├── .gitignore
├── .gitmodules
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── assets
    └── exploring_exploration.png
├── baselines.patch
├── configs
    ├── exploration
    │   ├── mp3d_test_config.yaml
    │   ├── mp3d_train_config.yaml
    │   ├── mp3d_val_config.yaml
    │   ├── ppo_pose_test.yaml
    │   ├── ppo_pose_train.yaml
    │   └── ppo_pose_val.yaml
    ├── pose_estimation
    │   ├── mp3d_test_config.yaml
    │   ├── mp3d_val_config.yaml
    │   ├── ppo_pose_test.yaml
    │   └── ppo_pose_val.yaml
    ├── pretrain_imitation
    │   ├── mp3d_train_oracle_landmarks_config.yaml
    │   ├── mp3d_train_oracle_objects_config.yaml
    │   ├── mp3d_train_oracle_random_config.yaml
    │   ├── ppo_pose_train_oracle_landmarks.yaml
    │   ├── ppo_pose_train_oracle_objects.yaml
    │   └── ppo_pose_train_oracle_random.yaml
    ├── pretrain_reconstruction
    │   ├── mp3d_train_config.yaml
    │   ├── mp3d_val_config.yaml
    │   ├── ppo_pose_train.yaml
    │   └── ppo_pose_val.yaml
    └── reconstruction_exploration
    │   ├── mp3d_test_config.yaml
    │   ├── mp3d_train_config.yaml
    │   ├── mp3d_val_config.yaml
    │   ├── ppo_pose_test.yaml
    │   ├── ppo_pose_train.yaml
    │   └── ppo_pose_val.yaml
├── environments
    ├── gym-avd
    │   ├── .gitignore
    │   ├── README.md
    │   ├── gym_avd
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── assets
    │   │   │   └── maps_topdown_agent_sprite
    │   │   │   │   └── 100x100.png
    │   │   ├── demos
    │   │   │   ├── exploration_demo.py
    │   │   │   ├── pointnav_demo.py
    │   │   │   ├── pose_estimation_demo.py
    │   │   │   ├── reconstruction_demo.py
    │   │   │   └── utils.py
    │   │   └── envs
    │   │   │   ├── __init__.py
    │   │   │   ├── avd_base_env.py
    │   │   │   ├── avd_nav_env.py
    │   │   │   ├── avd_occ_base_env.py
    │   │   │   ├── avd_pose_env.py
    │   │   │   ├── avd_recon_env.py
    │   │   │   ├── config.py
    │   │   │   └── utils.py
    │   ├── preprocess_raw_data.py
    │   ├── requirements.txt
    │   └── setup.py
    └── habitat
    │   ├── .gitignore
    │   ├── README.md
    │   ├── habitat_api.patch
    │   └── habitat_sim.patch
├── evaluate_pose_estimation.py
├── evaluate_reconstruction.py
├── evaluate_visitation.py
├── exploring_exploration
    ├── .gitignore
    ├── algo
    │   ├── __init__.py
    │   ├── imitation.py
    │   ├── ppo.py
    │   └── supervised_reconstruction.py
    ├── arguments.py
    ├── envs
    │   ├── __init__.py
    │   ├── avd.py
    │   └── habitat.py
    ├── models
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── curiosity.py
    │   ├── exploration.py
    │   ├── frontier_agent.py
    │   ├── navigation.py
    │   ├── pose_estimation.py
    │   └── reconstruction.py
    └── utils
    │   ├── common.py
    │   ├── distributions.py
    │   ├── eval.py
    │   ├── geometry.py
    │   ├── median_pooling.py
    │   ├── metrics.py
    │   ├── pose_estimation.py
    │   ├── reconstruction.py
    │   ├── reconstruction_eval.py
    │   ├── storage.py
    │   └── visualization.py
├── pretrain_imitation.py
├── pretrain_reconstruction.py
├── reconstruction_data_generation
    ├── avd
    │   ├── .gitignore
    │   └── gather_uniform_points.py
    ├── generate_imagenet_clusters.py
    └── mp3d
    │   ├── .gitignore
    │   ├── configs
    │       ├── pointnav_mp3d_test.yaml
    │       ├── pointnav_mp3d_train.yaml
    │       └── pointnav_mp3d_val.yaml
    │   ├── extract_data_script.sh
    │   └── generate_uniform_points.py
├── requirements.txt
├── train_curiosity_exploration.py
├── train_exploration.py
└── train_reconstruction_exploration.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | trained_models/
104 | .fuse_hidden*
105 | 
106 | # ctags
107 | tags
108 | 
109 | *.swp
110 | logs
111 | *.mp4
112 | data/
113 | tests/
114 | pretrained_models
115 | *_results/
116 | scripts
117 | baselines
118 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "exploring_exploration/models/astar_pycpp"]
2 | 	path = exploring_exploration/models/astar_pycpp
3 | 	url = git@github.com:srama2512/astar_pycpp.git
4 | 	branch = master
5 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to exploring_exploration
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | 1. Fork the repo and create your branch from `master`.
 9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 | 
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Facebook's open source projects.
18 | 
19 | Complete your CLA here: <https://code.facebook.com/cla>
20 | 
21 | ## Issues
22 | We use GitHub issues to track public bugs. Please ensure your description is
23 | clear and has sufficient instructions to be able to reproduce the issue.
24 | 
25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
26 | disclosure of security bugs. In those cases, please go through the process
27 | outlined on that page and do not file a public issue.
28 | 
29 | ## Coding Style  
30 | * 80 character line length
31 | 
32 | ## License
33 | By contributing to exploring_exploration, you agree that your contributions will be licensed
34 | under the LICENSE file in the root directory of this source tree.
35 | 


--------------------------------------------------------------------------------
/assets/exploring_exploration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/exploring_exploration/09d3f9b8703162fcc0974989e60f8cd5b47d4d39/assets/exploring_exploration.png


--------------------------------------------------------------------------------
/baselines.patch:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | # This source code is licensed under the license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | diff --git a/baselines/bench/monitor.py b/baselines/bench/monitor.py
 7 | index 0db473a..f2d93ac 100644
 8 | --- a/baselines/bench/monitor.py
 9 | +++ b/baselines/bench/monitor.py
10 | @@ -76,6 +76,7 @@ class Monitor(Wrapper):
11 |          self.total_steps += 1
12 |  
13 |      def close(self):
14 | +        super().close()
15 |          if self.f is not None:
16 |              self.f.close()
17 |  
18 | diff --git a/baselines/common/vec_env/dummy_vec_env.py b/baselines/common/vec_env/dummy_vec_env.py
19 | index 60db11d..387ea11 100644
20 | --- a/baselines/common/vec_env/dummy_vec_env.py
21 | +++ b/baselines/common/vec_env/dummy_vec_env.py
22 | @@ -61,6 +61,13 @@ class DummyVecEnv(VecEnv):
23 |              self._save_obs(e, obs)
24 |          return self._obs_from_buf()
25 |  
26 | +    def close_extras(self):
27 | +        """
28 | +        Clean up the extra resources, beyond what's in this base class.
29 | +        Only runs when not self.closed().
30 | +        """
31 | +        self.envs[0].close()
32 | +
33 |      def _save_obs(self, e, obs):
34 |          for k in self.keys:
35 |              if k is None:
36 | diff --git a/baselines/common/vec_env/subproc_vec_env.py b/baselines/common/vec_env/subproc_vec_env.py
37 | index 4dc4d2c..a1ec19c 100644
38 | --- a/baselines/common/vec_env/subproc_vec_env.py
39 | +++ b/baselines/common/vec_env/subproc_vec_env.py
40 | @@ -70,13 +70,29 @@ class SubprocVecEnv(VecEnv):
41 |          results = [remote.recv() for remote in self.remotes]
42 |          self.waiting = False
43 |          obs, rews, dones, infos = zip(*results)
44 | -        return np.stack(obs), np.stack(rews), np.stack(dones), infos
45 | +        if isinstance(obs[0], dict):
46 | +            obs_output = {
47 | +                key: np.stack([obs_[key] for obs_ in obs])
48 | +                for key in obs[0].keys()
49 | +            }
50 | +        else:
51 | +            obs_output = np.stack(obs)
52 | +        return obs_output, np.stack(rews), np.stack(dones), infos
53 |  
54 |      def reset(self):
55 |          self._assert_not_closed()
56 |          for remote in self.remotes:
57 |              remote.send(('reset', None))
58 | -        return np.stack([remote.recv() for remote in self.remotes])
59 | +
60 | +        obs = [remote.recv() for remote in self.remotes]
61 | +        if isinstance(obs[0], dict):
62 | +            obs_output = {
63 | +                key: np.stack([obs_[key] for obs_ in obs])
64 | +                for key in obs[0].keys()
65 | +            }
66 | +        else:
67 | +            obs_output = np.stack(obs)
68 | +        return obs_output
69 |  
70 |      def close_extras(self):
71 |          self.closed = True
72 | 


--------------------------------------------------------------------------------
/configs/exploration/mp3d_test_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 1001
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v1"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 |   DEPTH_SENSOR:
14 |     WIDTH: 84
15 |     HEIGHT: 84
16 |   OCCUPANCY_MAPS:
17 |     MAP_SCALE: 0.1
18 |     MAP_SIZE: 800
19 |     MAX_DEPTH: 3
20 |     SMALL_MAP_RANGE: 20
21 |     LARGE_MAP_RANGE: 100
22 |     HEIGHT_LOWER: 0.2
23 |     HEIGHT_UPPER: 1.5
24 |   FINE_OCC_SENSOR:
25 |     WIDTH: 84
26 |     HEIGHT: 84
27 |   COARSE_OCC_SENSOR:
28 |     WIDTH: 84
29 |     HEIGHT: 84
30 |   OBJECT_ANNOTATIONS:
31 |     IS_AVAILABLE: True
32 |     PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d'
33 | TASK:
34 |   TYPE: Pose-v0
35 |   SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR']
36 |   ORACLE_ACTION_SENSOR:
37 |     ORACLE_TYPE: 'random'
38 |   MEASUREMENTS: ['AREA_COVERED', 'TOP_DOWN_MAP_POSE', 'OPSR', 'OBJECTS_COVERED_GEOMETRIC']
39 |   TOP_DOWN_MAP_POSE:
40 |     TYPE: TopDownMapPose
41 |     FOG_OF_WAR:
42 |       DRAW: True
43 |       FOV: 90
44 |       VISIBILITY_DIST: 4.0
45 | DATASET:
46 |   TYPE: PoseEstimation-v1
47 |   SPLIT: test
48 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
49 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz
50 |   SHUFFLE_DATASET: False
51 | 


--------------------------------------------------------------------------------
/configs/exploration/mp3d_train_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 501
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v1"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 |   DEPTH_SENSOR:
14 |     WIDTH: 84
15 |     HEIGHT: 84
16 |   OCCUPANCY_MAPS:
17 |     MAP_SCALE: 0.1
18 |     MAP_SIZE: 800
19 |     MAX_DEPTH: 3
20 |     SMALL_MAP_RANGE: 20
21 |     LARGE_MAP_RANGE: 100
22 |     HEIGHT_LOWER: 0.2
23 |     HEIGHT_UPPER: 1.5
24 |   FINE_OCC_SENSOR:
25 |     WIDTH: 84
26 |     HEIGHT: 84
27 |   COARSE_OCC_SENSOR:
28 |     WIDTH: 84
29 |     HEIGHT: 84
30 |   OBJECT_ANNOTATIONS:
31 |     IS_AVAILABLE: True
32 |     PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d'
33 | TASK:
34 |   TYPE: Pose-v0
35 |   SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR']
36 |   ORACLE_ACTION_SENSOR:
37 |     ORACLE_TYPE: 'random'
38 |   MEASUREMENTS: ['AREA_COVERED', 'NOVELTY_REWARD', 'COVERAGE_NOVELTY_REWARD']
39 | DATASET:
40 |   TYPE: PoseEstimation-v1
41 |   SPLIT: train
42 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
43 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz
44 | 


--------------------------------------------------------------------------------
/configs/exploration/mp3d_val_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 1001
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v1"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 |   DEPTH_SENSOR:
14 |     WIDTH: 84
15 |     HEIGHT: 84
16 |   OCCUPANCY_MAPS:
17 |     MAP_SCALE: 0.1
18 |     MAP_SIZE: 800
19 |     MAX_DEPTH: 3
20 |     SMALL_MAP_RANGE: 20
21 |     LARGE_MAP_RANGE: 100
22 |     HEIGHT_LOWER: 0.2
23 |     HEIGHT_UPPER: 1.5
24 |   FINE_OCC_SENSOR:
25 |     WIDTH: 84
26 |     HEIGHT: 84
27 |   COARSE_OCC_SENSOR:
28 |     WIDTH: 84
29 |     HEIGHT: 84
30 |   OBJECT_ANNOTATIONS:
31 |     IS_AVAILABLE: True
32 |     PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d'
33 | TASK:
34 |   TYPE: Pose-v0
35 |   SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR']
36 |   ORACLE_ACTION_SENSOR:
37 |     ORACLE_TYPE: 'random'
38 |   MEASUREMENTS: ['AREA_COVERED', 'TOP_DOWN_MAP_POSE', 'OPSR', 'OBJECTS_COVERED_GEOMETRIC']
39 |   TOP_DOWN_MAP_POSE:
40 |     TYPE: TopDownMapPose
41 | DATASET:
42 |   TYPE: PoseEstimation-v1
43 |   SPLIT: val
44 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
45 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz
46 |   SHUFFLE_DATASET: False
47 | 


--------------------------------------------------------------------------------
/configs/exploration/ppo_pose_test.yaml:
--------------------------------------------------------------------------------
 1 | BASE_TASK_CONFIG_PATH: "configs/exploration/mp3d_test_config.yaml"
 2 | TRAINER:
 3 |   TRAINER_NAME: "ppo"
 4 |   RL:
 5 |     PPO:
 6 |       # general options
 7 |       num_processes: 1
 8 |       task_config: "configs/exploration/mp3d_test_config.yaml"
 9 |       sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR"
10 | 


--------------------------------------------------------------------------------
/configs/exploration/ppo_pose_train.yaml:
--------------------------------------------------------------------------------
 1 | BASE_TASK_CONFIG_PATH: "configs/exploration/mp3d_train_config.yaml"
 2 | TRAINER:
 3 |   TRAINER_NAME: "ppo"
 4 |   RL:
 5 |     PPO:
 6 |       # general options
 7 |       num_processes: 8
 8 |       task_config: "configs/exploration/mp3d_train_config.yaml"
 9 |       sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR"
10 | 


--------------------------------------------------------------------------------
/configs/exploration/ppo_pose_val.yaml:
--------------------------------------------------------------------------------
 1 | BASE_TASK_CONFIG_PATH: "configs/exploration/mp3d_val_config.yaml"
 2 | TRAINER:
 3 |   TRAINER_NAME: "ppo"
 4 |   RL:
 5 |     PPO:
 6 |       # general options
 7 |       num_processes: 1
 8 |       task_config: "configs/exploration/mp3d_val_config.yaml"
 9 |       sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR"
10 | 


--------------------------------------------------------------------------------
/configs/pose_estimation/mp3d_test_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 1001
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v1"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 |   DEPTH_SENSOR:
14 |     WIDTH: 84
15 |     HEIGHT: 84
16 |   OCCUPANCY_MAPS:
17 |     MAP_SCALE: 0.1
18 |     MAP_SIZE: 800
19 |     MAX_DEPTH: 3
20 |     SMALL_MAP_RANGE: 20
21 |     LARGE_MAP_RANGE: 100
22 |     HEIGHT_LOWER: 0.2
23 |     HEIGHT_UPPER: 1.5
24 |   FINE_OCC_SENSOR:
25 |     WIDTH: 84
26 |     HEIGHT: 84
27 |   COARSE_OCC_SENSOR:
28 |     WIDTH: 84
29 |     HEIGHT: 84
30 |   OBJECT_ANNOTATIONS:
31 |     IS_AVAILABLE: True
32 |     PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d'
33 | TASK:
34 |   TYPE: Pose-v0
35 |   SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR']
36 |   POSE_RGB_SENSOR:
37 |     NREF: 20
38 |   POSE_REGRESS_SENSOR:
39 |     NREF: 20
40 |   ORACLE_ACTION_SENSOR:
41 |     ORACLE_TYPE: 'random'
42 |   MEASUREMENTS: ['AREA_COVERED', 'TOP_DOWN_MAP_POSE', 'OPSR', 'OBJECTS_COVERED_GEOMETRIC']
43 |   TOP_DOWN_MAP_POSE:
44 |     TYPE: TopDownMapPose
45 |     FOG_OF_WAR:
46 |       DRAW: True
47 |       FOV: 90
48 |       VISIBILITY_DIST: 4.0
49 | DATASET:
50 |   TYPE: PoseEstimation-v1
51 |   SPLIT: test
52 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
53 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz
54 |   SHUFFLE_DATASET: False
55 | 


--------------------------------------------------------------------------------
/configs/pose_estimation/mp3d_val_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 1001
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v1"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 |   DEPTH_SENSOR:
14 |     WIDTH: 84
15 |     HEIGHT: 84
16 |   OCCUPANCY_MAPS:
17 |     MAP_SCALE: 0.1
18 |     MAP_SIZE: 800
19 |     MAX_DEPTH: 3
20 |     SMALL_MAP_RANGE: 20
21 |     LARGE_MAP_RANGE: 100
22 |     HEIGHT_LOWER: 0.2
23 |     HEIGHT_UPPER: 1.5
24 |   FINE_OCC_SENSOR:
25 |     WIDTH: 84
26 |     HEIGHT: 84
27 |   COARSE_OCC_SENSOR:
28 |     WIDTH: 84
29 |     HEIGHT: 84
30 |   OBJECT_ANNOTATIONS:
31 |     IS_AVAILABLE: True
32 |     PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d'
33 | TASK:
34 |   TYPE: Pose-v0
35 |   SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR']
36 |   POSE_RGB_SENSOR:
37 |     NREF: 20
38 |   POSE_REGRESS_SENSOR:
39 |     NREF: 20
40 |   ORACLE_ACTION_SENSOR:
41 |     ORACLE_TYPE: 'random'
42 |   MEASUREMENTS: ['AREA_COVERED', 'TOP_DOWN_MAP_POSE', 'OPSR', 'OBJECTS_COVERED_GEOMETRIC']
43 |   TOP_DOWN_MAP_POSE:
44 |     TYPE: TopDownMapPose
45 | DATASET:
46 |   TYPE: PoseEstimation-v1
47 |   SPLIT: val
48 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
49 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz
50 |   SHUFFLE_DATASET: False
51 | 


--------------------------------------------------------------------------------
/configs/pose_estimation/ppo_pose_test.yaml:
--------------------------------------------------------------------------------
 1 | BASE_TASK_CONFIG_PATH: "configs/pose_estimation/mp3d_test_config.yaml"
 2 | TRAINER:
 3 |   TRAINER_NAME: "ppo"
 4 |   RL:
 5 |     PPO:
 6 |       # general options
 7 |       num_processes: 1
 8 |       task_config: "configs/pose_estimation/mp3d_test_config.yaml"
 9 |       sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR"
10 | 


--------------------------------------------------------------------------------
/configs/pose_estimation/ppo_pose_val.yaml:
--------------------------------------------------------------------------------
 1 | BASE_TASK_CONFIG_PATH: "configs/pose_estimation/mp3d_val_config.yaml"
 2 | TRAINER:
 3 |   TRAINER_NAME: "ppo"
 4 |   RL:
 5 |     PPO:
 6 |       # general options
 7 |       num_processes: 1
 8 |       task_config: "configs/pose_estimation/mp3d_val_config.yaml"
 9 |       sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR"
10 | 


--------------------------------------------------------------------------------
/configs/pretrain_imitation/mp3d_train_oracle_landmarks_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 501
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v1"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 |   DEPTH_SENSOR:
14 |     WIDTH: 84
15 |     HEIGHT: 84
16 |   OCCUPANCY_MAPS:
17 |     MAP_SCALE: 0.1
18 |     MAP_SIZE: 800
19 |     MAX_DEPTH: 3
20 |     SMALL_MAP_RANGE: 20
21 |     LARGE_MAP_RANGE: 100
22 |     HEIGHT_LOWER: 0.2
23 |     HEIGHT_UPPER: 1.5
24 |   FINE_OCC_SENSOR:
25 |     WIDTH: 84
26 |     HEIGHT: 84
27 |   COARSE_OCC_SENSOR:
28 |     WIDTH: 84
29 |     HEIGHT: 84
30 |   OBJECT_ANNOTATIONS:
31 |     IS_AVAILABLE: True
32 |     PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d'
33 | TASK:
34 |   TYPE: Pose-v0
35 |   SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR']
36 |   ORACLE_ACTION_SENSOR:
37 |     ORACLE_TYPE: 'pose'
38 |   MEASUREMENTS: ['AREA_COVERED', 'NOVELTY_REWARD', 'COVERAGE_NOVELTY_REWARD']
39 | DATASET:
40 |   TYPE: PoseEstimation-v1
41 |   SPLIT: train
42 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
43 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz
44 | 


--------------------------------------------------------------------------------
/configs/pretrain_imitation/mp3d_train_oracle_objects_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 501
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v1"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 |   DEPTH_SENSOR:
14 |     WIDTH: 84
15 |     HEIGHT: 84
16 |   OCCUPANCY_MAPS:
17 |     MAP_SCALE: 0.1
18 |     MAP_SIZE: 800
19 |     MAX_DEPTH: 3
20 |     SMALL_MAP_RANGE: 20
21 |     LARGE_MAP_RANGE: 100
22 |     HEIGHT_LOWER: 0.2
23 |     HEIGHT_UPPER: 1.5
24 |   FINE_OCC_SENSOR:
25 |     WIDTH: 84
26 |     HEIGHT: 84
27 |   COARSE_OCC_SENSOR:
28 |     WIDTH: 84
29 |     HEIGHT: 84
30 |   OBJECT_ANNOTATIONS:
31 |     IS_AVAILABLE: True
32 |     PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d'
33 | TASK:
34 |   TYPE: Pose-v0
35 |   SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR']
36 |   ORACLE_ACTION_SENSOR:
37 |     ORACLE_TYPE: 'object'
38 |   MEASUREMENTS: ['AREA_COVERED', 'NOVELTY_REWARD', 'COVERAGE_NOVELTY_REWARD']
39 | DATASET:
40 |   TYPE: PoseEstimation-v1
41 |   SPLIT: train
42 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
43 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz
44 | 


--------------------------------------------------------------------------------
/configs/pretrain_imitation/mp3d_train_oracle_random_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 501
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v1"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 |   DEPTH_SENSOR:
14 |     WIDTH: 84
15 |     HEIGHT: 84
16 |   OCCUPANCY_MAPS:
17 |     MAP_SCALE: 0.1
18 |     MAP_SIZE: 800
19 |     MAX_DEPTH: 3
20 |     SMALL_MAP_RANGE: 20
21 |     LARGE_MAP_RANGE: 100
22 |     HEIGHT_LOWER: 0.2
23 |     HEIGHT_UPPER: 1.5
24 |   FINE_OCC_SENSOR:
25 |     WIDTH: 84
26 |     HEIGHT: 84
27 |   COARSE_OCC_SENSOR:
28 |     WIDTH: 84
29 |     HEIGHT: 84
30 |   OBJECT_ANNOTATIONS:
31 |     IS_AVAILABLE: True
32 |     PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d'
33 | TASK:
34 |   TYPE: Pose-v0
35 |   SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR']
36 |   ORACLE_ACTION_SENSOR:
37 |     ORACLE_TYPE: 'random'
38 |   MEASUREMENTS: ['AREA_COVERED', 'NOVELTY_REWARD', 'COVERAGE_NOVELTY_REWARD']
39 | DATASET:
40 |   TYPE: PoseEstimation-v1
41 |   SPLIT: train
42 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
43 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz
44 | 


--------------------------------------------------------------------------------
/configs/pretrain_imitation/ppo_pose_train_oracle_landmarks.yaml:
--------------------------------------------------------------------------------
 1 | BASE_TASK_CONFIG_PATH: "configs/pretrain_imitation/mp3d_train_oracle_landmarks_config.yaml"
 2 | TRAINER:
 3 |   TRAINER_NAME: "ppo"
 4 |   RL:
 5 |     PPO:
 6 |       # general options
 7 |       num_processes: 8
 8 |       task_config: "configs/pretrain_imitation/mp3d_train_oracle_landmarks_config.yaml"
 9 |       sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR"
10 | 


--------------------------------------------------------------------------------
/configs/pretrain_imitation/ppo_pose_train_oracle_objects.yaml:
--------------------------------------------------------------------------------
 1 | BASE_TASK_CONFIG_PATH: "configs/pretrain_imitation/mp3d_train_oracle_objects_config.yaml"
 2 | TRAINER:
 3 |   TRAINER_NAME: "ppo"
 4 |   RL:
 5 |     PPO:
 6 |       # general options
 7 |       num_processes: 8
 8 |       task_config: "configs/pretrain_imitation/mp3d_train_oracle_objects_config.yaml"
 9 |       sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR"
10 | 


--------------------------------------------------------------------------------
/configs/pretrain_imitation/ppo_pose_train_oracle_random.yaml:
--------------------------------------------------------------------------------
 1 | BASE_TASK_CONFIG_PATH: "configs/pretrain_imitation/mp3d_train_oracle_random_config.yaml"
 2 | TRAINER:
 3 |   TRAINER_NAME: "ppo"
 4 |   RL:
 5 |     PPO:
 6 |       # general options
 7 |       num_processes: 8
 8 |       task_config: "configs/pretrain_imitation/mp3d_train_oracle_random_config.yaml"
 9 |       sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR"
10 | 


--------------------------------------------------------------------------------
/configs/pretrain_reconstruction/mp3d_train_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 501
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v0"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 | TASK:
14 |   TYPE: Pose-v0
15 |   SENSORS: ['DELTA_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR', 'POSE_MASK_SENSOR', 'ORACLE_ACTION_SENSOR']
16 |   POSE_RGB_SENSOR:
17 |     NREF: 100
18 |   POSE_REGRESS_SENSOR:
19 |     NREF: 100
20 |   POSE_MASK_SENSOR:
21 |     NREF: 100
22 |   ORACLE_ACTION_SENSOR:
23 |     ORACLE_TYPE: 'random'
24 |   MEASUREMENTS: []
25 | DATASET:
26 |   TYPE: PoseEstimation-v1
27 |   SPLIT: train
28 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
29 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/reconstruction/mp3d/v1/{split}/{split}.json.gz
30 | 


--------------------------------------------------------------------------------
/configs/pretrain_reconstruction/mp3d_val_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 501
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v0"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 | TASK:
14 |   TYPE: Pose-v0
15 |   SENSORS: ['DELTA_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR', 'POSE_MASK_SENSOR', 'ORACLE_ACTION_SENSOR']
16 |   POSE_RGB_SENSOR:
17 |     NREF: 100
18 |   POSE_REGRESS_SENSOR:
19 |     NREF: 100
20 |   POSE_MASK_SENSOR:
21 |     NREF: 100
22 |   ORACLE_ACTION_SENSOR:
23 |     ORACLE_TYPE: 'random'
24 |   MEASUREMENTS: ['TOP_DOWN_MAP_POSE']
25 |   TOP_DOWN_MAP_POSE:
26 |     TYPE: TopDownMapPose
27 | DATASET:
28 |   TYPE: PoseEstimation-v1
29 |   SPLIT: val
30 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
31 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/reconstruction/mp3d/v1/{split}/{split}.json.gz
32 |   SHUFFLE_DATASET: False
33 | 


--------------------------------------------------------------------------------
/configs/pretrain_reconstruction/ppo_pose_train.yaml:
--------------------------------------------------------------------------------
1 | BASE_TASK_CONFIG_PATH: "configs/pretrain_reconstruction/mp3d_train_config.yaml"
2 | TRAINER:
3 |   TRAINER_NAME: "ppo"
4 |   RL:
5 |     PPO:
6 |       # general options
7 |       num_processes: 8
8 |       task_config: "configs/pretrain_reconstruction/mp3d_train_config.yaml"
9 | 


--------------------------------------------------------------------------------
/configs/pretrain_reconstruction/ppo_pose_val.yaml:
--------------------------------------------------------------------------------
1 | BASE_TASK_CONFIG_PATH: "configs/pretrain_reconstruction/mp3d_val_config.yaml"
2 | TRAINER:
3 |   TRAINER_NAME: "ppo"
4 |   RL:
5 |     PPO:
6 |       # general options
7 |       num_processes: 1
8 |       task_config: "configs/pretrain_reconstruction/mp3d_val_config.yaml"
9 | 


--------------------------------------------------------------------------------
/configs/reconstruction_exploration/mp3d_test_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 1001
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v1"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 |   DEPTH_SENSOR:
14 |     WIDTH: 84
15 |     HEIGHT: 84
16 |   OCCUPANCY_MAPS:
17 |     MAP_SCALE: 0.1
18 |     MAP_SIZE: 800
19 |     MAX_DEPTH: 3
20 |     SMALL_MAP_RANGE: 20
21 |     LARGE_MAP_RANGE: 100
22 |     HEIGHT_LOWER: 0.2
23 |     HEIGHT_UPPER: 1.5
24 |   FINE_OCC_SENSOR:
25 |     WIDTH: 84
26 |     HEIGHT: 84
27 |   COARSE_OCC_SENSOR:
28 |     WIDTH: 84
29 |     HEIGHT: 84
30 |   OBJECT_ANNOTATIONS:
31 |     IS_AVAILABLE: True
32 |     PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d'
33 | TASK:
34 |   TYPE: Pose-v0
35 |   SENSORS: ['DELTA_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR', 'POSE_MASK_SENSOR', 'ORACLE_ACTION_SENSOR']
36 |   POSE_RGB_SENSOR:
37 |     NREF: 100
38 |   POSE_REGRESS_SENSOR:
39 |     NREF: 100
40 |   POSE_MASK_SENSOR:
41 |     NREF: 100
42 |   ORACLE_ACTION_SENSOR:
43 |     ORACLE_TYPE: 'random'
44 |   MEASUREMENTS: ['AREA_COVERED', 'TOP_DOWN_MAP_POSE', 'OPSR', 'OBJECTS_COVERED_GEOMETRIC']
45 |   TOP_DOWN_MAP_POSE:
46 |     TYPE: TopDownMapPose
47 | DATASET:
48 |   TYPE: PoseEstimation-v1
49 |   SPLIT: test
50 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
51 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/reconstruction/mp3d/v1/{split}/{split}.json.gz
52 |   SHUFFLE_DATASET: False
53 | 


--------------------------------------------------------------------------------
/configs/reconstruction_exploration/mp3d_train_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 501
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v1"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 |   DEPTH_SENSOR:
14 |     WIDTH: 84
15 |     HEIGHT: 84
16 |   OCCUPANCY_MAPS:
17 |     MAP_SCALE: 0.1
18 |     MAP_SIZE: 800
19 |     MAX_DEPTH: 3
20 |     SMALL_MAP_RANGE: 20
21 |     LARGE_MAP_RANGE: 100
22 |     HEIGHT_LOWER: 0.2
23 |     HEIGHT_UPPER: 1.5
24 |   FINE_OCC_SENSOR:
25 |     WIDTH: 84
26 |     HEIGHT: 84
27 |   COARSE_OCC_SENSOR:
28 |     WIDTH: 84
29 |     HEIGHT: 84
30 |   OBJECT_ANNOTATIONS:
31 |     IS_AVAILABLE: True
32 |     PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d'
33 | TASK:
34 |   TYPE: Pose-v0
35 |   SENSORS: ['DELTA_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR', 'POSE_MASK_SENSOR', 'ORACLE_ACTION_SENSOR']
36 |   POSE_RGB_SENSOR:
37 |     NREF: 100
38 |   POSE_REGRESS_SENSOR:
39 |     NREF: 100
40 |   POSE_MASK_SENSOR:
41 |     NREF: 100
42 |   ORACLE_ACTION_SENSOR:
43 |     ORACLE_TYPE: 'random'
44 |   MEASUREMENTS: ['AREA_COVERED', 'INC_AREA_COVERED', 'OPSR']
45 | DATASET:
46 |   TYPE: PoseEstimation-v1
47 |   SPLIT: train
48 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
49 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/reconstruction/mp3d/v1/{split}/{split}.json.gz
50 | 


--------------------------------------------------------------------------------
/configs/reconstruction_exploration/mp3d_val_config.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 1001
 3 | SIMULATOR:
 4 |   TYPE: "Sim-v1"
 5 |   AGENT_0:
 6 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR']
 7 |   ACTION_SPACE_CONFIG: "v2"
 8 |   HABITAT_SIM_V0:
 9 |     GPU_DEVICE_ID: 0
10 |   RGB_SENSOR:
11 |     WIDTH: 84
12 |     HEIGHT: 84
13 |   DEPTH_SENSOR:
14 |     WIDTH: 84
15 |     HEIGHT: 84
16 |   OCCUPANCY_MAPS:
17 |     MAP_SCALE: 0.1
18 |     MAP_SIZE: 800
19 |     MAX_DEPTH: 3
20 |     SMALL_MAP_RANGE: 20
21 |     LARGE_MAP_RANGE: 100
22 |     HEIGHT_LOWER: 0.2
23 |     HEIGHT_UPPER: 1.5
24 |   FINE_OCC_SENSOR:
25 |     WIDTH: 84
26 |     HEIGHT: 84
27 |   COARSE_OCC_SENSOR:
28 |     WIDTH: 84
29 |     HEIGHT: 84
30 |   OBJECT_ANNOTATIONS:
31 |     IS_AVAILABLE: True
32 |     PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d'
33 | TASK:
34 |   TYPE: Pose-v0
35 |   SENSORS: ['DELTA_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR', 'POSE_MASK_SENSOR', 'ORACLE_ACTION_SENSOR']
36 |   POSE_RGB_SENSOR:
37 |     NREF: 100
38 |   POSE_REGRESS_SENSOR:
39 |     NREF: 100
40 |   POSE_MASK_SENSOR:
41 |     NREF: 100
42 |   ORACLE_ACTION_SENSOR:
43 |     ORACLE_TYPE: 'random'
44 |   MEASUREMENTS: ['AREA_COVERED', 'TOP_DOWN_MAP_POSE', 'OPSR', 'OBJECTS_COVERED_GEOMETRIC']
45 |   TOP_DOWN_MAP_POSE:
46 |     TYPE: TopDownMapPose
47 | DATASET:
48 |   TYPE: PoseEstimation-v1
49 |   SPLIT: val
50 |   SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets
51 |   DATA_PATH: environments/habitat/habitat-api/data/datasets/reconstruction/mp3d/v1/{split}/{split}.json.gz
52 |   SHUFFLE_DATASET: False
53 | 


--------------------------------------------------------------------------------
/configs/reconstruction_exploration/ppo_pose_test.yaml:
--------------------------------------------------------------------------------
 1 | BASE_TASK_CONFIG_PATH: "configs/reconstruction_exploration/mp3d_test_config.yaml"
 2 | TRAINER:
 3 |   TRAINER_NAME: "ppo"
 4 |   RL:
 5 |     PPO:
 6 |       # general options
 7 |       num_processes: 1
 8 |       task_config: "configs/reconstruction_exploration/mp3d_test_config.yaml"
 9 |       sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR"
10 | 


--------------------------------------------------------------------------------
/configs/reconstruction_exploration/ppo_pose_train.yaml:
--------------------------------------------------------------------------------
 1 | BASE_TASK_CONFIG_PATH: "configs/reconstruction_exploration/mp3d_train_config.yaml"
 2 | TRAINER:
 3 |   TRAINER_NAME: "ppo"
 4 |   RL:
 5 |     PPO:
 6 |       # general options
 7 |       num_processes: 8
 8 |       task_config: "configs/reconstruction_exploration/mp3d_train_config.yaml"
 9 |       sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR"
10 | 


--------------------------------------------------------------------------------
/configs/reconstruction_exploration/ppo_pose_val.yaml:
--------------------------------------------------------------------------------
 1 | BASE_TASK_CONFIG_PATH: "configs/reconstruction_exploration/mp3d_val_config.yaml"
 2 | TRAINER:
 3 |   TRAINER_NAME: "ppo"
 4 |   RL:
 5 |     PPO:
 6 |       # general options
 7 |       num_processes: 1
 8 |       task_config: "configs/reconstruction_exploration/mp3d_val_config.yaml"
 9 |       sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR"
10 | 


--------------------------------------------------------------------------------
/environments/gym-avd/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | *.swp
106 | data
107 | 


--------------------------------------------------------------------------------
/environments/gym-avd/README.md:
--------------------------------------------------------------------------------
 1 | # Active Vision Simulator
 2 | This directory contains the code to an [OpenAI gym](https://gym.openai.com/)-based environment for simulating discrete motion on the [Active Vision Dataset](https://www.cs.unc.edu/~ammirato/active_vision_dataset_website/). 
 3 | 
 4 | ## Installation instructions
 5 | 1. Install dependencies.
 6 | 
 7 |   ```
 8 |   pip install -r requirements.txt
 9 |   export GYM_AVD_ROOT=<path to gym-avd directory>
10 |   ```
11 | 2. Install `gym-avd`.
12 | 
13 |   ```
14 |   cd $GYM_AVD_ROOT
15 |   python setup.py install
16 |   ```
17 | 3. Add the code root to `~/.bashrc`.
18 | 
19 |   ```
20 |   export PYTHONPATH=$GYM_AVD_ROOT:$PYTHONPATH
21 |   ```
22 | 4. Download data from the [AVD website](https://www.cs.unc.edu/~ammirato/active_vision_dataset_website/get_data.html). The camera calibration information can be obtained from the author of AVD.
23 | 
24 |   ```
25 |   tar -xvf ActiveVisionDataset_part1.tar
26 |   tar -xvf ActiveVisionDataset_part2.tar
27 |   tar -xvf ActiveVisionDataset_part3.tar
28 |   tar -xvf ActiveVisionDataset_COLMAP_camera_params_part1-3.tar
29 | 
30 |   export AVD_DATASET_ROOT=<path to ActiveVisionDataset directory>
31 |   ```
32 | 
33 | 5. Download additional processed data for simulation.
34 | 
35 |  ```
36 |  cd $GYM_AVD_ROOT/gym_avd
37 |  mkdir data
38 |  cd data
39 |  wget https://dl.fbaipublicfiles.com/exploring-exploration/avd_extra_data.tar.gz -O data.tar.gz
40 |  tar -xvf data.tar.gz
41 |  rm data.tar.gz
42 |  ```
43 | 6. Set configuration paths for the simulator in `$GYM_AVD_ROOT/gym_avd/envs/config.py`.
44 | 
45 |   ```
46 |   GYM_AVD_ROOT=<path in GYM_AVD_ROOT>
47 |   ROOT_DIR=<path in AVD_DATASET_ROOT>
48 |   ```
49 | 7. Process dataset to extract images and connectivity:
50 | 
51 |   ```
52 |   cd $GYM_AVD_ROOT
53 |   python preprocess_raw_data.py --root_dir $AVD_DATASET_ROOT
54 |   ```
55 |   This will create the following files:
56 | 
57 |   ```
58 |   $AVD_DATASET_ROOT/processed_images_84x84.h5
59 |   $AVD_DATASET_ROOT/processed_scenes_84x84.npy
60 |   ```
61 | 
62 | ## Task demos
63 | This repository supports four tasks:
64 | 
65 | - Exploration
66 | - Pose estimation
67 | - Reconstruction
68 | - PointNav
69 | 
70 | Visual demos for each task are available.
71 | 
72 | ```
73 | cd $GYM_AVD_ROOT
74 | python gym_avd/demos/exploration_demo.py
75 | python gym_avd/demos/pose_estimation_demo.py
76 | python gym_avd/demos/reconstruction_demo.py
77 | python gym_avd/demos/pointnav_demo.py
78 | ```
79 | 


--------------------------------------------------------------------------------
/environments/gym-avd/gym_avd/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | 


--------------------------------------------------------------------------------
/environments/gym-avd/gym_avd/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates.
3 | # All rights reserved.
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | import gym_avd.envs
8 | 


--------------------------------------------------------------------------------
/environments/gym-avd/gym_avd/assets/maps_topdown_agent_sprite/100x100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/exploring_exploration/09d3f9b8703162fcc0974989e60f8cd5b47d4d39/environments/gym-avd/gym_avd/assets/maps_topdown_agent_sprite/100x100.png


--------------------------------------------------------------------------------
/environments/gym-avd/gym_avd/demos/exploration_demo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import cv2
 8 | import gym
 9 | import gym_avd
10 | import numpy as np
11 | from utils import *
12 | 
13 | env = gym.make("avd-pose-landmarks-oracle-v0")
14 | obs = env.reset()
15 | topdown = env.generate_topdown_occupancy()
16 | rgb_im = proc_rgb(obs["im"])
17 | fine_occ_im = proc_rgb(obs["fine_occupancy"])
18 | coarse_occ_im = proc_rgb(obs["coarse_occupancy"])
19 | topdown_im = proc_rgb(topdown)
20 | cv2.imshow(
21 |     "Exploration demo",
22 |     np.concatenate([rgb_im, fine_occ_im, coarse_occ_im, topdown_im], axis=1),
23 | )
24 | cv2.waitKey(60)
25 | for i in range(1000):
26 |     # oracle action is generated by sampling shortest paths between random points in the environment.
27 |     action = obs["oracle_action"][0]
28 |     obs, _, done, info = env.step(action)
29 |     if done:
30 |         obs = env.reset()
31 |     topdown = env.generate_topdown_occupancy()
32 |     rgb_im = proc_rgb(obs["im"])
33 |     fine_occ_im = proc_rgb(obs["fine_occupancy"])
34 |     coarse_occ_im = proc_rgb(obs["coarse_occupancy"])
35 |     topdown_im = proc_rgb(topdown)
36 | 
37 |     metrics_to_print = {
38 |         "Area covered (m^2)": info["seen_area"],
39 |         "Objects covered": info["num_objects_visited"],
40 |         "Landmarks covered": info["oracle_pose_success"],
41 |         "Novelty": info["count_based_reward"],
42 |         "Smooth coverage": info["coverage_novelty_reward"],
43 |     }
44 | 
45 |     print("===============================================")
46 |     for k, v in metrics_to_print.items():
47 |         print(f"{k:<25s}: {v:6.2f}")
48 | 
49 |     cv2.imshow(
50 |         "Exploration demo",
51 |         np.concatenate([rgb_im, fine_occ_im, coarse_occ_im, topdown_im], axis=1),
52 |     )
53 |     cv2.waitKey(60)
54 | 


--------------------------------------------------------------------------------
/environments/gym-avd/gym_avd/demos/pointnav_demo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import cv2
 8 | import gym
 9 | import gym_avd
10 | import numpy as np
11 | from utils import *
12 | 
13 | WIDTH = 300
14 | HEIGHT = 300
15 | 
16 | overall_image = np.zeros((HEIGHT * 2, WIDTH * 3, 3), dtype=np.uint8)
17 | 
18 | T_exp = 50
19 | T_nav = 50
20 | 
21 | env = gym.make("avd-nav-random-oracle-v0")
22 | env.seed(123)
23 | env.set_split("val")
24 | env.set_t_exp_and_nav(T_exp, T_nav)
25 | env.set_return_topdown_map()
26 | 
27 | 
28 | def process_inputs(rgb, depth, fine_occ, coarse_occ, topdown_map, target):
29 |     obs_1 = np.concatenate([rgb, depth, topdown_map], axis=1)
30 |     obs_2 = np.concatenate([fine_occ, coarse_occ, target], axis=1)
31 |     return np.concatenate([obs_1, obs_2], axis=0)
32 | 
33 | 
34 | for i in range(10):
35 |     obs = env.reset()
36 |     topdown = env.generate_topdown_occupancy()
37 |     rgb_im = proc_rgb(obs["im"])
38 |     fine_occ_im = proc_rgb(obs["fine_occupancy"])
39 |     coarse_occ_im = proc_rgb(obs["coarse_occupancy"])
40 |     topdown_im = proc_rgb(topdown)
41 |     cv2.imshow(
42 |         "PointNav: exploration phase",
43 |         np.concatenate([rgb_im, fine_occ_im, coarse_occ_im, topdown_im], axis=1),
44 |     )
45 |     cv2.waitKey(150)
46 | 
47 |     done = False
48 |     for t in range(T_exp + T_nav):
49 |         if t < T_exp:
50 |             action = obs["oracle_action"][0].item()
51 |         else:
52 |             action = obs["sp_action"][0].item()
53 | 
54 |         obs, reward, done, info = env.step(action)
55 |         if done or action == 3:
56 |             cv2.destroyWindow("PointNav: navigation phase")
57 |             break
58 | 
59 |         topdown = env.generate_topdown_occupancy()
60 |         rgb_im = proc_rgb(obs["im"])
61 |         fine_occ_im = proc_rgb(obs["fine_occupancy"])
62 |         coarse_occ_im = proc_rgb(obs["coarse_occupancy"])
63 |         topdown_im = proc_rgb(topdown)
64 |         if t < T_exp:
65 |             cv2.imshow(
66 |                 "PointNav: exploration phase",
67 |                 np.concatenate(
68 |                     [rgb_im, fine_occ_im, coarse_occ_im, topdown_im], axis=1
69 |                 ),
70 |             )
71 |         else:
72 |             if t == T_exp:
73 |                 cv2.destroyWindow("PointNav: exploration phase")
74 |             cv2.imshow(
75 |                 "PointNav: navigation phase",
76 |                 np.concatenate(
77 |                     [rgb_im, fine_occ_im, coarse_occ_im, topdown_im], axis=1
78 |                 ),
79 |             )
80 | 
81 |         cv2.waitKey(150)
82 | 


--------------------------------------------------------------------------------
/environments/gym-avd/gym_avd/demos/pose_estimation_demo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import cv2
 8 | import gym
 9 | import gym_avd
10 | import numpy as np
11 | from utils import *
12 | 
13 | 
14 | def create_reference_grid(refs_uint8):
15 |     """
16 |     Inputs:
17 |         refs_uint8 - (nRef, H, W, C) numpy array
18 |     """
19 |     refs_uint8 = np.copy(refs_uint8)
20 |     nRef, H, W, C = refs_uint8.shape
21 | 
22 |     nrow = int(math.sqrt(nRef))
23 | 
24 |     ncol = nRef // nrow  # (number of images per column)
25 |     if nrow * ncol < nRef:
26 |         ncol += 1
27 |     final_grid = np.zeros((nrow * ncol, *refs_uint8.shape[1:]), dtype=np.uint8)
28 |     font = cv2.FONT_HERSHEY_SIMPLEX
29 | 
30 |     final_grid[:nRef] = refs_uint8
31 |     final_grid = final_grid.reshape(
32 |         ncol, nrow, *final_grid.shape[1:]
33 |     )  # (ncol, nrow, H, W, C)
34 |     final_grid = final_grid.transpose(0, 2, 1, 3, 4)
35 |     final_grid = final_grid.reshape(ncol * H, nrow * W, C)
36 |     return final_grid
37 | 
38 | 
39 | WIDTH = 300
40 | HEIGHT = 300
41 | 
42 | overall_image = np.zeros((HEIGHT * 2, WIDTH * 3, 3), dtype=np.uint8)
43 | 
44 | env = gym.make("avd-pose-landmarks-oracle-v0")
45 | env.set_split("test")
46 | env.seed(123 + 12)
47 | env.plot_references_in_topdown = True
48 | nref = 10
49 | env.set_nref(nref)
50 | 
51 | obs = env.reset()
52 | topdown = env.generate_topdown_occupancy()
53 | rgb_im = proc_rgb(obs["im"])
54 | topdown_im = proc_rgb(topdown)
55 | ref_rgb = [proc_rgb(obs["pose_refs"][n]) for n in range(nref)]
56 | ref_rgb = cv2.resize(create_reference_grid(np.stack(ref_rgb, axis=0)), (HEIGHT, WIDTH))
57 | 
58 | overall_image = np.concatenate([rgb_im, topdown_im, ref_rgb], axis=1)
59 | 
60 | cv2.imshow("Pose estimation demo", overall_image)
61 | cv2.waitKey(60)
62 | 
63 | for i in range(10000):
64 |     action = obs["oracle_action"][0]
65 | 
66 |     obs, _, done, info = env.step(action)
67 | 
68 |     if done:
69 |         obs = env.reset()
70 |         ref_rgb = [proc_rgb(obs["pose_refs"][n]) for n in range(nref)]
71 |         ref_rgb = cv2.resize(
72 |             create_reference_grid(np.stack(ref_rgb, axis=0)), (HEIGHT, WIDTH)
73 |         )
74 | 
75 |     topdown = env.generate_topdown_occupancy()
76 |     rgb_im = proc_rgb(obs["im"])
77 |     topdown_im = proc_rgb(topdown)
78 |     overall_image = np.concatenate([rgb_im, topdown_im, ref_rgb], axis=1)
79 | 
80 |     area = info["seen_area"]
81 |     nlandmarks = info["oracle_pose_success"]
82 |     nobjects = info["num_objects_visited"]
83 | 
84 |     print(f"Area: {area:5.2f} | OSR: {nlandmarks:5.2f} | Objects: {nobjects:5.2f}")
85 |     cv2.imshow("Pose estimation demo", overall_image)
86 |     cv2.waitKey(60)
87 | 


--------------------------------------------------------------------------------
/environments/gym-avd/gym_avd/demos/reconstruction_demo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import cv2
 8 | import gym
 9 | import gym_avd
10 | import numpy as np
11 | from utils import *
12 | 
13 | 
14 | def create_reference_grid(refs_uint8):
15 |     """
16 |     Inputs:
17 |         refs_uint8 - (nRef, H, W, C) numpy array
18 |     """
19 |     refs_uint8 = np.copy(refs_uint8)
20 |     nRef, H, W, C = refs_uint8.shape
21 | 
22 |     nrow = int(math.sqrt(nRef))
23 | 
24 |     ncol = nRef // nrow  # (number of images per column)
25 |     if nrow * ncol < nRef:
26 |         ncol += 1
27 |     final_grid = np.zeros((nrow * ncol, *refs_uint8.shape[1:]), dtype=np.uint8)
28 |     font = cv2.FONT_HERSHEY_SIMPLEX
29 | 
30 |     final_grid[:nRef] = refs_uint8
31 |     final_grid = final_grid.reshape(
32 |         ncol, nrow, *final_grid.shape[1:]
33 |     )  # (ncol, nrow, H, W, C)
34 |     final_grid = final_grid.transpose(0, 2, 1, 3, 4)
35 |     final_grid = final_grid.reshape(ncol * H, nrow * W, C)
36 |     return final_grid
37 | 
38 | 
39 | WIDTH = 300
40 | HEIGHT = 300
41 | 
42 | overall_image = np.zeros((HEIGHT * 2, WIDTH * 3, 3), dtype=np.uint8)
43 | 
44 | env = gym.make("avd-recon-v0")
45 | env.set_split("test")
46 | env.seed(123 + 12)
47 | env.plot_references_in_topdown = True
48 | nref = 50
49 | env.set_nref(nref)
50 | 
51 | obs = env.reset()
52 | topdown = env.generate_topdown_occupancy()
53 | rgb_im = proc_rgb(obs["im"])
54 | topdown_im = proc_rgb(topdown)
55 | ref_rgb = [proc_rgb(obs["pose_refs"][n]) for n in range(nref)]
56 | ref_rgb = cv2.resize(create_reference_grid(np.stack(ref_rgb, axis=0)), (HEIGHT, WIDTH))
57 | 
58 | overall_image = np.concatenate([rgb_im, topdown_im, ref_rgb], axis=1)
59 | 
60 | cv2.imshow("Reconstruction demo", overall_image)
61 | cv2.waitKey(60)
62 | 
63 | for i in range(10000):
64 |     action = obs["oracle_action"][0]
65 | 
66 |     obs, _, done, info = env.step(action)
67 | 
68 |     if done:
69 |         obs = env.reset()
70 |         ref_rgb = [proc_rgb(obs["pose_refs"][n]) for n in range(nref)]
71 |         ref_rgb = cv2.resize(
72 |             create_reference_grid(np.stack(ref_rgb, axis=0)), (HEIGHT, WIDTH)
73 |         )
74 | 
75 |     topdown = env.generate_topdown_occupancy()
76 |     rgb_im = proc_rgb(obs["im"])
77 |     topdown_im = proc_rgb(topdown)
78 |     overall_image = np.concatenate([rgb_im, topdown_im, ref_rgb], axis=1)
79 | 
80 |     cv2.imshow("Reconstruction demo", overall_image)
81 |     cv2.waitKey(60)
82 | 


--------------------------------------------------------------------------------
/environments/gym-avd/gym_avd/demos/utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import cv2
 8 | import numpy as np
 9 | 
10 | 
11 | def proc_rgb(rgb):
12 |     return cv2.resize(np.flip(rgb, axis=2), (300, 300))
13 | 
14 | 
15 | def proc_depth(depth):
16 |     depth = np.clip(depth / 1000.0, 0.0, 10.0)  # Meters
17 |     depth = depth * 255.0 / 10.0  # Intensities
18 |     depth = np.repeat(depth, 3, axis=-1)
19 |     return cv2.resize(depth.astype(np.uint8), (300, 300))
20 | 


--------------------------------------------------------------------------------
/environments/gym-avd/gym_avd/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from gym_avd.envs.avd_base_env import *
 8 | from gym_avd.envs.avd_occ_base_env import *
 9 | from gym_avd.envs.avd_pose_env import *
10 | from gym_avd.envs.avd_recon_env import *
11 | from gym_avd.envs.avd_nav_env import *
12 | 


--------------------------------------------------------------------------------
/environments/gym-avd/gym_avd/envs/avd_recon_env.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import gym
  8 | import math
  9 | import numpy as np
 10 | 
 11 | from typing import Any, Dict, List, Optional, Tuple
 12 | 
 13 | from gym import error, spaces, utils
 14 | from gym.utils import seeding
 15 | from gym_avd.envs.config import *
 16 | from gym_avd.envs.utils import *
 17 | from gym.envs.registration import register
 18 | 
 19 | from gym_avd.envs.avd_pose_env import AVDPoseBaseEnv
 20 | 
 21 | 
 22 | class AVDReconEnv(AVDPoseBaseEnv):
 23 |     r"""Implements an environment for the reconstruction task. It builds on top of
 24 |     the AVDPoseBaseEnv and samples reconstruction targets as locations on a uniform
 25 |     grid in the environment.
 26 |     """
 27 | 
 28 |     def __init__(self, nRef: int = 50):
 29 |         super().__init__(nRef=nRef)
 30 | 
 31 |     def _initialize_environment_variables(self):
 32 |         r"""Additionally define reconstruction reference sampling details.
 33 |         """
 34 |         super()._initialize_environment_variables()
 35 |         self.cluster_root_dir = CLUSTER_ROOT_DIR
 36 |         self.ref_sample_intervals = None
 37 | 
 38 |     def _sample_pose_refs(self):
 39 |         r"""Sample views from a uniform grid locations.
 40 |         """
 41 |         min_x, min_z, max_x, max_z = self.get_environment_extents()
 42 |         all_nodes = self.data_conn[self.scene_idx]["nodes"]
 43 |         all_node_idxes = list(range(len(all_nodes)))
 44 |         all_nodes_positions = [
 45 |             [node["world_pos"][2], node["world_pos"][0]] for node in all_nodes
 46 |         ]
 47 |         all_nodes_positions = np.array(all_nodes_positions) * self.scale
 48 |         # Sample nodes uniformly @ 1.5m distance from the environment.
 49 |         range_x = np.arange(min_x, max_x, 1500.0)
 50 |         range_z = np.arange(min_z, max_z, 1500.0)
 51 |         relevant_node_idxes = set()
 52 |         relevant_nodes = []
 53 |         for x in range_x:
 54 |             for z in range_z:
 55 |                 # Find closest node to this coordinate.
 56 |                 zipped_data = zip(all_nodes, all_node_idxes, all_nodes_positions,)
 57 |                 min_dist = math.inf
 58 |                 min_dist_node = None
 59 |                 min_dist_node_idx = None
 60 |                 for node, node_idx, node_position in zipped_data:
 61 |                     nx, nz = node_position[0], node_position[1]
 62 |                     d = np.sqrt((x - nx) ** 2 + (z - nz) ** 2).item()
 63 |                     if d < min_dist:
 64 |                         min_dist = d
 65 |                         min_dist_node = node
 66 |                         min_dist_node_idx = node_idx
 67 |                 if min_dist_node_idx not in relevant_node_idxes:
 68 |                     relevant_nodes.append(min_dist_node)
 69 |                     relevant_node_idxes.add(min_dist_node_idx)
 70 |         # Sample the reference images from the nodes.
 71 |         relevant_images = []
 72 |         for node in relevant_nodes:
 73 |             for j in range(0, 12, 3):
 74 |                 image_name = node["views"][j]["image_name"]
 75 |                 relevant_images.append(image_name)
 76 |         self._pose_image_names = []
 77 |         self._pose_refs = []
 78 |         self._pose_refs_depth = []
 79 |         self.ref_positions = []
 80 |         self.ref_poses = []
 81 |         self._pose_regress = []
 82 |         for count, pose_image in enumerate(relevant_images):
 83 |             # Limit to self.nRef images.
 84 |             if count >= self.nRef:
 85 |                 break
 86 |             # Compute data for the pose references.
 87 |             ref_position = self._get_position(pose_image)
 88 |             ref_pose = self._get_pose(pose_image)
 89 |             pose_idx = self.images_to_idx[pose_image]
 90 |             pose_ref = self.scene_images[pose_idx]
 91 |             pose_ref_depth = self._process_depth(self.scene_depth[pose_idx])
 92 |             pose_ref = pose_ref[np.newaxis, :, :, :]
 93 |             pose_ref_depth = pose_ref_depth[np.newaxis, :, :, :]
 94 |             # Compute reference pose relative to agent's starting pose.
 95 |             dx = ref_position[0] - self.start_position[0]
 96 |             dz = ref_position[2] - self.start_position[2]
 97 |             dr = math.sqrt(dx ** 2 + dz ** 2)
 98 |             dtheta = math.atan2(dz, dx) - self.start_pose
 99 |             dhead = ref_pose - self.start_pose
100 |             delev = 0.0
101 |             pose_regress = (dr, dtheta, dhead, delev)
102 |             # Update the set of pose references.
103 |             self._pose_image_names.append(pose_image)
104 |             self._pose_refs.append(pose_ref)
105 |             self._pose_refs_depth.append(pose_ref_depth)
106 |             self.ref_positions.append(ref_position)
107 |             self.ref_poses.append(ref_pose)
108 |             self._pose_regress.append(pose_regress)
109 | 
110 |         self._pose_refs = np.concatenate(self._pose_refs, axis=0)
111 |         self._pose_refs_depth = np.concatenate(self._pose_refs_depth, axis=0)
112 |         self.ref_positions = np.array(self.ref_positions)
113 |         self.ref_poses = np.array(self.ref_poses)
114 |         self._pose_regress = np.array(self._pose_regress)
115 |         self.oracle_pose_successes = np.zeros((self.nRef,))
116 |         self._valid_masks = np.ones((self._pose_refs.shape[0],))
117 |         # Pad the data with dummy data to account for missing references.
118 |         if self._pose_refs.shape[0] < self.nRef:
119 |             padding = self.nRef - self._pose_refs.shape[0]
120 |             dummy_pose_image_names = ["" for _ in range(padding)]
121 |             np_shape = (padding, *self._pose_refs.shape[1:])
122 |             dummy_pose_refs = np.zeros(np_shape, dtype=np.uint8)
123 |             np_shape = (padding, *self._pose_refs_depth.shape[1:])
124 |             dummy_pose_refs_depth = np.zeros(np_shape, dtype=np.float32)
125 |             dummy_ref_positions = np.zeros((padding, 3))
126 |             dummy_ref_poses = np.zeros((padding,))
127 |             dummy_pose_regress = np.zeros((padding, 4))
128 |             dummy_mask = np.zeros((padding,))
129 |             self._pose_image_names += dummy_pose_image_names
130 |             self._pose_refs = np.concatenate(
131 |                 [self._pose_refs, dummy_pose_refs], axis=0,
132 |             )
133 |             self._pose_refs_depth = np.concatenate(
134 |                 [self._pose_refs_depth, dummy_pose_refs_depth], axis=0,
135 |             )
136 |             self.ref_positions = np.concatenate(
137 |                 [self.ref_positions, dummy_ref_positions], axis=0,
138 |             )
139 |             self.ref_poses = np.concatenate([self.ref_poses, dummy_ref_poses], axis=0,)
140 |             self._pose_regress = np.concatenate(
141 |                 [self._pose_regress, dummy_pose_regress], axis=0,
142 |             )
143 |             self._valid_masks = np.concatenate([self._valid_masks, dummy_mask], axis=0,)
144 | 
145 |     def generate_topdown_occupancy(self) -> np.array:
146 |         r"""Generates the top-down occupancy map of the environment.
147 |         """
148 |         # Obtain the top-down images from the original environment.
149 |         grid = super().generate_topdown_occupancy()
150 |         # Draw the set of pose references.
151 |         min_x, min_z, max_x, max_z = self.get_environment_extents()
152 |         grid_size = 20.0
153 |         env_size = max(max_z - min_z, max_x - min_x, 8000.0)
154 |         x_pad = (env_size - (max_x - min_x)) // 2
155 |         z_pad = (env_size - (max_z - min_z)) // 2
156 |         min_x = min_x - x_pad
157 |         min_z = min_z - z_pad
158 |         max_x = max_x + x_pad
159 |         max_z = max_z + z_pad
160 |         radius = max(grid.shape[0] // 50, 1)
161 |         for pose_img in self._pose_image_names:
162 |             if pose_img == "":
163 |                 continue
164 |             curr_pos = self._get_position(pose_img)
165 |             curr_pos = np.array([curr_pos[0], curr_pos[2]])
166 |             curr_pos = (curr_pos - np.array([min_x, min_z])) / grid_size
167 |             curr_theta = self._get_pose(pose_img)
168 |             grid = draw_agent(grid, curr_pos, curr_theta, (255, 0, 0), size=radius,)
169 | 
170 |         return grid
171 | 
172 | 
173 | register(
174 |     id="avd-recon-v0", entry_point="gym_avd.envs:AVDReconEnv",
175 | )
176 | 


--------------------------------------------------------------------------------
/environments/gym-avd/gym_avd/envs/config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | GYM_AVD_ROOT = "<PATH TO GYM-AVD>"
 8 | ROOT_DIR = "<PATH TO AVD DATASET ROOT>"
 9 | CLUSTER_ROOT_DIR = f"{GYM_AVD_ROOT}/gym_avd/data/avd_clusters"
10 | AREAS_FILE = f"{GYM_AVD_ROOT}/gym_avd/data/environment_areas.json"
11 | OBJ_COUNTS_FILE = f"{GYM_AVD_ROOT}/gym_avd/data/object_counts_per_env.json"
12 | OBJ_PROPS_FILE = ""
13 | VALID_INSTANCES_ROOT_DIR = f"{GYM_AVD_ROOT}/gym_avd/data/valid_instances_per_env"
14 | SIZE_CLASSIFICATION_PATH = f"{GYM_AVD_ROOT}/gym_avd/data/size_classification.json.gz"
15 | POINTNAV_TEST_EPISODES_PATH = f"{GYM_AVD_ROOT}/gym_avd/data/tdn_test_episodes.json"
16 | MAX_STEPS = 200
17 | 


--------------------------------------------------------------------------------
/environments/gym-avd/gym_avd/envs/utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import cv2
  8 | import math
  9 | import scipy.ndimage
 10 | import numpy as np
 11 | import networkx as nx
 12 | 
 13 | 
 14 | def norm_angle(angle):
 15 |     return math.atan2(math.sin(angle), math.cos(angle))
 16 | 
 17 | 
 18 | def create_nav_graph(scan):
 19 |     """
 20 |     scan - dictionary with keys as nodes, ...
 21 | 
 22 |     nodes is a list with each node containing a list of neighbors
 23 |     """
 24 |     G = nx.Graph()
 25 | 
 26 |     def distance(pos1, pos2):
 27 |         # Returns Euclidean distance in 3D space
 28 |         return np.linalg.norm(pos1 - pos2)
 29 | 
 30 |     for nodeix, node in enumerate(scan["nodes"]):
 31 |         for nbrix in node["neighbors"]:
 32 |             nbr = scan["nodes"][nbrix]
 33 |             node_pos = np.array(node["world_pos"]) * scan["scale"]
 34 |             nbr_pos = np.array(nbr["world_pos"]) * scan["scale"]
 35 |             G.add_edge(nodeix, nbrix, weight=distance(node_pos, nbr_pos))
 36 | 
 37 |     return G
 38 | 
 39 | 
 40 | def draw_border(img, color=(255, 0, 0)):
 41 |     cv2.rectangle(img, (0, 0), (img.shape[1] - 1, img.shape[0] - 1), color, 3)
 42 | 
 43 | 
 44 | def draw_triangle(img, loc1, loc2, loc3, color=(0, 255, 0)):
 45 |     triangle_cnt = np.array([loc1, loc2, loc3])
 46 |     cv2.drawContours(img, [triangle_cnt], 0, color, -1)
 47 | 
 48 | 
 49 | def draw_agent(image, position, pose, color, size=5):
 50 |     loc1 = (int(position[0] - size), int(position[1] - size))
 51 |     loc2 = (int(position[0]), int(position[1] + size))
 52 |     loc3 = (int(position[0] + size), int(position[1] - size))
 53 | 
 54 |     center = (int(position[0]), int(position[1]))
 55 |     loc4 = (
 56 |         int(center[0] + 2 * size * math.cos(pose)),
 57 |         int(center[1] + 2 * size * math.sin(pose)),
 58 |     )
 59 | 
 60 |     draw_triangle(image, loc1, loc2, loc3, color=color)
 61 |     image = cv2.line(image, center, loc4, (255, 255, 255), size // 2)
 62 |     return image
 63 | 
 64 | 
 65 | def draw_agent_sprite(image, position, pose, sprite, size=5):
 66 |     # Rotate before resize
 67 |     rotated_sprite = scipy.ndimage.interpolation.rotate(sprite, -pose * 180 / np.pi)
 68 |     # Rescale because rotation may result in larger image than original, but
 69 |     # the agent sprite image should stay the same.
 70 |     initial_agent_size = sprite.shape[0]
 71 |     new_size = rotated_sprite.shape[0]
 72 | 
 73 |     # Rescale to a fixed size
 74 |     rotated_sprite = cv2.resize(
 75 |         rotated_sprite,
 76 |         (
 77 |             int(3 * size * new_size / initial_agent_size),
 78 |             int(3 * size * new_size / initial_agent_size),
 79 |         ),
 80 |     )
 81 | 
 82 |     # Add the rotated sprite to the image while ensuring boundary limits
 83 |     start_x = int(position[0]) - (rotated_sprite.shape[1] // 2)
 84 |     start_y = int(position[1]) - (rotated_sprite.shape[0] // 2)
 85 |     end_x = start_x + rotated_sprite.shape[1] - 1
 86 |     end_y = start_y + rotated_sprite.shape[0] - 1
 87 | 
 88 |     if start_x < 0:
 89 |         rotated_sprite = rotated_sprite[:, (-start_x):]
 90 |         start_x = 0
 91 |     elif end_x >= image.shape[1]:
 92 |         rotated_sprite = rotated_sprite[:, : (image.shape[1] - end_x - 1)]
 93 |         end_x = image.shape[1] - 1
 94 | 
 95 |     if start_y < 0:
 96 |         rotated_sprite = rotated_sprite[
 97 |             (-start_y):,
 98 |         ]
 99 |         start_y = 0
100 |     elif end_y >= image.shape[0]:
101 |         rotated_sprite = rotated_sprite[
102 |             : (image.shape[0] - end_y - 1),
103 |         ]
104 |         end_y = image.shape[0] - 1
105 | 
106 |     alpha_mask = rotated_sprite[..., 2:3].astype(np.float32) / 255.0
107 |     background = image[start_y : (end_y + 1), start_x : (end_x + 1)].astype(np.float32)
108 |     foreground = rotated_sprite[..., :3].astype(np.float32)
109 | 
110 |     blended_sprite = cv2.add(foreground * alpha_mask, background * (1 - alpha_mask))
111 |     blended_sprite = blended_sprite.astype(np.uint8)
112 |     image[start_y : (end_y + 1), start_x : (end_x + 1)] = blended_sprite
113 | 
114 |     return image
115 | 


--------------------------------------------------------------------------------
/environments/gym-avd/requirements.txt:
--------------------------------------------------------------------------------
1 | gym
2 | h5py
3 | imageio
4 | networkx
5 | numpy
6 | opencv-python
7 | Pillow
8 | scipy
9 | 


--------------------------------------------------------------------------------
/environments/gym-avd/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from setuptools import setup
 8 | 
 9 | setup(name="gym_avd", version="0.0.1", install_requires=["gym", "opencv-python"])
10 | 


--------------------------------------------------------------------------------
/environments/habitat/.gitignore:
--------------------------------------------------------------------------------
1 | habitat-api
2 | habitat-sim
3 | 


--------------------------------------------------------------------------------
/environments/habitat/README.md:
--------------------------------------------------------------------------------
 1 | # Habitat
 2 | Our project uses a modified version of the original [habitat-lab](https://github.com/facebookresearch/habitat-lab) and [habitat-sim](https://github.com/facebookresearch/habitat-sim) repositories for simulating 3D motion in Matterport3D dataset. 
 3 | 
 4 | ## Installing habitat-sim
 5 | 1. Create a root directory for Habitat.
 6 | 
 7 |   ```
 8 |   export HABITAT_ROOT=<path to exploring_exploration/envs/habitat/>
 9 |   mkdir $HABITAT_ROOT
10 |   cd $HABITAT_ROOT
11 |   ```
12 | 2. Clone `habitat-sim` and checkout a specific version used for this code-base.
13 | 
14 |   ```
15 |   git clone git@github.com:facebookresearch/habitat-sim.git
16 |   cd $HABITAT_ROOT/habitat-sim
17 |   git checkout 15994e440560c1608b251a1c4059507d1cae801b
18 |   ```
19 | 
20 | 3. Follow installation instructions from `https://github.com/facebookresearch/habitat-sim` (at that specific commit).
21 | 
22 | 4. Apply `habitat_sim.patch` to `habitat-sim` repository. This will incorporate some minor additions to the original simulator.
23 | 
24 |   ```
25 |   cd $HABITAT_ROOT
26 |   cp habitat_sim.patch habitat-sim
27 |   cd habitat-sim
28 |   git apply habitat_sim.patch
29 |   ```
30 | 
31 | ## Installing habitat-api
32 | 1. Clone `habitat-lab` and checkout a specific version used for this code-base.
33 | 
34 |   ```
35 |   cd $HABITAT_ROOT
36 |   git clone git@github.com:facebookresearch/habitat-lab.git habitat-api
37 |   cd habitat-api
38 |   git checkout 31318f81db05100099cfd308438d5930c3fb6cd2
39 |   ```
40 | 2. Follow the [installation instructions](https://github.com/facebookresearch/habitat-api). Download the Matterport3D scene dataset as instructed.
41 | 3. Apply `habitat_api.patch` to `habitat-api` repository. This will incorporate the necessary additions to the original api.
42 | 
43 |   ```
44 |   cd $HABITAT_ROOT
45 |   cp habitat_sim.patch habitat-api
46 |   cd habitat-api
47 |   patch -p0 < habitat_api.patch
48 |   ```
49 | 
50 | 4. Download the task datasets.
51 | 
52 |   ```
53 |   mkdir -p $HABITAT_ROOT/habitat-api/data
54 |   cd $HABITAT_ROOT/habitat-api/data
55 |   wget -O task_datasets.tar.gz https://dl.fbaipublicfiles.com/exploring-exploration/mp3d_task_datasets.tar.gz
56 |   tar -xvf task_datasets.tar.gz
57 |   rm task_datasets.tar.gz
58 |   ```
59 | 5. Extract object annotations for MP3D:
60 | 
61 |   ```
62 |   cd $HABITAT_ROOT/habitat-api
63 |   python data_generation_scripts/extract_object_annotations_per_env.py
64 |   ```
65 | 
66 | ## Task demos
67 | This repository supports four tasks:
68 | 
69 | - Exploration
70 | - Pose estimation
71 | - Reconstruction
72 | - PointNav
73 | 
74 | Visual demos for each task are available.
75 | 
76 | ```
77 | python demos/exploration_demo.py
78 | python demos/pose_estimation_demo.py
79 | python demos/reconstruction_demo.py
80 | python demos/pointnav_demo.py
81 | ```
82 | 


--------------------------------------------------------------------------------
/environments/habitat/habitat_sim.patch:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | # This source code is licensed under the license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | diff --git a/habitat_sim/simulator.py b/habitat_sim/simulator.py
 7 | index df7b2af..f7edca6 100644
 8 | --- a/habitat_sim/simulator.py
 9 | +++ b/habitat_sim/simulator.py
10 | @@ -151,6 +151,10 @@ class Simulator:
11 |              observations[sensor_uuid] = sensor.get_observation()
12 |          return observations
13 | 
14 | +    def get_specific_sensor_observations(self, sensor_uuid):
15 | +        observations = self._sensors[sensor_uuid].get_observation()
16 | +        return observations
17 | +
18 |      def last_state(self):
19 |          return self._last_state
20 | 
21 | 


--------------------------------------------------------------------------------
/evaluate_pose_estimation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | import sys
  9 | import math
 10 | import json
 11 | import torch
 12 | import logging
 13 | import numpy as np
 14 | import torch.nn as nn
 15 | 
 16 | from exploring_exploration.arguments import get_args
 17 | from exploring_exploration.envs import (
 18 |     make_vec_envs_avd,
 19 |     make_vec_envs_habitat,
 20 | )
 21 | from exploring_exploration.models import RGBEncoder, MapRGBEncoder, Policy
 22 | from exploring_exploration.models.pose_estimation import (
 23 |     RetrievalNetwork,
 24 |     PairwisePosePredictor,
 25 |     ViewLocalizer,
 26 | )
 27 | from exploring_exploration.utils.pose_estimation import (
 28 |     get_pose_criterion,
 29 |     get_pose_label_shape,
 30 |     get_gaussian_kernel,
 31 | )
 32 | from exploring_exploration.utils.eval import evaluate_pose
 33 | 
 34 | args = get_args()
 35 | 
 36 | torch.manual_seed(args.seed)
 37 | if args.cuda:
 38 |     torch.cuda.manual_seed(args.seed)
 39 | 
 40 | try:
 41 |     os.makedirs(args.log_dir)
 42 | except OSError:
 43 |     pass
 44 | 
 45 | eval_log_dir = os.path.join(args.log_dir, "monitor")
 46 | 
 47 | try:
 48 |     os.makedirs(eval_log_dir)
 49 | except OSError:
 50 |     pass
 51 | 
 52 | 
 53 | def main():
 54 |     torch.set_num_threads(1)
 55 |     device = torch.device("cuda:0" if args.cuda else "cpu")
 56 |     ndevices = torch.cuda.device_count()
 57 |     args.map_shape = (1, args.map_size, args.map_size)
 58 |     # Setup loggers
 59 |     logging.basicConfig(filename=f"{args.log_dir}/eval_log.txt", level=logging.DEBUG)
 60 |     logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
 61 |     logging.getLogger().setLevel(logging.INFO)
 62 | 
 63 |     args.feat_shape_sim = (512,)
 64 |     args.feat_shape_pose = (512 * 9,)
 65 |     args.odometer_shape = (4,)  # (delta_y, delta_x, delta_head, delta_elev)
 66 |     args.match_thresh = 0.95
 67 |     args.requires_policy = args.actor_type not in [
 68 |         "random",
 69 |         "oracle",
 70 |         "forward",
 71 |         "forward-plus",
 72 |         "frontier",
 73 |     ]
 74 |     if "habitat" in args.env_name:
 75 |         if "CUDA_VISIBLE_DEVICES" in os.environ:
 76 |             devices = [
 77 |                 int(dev) for dev in os.environ["CUDA_VISIBLE_DEVICES"].split(",")
 78 |             ]
 79 |             # Devices need to be indexed between 0 to N-1
 80 |             devices = [dev for dev in range(len(devices))]
 81 |         else:
 82 |             devices = None
 83 |         eval_envs = make_vec_envs_habitat(
 84 |             args.habitat_config_file,
 85 |             device,
 86 |             devices,
 87 |             enable_odometry_noise=args.enable_odometry_noise,
 88 |             odometer_noise_scaling=args.odometer_noise_scaling,
 89 |             measure_noise_free_area=args.measure_noise_free_area,
 90 |         )
 91 |         if args.actor_type == "frontier":
 92 |             large_map_range = 100.0
 93 |             H = eval_envs.observation_space.spaces["highres_coarse_occupancy"].shape[1]
 94 |             args.occ_map_scale = 0.1 * (2 * large_map_range + 1) / H
 95 |     else:
 96 |         eval_envs = make_vec_envs_avd(
 97 |             args.env_name,
 98 |             123 + args.num_processes,
 99 |             args.num_processes,
100 |             eval_log_dir,
101 |             device,
102 |             True,
103 |             split=args.eval_split,
104 |             nRef=args.num_pose_refs,
105 |             set_return_topdown_map=True,
106 |         )
107 |         if args.actor_type == "frontier":
108 |             large_map_range = 100.0
109 |             H = eval_envs.observation_space.spaces["highres_coarse_occupancy"].shape[0]
110 |             args.occ_map_scale = 50.0 * (2 * large_map_range + 1) / H
111 |     args.obs_shape = eval_envs.observation_space.spaces["im"].shape
112 |     args.angles = torch.Tensor(np.radians(np.linspace(180, -150, 12))).to(device)
113 |     args.bin_size = math.radians(31)
114 | 
115 |     # =================== Create models ====================
116 |     rnet = RetrievalNetwork()
117 |     posenet = PairwisePosePredictor(
118 |         use_classification=args.use_classification, num_classes=args.num_classes
119 |     )
120 |     pose_head = ViewLocalizer(args.map_scale)
121 |     if args.requires_policy:
122 |         encoder = RGBEncoder() if args.encoder_type == "rgb" else MapRGBEncoder()
123 |         action_config = (
124 |             {
125 |                 "nactions": eval_envs.action_space.n,
126 |                 "embedding_size": args.action_embedding_size,
127 |             }
128 |             if args.use_action_embedding
129 |             else None
130 |         )
131 |         collision_config = (
132 |             {"collision_dim": 2, "embedding_size": args.collision_embedding_size}
133 |             if args.use_collision_embedding
134 |             else None
135 |         )
136 |         actor_critic = Policy(
137 |             eval_envs.action_space,
138 |             base_kwargs={
139 |                 "feat_dim": args.feat_shape_sim[0],
140 |                 "recurrent": True,
141 |                 "hidden_size": args.feat_shape_sim[0],
142 |                 "action_config": action_config,
143 |                 "collision_config": collision_config,
144 |             },
145 |         )
146 |     # =================== Load models ====================
147 |     rnet_state = torch.load(args.pretrained_rnet)["state_dict"]
148 |     rnet.load_state_dict(rnet_state)
149 |     posenet_state = torch.load(args.pretrained_posenet)["state_dict"]
150 |     posenet.load_state_dict(posenet_state)
151 |     rnet.to(device)
152 |     posenet.to(device)
153 |     pose_head.to(device)
154 |     rnet.eval()
155 |     posenet.eval()
156 |     pose_head.eval()
157 |     if args.requires_policy:
158 |         encoder_state, actor_critic_state = torch.load(args.load_path)[:2]
159 |         encoder.load_state_dict(encoder_state)
160 |         actor_critic.load_state_dict(actor_critic_state)
161 |         actor_critic.to(device)
162 |         encoder.to(device)
163 |         actor_critic.eval()
164 |         encoder.eval()
165 |     if args.use_multi_gpu:
166 |         rnet.compare = nn.DataParallel(rnet.compare)
167 |         rnet.feat_extract = nn.DataParallel(rnet.feat_extract)
168 |         posenet.compare = nn.DataParallel(posenet.compare)
169 |         posenet.feat_extract = nn.DataParallel(posenet.feat_extract)
170 |         posenet.predict_depth = nn.DataParallel(posenet.predict_depth)
171 |         posenet.predict_baseline = nn.DataParallel(posenet.predict_baseline)
172 |         posenet.predict_baseline_sign = nn.DataParallel(posenet.predict_baseline_sign)
173 | 
174 |     # =================== Define pose criterion ====================
175 |     args.pose_loss_fn = get_pose_criterion()
176 |     lab_shape = get_pose_label_shape()
177 |     gaussian_kernel = get_gaussian_kernel(
178 |         kernel_size=args.vote_kernel_size, sigma=0.5, channels=1
179 |     )
180 | 
181 |     eval_config = {}
182 |     eval_config["num_steps"] = args.num_steps
183 |     eval_config["num_processes"] = args.num_processes
184 |     eval_config["obs_shape"] = args.obs_shape
185 |     eval_config["feat_shape_sim"] = args.feat_shape_sim
186 |     eval_config["feat_shape_pose"] = args.feat_shape_pose
187 |     eval_config["odometer_shape"] = args.odometer_shape
188 |     eval_config["lab_shape"] = lab_shape
189 |     eval_config["map_shape"] = args.map_shape
190 |     eval_config["map_scale"] = args.map_scale
191 |     eval_config["angles"] = args.angles
192 |     eval_config["bin_size"] = args.bin_size
193 |     eval_config["gaussian_kernel"] = gaussian_kernel
194 |     eval_config["match_thresh"] = args.match_thresh
195 |     eval_config["pose_loss_fn"] = args.pose_loss_fn
196 |     eval_config["num_eval_episodes"] = args.eval_episodes
197 |     eval_config["num_pose_refs"] = args.num_pose_refs
198 |     eval_config["median_filter_size"] = 3
199 |     eval_config["vote_kernel_size"] = args.vote_kernel_size
200 |     eval_config["env_name"] = args.env_name
201 |     eval_config["actor_type"] = args.actor_type
202 |     eval_config["pose_predictor_type"] = args.pose_predictor_type
203 |     eval_config["encoder_type"] = args.encoder_type
204 |     eval_config["ransac_n"] = args.ransac_n
205 |     eval_config["ransac_niter"] = args.ransac_niter
206 |     eval_config["ransac_batch"] = args.ransac_batch
207 |     eval_config["use_action_embedding"] = args.use_action_embedding
208 |     eval_config["use_collision_embedding"] = args.use_collision_embedding
209 |     eval_config["vis_save_dir"] = os.path.join(args.log_dir, "visualizations")
210 |     eval_config["final_topdown_save_path"] = os.path.join(
211 |         args.log_dir, "top_down_maps.h5"
212 |     )
213 |     eval_config["forward_action_id"] = 2 if "avd" in args.env_name else 0
214 |     eval_config["turn_action_id"] = 0 if "avd" in args.env_name else 1
215 |     eval_config["input_highres"] = args.input_highres
216 |     if args.actor_type == "frontier":
217 |         eval_config["occ_map_scale"] = args.occ_map_scale
218 |         eval_config["frontier_dilate_occ"] = args.frontier_dilate_occ
219 |         eval_config["max_time_per_target"] = args.max_time_per_target
220 | 
221 |     models = {}
222 |     models["rnet"] = rnet
223 |     models["posenet"] = posenet
224 |     models["pose_head"] = pose_head
225 |     if args.requires_policy:
226 |         models["actor_critic"] = actor_critic
227 |         models["encoder"] = encoder
228 | 
229 |     metrics, per_episode_metrics = evaluate_pose(
230 |         models,
231 |         eval_envs,
232 |         eval_config,
233 |         device,
234 |         multi_step=True,
235 |         interval_steps=args.interval_steps,
236 |         visualize_policy=args.visualize_policy,
237 |         visualize_size=args.visualize_size,
238 |         visualize_batches=args.visualize_batches,
239 |         visualize_n_per_batch=args.visualize_n_per_batch,
240 |     )
241 | 
242 |     json.dump(
243 |         per_episode_metrics, open(os.path.join(args.log_dir, "statistics.json"), "w")
244 |     )
245 | 
246 | 
247 | if __name__ == "__main__":
248 |     main()
249 | 


--------------------------------------------------------------------------------
/evaluate_reconstruction.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | import sys
  9 | import json
 10 | import h5py
 11 | import torch
 12 | import logging
 13 | import numpy as np
 14 | import torch.nn as nn
 15 | 
 16 | from exploring_exploration.arguments import get_args
 17 | from exploring_exploration.envs import (
 18 |     make_vec_envs_avd,
 19 |     make_vec_envs_habitat,
 20 | )
 21 | from exploring_exploration.models import RGBEncoder, MapRGBEncoder, Policy
 22 | from exploring_exploration.utils.reconstruction_eval import evaluate_reconstruction
 23 | from exploring_exploration.models.reconstruction import (
 24 |     FeatureReconstructionModule,
 25 |     FeatureNetwork,
 26 |     PoseEncoder,
 27 | )
 28 | from exploring_exploration.utils.reconstruction import rec_loss_fn_classify
 29 | 
 30 | args = get_args()
 31 | 
 32 | torch.manual_seed(args.seed)
 33 | if args.cuda:
 34 |     torch.cuda.manual_seed(args.seed)
 35 | 
 36 | try:
 37 |     os.makedirs(args.log_dir)
 38 | except OSError:
 39 |     pass
 40 | 
 41 | eval_log_dir = os.path.join(args.log_dir, "monitor")
 42 | 
 43 | try:
 44 |     os.makedirs(eval_log_dir)
 45 | except OSError:
 46 |     pass
 47 | 
 48 | 
 49 | def main():
 50 |     torch.set_num_threads(1)
 51 |     device = torch.device("cuda:0" if args.cuda else "cpu")
 52 |     ndevices = torch.cuda.device_count()
 53 |     # Setup loggers
 54 |     logging.basicConfig(filename=f"{args.log_dir}/eval_log.txt", level=logging.DEBUG)
 55 |     logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
 56 |     logging.getLogger().setLevel(logging.INFO)
 57 | 
 58 |     args.feat_shape_sim = (512,)
 59 |     args.odometer_shape = (4,)  # (delta_y, delta_x, delta_head, delta_elev)
 60 |     args.requires_policy = args.actor_type not in [
 61 |         "random",
 62 |         "oracle",
 63 |         "forward",
 64 |         "forward-plus",
 65 |         "frontier",
 66 |     ]
 67 |     if "habitat" in args.env_name:
 68 |         if "CUDA_VISIBLE_DEVICES" in os.environ:
 69 |             devices = [
 70 |                 int(dev) for dev in os.environ["CUDA_VISIBLE_DEVICES"].split(",")
 71 |             ]
 72 |             # Devices need to be indexed between 0 to N-1
 73 |             devices = [dev for dev in range(len(devices))]
 74 |         else:
 75 |             devices = None
 76 |         eval_envs = make_vec_envs_habitat(
 77 |             args.habitat_config_file, device, devices, seed=args.seed
 78 |         )
 79 |         if args.actor_type == "frontier":
 80 |             large_map_range = 100.0
 81 |             H = eval_envs.observation_space.spaces["highres_coarse_occupancy"].shape[1]
 82 |             args.occ_map_scale = 0.1 * (2 * large_map_range + 1) / H
 83 |     else:
 84 |         eval_envs = make_vec_envs_avd(
 85 |             args.env_name,
 86 |             args.seed + args.num_processes,
 87 |             args.num_processes,
 88 |             eval_log_dir,
 89 |             device,
 90 |             True,
 91 |             split=args.eval_split,
 92 |             nRef=args.num_pose_refs,
 93 |             set_return_topdown_map=True,
 94 |         )
 95 |         if args.actor_type == "frontier":
 96 |             large_map_range = 100.0
 97 |             H = eval_envs.observation_space.spaces["highres_coarse_occupancy"].shape[0]
 98 |             args.occ_map_scale = 50.0 * (2 * large_map_range + 1) / H
 99 |     args.obs_shape = eval_envs.observation_space.spaces["im"].shape
100 | 
101 |     # =================== Load clusters =================
102 |     clusters_h5 = h5py.File(args.clusters_path, "r")
103 |     cluster_centroids = torch.Tensor(np.array(clusters_h5["cluster_centroids"])).to(
104 |         device
105 |     )
106 |     args.nclusters = cluster_centroids.shape[0]
107 |     clusters2images = {}
108 |     for i in range(args.nclusters):
109 |         cluster_images = np.array(
110 |             clusters_h5[f"cluster_{i}/images"]
111 |         )  # (K, C, H, W) torch Tensor
112 |         cluster_images = np.ascontiguousarray(cluster_images.transpose(0, 2, 3, 1))
113 |         cluster_images = (cluster_images * 255.0).astype(np.uint8)
114 |         clusters2images[i] = cluster_images  # (K, H, W, C)
115 |     clusters_h5.close()
116 | 
117 |     # =================== Create models ====================
118 |     decoder = FeatureReconstructionModule(
119 |         args.nclusters, args.nclusters, nlayers=args.n_transformer_layers,
120 |     )
121 |     feature_network = FeatureNetwork()
122 |     feature_network = nn.DataParallel(feature_network, dim=0)
123 |     pose_encoder = PoseEncoder()
124 |     if args.use_multi_gpu:
125 |         decoder = nn.DataParallel(decoder, dim=1)
126 |         pose_encoder = nn.DataParallel(pose_encoder, dim=0)
127 |     if args.requires_policy:
128 |         encoder = RGBEncoder() if args.encoder_type == "rgb" else MapRGBEncoder()
129 |         action_config = (
130 |             {
131 |                 "nactions": eval_envs.action_space.n,
132 |                 "embedding_size": args.action_embedding_size,
133 |             }
134 |             if args.use_action_embedding
135 |             else None
136 |         )
137 |         collision_config = (
138 |             {"collision_dim": 2, "embedding_size": args.collision_embedding_size}
139 |             if args.use_collision_embedding
140 |             else None
141 |         )
142 |         actor_critic = Policy(
143 |             eval_envs.action_space,
144 |             base_kwargs={
145 |                 "feat_dim": args.feat_shape_sim[0],
146 |                 "recurrent": True,
147 |                 "hidden_size": args.feat_shape_sim[0],
148 |                 "action_config": action_config,
149 |                 "collision_config": collision_config,
150 |             },
151 |         )
152 | 
153 |     # =================== Load models ====================
154 |     decoder_state, pose_encoder_state = torch.load(args.load_path_rec)[:2]
155 |     decoder.load_state_dict(decoder_state)
156 |     pose_encoder.load_state_dict(pose_encoder_state)
157 |     decoder.to(device)
158 |     feature_network.to(device)
159 |     decoder.eval()
160 |     feature_network.eval()
161 |     pose_encoder.eval()
162 |     pose_encoder.to(device)
163 |     if args.requires_policy:
164 |         encoder_state, actor_critic_state = torch.load(args.load_path)[:2]
165 |         encoder.load_state_dict(encoder_state)
166 |         actor_critic.load_state_dict(actor_critic_state)
167 |         actor_critic.to(device)
168 |         encoder.to(device)
169 |         actor_critic.eval()
170 |         encoder.eval()
171 | 
172 |     eval_config = {}
173 |     eval_config["num_steps"] = args.num_steps
174 |     eval_config["num_processes"] = args.num_processes
175 |     eval_config["feat_shape_sim"] = args.feat_shape_sim
176 |     eval_config["odometer_shape"] = args.odometer_shape
177 |     eval_config["num_eval_episodes"] = args.eval_episodes
178 |     eval_config["num_pose_refs"] = args.num_pose_refs
179 |     eval_config["env_name"] = args.env_name
180 |     eval_config["actor_type"] = args.actor_type
181 |     eval_config["encoder_type"] = args.encoder_type
182 |     eval_config["use_action_embedding"] = args.use_action_embedding
183 |     eval_config["use_collision_embedding"] = args.use_collision_embedding
184 |     eval_config["cluster_centroids"] = cluster_centroids
185 |     eval_config["clusters2images"] = clusters2images
186 |     eval_config["rec_loss_fn"] = rec_loss_fn_classify
187 |     eval_config["vis_save_dir"] = os.path.join(args.log_dir, "visualizations")
188 |     eval_config["forward_action_id"] = 2 if "avd" in args.env_name else 0
189 |     eval_config["turn_action_id"] = 0 if "avd" in args.env_name else 1
190 |     if args.actor_type == "frontier":
191 |         eval_config["occ_map_scale"] = args.occ_map_scale
192 |         eval_config["frontier_dilate_occ"] = args.frontier_dilate_occ
193 |         eval_config["max_time_per_target"] = args.max_time_per_target
194 | 
195 |     models = {}
196 |     models["decoder"] = decoder
197 |     models["pose_encoder"] = pose_encoder
198 |     models["feature_network"] = feature_network
199 |     if args.requires_policy:
200 |         models["actor_critic"] = actor_critic
201 |         models["encoder"] = encoder
202 | 
203 |     metrics, per_episode_metrics = evaluate_reconstruction(
204 |         models,
205 |         eval_envs,
206 |         eval_config,
207 |         device,
208 |         multi_step=True,
209 |         interval_steps=args.interval_steps,
210 |         visualize_policy=args.visualize_policy,
211 |     )
212 | 
213 |     json.dump(
214 |         per_episode_metrics, open(os.path.join(args.log_dir, "statistics.json"), "w")
215 |     )
216 | 
217 | 
218 | if __name__ == "__main__":
219 |     main()
220 | 


--------------------------------------------------------------------------------
/evaluate_visitation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | import sys
  9 | import json
 10 | import torch
 11 | import logging
 12 | 
 13 | from exploring_exploration.arguments import get_args
 14 | from exploring_exploration.envs import (
 15 |     make_vec_envs_avd,
 16 |     make_vec_envs_habitat,
 17 | )
 18 | from exploring_exploration.models import RGBEncoder, MapRGBEncoder, Policy
 19 | from exploring_exploration.utils.eval import evaluate_visitation
 20 | 
 21 | args = get_args()
 22 | 
 23 | torch.manual_seed(args.seed)
 24 | if args.cuda:
 25 |     torch.cuda.manual_seed(args.seed)
 26 | 
 27 | try:
 28 |     os.makedirs(args.log_dir)
 29 | except OSError:
 30 |     pass
 31 | 
 32 | eval_log_dir = os.path.join(args.log_dir, "monitor")
 33 | 
 34 | try:
 35 |     os.makedirs(eval_log_dir)
 36 | except OSError:
 37 |     pass
 38 | 
 39 | 
 40 | def main():
 41 |     torch.set_num_threads(1)
 42 |     device = torch.device("cuda:0" if args.cuda else "cpu")
 43 |     ndevices = torch.cuda.device_count()
 44 |     # Setup loggers
 45 |     logging.basicConfig(filename=f"{args.log_dir}/eval_log.txt", level=logging.DEBUG)
 46 |     logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
 47 |     logging.getLogger().setLevel(logging.INFO)
 48 | 
 49 |     args.feat_shape_sim = (512,)
 50 |     args.feat_shape_pose = (512 * 9,)
 51 |     args.requires_policy = args.actor_type not in [
 52 |         "random",
 53 |         "oracle",
 54 |         "forward",
 55 |         "forward-plus",
 56 |         "frontier",
 57 |     ]
 58 |     if "habitat" in args.env_name:
 59 |         if "CUDA_VISIBLE_DEVICES" in os.environ:
 60 |             devices = [
 61 |                 int(dev) for dev in os.environ["CUDA_VISIBLE_DEVICES"].split(",")
 62 |             ]
 63 |             # Devices need to be indexed between 0 to N-1
 64 |             devices = [dev for dev in range(len(devices))]
 65 |         else:
 66 |             devices = None
 67 |         eval_envs = make_vec_envs_habitat(
 68 |             args.habitat_config_file,
 69 |             device,
 70 |             devices,
 71 |             enable_odometry_noise=args.enable_odometry_noise,
 72 |             odometer_noise_scaling=args.odometer_noise_scaling,
 73 |             measure_noise_free_area=args.measure_noise_free_area,
 74 |         )
 75 |         if args.actor_type == "frontier":
 76 |             large_map_range = 100.0
 77 |             H = eval_envs.observation_space.spaces["highres_coarse_occupancy"].shape[1]
 78 |             args.occ_map_scale = 0.1 * (2 * large_map_range + 1) / H
 79 |     else:
 80 |         eval_envs = make_vec_envs_avd(
 81 |             args.env_name,
 82 |             123 + args.num_processes,
 83 |             args.num_processes,
 84 |             eval_log_dir,
 85 |             device,
 86 |             True,
 87 |             split=args.eval_split,
 88 |             nRef=args.num_pose_refs,
 89 |             set_return_topdown_map=True,
 90 |         )
 91 |         if args.actor_type == "frontier":
 92 |             large_map_range = 100.0
 93 |             H = eval_envs.observation_space.spaces["highres_coarse_occupancy"].shape[0]
 94 |             args.occ_map_scale = 50.0 * (2 * large_map_range + 1) / H
 95 |     args.obs_shape = eval_envs.observation_space.spaces["im"].shape
 96 | 
 97 |     if args.requires_policy:
 98 |         # =================== Create models ====================
 99 |         encoder = RGBEncoder() if args.encoder_type == "rgb" else MapRGBEncoder()
100 |         action_config = (
101 |             {
102 |                 "nactions": eval_envs.action_space.n,
103 |                 "embedding_size": args.action_embedding_size,
104 |             }
105 |             if args.use_action_embedding
106 |             else None
107 |         )
108 |         collision_config = (
109 |             {"collision_dim": 2, "embedding_size": args.collision_embedding_size}
110 |             if args.use_collision_embedding
111 |             else None
112 |         )
113 |         actor_critic = Policy(
114 |             eval_envs.action_space,
115 |             base_kwargs={
116 |                 "feat_dim": args.feat_shape_sim[0],
117 |                 "recurrent": True,
118 |                 "hidden_size": args.feat_shape_sim[0],
119 |                 "action_config": action_config,
120 |                 "collision_config": collision_config,
121 |             },
122 |         )
123 |         # =================== Load models ====================
124 |         encoder_state, actor_critic_state = torch.load(args.load_path)[:2]
125 |         encoder.load_state_dict(encoder_state)
126 |         actor_critic.load_state_dict(actor_critic_state)
127 |         actor_critic.to(device)
128 |         encoder.to(device)
129 |         actor_critic.eval()
130 |         encoder.eval()
131 | 
132 |     eval_config = {}
133 |     eval_config["num_steps"] = args.num_steps
134 |     eval_config["feat_shape_sim"] = args.feat_shape_sim
135 |     eval_config["num_processes"] = args.num_processes
136 |     eval_config["num_pose_refs"] = args.num_pose_refs
137 |     eval_config["num_eval_episodes"] = args.eval_episodes
138 |     eval_config["env_name"] = args.env_name
139 |     eval_config["actor_type"] = args.actor_type
140 |     eval_config["encoder_type"] = args.encoder_type
141 |     eval_config["use_action_embedding"] = args.use_action_embedding
142 |     eval_config["use_collision_embedding"] = args.use_collision_embedding
143 |     eval_config["vis_save_dir"] = os.path.join(args.log_dir, "visualizations")
144 |     eval_config["final_topdown_save_path"] = os.path.join(
145 |         args.log_dir, "top_down_maps.h5"
146 |     )
147 |     eval_config["forward_action_id"] = 2 if "avd" in args.env_name else 0
148 |     eval_config["turn_action_id"] = 0 if "avd" in args.env_name else 1
149 |     eval_config["input_highres"] = args.input_highres
150 |     if args.actor_type == "frontier":
151 |         eval_config["occ_map_scale"] = args.occ_map_scale
152 |         eval_config["frontier_dilate_occ"] = args.frontier_dilate_occ
153 |         eval_config["max_time_per_target"] = args.max_time_per_target
154 | 
155 |     models = {}
156 |     if args.requires_policy:
157 |         models["actor_critic"] = actor_critic
158 |         models["encoder"] = encoder
159 | 
160 |     metrics, per_episode_metrics = evaluate_visitation(
161 |         models,
162 |         eval_envs,
163 |         eval_config,
164 |         device,
165 |         multi_step=True,
166 |         interval_steps=args.interval_steps,
167 |         visualize_policy=args.visualize_policy,
168 |         visualize_size=args.visualize_size,
169 |         visualize_batches=args.visualize_batches,
170 |         visualize_n_per_batch=args.visualize_n_per_batch,
171 |     )
172 | 
173 |     json.dump(
174 |         per_episode_metrics, open(os.path.join(args.log_dir, "statistics.json"), "w")
175 |     )
176 | 
177 | 
178 | if __name__ == "__main__":
179 |     main()
180 | 


--------------------------------------------------------------------------------
/exploring_exploration/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | trained_models/
104 | .fuse_hidden*
105 | 
106 | # ctags
107 | tags
108 | 
109 | *.swp
110 | logs
111 | *.mp4
112 | data/
113 | custom_kernels/
114 | enjoy_script.sh
115 | eval_scripts/
116 | tests/
117 | visualize_clusterings*
118 | *.sh
119 | *nfs*
120 | plotting_results
121 | policy_visualizations
122 | visualized_avd_clusters/
123 | visualized_avd_clusters_v2/
124 | *.yaml
125 | imagenet_*
126 | mp3d_tdn_visualizations/
127 | sptm_vis_examples/
128 | trained_models
129 | 


--------------------------------------------------------------------------------
/exploring_exploration/algo/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .ppo import PPO
 8 | from .imitation import Imitation
 9 | from .supervised_reconstruction import SupervisedReconstruction
10 | 


--------------------------------------------------------------------------------
/exploring_exploration/algo/imitation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import torch.optim as optim
 11 | from itertools import chain
 12 | 
 13 | from exploring_exploration.utils.common import (
 14 |     flatten_two,
 15 |     unflatten_two,
 16 | )
 17 | 
 18 | 
 19 | def get_onehot_tensor(idxes, size):
 20 |     device = idxes.device
 21 |     bs = idxes.shape[0]
 22 |     oh = torch.zeros(bs, size).to(device).scatter_(1, idxes, 1)
 23 |     return oh
 24 | 
 25 | 
 26 | class Imitation:
 27 |     """Algorithm to learn policy from expert trajectories via
 28 |     imitation learning. Incorporates inflection weighting from
 29 |     https://arxiv.org/pdf/1904.03461.pdf.
 30 |     """
 31 | 
 32 |     def __init__(self, config):
 33 |         self.encoder = config["encoder"]
 34 |         self.actor_critic = config["actor_critic"]
 35 |         lr = config["lr"]
 36 |         eps = config["eps"]
 37 |         self.max_grad_norm = config["max_grad_norm"]
 38 |         self.nactions = config["nactions"]
 39 |         self.encoder_type = config["encoder_type"]
 40 |         self.use_action_embedding = config["use_action_embedding"]
 41 |         self.use_collision_embedding = config["use_collision_embedding"]
 42 |         self.use_inflection_weighting = config["use_inflection_weighting"]
 43 |         self.optimizer = optim.Adam(
 44 |             list(
 45 |                 filter(
 46 |                     lambda p: p.requires_grad,
 47 |                     chain(self.encoder.parameters(), self.actor_critic.parameters()),
 48 |                 )
 49 |             ),
 50 |             lr=lr,
 51 |             eps=eps,
 52 |         )
 53 |         if self.use_inflection_weighting:
 54 |             # inflection_factor = L / N where L = episode length and
 55 |             # N = # of inflection points in the episode.
 56 |             # The loss function will be biased towards inflection points in
 57 |             # the episode. Fewer the inflection points, larger the bias.
 58 |             # loss = inflection_factor * loss_inflection +
 59 |             #        1.0 * loss_non_inflection
 60 |             self.inflection_factor = 1.0
 61 |             # The inflection factor is updated during training by computing
 62 |             # a moving average estimate (with weighting inflection_beta).
 63 |             self.inflection_beta = 0.90
 64 |             # The inflection factor estimate for an episode is clipped to
 65 |             # this value to prevent explosion.
 66 |             self.trunc_factor_clipping = 10.0
 67 | 
 68 |     def update(self, rollouts):
 69 |         """Update the policy based on expert data in the rollouts.
 70 |         """
 71 |         T, N = rollouts.actions.shape[:2]
 72 |         expert_actions = rollouts.actions  # (T, N, 1)
 73 |         # Masks indicating when expert actions were *not* taken. This permits
 74 |         # a form of data augmentation where non-expert actions are taken to
 75 |         # accomodate distribution shifts b/w the expert and the learned policy.
 76 |         action_masks = rollouts.action_masks  # (T, N, 1)
 77 |         hxs = rollouts.recurrent_hidden_states[0].unsqueeze(0)  # (1, N, nfeats)
 78 |         masks = rollouts.masks[:-1]  # (T, N, nfeats)
 79 |         # ============= Update inflection factor if applicable ================
 80 |         if self.use_inflection_weighting:
 81 |             inflection_mask = self._get_inflection_mask(expert_actions)
 82 |             # Inverse frequency of inflection points.
 83 |             inflection_factor = T / (inflection_mask.sum(dim=0) + 1e-12)
 84 |             inflection_factor = torch.clamp(
 85 |                 inflection_factor, 1.0, self.trunc_factor_clipping
 86 |             )
 87 |             self._update_inflection_factor(inflection_factor.mean().item())
 88 |         # ========================= Forward pass ==============================
 89 |         hxs = flatten_two(hxs)  # (N, nfeats)
 90 |         masks = flatten_two(masks)  # (T*N, nfeats)
 91 |         action_masks = flatten_two(action_masks).squeeze(1)  # (T*N, )
 92 |         policy_inputs = self._create_policy_inputs(rollouts)
 93 |         # (T*N, nactions)
 94 |         pred_action_log_probs = self.actor_critic.get_log_probs(
 95 |             policy_inputs, hxs, masks
 96 |         )
 97 |         # ==================== Compute the prediction loss ====================
 98 |         expert_actions = flatten_two(expert_actions).squeeze(1).long()  # (T*N,)
 99 |         action_loss = F.nll_loss(
100 |             pred_action_log_probs, expert_actions, reduction="none"
101 |         )  # (T*N, )
102 |         # Weight the loss based on inflection points.
103 |         if self.use_inflection_weighting:
104 |             inflection_mask = flatten_two(inflection_mask).squeeze(1)  # (T*N,)
105 |             action_loss = action_loss * (
106 |                 inflection_mask * self.inflection_factor + (1 - inflection_mask) * 1.0
107 |             )
108 |         # Mask the losses for non-expert actions.
109 |         action_loss = (action_loss * action_masks).sum() / (action_masks.sum() + 1e-10)
110 |         # ============================ Backward pass ==========================
111 |         self.optimizer.zero_grad()
112 |         action_loss.backward()
113 |         nn.utils.clip_grad_norm_(
114 |             chain(self.encoder.parameters(), self.actor_critic.parameters()),
115 |             self.max_grad_norm,
116 |         )
117 |         self.optimizer.step()
118 | 
119 |         losses = {}
120 |         losses["action_loss"] = action_loss.item()
121 |         return losses
122 | 
123 |     def _update_inflection_factor(self, inflection_factor):
124 |         self.inflection_factor = (
125 |             self.inflection_factor * self.inflection_beta
126 |             + inflection_factor * (1 - self.inflection_beta)
127 |         )
128 | 
129 |     def _create_policy_inputs(self, rollouts):
130 |         """The policy inputs consist of features extract from the RGB and
131 |         top-down occupancy maps, and learned encodings of the previous actions,
132 |         and collision detections.
133 |         """
134 |         obs_im = rollouts.obs_im[:-1]  # (T, N, *obs_shape)
135 |         encoder_inputs = [obs_im]
136 |         if self.encoder_type == "rgb+map":
137 |             encoder_inputs.append(rollouts.obs_sm[:-1])  # (T, N, *obs_shape)
138 |             encoder_inputs.append(rollouts.obs_lm[:-1])  # (T, N, *obs_shape)
139 |         encoder_inputs = [flatten_two(v) for v in encoder_inputs]
140 |         obs_feats = self.encoder(*encoder_inputs)  # (T*N, nfeats)
141 |         policy_inputs = {"features": obs_feats}
142 |         if self.use_action_embedding:
143 |             prev_actions = torch.zeros_like(rollouts.actions)  # (T, N, 1)
144 |             prev_actions[1:] = rollouts.actions[:-1]
145 |             prev_actions = flatten_two(prev_actions)  # (T*N, 1)
146 |             policy_inputs["actions"] = prev_actions.long()
147 |         if self.use_collision_embedding:
148 |             prev_collisions = flatten_two(rollouts.collisions[:-1])  # (T*N, 1)
149 |             policy_inputs["collisions"] = prev_collisions.long()
150 |         return policy_inputs
151 | 
152 |     def _get_inflection_mask(self, actions):
153 |         """Given a sequence of actions, it predicts a mask highlighting
154 |         the inflection points, i.e., points when the actions in the
155 |         sequence change between t-1 and t.
156 |         """
157 |         device = actions.device
158 |         # actions - (T, N, 1) tensor
159 |         prev_actions = actions[:-1]
160 |         curr_actions = actions[1:]
161 |         inflection_mask = (curr_actions != prev_actions).float()  # (T-1, N, 1)
162 |         # First action is never an inflection point
163 |         inflection_mask = torch.cat(
164 |             [torch.zeros(1, *actions.shape[1:]).to(device), inflection_mask], dim=0
165 |         )
166 |         return inflection_mask
167 | 


--------------------------------------------------------------------------------
/exploring_exploration/algo/ppo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import torch.optim as optim
 11 | 
 12 | from itertools import chain
 13 | 
 14 | 
 15 | class PPO:
 16 |     """Algorithm to learn a policy via Proximal Policy Optimization:
 17 |     https://arxiv.org/abs/1707.06347 .
 18 |     Large parts of the code were borrowed from Ilya Kostrikov's codebase:
 19 |     https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail
 20 |     """
 21 | 
 22 |     def __init__(self, config):
 23 |         self.encoder = config["encoder"]
 24 |         self.actor_critic = config["actor_critic"]
 25 |         lr = config["lr"]
 26 |         eps = config["eps"]
 27 |         self.clip_param = config["clip_param"]
 28 |         self.ppo_epoch = config["ppo_epoch"]
 29 |         self.encoder_type = config["encoder_type"]
 30 |         self.num_mini_batch = config["num_mini_batch"]
 31 |         self.entropy_coef = config["entropy_coef"]
 32 |         self.max_grad_norm = config["max_grad_norm"]
 33 |         self.nactions = config["nactions"]
 34 |         self.value_loss_coef = config["value_loss_coef"]
 35 |         self.use_clipped_value_loss = config["use_clipped_value_loss"]
 36 |         self.use_action_embedding = config["use_action_embedding"]
 37 |         self.use_collision_embedding = config["use_collision_embedding"]
 38 | 
 39 |         self.optimizer = optim.Adam(
 40 |             list(
 41 |                 filter(
 42 |                     lambda p: p.requires_grad,
 43 |                     chain(self.encoder.parameters(), self.actor_critic.parameters()),
 44 |                 )
 45 |             ),
 46 |             lr=lr,
 47 |             eps=eps,
 48 |         )
 49 | 
 50 |     def update(self, rollouts):
 51 |         advantages = rollouts.returns[:-1] - rollouts.value_preds[:-1]
 52 |         advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-5)
 53 | 
 54 |         value_loss_epoch = 0
 55 |         action_loss_epoch = 0
 56 |         dist_entropy_epoch = 0
 57 | 
 58 |         for e in range(self.ppo_epoch):
 59 |             if self.actor_critic.is_recurrent:
 60 |                 data_generator = rollouts.recurrent_generator(
 61 |                     advantages, self.num_mini_batch
 62 |                 )
 63 |             else:
 64 |                 data_generator = rollouts.feed_forward_generator(
 65 |                     advantages, self.num_mini_batch
 66 |                 )
 67 | 
 68 |             for sample in data_generator:
 69 |                 (
 70 |                     obs_im_batch,
 71 |                     obs_sm_batch,
 72 |                     obs_lm_batch,
 73 |                     recurrent_hidden_states_batch,
 74 |                     actions_batch,
 75 |                     value_preds_batch,
 76 |                     return_batch,
 77 |                     masks_batch,
 78 |                     collisions_batch,
 79 |                     old_action_log_probs_batch,
 80 |                     adv_targ,
 81 |                     T,
 82 |                     N,
 83 |                 ) = sample
 84 | 
 85 |                 # ======================= Forward pass ========================
 86 |                 encoder_inputs = [obs_im_batch]
 87 |                 if self.encoder_type == "rgb+map":
 88 |                     encoder_inputs += [obs_sm_batch, obs_lm_batch]
 89 |                 obs_feats = self.encoder(*encoder_inputs)
 90 |                 policy_inputs = {"features": obs_feats}
 91 |                 prev_actions = torch.zeros_like(actions_batch.view(T, N, 1))
 92 |                 prev_actions[1:] = actions_batch.view(T, N, 1)[:-1]
 93 |                 prev_actions = prev_actions.view(T * N, 1)
 94 |                 prev_collisions = collisions_batch
 95 |                 if self.use_action_embedding:
 96 |                     policy_inputs["actions"] = prev_actions.long()
 97 |                 if self.use_collision_embedding:
 98 |                     policy_inputs["collisions"] = prev_collisions.long()
 99 |                 # Reshape to do in a single forward pass for all steps
100 |                 policy_outputs = self.actor_critic.evaluate_actions(
101 |                     policy_inputs,
102 |                     recurrent_hidden_states_batch,
103 |                     masks_batch,
104 |                     actions_batch,
105 |                 )
106 |                 values, action_log_probs, dist_entropy, _ = policy_outputs
107 |                 # ===================== Compute PPO losses ====================
108 |                 # Clipped surrogate loss
109 |                 ratio = torch.exp(action_log_probs - old_action_log_probs_batch)
110 |                 surr1 = ratio * adv_targ
111 |                 surr2 = (
112 |                     torch.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param)
113 |                     * adv_targ
114 |                 )
115 |                 action_loss = -torch.min(surr1, surr2).mean()
116 |                 # Value function loss
117 |                 if self.use_clipped_value_loss:
118 |                     value_pred_clipped = value_preds_batch + (
119 |                         values - value_preds_batch
120 |                     ).clamp(-self.clip_param, self.clip_param)
121 |                     value_losses = (values - return_batch).pow(2)
122 |                     value_losses_clipped = (value_pred_clipped - return_batch).pow(2)
123 |                     value_loss = (
124 |                         0.5 * torch.max(value_losses, value_losses_clipped).mean()
125 |                     )
126 |                 else:
127 |                     value_loss = 0.5 * F.mse_loss(return_batch, values)
128 |                 # ======================= Backward pass =======================
129 |                 self.optimizer.zero_grad()
130 |                 (
131 |                     value_loss * self.value_loss_coef
132 |                     + action_loss
133 |                     - dist_entropy * self.entropy_coef
134 |                 ).backward()
135 |                 nn.utils.clip_grad_norm_(
136 |                     chain(self.encoder.parameters(), self.actor_critic.parameters()),
137 |                     self.max_grad_norm,
138 |                 )
139 |                 self.optimizer.step()
140 |                 # ===================== Update statistics =====================
141 |                 value_loss_epoch += value_loss.item()
142 |                 action_loss_epoch += action_loss.item()
143 |                 dist_entropy_epoch += dist_entropy.item()
144 | 
145 |         num_updates = self.ppo_epoch * self.num_mini_batch
146 |         value_loss_epoch /= num_updates
147 |         action_loss_epoch /= num_updates
148 |         dist_entropy_epoch /= num_updates
149 | 
150 |         losses = {}
151 |         losses["value_loss"] = value_loss_epoch
152 |         losses["action_loss"] = action_loss_epoch
153 |         losses["dist_entropy"] = dist_entropy_epoch
154 |         return losses
155 | 


--------------------------------------------------------------------------------
/exploring_exploration/algo/supervised_reconstruction.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.optim as optim
 10 | import itertools
 11 | 
 12 | from exploring_exploration.utils.geometry import subtract_pose
 13 | from exploring_exploration.utils.common import (
 14 |     flatten_two,
 15 |     unflatten_two,
 16 |     unsq_exp,
 17 | )
 18 | from einops import rearrange
 19 | 
 20 | 
 21 | class SupervisedReconstruction:
 22 |     """Algorithm to learn a reconstruction task-head that reconstructs
 23 |     features at a new target location given features from existing locations.
 24 | 
 25 |     The default loss function is a multilabel loss. Existing features from
 26 |     training environments are clustered into K clusters. For each target
 27 |     location, the nearest J clusters are defined as positives and the
 28 |     remaining K-J clusters are defined as negatives. The model is required to
 29 |     identify these top-J clusters / "reconstruction" concepts.
 30 |     """
 31 | 
 32 |     def __init__(self, config):
 33 |         self.decoder = config["decoder"]
 34 |         self.pose_encoder = config["pose_encoder"]
 35 |         lr = config["lr"]
 36 |         eps = config["eps"]
 37 |         self.max_grad_norm = config["max_grad_norm"]
 38 |         # The loss function is passed as an argument. The default loss is a
 39 |         # multi-label loss.
 40 |         self.rec_loss_fn = config["rec_loss_fn"]
 41 |         # The number of nearest neighbors to use as positives.
 42 |         self.rec_loss_fn_J = config["rec_loss_fn_J"]
 43 |         self.cluster_centroids = config["cluster_centroids"]
 44 |         # Make a prediction at regular intervals of this size.
 45 |         self.prediction_interval = config["prediction_interval"]
 46 |         self.optimizer = optim.Adam(
 47 |             itertools.chain(self.decoder.parameters(), self.pose_encoder.parameters(),),
 48 |             lr=lr,
 49 |             eps=eps,
 50 |         )
 51 | 
 52 |     def update(self, rollouts):
 53 |         T, N, nfeats = rollouts.obs_feats[:-1].shape
 54 |         nRef = rollouts.tgt_feats.shape[1]
 55 |         device = rollouts.obs_feats.device
 56 |         avg_loss = 0.0
 57 |         avg_loss_count = 0.0
 58 |         tgt_feats = rollouts.tgt_feats  # (N, nRef, nfeats)
 59 |         tgt_masks = rollouts.tgt_masks.squeeze(2)  # (N, nRef)
 60 |         obs_feats = unsq_exp(rollouts.obs_feats, nRef, dim=2)  # (T+1, N, nRef, nfeats)
 61 |         obs_poses = unsq_exp(
 62 |             rollouts.obs_odometer[:, :, :3], nRef, dim=2
 63 |         )  # (T+1, N, nRef, 3) - (y, x, phi)
 64 |         tgt_poses = unsq_exp(rollouts.tgt_poses, T + 1, dim=0)  # (T+1, N, nRef, 3)
 65 |         # Make a prediction after every prediction_interval steps, i.e.,
 66 |         # the agent has seen self.prediction_interval*(i+1) observations.
 67 |         for i in range(0, T, self.prediction_interval):
 68 |             L = min(i + self.prediction_interval, T)
 69 |             # Estimate relative pose b/w targets and observations.
 70 |             obs_relpose = subtract_pose(
 71 |                 rearrange(tgt_poses[:L], "l b n f -> (l b n) f"),
 72 |                 rearrange(obs_poses[:L], "l b n f -> (l b n) f"),
 73 |             )  # (L*N*nRef, 3) --- (x, y, phi)
 74 |             # ========================= Forward pass ==========================
 75 |             # Encode the poses of the observations and targets.
 76 |             obs_relpose_enc = self.pose_encoder(obs_relpose)  # (L*N*nRef, 16)
 77 |             obs_relpose_enc = obs_relpose_enc.view(L, N * nRef, -1)
 78 |             tgt_relpose_enc = torch.zeros(1, *obs_relpose_enc.shape[1:]).to(device)
 79 |             obs_feats_i = rearrange(obs_feats[:L], "l b n f -> l (b n) f")
 80 |             # These serve as inputs to an encoder-decoder transformer model.
 81 |             rec_inputs = {
 82 |                 # encoder inputs
 83 |                 "history_image_features": obs_feats_i,  # (L, N*nRef, nfeats)
 84 |                 "history_pose_features": obs_relpose_enc,  # (L, N*nRef, 16)
 85 |                 # decoder inputs
 86 |                 "target_pose_features": tgt_relpose_enc,  # (1, N*nRef, 16)
 87 |             }
 88 |             pred_logits = self.decoder(rec_inputs).squeeze(0)  # (N*nRef, nclass)
 89 |             # =================== Compute reconstruction loss =================
 90 |             loss = self.rec_loss_fn(
 91 |                 pred_logits,  # (N*nRef, nclass)
 92 |                 flatten_two(tgt_feats),  # (N*nRef, nfeats)
 93 |                 self.cluster_centroids,
 94 |                 K=self.rec_loss_fn_J,
 95 |                 reduction="none",
 96 |             ).sum(
 97 |                 dim=1
 98 |             )  # (N*nRef, )
 99 |             loss = unflatten_two(loss, N, nRef)
100 |             # Mask out invalid targets.
101 |             loss = loss * tgt_masks
102 |             loss = loss.mean()
103 |             # ========================== Backward pass ========================
104 |             self.optimizer.zero_grad()
105 |             loss.backward()
106 |             nn.utils.clip_grad_norm_(
107 |                 itertools.chain(
108 |                     self.decoder.parameters(), self.pose_encoder.parameters(),
109 |                 ),
110 |                 self.max_grad_norm,
111 |             )
112 |             self.optimizer.step()
113 | 
114 |             avg_loss += loss.item()
115 |             avg_loss_count += 1.0
116 | 
117 |         avg_loss = avg_loss / avg_loss_count
118 |         losses = {"rec_loss": avg_loss}
119 |         return losses
120 | 


--------------------------------------------------------------------------------
/exploring_exploration/envs/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates.
3 | # All rights reserved.
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | from .habitat import make_vec_envs as make_vec_envs_habitat
8 | from .avd import make_vec_envs as make_vec_envs_avd
9 | 


--------------------------------------------------------------------------------
/exploring_exploration/envs/avd.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | import torch
  9 | 
 10 | import gym
 11 | import gym_avd
 12 | from gym.spaces.box import Box
 13 | 
 14 | from baselines import bench
 15 | from baselines.common.vec_env import VecEnvWrapper
 16 | from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
 17 | 
 18 | from einops import rearrange
 19 | 
 20 | 
 21 | def make_env(
 22 |     env_id,
 23 |     seed,
 24 |     rank,
 25 |     log_dir,
 26 |     allow_early_resets,
 27 |     split="train",
 28 |     nRef=1,
 29 |     set_return_topdown_map=False,
 30 |     tdn_min_dist=2000.0,
 31 |     tdn_t_exp=200,
 32 |     tdn_t_nav=200,
 33 |     provide_collision_penalty=False,
 34 |     collision_penalty_factor=1e-1,
 35 |     n_landmarks=20,
 36 | ):
 37 |     # Define a temporary function that creates an environment instance.
 38 |     def _thunk():
 39 |         env = gym.make(env_id)
 40 |         env.set_split(split)
 41 |         env.set_nref(nRef)
 42 |         if set_return_topdown_map:
 43 |             env.set_return_topdown_map()
 44 |         env.set_nlandmarks(n_landmarks)
 45 |         if "tdn" in env_id:
 46 |             env.set_min_dist(tdn_min_dist)
 47 |             env.set_t_exp_and_nav(tdn_t_exp, tdn_t_nav)
 48 |         env.seed(seed + rank)
 49 |         obs_shape = env.observation_space.shape
 50 |         if log_dir is not None:
 51 |             env = bench.Monitor(
 52 |                 env,
 53 |                 os.path.join(log_dir, str(rank)),
 54 |                 allow_early_resets=allow_early_resets,
 55 |             )
 56 |         # If the input has shape (W,H,3), wrap for PyTorch convolutions
 57 |         obs_shape = env.observation_space.spaces["im"].shape
 58 |         if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
 59 |             env = TransposeImageDict(env)
 60 |         return env
 61 | 
 62 |     return _thunk
 63 | 
 64 | 
 65 | def make_vec_envs(
 66 |     env_name,
 67 |     seed,
 68 |     num_processes,
 69 |     log_dir,
 70 |     device,
 71 |     allow_early_resets,
 72 |     num_frame_stack=None,
 73 |     nRef=1,
 74 |     n_landmarks=20,
 75 |     set_return_topdown_map=False,
 76 |     **kwargs
 77 | ):
 78 |     envs = [
 79 |         make_env(
 80 |             env_name,
 81 |             seed,
 82 |             i,
 83 |             log_dir,
 84 |             allow_early_resets,
 85 |             nRef=nRef,
 86 |             set_return_topdown_map=set_return_topdown_map,
 87 |             n_landmarks=n_landmarks,
 88 |             **kwargs
 89 |         )
 90 |         for i in range(num_processes)
 91 |     ]
 92 | 
 93 |     envs = SubprocVecEnv(envs)
 94 |     envs = VecPyTorchDict(envs, device)
 95 | 
 96 |     return envs
 97 | 
 98 | 
 99 | class TransposeImageDict(gym.ObservationWrapper):
100 |     """Transpose the image data from (..., H, W, C) -> (..., C, H, W)."""
101 | 
102 |     keys_to_check = [
103 |         "im",
104 |         "depth",
105 |         "coarse_occupancy",
106 |         "fine_occupancy",
107 |         "highres_coarse_occupancy",
108 |         "target_im",
109 |         "pose_refs",
110 |         "pose_refs_depth",
111 |         "landmark_ims",
112 |     ]
113 | 
114 |     def __init__(self, env=None):
115 |         super().__init__(env)
116 |         self.keys_to_transpose = []
117 |         for key in self.keys_to_check:
118 |             if key in self.observation_space.spaces:
119 |                 self.keys_to_transpose.append(key)
120 |         for key in self.keys_to_transpose:
121 |             self.observation_space.spaces[key] = self._transpose_obs_space(
122 |                 self.observation_space.spaces[key]
123 |             )
124 | 
125 |     def _transpose_obs_space(self, obs_space):
126 |         """Transposes the observation space from (... H W C) -> (... C H W)."""
127 |         obs_shape = obs_space.shape
128 |         assert len(obs_shape) in [3, 4]
129 |         if len(obs_shape) == 4:
130 |             new_obs_shape = [obs_shape[0], obs_shape[3], obs_shape[1], obs_shape[2]]
131 |         else:
132 |             new_obs_shape = [obs_shape[2], obs_shape[0], obs_shape[1]]
133 |         dtype = obs_space.dtype
134 |         low = obs_space.low.flat[0]
135 |         high = obs_space.high.flat[0]
136 |         return Box(low, high, new_obs_shape, dtype=dtype)
137 | 
138 |     def _transpose_obs(self, obs):
139 |         """Transposes the observation from (... H W C) -> (... C H W)
140 |         """
141 |         assert len(obs.shape) in [3, 4]
142 |         if len(obs.shape) == 4:
143 |             return rearrange(obs, "n h w c -> n c h w")
144 |         else:
145 |             return rearrange(obs, "h w c -> c h w")
146 | 
147 |     def observation(self, observation):
148 |         for key in self.keys_to_transpose:
149 |             if key in observation.keys():
150 |                 observation[key] = self._transpose_obs(observation[key])
151 |         return observation
152 | 
153 | 
154 | class VecPyTorchDict(VecEnvWrapper):
155 |     def __init__(self, venv, device):
156 |         """Converts numpy arrays to torch sensors and load them to GPU."""
157 |         super(VecPyTorchDict, self).__init__(venv)
158 |         self.device = device
159 | 
160 |     def reset(self):
161 |         obs = self.venv.reset()
162 |         obs = {key: torch.from_numpy(obs[key]).float().to(self.device) for key in obs}
163 |         return obs
164 | 
165 |     def step_async(self, actions):
166 |         actions = actions.squeeze(1).cpu().numpy()
167 |         self.venv.step_async(actions)
168 | 
169 |     def step_wait(self):
170 |         obs, reward, done, info = self.venv.step_wait()
171 |         obs = {key: torch.from_numpy(obs[key]).float().to(self.device) for key in obs}
172 |         reward = torch.from_numpy(reward).unsqueeze(dim=1).float()
173 |         return obs, reward, done, info
174 | 


--------------------------------------------------------------------------------
/exploring_exploration/envs/habitat.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | 
  9 | from gym import spaces
 10 | from gym.spaces import Box
 11 | 
 12 | import habitat
 13 | from habitat_baselines.common.utils import batch_obs
 14 | from habitat_baselines.common.env_utils import construct_envs
 15 | from habitat_baselines.common.environments import PoseRLEnv, ExpNavRLEnv
 16 | from habitat_baselines.config.default_pose import get_config_pose
 17 | from habitat_baselines.config.default_exp_nav import get_config_exp_nav
 18 | 
 19 | from einops import rearrange
 20 | 
 21 | 
 22 | def make_vec_envs(
 23 |     config,
 24 |     device,
 25 |     devices,
 26 |     seed=100,
 27 |     task_type="pose",
 28 |     enable_odometry_noise=None,
 29 |     odometer_noise_scaling=None,
 30 |     measure_noise_free_area=None,
 31 | ):
 32 |     if task_type == "pose":
 33 |         config = get_config_pose(config, [])
 34 |         env_class = PoseRLEnv
 35 |     else:
 36 |         config = get_config_exp_nav(config, [])
 37 |         env_class = ExpNavRLEnv
 38 |     config.defrost()
 39 |     config.TASK_CONFIG.SEED = seed
 40 |     config.TASK_CONFIG.SIMULATOR.SEED = seed
 41 |     if enable_odometry_noise is not None:
 42 |         config.TASK_CONFIG.SIMULATOR.ENABLE_ODOMETRY_NOISE = enable_odometry_noise
 43 |         config.TASK_CONFIG.SIMULATOR.ODOMETER_NOISE_SCALING = odometer_noise_scaling
 44 |     if measure_noise_free_area is not None:
 45 |         config.TASK_CONFIG.SIMULATOR.OCCUPANCY_MAPS.MEASURE_NOISE_FREE_AREA = (
 46 |             measure_noise_free_area
 47 |         )
 48 |     config.freeze()
 49 |     envs = construct_envs(config, env_class, devices)
 50 |     envs = BatchDataWrapper(envs)
 51 |     envs = TransposeImageWrapper(envs)
 52 |     envs = RenameKeysWrapper(envs)
 53 |     envs = DeviceWrapper(envs, device)
 54 |     return envs
 55 | 
 56 | 
 57 | class BatchDataWrapper:
 58 |     """Batches the observations received from habitat-api environments."""
 59 | 
 60 |     def __init__(self, vec_envs):
 61 |         self._envs = vec_envs
 62 |         self.observation_space = vec_envs.observation_spaces[0]
 63 |         self.action_space = vec_envs.action_spaces[0]
 64 | 
 65 |     def reset(self):
 66 |         obs = self._envs.reset()
 67 |         obs = batch_obs(obs)
 68 |         return obs
 69 | 
 70 |     def step(self, actions):
 71 |         actions_list = [a[0].item() for a in actions]
 72 |         outputs = self._envs.step(actions_list)
 73 |         obs, rewards, done, info = [list(x) for x in zip(*outputs)]
 74 |         obs = batch_obs(obs)
 75 |         rewards = torch.tensor(rewards, dtype=torch.float)
 76 |         rewards = rewards.unsqueeze(1)
 77 | 
 78 |         return obs, rewards, done, info
 79 | 
 80 |     def close(self):
 81 |         self._envs.close()
 82 | 
 83 | 
 84 | class TransposeImageWrapper:
 85 |     """Transpose the image data from (..., H, W, C) -> (..., C, H, W)"""
 86 | 
 87 |     keys_to_check = [
 88 |         "rgb",
 89 |         "depth",
 90 |         "coarse_occupancy",
 91 |         "fine_occupancy",
 92 |         "highres_coarse_occupancy",
 93 |         "pose_estimation_rgb",
 94 |         "pose_estimation_depth",
 95 |     ]
 96 | 
 97 |     def __init__(self, vec_envs):
 98 |         self._envs = vec_envs
 99 |         self.observation_space = vec_envs.observation_space
100 |         self.keys_to_transpose = []
101 |         for key in self.keys_to_check:
102 |             if key in self.observation_space.spaces:
103 |                 self.keys_to_transpose.append(key)
104 |         for key in self.keys_to_transpose:
105 |             self.observation_space.spaces[key] = self._transpose_obs_space(
106 |                 self.observation_space.spaces[key]
107 |             )
108 |         self.action_space = vec_envs.action_space
109 | 
110 |     def _transpose_obs_space(self, obs_space):
111 |         """Transposes the observation space from (... H W C) -> (... C H W)."""
112 |         obs_shape = obs_space.shape
113 |         assert len(obs_shape) in [3, 4]
114 |         if len(obs_shape) == 4:
115 |             new_obs_shape = [obs_shape[0], obs_shape[3], obs_shape[1], obs_shape[2]]
116 |         else:
117 |             new_obs_shape = [obs_shape[2], obs_shape[0], obs_shape[1]]
118 |         dtype = obs_space.dtype
119 |         low = obs_space.low.flat[0]
120 |         high = obs_space.high.flat[0]
121 |         return Box(low, high, new_obs_shape, dtype=dtype)
122 | 
123 |     def _transpose_obs(self, obs):
124 |         """Transposes the observation from (B ... H W C) -> (B ... C H W)
125 |         """
126 |         assert len(obs.shape) in [4, 5]
127 |         if len(obs.shape) == 5:
128 |             return rearrange(obs, "b n h w c -> b n c h w")
129 |         else:
130 |             return rearrange(obs, "b h w c -> b c h w")
131 | 
132 |     def reset(self):
133 |         obs = self._envs.reset()
134 |         for k in self.keys_to_transpose:
135 |             if k in obs.keys():
136 |                 obs[k] = self._transpose_obs(obs[k])
137 |         return obs
138 | 
139 |     def step(self, actions):
140 |         obs, reward, done, info = self._envs.step(actions)
141 |         for k in self.keys_to_transpose:
142 |             if k in obs.keys():
143 |                 obs[k] = self._transpose_obs(obs[k])
144 |         return obs, reward, done, info
145 | 
146 |     def close(self):
147 |         self._envs.close()
148 | 
149 | 
150 | class RenameKeysWrapper:
151 |     """Renames keys from habitat-api convention to exploring_exploration
152 |     convention.
153 |     """
154 | 
155 |     def __init__(self, vec_envs):
156 |         self._envs = vec_envs
157 |         self.mapping = {
158 |             "rgb": "im",
159 |             "depth": "depth",
160 |             "coarse_occupancy": "coarse_occupancy",
161 |             "fine_occupancy": "fine_occupancy",
162 |             "delta_sensor": "delta",
163 |             "pose_estimation_rgb": "pose_refs",
164 |             "pose_estimation_depth": "pose_refs_depth",
165 |             "pose_estimation_reg": "pose_regress",
166 |             "pose_estimation_mask": "valid_masks",
167 |             "oracle_action_sensor": "oracle_action",
168 |             "collision_sensor": "collisions",
169 |             "opsr": "oracle_pose_success",
170 |             "area_covered": "seen_area",
171 |             "inc_area_covered": "inc_area",
172 |             "frac_area_covered": "frac_seen_area",
173 |             "top_down_map_pose": "topdown_map",
174 |             "novelty_reward": "count_based_reward",
175 |             # Navigation specific ones
176 |             "highres_coarse_occupancy": "highres_coarse_occupancy",
177 |             "grid_goal_exp_nav": "target_grid_loc",
178 |             "spl_exp_nav": "spl",
179 |             "success_exp_nav": "success_rate",
180 |             "nav_error_exp_nav": "nav_error",
181 |             "top_down_map_exp_nav": "topdown_map",
182 |             "local_top_down_sensor": "gt_highres_coarse_occupancy",
183 |         }
184 |         self.observation_space = spaces.Dict(
185 |             {
186 |                 self.mapping[key]: val
187 |                 for key, val in vec_envs.observation_space.spaces.items()
188 |             }
189 |         )
190 |         self.action_space = vec_envs.action_space
191 | 
192 |     def reset(self):
193 |         obs = self._envs.reset()
194 |         obs_new = {}
195 |         for key, val in obs.items():
196 |             obs_new[self.mapping[key]] = val
197 |         return obs_new
198 | 
199 |     def step(self, actions):
200 |         obs, reward, done, infos = self._envs.step(actions)
201 |         obs_new = {}
202 |         for key, val in obs.items():
203 |             obs_new[self.mapping[key]] = val
204 | 
205 |         infos_new = []
206 |         for info in infos:
207 |             info_new = {}
208 |             for key, val in info.items():
209 |                 if key == "objects_covered_geometric":
210 |                     small = val["small_objects_visited"]
211 |                     medium = val["medium_objects_visited"]
212 |                     large = val["large_objects_visited"]
213 |                     categories = float(val["categories_visited"])
214 |                     info_new["num_objects_visited"] = small + medium + large
215 |                     info_new["num_small_objects_visited"] = small
216 |                     info_new["num_medium_objects_visited"] = medium
217 |                     info_new["num_large_objects_visited"] = large
218 |                     info_new["num_categories_visited"] = categories
219 |                 elif key in self.mapping.keys():
220 |                     info_new[self.mapping[key]] = val
221 |                 else:
222 |                     info_new[key] = val
223 |             infos_new.append(info_new)
224 | 
225 |         return obs_new, reward, done, infos_new
226 | 
227 |     def close(self):
228 |         self._envs.close()
229 | 
230 | 
231 | class DeviceWrapper:
232 |     """Moves all observations to a torch device."""
233 | 
234 |     def __init__(self, vec_envs, device):
235 |         self._envs = vec_envs
236 |         self.device = device
237 | 
238 |         self.observation_space = vec_envs.observation_space
239 |         self.action_space = vec_envs.action_space
240 | 
241 |     def reset(self):
242 |         obs = self._envs.reset()
243 |         for key, val in obs.items():
244 |             obs[key] = val.to(self.device)
245 | 
246 |         return obs
247 | 
248 |     def step(self, actions):
249 |         obs, reward, done, info = self._envs.step(actions)
250 |         for key, val in obs.items():
251 |             obs[key] = val.to(self.device)
252 | 
253 |         return obs, reward, done, info
254 | 
255 |     def close(self):
256 |         self._envs.close()
257 | 


--------------------------------------------------------------------------------
/exploring_exploration/models/.gitignore:
--------------------------------------------------------------------------------
1 | astar_pycpp
2 | 


--------------------------------------------------------------------------------
/exploring_exploration/models/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates.
3 | # All rights reserved.
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | from .exploration import RGBEncoder, MapRGBEncoder, Policy
8 | 


--------------------------------------------------------------------------------
/exploring_exploration/models/curiosity.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | import torch.nn as nn
 10 | import torchvision.models as models
 11 | 
 12 | 
 13 | class ForwardDynamics(nn.Module):
 14 |     """Model that takes previous state, and action as inputs to predict
 15 |     the next state.
 16 |     """
 17 | 
 18 |     def __init__(self, n_actions):
 19 |         super().__init__()
 20 |         state_size = 512
 21 |         hidden_size = 256
 22 | 
 23 |         class ResidualBlock(nn.Module):
 24 |             def __init__(self):
 25 |                 super().__init__()
 26 |                 self.fc1 = nn.Sequential(
 27 |                     nn.Linear(hidden_size + n_actions, hidden_size),
 28 |                     nn.LeakyReLU(0.2, inplace=True),
 29 |                 )
 30 |                 self.fc2 = nn.Sequential(
 31 |                     nn.Linear(hidden_size + n_actions, hidden_size)
 32 |                 )
 33 | 
 34 |             def forward(self, feat, act):
 35 |                 x = feat
 36 |                 x = self.fc1(torch.cat([x, act], dim=1))
 37 |                 x = self.fc2(torch.cat([x, act], dim=1))
 38 |                 return feat + x
 39 | 
 40 |         self.pre_rb = nn.Sequential(
 41 |             nn.Linear(state_size + n_actions, hidden_size),
 42 |             nn.LeakyReLU(0.2, inplace=True),
 43 |         )
 44 |         self.post_rb = nn.Linear(hidden_size, state_size)
 45 |         self.rb1 = ResidualBlock()
 46 |         self.rb2 = ResidualBlock()
 47 |         self.rb3 = ResidualBlock()
 48 |         self.rb4 = ResidualBlock()
 49 | 
 50 |     def forward(self, s, a):
 51 |         """
 52 |         Inputs:
 53 |             s - (bs, state_size)
 54 |             a - (bs, n_actions) onehot encoding
 55 |         Outputs:
 56 |             sp - (bs, state_size)
 57 |         """
 58 |         x = self.pre_rb(torch.cat([s, a], dim=1))
 59 |         x = self.rb1(x, a)
 60 |         x = self.rb2(x, a)
 61 |         x = self.rb3(x, a)
 62 |         x = self.rb4(x, a)
 63 |         sp = self.post_rb(x)
 64 |         return sp
 65 | 
 66 | 
 67 | class Phi(nn.Module):
 68 |     """A simple imagenet-pretrained encoder for state representation.
 69 |     """
 70 | 
 71 |     def __init__(self):
 72 |         super().__init__()
 73 | 
 74 |         resnet = models.resnet18(pretrained=True)
 75 |         self.main = nn.Sequential(
 76 |             resnet.conv1,
 77 |             resnet.bn1,
 78 |             resnet.relu,
 79 |             resnet.maxpool,
 80 |             resnet.layer1,
 81 |             resnet.layer2,
 82 |             resnet.layer3,
 83 |             resnet.layer4,
 84 |             resnet.avgpool,
 85 |         )
 86 | 
 87 |     def forward(self, x):
 88 |         """
 89 |         Inputs:
 90 |             x - (bs, C, H, W)
 91 |         Outputs:
 92 |             feat - (bs, 512)
 93 |         """
 94 |         feat = self.main(x).squeeze(3).squeeze(2)
 95 |         return feat
 96 | 
 97 | 
 98 | # Maintains running statistics of the mean and standard deviation of
 99 | # the episode returns. Used for reward normalization as done here:
100 | # https://arxiv.org/pdf/1808.04355.pdf
101 | class RunningMeanStd(object):
102 |     # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
103 |     def __init__(self, epsilon=1e-4, shape=()):
104 |         self.mean = np.zeros(shape, "float64")
105 |         self.var = np.ones(shape, "float64")
106 |         self.count = epsilon
107 | 
108 |     def update(self, x):
109 |         batch_mean = np.mean(x, axis=0)
110 |         batch_var = np.var(x, axis=0)
111 |         batch_count = x.shape[0]
112 |         self.update_from_moments(batch_mean, batch_var, batch_count)
113 | 
114 |     def update_from_moments(self, batch_mean, batch_var, batch_count):
115 |         self.mean, self.var, self.count = update_mean_var_count_from_moments(
116 |             self.mean, self.var, self.count, batch_mean, batch_var, batch_count
117 |         )
118 | 
119 | 
120 | def update_mean_var_count_from_moments(
121 |     mean, var, count, batch_mean, batch_var, batch_count
122 | ):
123 |     delta = batch_mean - mean
124 |     tot_count = count + batch_count
125 |     new_mean = mean + delta * batch_count / tot_count
126 |     m_a = var * count
127 |     m_b = batch_var * batch_count
128 |     M2 = m_a + m_b + np.square(delta) * count * batch_count / tot_count
129 |     new_var = M2 / tot_count
130 |     new_count = tot_count
131 |     return new_mean, new_var, new_count
132 | 


--------------------------------------------------------------------------------
/exploring_exploration/models/exploration.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torchvision.models as models
 10 | 
 11 | from exploring_exploration.utils.distributions import Categorical
 12 | from exploring_exploration.utils.common import init
 13 | 
 14 | 
 15 | class RGBEncoder(nn.Module):
 16 |     """Encoder that only takes RGB readings as inputs.
 17 |     """
 18 | 
 19 |     def __init__(self, fix_cnn=True):
 20 |         super().__init__()
 21 |         self.rgb_resnet_model = models.resnet18(pretrained=True)
 22 |         resnet_models = [self.rgb_resnet_model]
 23 |         if fix_cnn:
 24 |             for model in resnet_models:
 25 |                 for param in model.parameters():
 26 |                     param.requires_grad = False
 27 |         num_ftrs = self.rgb_resnet_model.fc.in_features
 28 |         num_in = 0
 29 |         self.rgb_resnet_model.avgpool = nn.AvgPool2d(3, stride=1)
 30 |         self.rgb_resnet_model.fc = nn.Linear(num_ftrs, 128)
 31 |         num_in += 128
 32 |         self.merge_fc = nn.Linear(num_in, 512)
 33 | 
 34 |     def forward(self, rgb):
 35 |         """
 36 |         Inputs:
 37 |             rgb    - (bs, C, H, W)
 38 |         Outputs:
 39 |             feat - (bs, 512)
 40 |         """
 41 |         feat_rgb = self.rgb_resnet_model(rgb)
 42 |         feat = self.merge_fc(feat_rgb)
 43 |         return feat
 44 | 
 45 |     def get_feats(self, rgb):
 46 |         return self(rgb)
 47 | 
 48 | 
 49 | class MapRGBEncoder(nn.Module):
 50 |     """Encoder that only takes RGB readings and egocentric occupancy
 51 |     maps as inputs.
 52 |     """
 53 | 
 54 |     def __init__(self, fix_cnn=True):
 55 |         super().__init__()
 56 |         self.rgb_resnet_model = models.resnet18(pretrained=True)
 57 |         self.large_map_resnet_model = models.resnet18(pretrained=True)
 58 |         self.small_map_resnet_model = models.resnet18(pretrained=True)
 59 |         resnet_models = [
 60 |             self.rgb_resnet_model,
 61 |             self.large_map_resnet_model,
 62 |             self.small_map_resnet_model,
 63 |         ]
 64 |         if fix_cnn:
 65 |             for model in resnet_models:
 66 |                 for param in model.parameters():
 67 |                     param.requires_grad = False
 68 |         num_ftrs = self.large_map_resnet_model.fc.in_features
 69 |         num_in = 0
 70 |         self.rgb_resnet_model.avgpool = nn.AvgPool2d(3, stride=1)
 71 |         self.rgb_resnet_model.fc = nn.Linear(num_ftrs, 128)
 72 |         num_in += 128
 73 |         self.large_map_resnet_model.avgpool = nn.AvgPool2d(3, stride=1)
 74 |         self.large_map_resnet_model.fc = nn.Linear(num_ftrs, 128)
 75 |         num_in += 128
 76 |         self.small_map_resnet_model.avgpool = nn.AvgPool2d(3, stride=1)
 77 |         self.small_map_resnet_model.fc = nn.Linear(num_ftrs, 128)
 78 |         num_in += 128
 79 |         self.merge_fc = nn.Linear(num_in, 512)
 80 | 
 81 |     def forward(self, rgb, small_maps, large_maps):
 82 |         """
 83 |         Inputs:
 84 |             rgb        - (bs, C, H, W)
 85 |             small_maps - (bs, C, H, W)
 86 |             large_maps - (bs, C, H, W)
 87 |         Outputs:
 88 |             feat - (bs, 512)
 89 |         """
 90 |         feat_lm = self.large_map_resnet_model(large_maps)
 91 |         feat_sm = self.small_map_resnet_model(small_maps)
 92 |         feat_rgb = self.rgb_resnet_model(rgb)
 93 |         feat = self.merge_fc(torch.cat([feat_lm, feat_sm, feat_rgb], dim=1))
 94 |         return feat
 95 | 
 96 |     def get_feats(self, rgb, small_maps, large_maps):
 97 |         return self(rgb, small_maps, large_maps)
 98 | 
 99 | 
100 | # Borrowed from https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail
101 | class NNBase(nn.Module):
102 |     def __init__(self, recurrent, recurrent_input_size, hidden_size):
103 |         super(NNBase, self).__init__()
104 |         self._hidden_size = hidden_size
105 |         self._recurrent = recurrent
106 |         if recurrent:
107 |             self.gru = nn.GRUCell(recurrent_input_size, hidden_size)
108 |             nn.init.orthogonal_(self.gru.weight_ih.data)
109 |             nn.init.orthogonal_(self.gru.weight_hh.data)
110 |             self.gru.bias_ih.data.fill_(0)
111 |             self.gru.bias_hh.data.fill_(0)
112 | 
113 |     @property
114 |     def is_recurrent(self):
115 |         return self._recurrent
116 | 
117 |     @property
118 |     def recurrent_hidden_state_size(self):
119 |         if self._recurrent:
120 |             return self._hidden_size
121 |         return 1
122 | 
123 |     @property
124 |     def output_size(self):
125 |         return self._hidden_size
126 | 
127 |     def _forward_gru(self, x, hxs, masks):
128 |         if x.size(0) == hxs.size(0):
129 |             x = hxs = self.gru(x, hxs * masks)
130 |         else:
131 |             # x is a (T, N, -1) tensor that has been flatten to (T * N, -1)
132 |             N = hxs.size(0)
133 |             T = int(x.size(0) / N)
134 |             # unflatten
135 |             x = x.view(T, N, x.size(1))
136 |             # Same deal with masks
137 |             masks = masks.view(T, N, 1)
138 |             outputs = []
139 |             for i in range(T):
140 |                 hx = hxs = self.gru(x[i], hxs * masks[i])
141 |                 outputs.append(hx)
142 |             # assert len(outputs) == T
143 |             # x is a (T, N, -1) tensor
144 |             x = torch.stack(outputs, dim=0)
145 |             # flatten
146 |             x = x.view(T * N, -1)
147 |         return x, hxs
148 | 
149 | 
150 | class MLPBase(NNBase):
151 |     """An MLP recurrent-encoder for the policy inputs. It takes features,
152 |     past actions, and collisions as inputs.
153 |     """
154 | 
155 |     def __init__(
156 |         self,
157 |         feat_dim=512,
158 |         recurrent=False,
159 |         hidden_size=512,
160 |         action_config=None,
161 |         collision_config=None,
162 |     ):
163 |         super().__init__(recurrent, hidden_size, hidden_size)
164 |         init_ = lambda m: init(
165 |             m,
166 |             nn.init.orthogonal_,
167 |             lambda x: nn.init.constant_(x, 0),
168 |             nn.init.calculate_gain("relu"),
169 |         )
170 |         main_input_dim = feat_dim
171 |         # Model to encode actions
172 |         self.use_action_embedding = False
173 |         if action_config is not None:
174 |             nactions = action_config["nactions"]
175 |             action_embedding_size = action_config["embedding_size"]
176 |             self.action_encoder = nn.Sequential(
177 |                 nn.Embedding(nactions, action_embedding_size),
178 |                 nn.Linear(action_embedding_size, action_embedding_size),
179 |                 nn.ReLU(),
180 |                 nn.Linear(action_embedding_size, action_embedding_size),
181 |             )
182 |             main_input_dim += action_embedding_size
183 |             self.use_action_embedding = True
184 |         # Model to encode collisions
185 |         self.use_collision_embedding = False
186 |         if collision_config is not None:
187 |             collision_dim = collision_config["collision_dim"]
188 |             collision_embedding_size = collision_config["embedding_size"]
189 |             self.collision_encoder = nn.Sequential(
190 |                 nn.Embedding(collision_dim, collision_embedding_size),
191 |                 nn.Linear(collision_embedding_size, collision_embedding_size),
192 |                 nn.ReLU(),
193 |                 nn.Linear(collision_embedding_size, collision_embedding_size),
194 |             )
195 |             main_input_dim += collision_embedding_size
196 |             self.use_collision_embedding = True
197 |         # Feature merging
198 |         self.main = nn.Sequential(
199 |             init_(nn.Linear(main_input_dim, hidden_size)), nn.ReLU()
200 |         )
201 |         init_ = lambda m: init(
202 |             m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0)
203 |         )
204 |         # Critic for policy learning
205 |         self.critic_linear = init_(nn.Linear(hidden_size, 1))
206 |         self.train()
207 | 
208 |     def forward(self, inputs, rnn_hxs, masks):
209 |         # Encode input features
210 |         x = self._process_inputs(inputs)
211 |         if self.is_recurrent:
212 |             x, rnn_hxs = self._forward_gru(x, rnn_hxs, masks)
213 |         return self.critic_linear(x), x, rnn_hxs
214 | 
215 |     def _process_inputs(self, inputs):
216 |         """
217 |         inputs is a dictionary consisting of the following:
218 |         {
219 |             features: (bs, feat_dim)
220 |             actions: (bs, 1)  (optional)
221 |             collisions: (bs, 1) one hot vector (optional)
222 |         }
223 |         """
224 |         input_values = [inputs["features"]]
225 |         if self.use_action_embedding:
226 |             act_feat = self.action_encoder(inputs["actions"].squeeze(1))
227 |             input_values.append(act_feat)
228 |         if self.use_collision_embedding:
229 |             coll_feat = self.collision_encoder(inputs["collisions"].squeeze(1))
230 |             input_values.append(coll_feat)
231 |         input_values = torch.cat(input_values, dim=1)
232 |         return self.main(input_values)
233 | 
234 | 
235 | # Borrowed from https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail
236 | class Policy(nn.Module):
237 |     def __init__(self, action_space, base_kwargs=None):
238 |         super().__init__()
239 |         if base_kwargs is None:
240 |             base_kwargs = {}
241 |         self.base = MLPBase(**base_kwargs)
242 |         num_outputs = action_space.n
243 |         self.dist = Categorical(self.base.output_size, num_outputs)
244 | 
245 |     @property
246 |     def is_recurrent(self):
247 |         return self.base.is_recurrent
248 | 
249 |     @property
250 |     def recurrent_hidden_state_size(self):
251 |         """Size of rnn_hx."""
252 |         return self.base.recurrent_hidden_state_size
253 | 
254 |     def forward(self, inputs, rnn_hxs, masks):
255 |         raise NotImplementedError
256 | 
257 |     def act(self, inputs, rnn_hxs, masks, deterministic=False):
258 |         value, actor_features, rnn_hxs = self.base(inputs, rnn_hxs, masks)
259 |         dist = self.dist(actor_features)
260 |         if deterministic:
261 |             action = dist.mode()
262 |         else:
263 |             action = dist.sample()
264 |         action_log_probs = dist.log_probs(action)
265 |         dist_entropy = dist.entropy().mean()
266 |         return value, action, action_log_probs, rnn_hxs
267 | 
268 |     def get_value(self, inputs, rnn_hxs, masks):
269 |         value, _, _ = self.base(inputs, rnn_hxs, masks)
270 |         return value
271 | 
272 |     def evaluate_actions(self, inputs, rnn_hxs, masks, action):
273 |         value, actor_features, rnn_hxs = self.base(inputs, rnn_hxs, masks)
274 |         dist = self.dist(actor_features)
275 |         action_log_probs = dist.log_probs(action)
276 |         dist_entropy = dist.entropy().mean()
277 |         return value, action_log_probs, dist_entropy, rnn_hxs
278 | 
279 |     def get_log_probs(self, inputs, rnn_hxs, masks):
280 |         value, actor_features, rnn_hxs = self.base(inputs, rnn_hxs, masks)
281 |         dist = self.dist(actor_features)
282 |         action_log_probs = (dist.probs + 1e-10).log()
283 |         return action_log_probs
284 | 


--------------------------------------------------------------------------------
/exploring_exploration/models/frontier_agent.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import cv2
  8 | import logging
  9 | import numpy as np
 10 | from exploring_exploration.models.navigation import (
 11 |     AStarActorAVD,
 12 |     AStarActorHabitat,
 13 |     HierarchicalAStarActorHabitat,
 14 |     HierarchicalAStarActorHabitatV2,
 15 | )
 16 | 
 17 | 
 18 | class FrontierAgent:
 19 |     def __init__(
 20 |         self,
 21 |         action_space,
 22 |         env_name,
 23 |         occ_map_scale,
 24 |         show_animation=False,
 25 |         seed=123,
 26 |         use_contour_sampling=True,
 27 |         dilate_occupancy=True,
 28 |         max_time_per_target=-1,
 29 |     ):
 30 |         self.map_size = None
 31 |         self.action_space = action_space
 32 |         self.show_animation = show_animation
 33 |         self.frontier_target = None
 34 |         self.occ_buffer = None
 35 |         self.seed = seed
 36 |         self._rng = random.Random(seed)
 37 |         self._time_elapsed_for_target = 0
 38 |         self._failure_count = 0
 39 |         self.use_contour_sampling = use_contour_sampling
 40 |         self.env_name = env_name
 41 | 
 42 |         if "avd" in env_name:
 43 |             self.actor = AStarActorAVD(action_space, show_animation=show_animation)
 44 |             self.max_time_per_target = (
 45 |                 20 if max_time_per_target == -1 else max_time_per_target
 46 |             )
 47 |         else:
 48 |             self.actor = HierarchicalAStarActorHabitatV2(
 49 |                 action_space, occ_map_scale, show_animation=show_animation
 50 |             )
 51 |             # Manually set dilate_occupancy flag
 52 |             if dilate_occupancy:
 53 |                 self.actor.high_level_actor.dilate_occupancy = True
 54 |                 self.actor.low_level_actor.dilate_occupancy = True
 55 |             self.max_time_per_target = (
 56 |                 200 if max_time_per_target == -1 else max_time_per_target
 57 |             )
 58 | 
 59 |         logging.info("========> FrontierAgent settings")
 60 |         logging.info(f"===> max_time_per_target  : {self.max_time_per_target}")
 61 |         logging.info(f"===> dilate_occupancy     : {dilate_occupancy}")
 62 | 
 63 |     def act(self, occ_map, prev_delta, collision_prev_step):
 64 |         if self.occ_buffer is None:
 65 |             self.map_size = occ_map.shape[0]
 66 |             self.occ_buffer = np.zeros((self.map_size, self.map_size), dtype=np.uint8)
 67 | 
 68 |         action = 3
 69 |         action_count = 0
 70 |         while action == 3:
 71 |             # If no target is selected or too much time was spent on a single target, pick a new target
 72 |             if (
 73 |                 self.frontier_target is None
 74 |                 or self._time_elapsed_for_target >= self.max_time_per_target
 75 |             ):
 76 |                 self.sample_frontier_target(occ_map)
 77 |                 self.actor.reset()
 78 |             # If the hierarchical planner failed twice to generate a plan to the target, then sample a new target
 79 |             elif self._failure_count == 2:
 80 |                 self.sample_frontier_target(occ_map)
 81 |                 self.actor.reset()
 82 |             # If a valid target is available, then update the target coordinate based on the past motion.
 83 |             else:
 84 |                 self.update_target(prev_delta)
 85 |                 # If the agent has reached the target or the target is occupied, then sample a new target
 86 |                 if self.has_reached_target() or np.all(
 87 |                     occ_map[self.frontier_target[1], self.frontier_target[0]]
 88 |                     == (0, 0, 255)
 89 |                 ):
 90 |                     self.sample_frontier_target(occ_map)
 91 |                     self.actor.reset()
 92 |                 # When the hierarchical actor has reached the target, resample the target
 93 |                 elif action_count > 0 and action == 3:
 94 |                     self.sample_frontier_target(occ_map)
 95 |                     self.actor.reset()
 96 | 
 97 |             if self.show_animation:
 98 |                 cv2.imshow("Occupancy", np.flip(occ_map, axis=2))
 99 |                 cv2.waitKey(20)
100 | 
101 |             action_count += 1
102 | 
103 |             # Prevents infinite loop when all frontier targets sampled return action=3
104 |             if action_count > 3:
105 |                 logging.info("=====> Stuck in occupied region! ")
106 |                 return random.choice(
107 |                     [
108 |                         self.action_space["left"],
109 |                         self.action_space["right"],
110 |                         self.action_space["forward"],
111 |                     ]
112 |                 )
113 | 
114 |             if "avd" in self.env_name:
115 |                 action = self.actor.act(
116 |                     occ_map, self.frontier_target, collision_prev_step
117 |                 )
118 |             else:
119 |                 # This does not process the occupancy map. Process it.
120 |                 action = self.actor.act(
121 |                     occ_map, self.frontier_target, prev_delta, collision_prev_step
122 |                 )
123 |             if self.actor.planning_failure_flag:
124 |                 self._failure_count += 1
125 |                 if self._failure_count == 2:
126 |                     action = 3
127 | 
128 |         self._time_elapsed_for_target += 1
129 | 
130 |         return action
131 | 
132 |     def sample_frontier_target(self, occ_map):
133 |         """
134 |         Inputs: 
135 |             occ_map - occupancy map with the following color coding:
136 |                       (0, 0, 255) is occupied region
137 |                       (255, 255, 255) is unknown region
138 |                       (0, 255, 0) is free region
139 |         """
140 |         self.occ_buffer.fill(0)
141 |         self._time_elapsed_for_target = 0
142 |         self._failure_count = 0
143 | 
144 |         unknown_mask = np.all(occ_map == (255, 255, 255), axis=-1).astype(np.uint8)
145 |         free_mask = np.all(occ_map == (0, 255, 0), axis=-1).astype(np.uint8)
146 | 
147 |         unknown_mask_shiftup = np.pad(
148 |             unknown_mask, ((0, 1), (0, 0)), mode="constant", constant_values=0
149 |         )[1:, :]
150 |         unknown_mask_shiftdown = np.pad(
151 |             unknown_mask, ((1, 0), (0, 0)), mode="constant", constant_values=0
152 |         )[:-1, :]
153 |         unknown_mask_shiftleft = np.pad(
154 |             unknown_mask, ((0, 0), (0, 1)), mode="constant", constant_values=0
155 |         )[:, 1:]
156 |         unknown_mask_shiftright = np.pad(
157 |             unknown_mask, ((0, 0), (1, 0)), mode="constant", constant_values=0
158 |         )[:, :-1]
159 | 
160 |         frontier_mask = (
161 |             (free_mask == unknown_mask_shiftup)
162 |             | (free_mask == unknown_mask_shiftdown)
163 |             | (free_mask == unknown_mask_shiftleft)
164 |             | (free_mask == unknown_mask_shiftright)
165 |         ) & (free_mask == 1)
166 | 
167 |         frontier_idxes = list(zip(*np.where(frontier_mask)))
168 |         if len(frontier_idxes) > 0:
169 |             if self.use_contour_sampling:
170 |                 frontier_img = frontier_mask.astype(np.uint8) * 255
171 |                 # Reduce size for efficiency
172 |                 scaling_factor = frontier_mask.shape[0] / 200.0
173 |                 frontier_img = cv2.resize(
174 |                     frontier_img,
175 |                     None,
176 |                     fx=1.0 / scaling_factor,
177 |                     fy=1.0 / scaling_factor,
178 |                     interpolation=cv2.INTER_NEAREST,
179 |                 )
180 |                 # Add a single channel
181 |                 frontier_img = frontier_img[:, :, np.newaxis]
182 |                 contours, _ = cv2.findContours(
183 |                     frontier_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
184 |                 )
185 |                 if len(contours) == 0:
186 |                     tgt = self._rng.choice(frontier_idxes)  # (y, x)
187 |                 else:
188 |                     contours_length = [len(contour) for contour in contours]
189 |                     contours = list(zip(contours, contours_length))
190 |                     sorted_contours = sorted(contours, key=lambda x: x[1], reverse=True)
191 | 
192 |                     contours = sorted_contours[:3]
193 |                     # Randomly pick one of the longest contours
194 |                     # To introduce some stochasticity in case the agent is stuck
195 |                     max_contour = self._rng.choice(contours)[0]
196 |                     # Pick a random sample from the longest contour
197 |                     tgt = self._rng.choice(max_contour)[
198 |                         0
199 |                     ]  # Each point is [[x, y]] for some reason
200 |                     # Scale it back to original image size
201 |                     # Convert it to (y, x) convention as this will be reversed next
202 |                     tgt = (int(tgt[1] * scaling_factor), int(tgt[0] * scaling_factor))
203 |             else:
204 |                 tgt = self._rng.choice(frontier_idxes)  # (y, x)
205 | 
206 |             self.frontier_target = (
207 |                 np.clip(tgt[1], 1, self.map_size - 2).item(),
208 |                 np.clip(tgt[0], 1, self.map_size - 2).item(),
209 |             )  # (x, y)
210 |         else:
211 |             self.frontier_target = (self.map_size // 2 + 4, self.map_size // 2 + 4)
212 | 
213 |         if self.show_animation:
214 |             occ_map_copy = np.copy(occ_map)
215 |             occ_map_copy = cv2.circle(
216 |                 occ_map_copy, self.frontier_target, 3, (255, 0, 0), -1
217 |             )
218 |             cv2.imshow("Occupancy map with target", np.flip(occ_map_copy, axis=2))
219 |             cv2.imshow("Frontier mask", frontier_mask.astype(np.uint8) * 255)
220 |             cv2.waitKey(10)
221 | 
222 |     def has_reached_target(self):
223 |         fx, fy = self.frontier_target
224 |         cx, cy = self.map_size / 2, self.map_size / 2
225 |         if math.sqrt((fx - cx) ** 2 + (fy - cy) ** 2) < 3.0:
226 |             return True
227 |         else:
228 |             return False
229 | 
230 |     def update_target(self, prev_delta):
231 |         """
232 |         Update the target to the new egocentric coordinate system.
233 |         Inputs:
234 |             prev_delta - (dx, dy, dtheta) motion in the previous position's
235 |                          frame of reference
236 |         """
237 |         # Note: X - forward, Y - rightward in egocentric frame of references
238 | 
239 |         # Perform update in egocentric coordinate
240 |         x, y = self._convert_to_egocentric(self.frontier_target)
241 | 
242 |         dx, dy, dt = prev_delta
243 |         # Translate
244 |         xp, yp = x - dx, y - dy
245 |         # Rotate by -dt
246 |         xp, yp = (
247 |             math.cos(-dt) * xp - math.sin(-dt) * yp,
248 |             math.sin(-dt) * xp + math.cos(-dt) * yp,
249 |         )
250 | 
251 |         # Convert back to image coordinate
252 |         xi, yi = self._convert_to_image((xp, yp))
253 |         xi = np.clip(xi, 1, self.map_size - 2)
254 |         yi = np.clip(yi, 1, self.map_size - 2)
255 | 
256 |         self.frontier_target = (int(xi), int(yi))
257 | 
258 |     def _convert_to_egocentric(self, coords):
259 |         return (-coords[1] + self.map_size / 2, coords[0] - self.map_size / 2)
260 | 
261 |     def _convert_to_image(self, coords):
262 |         # Forward - positive X, rightward - positive Y
263 |         return (coords[1] + self.map_size / 2, -coords[0] + self.map_size / 2)
264 | 
265 |     def reset(self):
266 |         self.frontier_target = None
267 |         self._failure_count = 0
268 |         self._time_elapsed_for_target = 0
269 | 


--------------------------------------------------------------------------------
/exploring_exploration/models/reconstruction.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import torchvision.models as tmodels
 11 | import torch.nn.modules.transformer as transformer
 12 | 
 13 | 
 14 | class View(nn.Module):
 15 |     def __init__(self, *shape):
 16 |         # shape is a list
 17 |         super().__init__()
 18 |         self.shape = shape
 19 | 
 20 |     def forward(self, input):
 21 |         return input.view(*self.shape)
 22 | 
 23 | 
 24 | class FeatureReconstructionModule(nn.Module):
 25 |     """An encoder-decoder model based on transformers for reconstructing
 26 |     concepts at a target location.
 27 |     """
 28 | 
 29 |     def __init__(self, nfeats, noutputs, nlayers=4):
 30 |         super().__init__()
 31 |         encoder_layer = transformer.TransformerEncoderLayer(nfeats + 16, 2, nfeats)
 32 |         decoder_layer = transformer.TransformerDecoderLayer(nfeats + 16, 2, nfeats)
 33 |         self.encoder = transformer.TransformerEncoder(encoder_layer, nlayers)
 34 |         self.decoder = transformer.TransformerDecoder(decoder_layer, nlayers)
 35 |         self.predict_outputs = nn.Linear(nfeats + 16, noutputs)
 36 | 
 37 |     def forward(self, x):
 38 |         """
 39 |         Inputs:
 40 |             x - dictionary consisting of the following:
 41 |             {
 42 |                 'history_image_features': (T, N, nfeats)
 43 |                 'history_pose_features': (T, N, 16)
 44 |                 'target_pose_features': (1, N, 16)
 45 |             }
 46 |         Outputs:
 47 |             pred_outputs - (1, N, noutputs)
 48 |         """
 49 |         target_pose_features = x["target_pose_features"][0]
 50 |         T, N, nfeats = x["history_image_features"].shape
 51 |         nRef = target_pose_features.shape[1]
 52 |         device = x["target_pose_features"].device
 53 |         # =================== Encode features and poses =======================
 54 |         encoder_inputs = torch.cat(
 55 |             [x["history_image_features"], x["history_pose_features"]], dim=2
 56 |         )  # (T, N, nfeats+16)
 57 |         encoded_features = self.encoder(encoder_inputs)  # (T, N, nfeats+16)
 58 |         # ================ Decode features for given poses ====================
 59 |         decoder_pose_features = target_pose_features.unsqueeze(0)  # (1, N, 16)
 60 |         # Initialize as zeros
 61 |         decoder_image_features = torch.zeros(
 62 |             *decoder_pose_features.shape[:2], nfeats
 63 |         ).to(
 64 |             device
 65 |         )  # (1, N, nfeats)
 66 |         decoder_inputs = torch.cat(
 67 |             [decoder_image_features, decoder_pose_features], dim=2
 68 |         )  # (1, N, nfeats+16)
 69 |         decoder_features = self.decoder(
 70 |             decoder_inputs, encoded_features
 71 |         )  # (1, N, nfeats+16)
 72 |         pred_outputs = self.predict_outputs(decoder_features).squeeze(0)
 73 |         return pred_outputs.unsqueeze(0)
 74 | 
 75 | 
 76 | class FeatureNetwork(nn.Module):
 77 |     """Network to extract image features.
 78 |     """
 79 | 
 80 |     def __init__(self):
 81 |         super().__init__()
 82 |         resnet = tmodels.resnet50(pretrained=True)
 83 |         self.net = nn.Sequential(
 84 |             resnet.conv1,
 85 |             resnet.bn1,
 86 |             resnet.relu,
 87 |             resnet.maxpool,
 88 |             resnet.layer1,
 89 |             resnet.layer2,
 90 |             resnet.layer3,
 91 |             resnet.layer4,
 92 |             resnet.avgpool,
 93 |         )
 94 | 
 95 |     def forward(self, x):
 96 |         feat = self.net(x).squeeze(3).squeeze(2)
 97 |         feat = F.normalize(feat, p=2, dim=1)
 98 |         return feat
 99 | 
100 | 
101 | class PoseEncoder(nn.Module):
102 |     """Network to encode pose information.
103 |     """
104 | 
105 |     def __init__(self):
106 |         super().__init__()
107 |         self.main = nn.Sequential(nn.Linear(3, 16), nn.ReLU(), nn.Linear(16, 16),)
108 | 
109 |     def forward(self, x):
110 |         return self.main(x)
111 | 


--------------------------------------------------------------------------------
/exploring_exploration/utils/common.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import math
  8 | import torch
  9 | import random
 10 | import numpy as np
 11 | import torch.nn.functional as F
 12 | 
 13 | from einops import rearrange, asnumpy
 14 | 
 15 | 
 16 | def flatten_two(x):
 17 |     """Flattens the first two dims.
 18 |     """
 19 |     return x.view(-1, *x.shape[2:])
 20 | 
 21 | 
 22 | def unflatten_two(x, sh1, sh2):
 23 |     """Unflattens the first two dims.
 24 |     """
 25 |     return x.view(sh1, sh2, *x.shape[1:])
 26 | 
 27 | 
 28 | def unsq_exp(x, reps, dim=0):
 29 |     """Unsqueezes along dimension dim and repeats along the axis reps times.
 30 |     """
 31 |     x_e = x.unsqueeze(dim)
 32 |     exp_args = [-1] * len(x_e.shape)
 33 |     exp_args[dim] = reps
 34 |     return x_e.expand(*exp_args).contiguous()
 35 | 
 36 | 
 37 | approx_eq = lambda a, b, eps: torch.lt(torch.abs(a - b), eps)
 38 | 
 39 | 
 40 | def norm_angle(x):
 41 |     """Normalizes an angle (scalar) between -pi to pi.
 42 |     """
 43 |     if isinstance(x, np.ndarray):
 44 |         return np.arctan2(np.sin(x), np.cos(x))
 45 |     elif isinstance(x, torch.Tensor):
 46 |         return torch.atan2(torch.sin(x), torch.cos(x))
 47 |     else:
 48 |         return math.atan2(math.sin(x), math.cos(x))
 49 | 
 50 | 
 51 | def freeze_params(module):
 52 |     """Freezes all parameters of a module by setting requires_grad to False.
 53 |     """
 54 |     for param in module.parameters():
 55 |         param.requires_grad = False
 56 | 
 57 | 
 58 | def unnormalize(data, mean, std):
 59 |     # data - (bs, H, W, C)
 60 |     data[:, :, :, 0] = data[:, :, :, 0] * std[0] + mean[0]
 61 |     data[:, :, :, 1] = data[:, :, :, 1] * std[1] + mean[1]
 62 |     data[:, :, :, 2] = data[:, :, :, 2] * std[2] + mean[2]
 63 |     return data
 64 | 
 65 | 
 66 | def process_image(img):
 67 |     """Apply imagenet normalization to a batch of images.
 68 |     """
 69 |     # img - (bs, C, H, W)
 70 |     mean = [0.485, 0.456, 0.406]
 71 |     std = [0.229, 0.224, 0.225]
 72 |     img_proc = img.float() / 255.0
 73 | 
 74 |     img_proc[:, 0] = (img_proc[:, 0] - mean[0]) / std[0]
 75 |     img_proc[:, 1] = (img_proc[:, 1] - mean[1]) / std[1]
 76 |     img_proc[:, 2] = (img_proc[:, 2] - mean[2]) / std[2]
 77 | 
 78 |     return img_proc
 79 | 
 80 | 
 81 | def resize_image(img, shape=(84, 84), mode="bilinear"):
 82 |     """Resizes a batch of images.
 83 |     """
 84 |     # img - (bs, C, H, W) FloatTensor
 85 |     out_img = F.interpolate(img, size=shape, mode=mode)
 86 |     return out_img
 87 | 
 88 | 
 89 | def unprocess_image(img):
 90 |     """Undo imagenet normalization to a batch of images."""
 91 |     # img - (bs, C, H, W)
 92 |     mean = [0.485, 0.456, 0.406]
 93 |     std = [0.229, 0.224, 0.225]
 94 |     img_unproc = np.copy(asnumpy(img))
 95 |     img_unproc[:, 0] = img_unproc[:, 0] * std[0] + mean[0]
 96 |     img_unproc[:, 1] = img_unproc[:, 1] * std[1] + mean[1]
 97 |     img_unproc[:, 2] = img_unproc[:, 2] * std[2] + mean[2]
 98 | 
 99 |     img_unproc = np.clip(img_unproc, 0.0, 1.0) * 255.0
100 |     img_unproc = img_unproc.astype(np.uint8)
101 |     img_unproc = rearrange(img_unproc, "b c h w -> b h w c")
102 | 
103 |     return img_unproc
104 | 
105 | 
106 | # Weight initializations
107 | def init(module, weight_init, bias_init, gain=1):
108 |     weight_init(module.weight.data, gain=gain)
109 |     bias_init(module.bias.data)
110 |     return module
111 | 
112 | 
113 | # https://github.com/openai/baselines/blob/master/baselines/common/tf_util.py#L87
114 | def init_normc_(weight, gain=1):
115 |     weight.normal_(0, 1)
116 |     weight *= gain / torch.sqrt(weight.pow(2).sum(1, keepdim=True))
117 | 
118 | 
119 | def random_range(start, end, interval=1):
120 |     """Returns a randomized range of numbers.
121 |     """
122 |     vals = list(range(start, end, interval))
123 |     random.shuffle(vals)
124 |     return vals
125 | 


--------------------------------------------------------------------------------
/exploring_exploration/utils/distributions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | from exploring_exploration.utils.common import init
11 | 
12 | """
13 | Modify standard PyTorch distributions so they are compatible with this code.
14 | """
15 | 
16 | FixedCategorical = torch.distributions.Categorical
17 | 
18 | old_sample = FixedCategorical.sample
19 | FixedCategorical.sample = lambda self: old_sample(self).unsqueeze(-1)
20 | 
21 | log_prob_cat = FixedCategorical.log_prob
22 | FixedCategorical.log_probs = lambda self, actions: log_prob_cat(
23 |     self, actions.squeeze(-1)
24 | ).unsqueeze(-1)
25 | FixedCategorical.log_probs_all = lambda self: self.logits
26 | 
27 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=1, keepdim=True)
28 | 
29 | FixedNormal = torch.distributions.Normal
30 | log_prob_normal = FixedNormal.log_prob
31 | FixedNormal.log_probs = lambda self, actions: log_prob_normal(self, actions).sum(
32 |     -1, keepdim=True
33 | )
34 | 
35 | entropy = FixedNormal.entropy
36 | FixedNormal.entropy = lambda self: entropy(self).sum(-1)
37 | 
38 | FixedNormal.mode = lambda self: self.mean
39 | 
40 | 
41 | class Categorical(nn.Module):
42 |     def __init__(self, num_inputs, num_outputs):
43 |         super(Categorical, self).__init__()
44 | 
45 |         init_ = lambda m: init(
46 |             m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=0.01
47 |         )
48 | 
49 |         self.linear = init_(nn.Linear(num_inputs, num_outputs))
50 | 
51 |     def forward(self, x):
52 |         x = self.linear(x)
53 |         return FixedCategorical(logits=x)
54 | 


--------------------------------------------------------------------------------
/exploring_exploration/utils/geometry.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | import numpy as np
  9 | 
 10 | approx_eq = lambda a, b, eps: torch.lt(torch.abs(a - b), eps)
 11 | 
 12 | 
 13 | def process_pose(pose):
 14 |     # pose - num_processes x 4 element Tensor with (r, theta, phi_head, phi_elev) - angles in radians
 15 |     # Output - num_processes x 3 torch tensor representing distance, cos and sin of relative theta
 16 |     pose_processed = torch.stack(
 17 |         (pose[:, 0], torch.cos(pose[:, 1]), torch.sin(pose[:, 1])), dim=1
 18 |     ).to(pose.device)
 19 |     return pose_processed
 20 | 
 21 | 
 22 | def process_poseref(pose, map_shape, map_scale, angles, eps):
 23 |     # pose - batch_size x 3 - (r, theta, head) of the reference view
 24 |     r = pose[:, 0]
 25 |     t = pose[:, 1]
 26 |     x = r * torch.cos(t)
 27 |     y = r * torch.sin(t)
 28 |     mh, mw = map_shape[1:]
 29 |     # This convention comes from transform_to_map() in model_pose.py
 30 |     ref_on_map_x = torch.clamp(mw / 2 + x / map_scale, 0, mw - 1)
 31 |     ref_on_map_y = torch.clamp(mh / 2 + y / map_scale, 0, mh - 1)
 32 |     # Mapping heading angles to map locations
 33 |     ref_on_map_dir = torch.zeros(pose.shape[0]).to(pose.device)
 34 |     normalized_angles = torch.atan2(torch.sin(pose[:, 2]), torch.cos(pose[:, 2]))
 35 |     for i in range(angles.shape[0]):
 36 |         ref_on_map_dir[approx_eq(normalized_angles, angles[i].item(), eps)] = i
 37 |     return torch.stack([ref_on_map_x, ref_on_map_y, ref_on_map_dir], dim=1).long()
 38 | 
 39 | 
 40 | def process_poseref_raw(pose, map_shape, map_scale, angles, eps):
 41 |     # pose - batch_size x 3 - (r, theta, head) of the reference view
 42 |     r = pose[:, 0]
 43 |     t = pose[:, 1]
 44 |     x = r * torch.cos(t)
 45 |     y = r * torch.sin(t)
 46 |     mh, mw = map_shape[1:]
 47 |     # This convention comes from transform_to_map() in model_pose.py
 48 |     ref_on_map_x = torch.clamp(mw / 2 + x / map_scale, 0, mw - 1)
 49 |     ref_on_map_y = torch.clamp(mh / 2 + y / map_scale, 0, mh - 1)
 50 |     normalized_angles = torch.atan2(torch.sin(pose[:, 2]), torch.cos(pose[:, 2]))
 51 |     return torch.stack([ref_on_map_x, ref_on_map_y, normalized_angles], dim=1)
 52 | 
 53 | 
 54 | def position_loss_fn(pred, gt):
 55 |     """
 56 |     pred - (bs, 3) ---> (r, cos_phi, sin_phi)
 57 |     gt   - (bs, 3) ---> (r, cos_phi, sin_phi)
 58 |     pred won't be normalized, gt will be normalized cos, sin values
 59 |     """
 60 |     pred_cossin = norm_cossin(pred[:, 1:])
 61 |     gt_cossin = gt[:, 1:]
 62 |     pred_r = pred[:, 0]
 63 |     gt_r = gt[:, 0]
 64 |     pred_x = pred_r * pred_cossin[:, 0]
 65 |     pred_y = pred_r * pred_cossin[:, 1]
 66 |     gt_x = gt_r * gt_cossin[:, 0]
 67 |     gt_y = gt_r * gt_cossin[:, 1]
 68 |     loss = (pred_x - gt_x) ** 2 + (pred_y - gt_y) ** 2
 69 |     return loss
 70 | 
 71 | 
 72 | def norm_cossin(input):
 73 |     """Convert unnormalized cos, sin predictions into [0, 1] range.
 74 |     """
 75 |     # Normalize cos, sin predictions
 76 |     if isinstance(input, torch.Tensor):
 77 |         input = input / (torch.norm(input, dim=1).unsqueeze(1) + 1e-8)
 78 |     elif isinstance(input, np.ndarray):
 79 |         input = input / (np.linalg.norm(input, axis=1)[:, np.newaxis] + 1e-8)
 80 |     else:
 81 |         raise ValueError("Incorrect type for norm_cossin!")
 82 | 
 83 |     return input
 84 | 
 85 | 
 86 | def process_odometer(poses):
 87 |     """Converts odometer readings in polar coordinates to xyt coordinates.
 88 | 
 89 |     Inputs:
 90 |         pose - (bs, 4) Tensor with (r, theta, phi_head, phi_elev)
 91 |              - where angles are in radians
 92 |     Outputs:
 93 |         pose_processed - (bs, 4) Tensor with (y, x, phi_head, phi_elev)
 94 |     """
 95 |     pose_processed = torch.stack(
 96 |         [
 97 |             poses[:, 0] * torch.sin(poses[:, 1]),
 98 |             poses[:, 0] * torch.cos(poses[:, 1]),
 99 |             poses[:, 2],
100 |             poses[:, 3],
101 |         ],
102 |         dim=1,
103 |     )
104 |     return pose_processed
105 | 
106 | 
107 | def np_normalize(angles):
108 |     return np.arctan2(np.sin(angles), np.cos(angles))
109 | 
110 | 
111 | def xyt2polar(poses):
112 |     """Converts poses from carteisan (xyt) to polar (rpt) coordinates.
113 | 
114 |     Inputs:
115 |         poses - (bs, 3) Tensor --- (x, y, theta)
116 |     Outputs:
117 |         poses Tensor with (r, phi, theta) conventions
118 |     """
119 |     return torch.stack(
120 |         [
121 |             torch.norm(poses[:, :2], dim=1),  # r
122 |             torch.atan2(poses[:, 1], poses[:, 0]),  # phi
123 |             poses[:, 2],
124 |         ],
125 |         dim=1,
126 |     )
127 | 
128 | 
129 | def polar2xyt(poses):
130 |     """Converts poses from polar (rpt) to cartesian (xyt) coordinates.
131 | 
132 |     Inputs:
133 |         poses - (bs, 3) Tensor --- (r, phi, theta)
134 |     Outputs:
135 |         poses Tensor with (x, y, theta) conventions
136 |     """
137 |     return torch.stack(
138 |         [
139 |             poses[:, 0] * torch.cos(poses[:, 1]),  # x
140 |             poses[:, 0] * torch.sin(poses[:, 1]),  # y
141 |             poses[:, 2],
142 |         ],
143 |         dim=1,
144 |     )
145 | 
146 | 
147 | def compute_egocentric_coors(delta, prev_pos, scale):
148 |     """
149 |     delta - (N, 4) --- (y, x, phi_head, phi_elev)
150 |     prev_pos - (N, 4) --- (y, x, phi_head, phi_elev)
151 |     """
152 |     dy, dx, dt = delta[:, 0], delta[:, 1], delta[:, 2]
153 |     x, y, t = prev_pos[:, 0], prev_pos[:, 1], prev_pos[:, 2]
154 |     dr = torch.sqrt(dx ** 2 + dy ** 2)
155 |     dp = torch.atan2(dy, dx) - t
156 |     dx_ego = dr * torch.cos(dp) / scale
157 |     dy_ego = dr * torch.sin(dp) / scale
158 |     dt_ego = dt
159 | 
160 |     return torch.stack([dx_ego, dy_ego, dt_ego], dim=1)
161 | 
162 | 
163 | def subtract_pose(pose_common, poses):
164 |     """
165 |     Convert poses to frame-of-reference of pose_common.
166 | 
167 |     Inputs:
168 |         pose_common - (N, 3) --- (y, x, phi)
169 |         poses - (N, 3) --- (y, x, phi)
170 | 
171 |     Outputs:
172 |         poses_n - (N, 3) --- (x, y, phi) in the new coordinate system
173 |     """
174 | 
175 |     x = poses[:, 1]
176 |     y = poses[:, 0]
177 |     phi = poses[:, 2]
178 | 
179 |     x_c = pose_common[:, 1]
180 |     y_c = pose_common[:, 0]
181 |     phi_c = pose_common[:, 2]
182 | 
183 |     # Polar coordinates in the new frame-of-reference
184 |     r_n = torch.sqrt((x - x_c) ** 2 + (y - y_c) ** 2)
185 |     theta_n = torch.atan2(y - y_c, x - x_c) - phi_c
186 |     # Convert to cartesian coordinates
187 |     x_n = r_n * torch.cos(theta_n)
188 |     y_n = r_n * torch.sin(theta_n)
189 |     phi_n = phi - phi_c
190 |     # Normalize phi to lie between -pi to pi
191 |     phi_n = torch.atan2(torch.sin(phi_n), torch.cos(phi_n))
192 | 
193 |     poses_n = torch.stack([x_n, y_n, phi_n], dim=1)
194 | 
195 |     return poses_n
196 | 
197 | 
198 | def add_pose(pose_common, dposes, mode="yxt"):
199 |     """
200 |     Convert dposes from frame-of-reference of pose_common to global pose.
201 | 
202 |     Inputs:
203 |         pose_common - (N, 3)
204 |         dposes - (N, 3)
205 | 
206 |     Outputs:
207 |         poses - (N, 3)
208 |     """
209 | 
210 |     assert mode in ["xyt", "yxt"]
211 | 
212 |     if mode == "yxt":
213 |         dy, dx, dt = torch.unbind(dposes, dim=1)
214 |         y_c, x_c, t_c = torch.unbind(pose_common, dim=1)
215 |     else:
216 |         dx, dy, dt = torch.unbind(dposes, dim=1)
217 |         x_c, y_c, t_c = torch.unbind(pose_common, dim=1)
218 | 
219 |     dr = torch.sqrt(dx ** 2 + dy ** 2)
220 |     dphi = torch.atan2(dy, dx) + t_c
221 |     x = x_c + dr * torch.cos(dphi)
222 |     y = y_c + dr * torch.sin(dphi)
223 |     t = t_c + dt
224 |     # Normalize angles to lie between -pi to pi
225 |     t = torch.atan2(torch.sin(t), torch.cos(t))
226 | 
227 |     if mode == "yxt":
228 |         poses = torch.stack([y, x, t], dim=1)
229 |     else:
230 |         poses = torch.stack([x, y, t], dim=1)
231 | 
232 |     return poses
233 | 


--------------------------------------------------------------------------------
/exploring_exploration/utils/median_pooling.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | from torch.nn.modules.utils import _pair, _quadruple
10 | 
11 | # Obtained from https://gist.github.com/rwightman/f2d3849281624be7c0f11c85c87c1598
12 | class MedianPool2d(nn.Module):
13 |     """ Median pool (usable as median filter when stride=1) module.
14 | 
15 |     Args:
16 |          kernel_size: size of pooling kernel, int or 2-tuple
17 |          stride: pool stride, int or 2-tuple
18 |          padding: pool padding, int or 4-tuple (l, r, t, b) as in pytorch F.pad
19 |          same: override padding and enforce same padding, boolean
20 |     """
21 | 
22 |     def __init__(self, kernel_size=3, stride=1, padding=0, same=False):
23 |         super(MedianPool2d, self).__init__()
24 |         self.k = _pair(kernel_size)
25 |         self.stride = _pair(stride)
26 |         self.padding = _quadruple(padding)  # convert to l, r, t, b
27 |         self.same = same
28 | 
29 |     def _padding(self, x):
30 |         if self.same:
31 |             ih, iw = x.size()[2:]
32 |             if ih % self.stride[0] == 0:
33 |                 ph = max(self.k[0] - self.stride[0], 0)
34 |             else:
35 |                 ph = max(self.k[0] - (ih % self.stride[0]), 0)
36 |             if iw % self.stride[1] == 0:
37 |                 pw = max(self.k[1] - self.stride[1], 0)
38 |             else:
39 |                 pw = max(self.k[1] - (iw % self.stride[1]), 0)
40 |             pl = pw // 2
41 |             pr = pw - pl
42 |             pt = ph // 2
43 |             pb = ph - pt
44 |             padding = (pl, pr, pt, pb)
45 |         else:
46 |             padding = self.padding
47 |         return padding
48 | 
49 |     def forward(self, x):
50 |         # using existing pytorch functions and tensor ops so that we get autograd,
51 |         # would likely be more efficient to implement from scratch at C/Cuda level
52 |         x = F.pad(x, self._padding(x), mode="reflect")
53 |         x = x.unfold(2, self.k[0], self.stride[0]).unfold(3, self.k[1], self.stride[1])
54 |         x = x.contiguous().view(x.size()[:4] + (-1,)).median(dim=-1)[0]
55 |         return x
56 | 
57 | 
58 | class MedianPool1d(nn.Module):
59 |     """ Median pool (usable as median filter when stride=1) module.
60 | 
61 |     Args:
62 |          kernel_size: size of pooling kernel, int
63 |          stride: pool stride, int
64 |          padding: pool padding, int
65 |          same: override padding and enforce same padding, boolean
66 |     """
67 | 
68 |     def __init__(self, kernel_size=3, stride=1, padding=0, same=False):
69 |         super(MedianPool1d, self).__init__()
70 |         self.k = kernel_size
71 |         self.stride = stride
72 |         self.padding = _pair(padding)
73 |         self.same = same
74 | 
75 |     def _padding(self, x):
76 |         # x - (bs, C, L)
77 |         if self.same:
78 |             il = x.size()[2]
79 |             if il % self.stride == 0:
80 |                 pl = max(self.k - self.stride, 0)
81 |             else:
82 |                 pl = max(self.k - (il % self.stride), 0)
83 |             pleft = pl // 2
84 |             pright = pl - pleft
85 |             padding = (pleft, pright)
86 |         else:
87 |             padding = self.padding
88 |         return padding
89 | 
90 |     def forward(self, x):
91 |         # using existing pytorch functions and tensor ops so that we get autograd,
92 |         # would likely be more efficient to implement from scratch at C/Cuda level
93 |         x = F.pad(x, self._padding(x), mode="reflect")
94 |         x = x.unfold(2, self.k, self.stride)
95 |         x = x.contiguous().median(dim=-1)[0]
96 |         return x
97 | 


--------------------------------------------------------------------------------
/exploring_exploration/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import math
  8 | import torch
  9 | import logging
 10 | import numpy as np
 11 | 
 12 | from exploring_exploration.utils.geometry import norm_cossin
 13 | from exploring_exploration.utils.common import norm_angle
 14 | 
 15 | 
 16 | def precision_at_k(pred_scores, gt_scores, k=5, gt_K=5):
 17 |     """
 18 |     Measures the fraction of correctly retrieved classes among the top-k
 19 |     retrievals.
 20 | 
 21 |     Inputs:
 22 |         pred_scores - (N, nclasses) logits
 23 |         gt_scores   - (N, nclasses) similarity scores
 24 |         k           - the top-k retrievals from pred_scores to consider
 25 |         gt_K        - how many of the most similar classes in gt_scores
 26 |                       should be considered as ground-truth
 27 | 
 28 |     Outputs:
 29 |         prec_scores - (N, ) precision@k scores per batch element
 30 |     """
 31 |     device = pred_scores.device
 32 |     N, nclasses = pred_scores.shape
 33 | 
 34 |     relevant_idxes = (torch.topk(gt_scores, gt_K, dim=1).indices).cpu()  # (N, gt_K)
 35 |     relevant_idxes_indicator = torch.zeros(N, nclasses)
 36 |     relevant_idxes_indicator.scatter_(1, relevant_idxes, 1.0)
 37 | 
 38 |     pred_idxes = (torch.topk(pred_scores, k, dim=1).indices).cpu()  # (N, k)
 39 |     pred_idxes_indicator = torch.zeros(N, nclasses)
 40 |     pred_idxes_indicator.scatter_(1, pred_idxes, 1.0)
 41 | 
 42 |     intersection_indicator = (
 43 |         relevant_idxes_indicator * pred_idxes_indicator
 44 |     )  # (N, nclasses)
 45 |     prec_scores = (intersection_indicator.sum(dim=1) / k).to(device)
 46 | 
 47 |     return prec_scores
 48 | 
 49 | 
 50 | def s_metric(agent_pos, target_pos, thresh, stop_called):
 51 |     # Success rate
 52 |     if not stop_called:
 53 |         return 0.0
 54 | 
 55 |     dist = np.linalg.norm(np.array(agent_pos) - np.array(target_pos)).item()
 56 |     score = 0.0 if dist > thresh else 1.0
 57 |     return score
 58 | 
 59 | 
 60 | def spl_metric(
 61 |     agent_pos, target_pos, thresh, path_length, shortest_path_length, stop_called
 62 | ):
 63 |     # Success rate normalized by Path Length
 64 |     if not stop_called:
 65 |         return 0.0
 66 | 
 67 |     dist = np.linalg.norm(np.array(agent_pos) - np.array(target_pos)).item()
 68 |     score = (
 69 |         0.0
 70 |         if dist > thresh
 71 |         else shortest_path_length / (max(shortest_path_length, path_length) + 1e-7)
 72 |     )
 73 |     return score
 74 | 
 75 | 
 76 | def compute_pose_metrics(
 77 |     true_poses, pred_poses, true_pose_angles, pred_pose_angles, env_name
 78 | ):
 79 |     """
 80 |     Inputs:
 81 |         true_poses - array of ground truth poses
 82 |         pred_poses - array of predicted poses
 83 |         true_pose_angles - array of ground truth heading angles
 84 |         pred_pose_angles - array of predicted heading angles
 85 |         env_name - name of current environment
 86 | 
 87 |     Outputs
 88 |         metrics     - a dictionary containing the different metrics measured
 89 |     """
 90 |     metrics = {}
 91 |     heading_err = np.abs(norm_angle(pred_pose_angles - true_pose_angles))
 92 |     avg_heading_err = math.degrees(heading_err.mean().item())
 93 |     heading_err_per_episode = np.degrees(heading_err)
 94 | 
 95 |     # Compute angular error
 96 |     norm_gt_pose = torch.Tensor(true_poses[:, 1:])
 97 |     norm_gt_angle = torch.atan2(norm_gt_pose[:, 1], norm_gt_pose[:, 0])
 98 |     norm_pred_pose = norm_cossin(torch.Tensor(pred_poses[:, 1:]))
 99 |     norm_pred_angle = torch.atan2(norm_pred_pose[:, 1], norm_pred_pose[:, 0])
100 |     norm_ae = torch.abs(norm_angle(norm_pred_angle - norm_gt_angle))
101 |     norm_ae_avg = math.degrees(norm_ae.cpu().mean().item())
102 | 
103 |     norm_ae_per_episode = np.degrees(norm_ae.cpu().numpy())
104 | 
105 |     # Compute distance prediction error
106 |     distance_err = np.sqrt(((true_poses[:, 0] - pred_poses[:, 0]) ** 2))
107 |     if "avd" not in env_name:
108 |         distance_err = distance_err * 1000.0  # Convert to mm
109 |     avg_distance_err = distance_err.mean()
110 | 
111 |     distance_err_per_episode = distance_err
112 | 
113 |     # Compute position error
114 |     gt_r = torch.Tensor(true_poses[:, 0])
115 |     gt_x = gt_r * torch.cos(norm_gt_angle)
116 |     gt_y = gt_r * torch.sin(norm_gt_angle)
117 |     pred_r = torch.Tensor(pred_poses[:, 0])
118 |     pred_x = pred_r * torch.cos(norm_pred_angle)
119 |     pred_y = pred_r * torch.sin(norm_pred_angle)
120 |     position_err = torch.sqrt((gt_x - pred_x) ** 2 + (gt_y - pred_y) ** 2)
121 |     if "avd" not in env_name:
122 |         position_err = position_err * 1000.0  # Convert to mm
123 |     mean_position_err = position_err.mean().item()
124 | 
125 |     position_err_per_episode = position_err.cpu().numpy()
126 | 
127 |     # Compute position error, heading error as a function of difficulty
128 |     difficulty_bins = list(range(500, 7000, 500))
129 |     position_errors_vs_diff = []
130 |     heading_errors_vs_diff = []
131 |     heading_err = torch.Tensor(heading_err)
132 |     for i in range(len(difficulty_bins) - 1):
133 |         dl, dh = difficulty_bins[i], difficulty_bins[i + 1]
134 |         if "avd" not in env_name:
135 |             diff_mask = (gt_r * 1000.0 < dh) & (gt_r * 1000.0 >= dl)
136 |         else:
137 |             diff_mask = (gt_r < dh) & (gt_r >= dl)
138 |         position_error_curr = position_err[diff_mask]
139 |         heading_error_curr = heading_err[diff_mask]
140 |         if diff_mask.sum() == 0:
141 |             position_errors_vs_diff.append(0)
142 |             heading_errors_vs_diff.append(0)
143 |         else:
144 |             position_errors_vs_diff.append(position_error_curr.mean())
145 |             heading_errors_vs_diff.append(math.degrees(heading_error_curr.mean()))
146 | 
147 |     # Compute pose success rates at various thresholds
148 |     success_thresholds = [250, 500, 750, 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750]
149 |     success_rates = [
150 |         (position_err < sthresh).float().mean().item() for sthresh in success_thresholds
151 |     ]
152 | 
153 |     logging.info("Success rates and thresholds:")
154 |     logging.info(
155 |         " | ".join(["{:6.0f}".format(sthresh) for sthresh in success_thresholds])
156 |     )
157 |     logging.info(" | ".join(["{:6.4f}".format(srate) for srate in success_rates]))
158 | 
159 |     logging.info("Position, heading errors at different difficulty levels:")
160 |     logging.info(" || ".join(["{:6.2f}".format(dlevel) for dlevel in difficulty_bins]))
161 |     logging.info(
162 |         " || ".join(["{:6.2f}".format(perror) for perror in position_errors_vs_diff])
163 |     )
164 |     logging.info(
165 |         " || ".join(["{:6.2f}".format(herror) for herror in heading_errors_vs_diff])
166 |     )
167 | 
168 |     metrics["norm_ae"] = norm_ae_avg
169 |     metrics["distance_err"] = avg_distance_err
170 |     metrics["position_err"] = mean_position_err
171 |     metrics["heading_err"] = avg_heading_err
172 |     for thresh, rate in zip(success_thresholds, success_rates):
173 |         metrics["success_rate @ {:.1f}".format(thresh)] = rate
174 |     for level_0, level_1, err in zip(
175 |         difficulty_bins[:-1], difficulty_bins[1:], position_errors_vs_diff
176 |     ):
177 |         metrics[
178 |             "position_err @ distances b/w {:.1f} to {:.1f}".format(level_0, level_1)
179 |         ] = err
180 | 
181 |     for level_0, level_1, err in zip(
182 |         difficulty_bins[:-1], difficulty_bins[1:], heading_errors_vs_diff
183 |     ):
184 |         metrics[
185 |             "heading_err @ distances b/w {:.1f} to {:.1f}".format(level_0, level_1)
186 |         ] = err
187 | 
188 |     per_episode_metrics = {
189 |         "heading_err": heading_err_per_episode,
190 |         "norm_ae": norm_ae_per_episode,
191 |         "position_err": position_err_per_episode,
192 |     }
193 |     return metrics, per_episode_metrics
194 | 


--------------------------------------------------------------------------------
/exploring_exploration/utils/reconstruction.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | import torch.nn.functional as F
  9 | 
 10 | from exploring_exploration.utils.common import (
 11 |     unflatten_two,
 12 |     flatten_two,
 13 | )
 14 | from exploring_exploration.utils.geometry import subtract_pose
 15 | 
 16 | 
 17 | def multi_label_classification_loss(x, y, reduction="batchmean"):
 18 |     """
 19 |     Multi-label classification loss - KL divergence between a uniform
 20 |     distribution over the GT classes and the predicted probabilities.
 21 |     Inputs:
 22 |         x - (bs, nclasses) predicted logits
 23 |         y - (bs, nclasses) with ones for the right classes and zeros
 24 |             for the wrong classes
 25 |     """
 26 |     x_logprob = F.log_softmax(x, dim=1)
 27 |     y_prob = F.normalize(
 28 |         y.float(), p=1, dim=1
 29 |     )  # L1 normalization to convert to probabilities
 30 |     loss = F.kl_div(x_logprob, y_prob, reduction=reduction)
 31 |     return loss
 32 | 
 33 | 
 34 | def rec_loss_fn_classify(
 35 |     x_logits, x_gt_feat, cluster_centroids, K=5, reduction="batchmean"
 36 | ):
 37 |     """
 38 |     Given the predicted logits and ground-truth reference feature,
 39 |     find the top-K NN cluster centroids to the ground-truth feature.
 40 |     Using the top-k clusters as the ground-truth, use a multi-label
 41 |     classification loss.
 42 | 
 43 |     NOTE - this assumes that x_gt_feat and cluster_centroids are unit vectors.
 44 | 
 45 |     Inputs:
 46 |         x_logits - (bs, nclusters) predicted logits
 47 |         x_gt_feat - (bs, nclusters) reference feature that consists of
 48 |                     similarity scores between GT image and cluster centroids.
 49 |         cluster_centroids - (nclusters, feat_size) cluster centroids
 50 |     """
 51 |     bs, nclasses = x_logits.shape
 52 |     nclusters, feat_size = cluster_centroids.shape
 53 |     device = x_logits.device
 54 | 
 55 |     # Compute cosine similarity between x_gt_feat and cluster_centroids
 56 |     cosine_sim = x_gt_feat
 57 | 
 58 |     # Sample top-K similar clusters
 59 |     topK_outputs = torch.topk(cosine_sim, K, dim=1)
 60 | 
 61 |     # Generate K-hot encoding
 62 |     k_hot_encoding = (
 63 |         torch.zeros(bs, nclasses).to(device).scatter_(1, topK_outputs.indices, 1.0)
 64 |     )
 65 | 
 66 |     loss = multi_label_classification_loss(
 67 |         x_logits, k_hot_encoding, reduction=reduction
 68 |     )
 69 | 
 70 |     return loss
 71 | 
 72 | 
 73 | def compute_reconstruction_rewards(
 74 |     obs_feats,
 75 |     obs_odometer,
 76 |     tgt_feats,
 77 |     tgt_poses,
 78 |     cluster_centroids_t,
 79 |     decoder,
 80 |     pose_encoder,
 81 | ):
 82 |     """
 83 |     Inputs:
 84 |         obs_feats           - (T, N, nclusters)
 85 |         obs_odometer        - (T, N, 3) --- (y, x, theta)
 86 |         tgt_feats           - (N, nRef, nclusters)
 87 |         tgt_poses           - (N, nRef, 3) --- (y, x, theta)
 88 |         cluster_centroids_t - (nclusters, feat_dim)
 89 |         decoder             - decoder model
 90 |         pose_encoder        - pose_encoder model
 91 | 
 92 |     Outputs:
 93 |         reward              - (N, nRef) float values indicating how many
 94 |                               GT clusters were successfully retrieved for
 95 |                               each target.
 96 |     """
 97 |     T, N, nclusters = obs_feats.shape
 98 |     nRef = tgt_feats.shape[1]
 99 |     device = obs_feats.device
100 | 
101 |     obs_feats_exp = obs_feats.unsqueeze(2)
102 |     obs_feats_exp = obs_feats_exp.expand(
103 |         -1, -1, nRef, -1
104 |     ).contiguous()  # (T, N, nRef, nclusters)
105 |     obs_odometer_exp = obs_odometer.unsqueeze(2)
106 |     obs_odometer_exp = obs_odometer_exp.expand(
107 |         -1, -1, nRef, -1
108 |     ).contiguous()  # (T, N, nRef, 3)
109 |     tgt_poses_exp = (
110 |         tgt_poses.unsqueeze(0).expand(T, -1, -1, -1).contiguous()
111 |     )  # (T, N, nRef, 3)
112 | 
113 |     # Compute relative poses
114 |     obs_odometer_exp = obs_odometer_exp.view(T * N * nRef, 3)
115 |     tgt_poses_exp = tgt_poses_exp.view(T * N * nRef, 3)
116 |     obs_relpose = subtract_pose(
117 |         obs_odometer_exp, tgt_poses_exp
118 |     )  # (T*N*nRef, 3) --- (x, y, phi)
119 | 
120 |     # Compute pose encoding
121 |     with torch.no_grad():
122 |         obs_relpose_enc = pose_encoder(obs_relpose)  # (T*N*nRef, 16)
123 |     obs_relpose_enc = obs_relpose_enc.view(T, N, nRef, -1)  # (T, N, nRef, 16)
124 |     tgt_relpose_enc = torch.zeros(1, *obs_relpose_enc.shape[1:]).to(
125 |         device
126 |     )  # (1, N, nRef, 16)
127 | 
128 |     # Compute reconstructions
129 |     obs_feats_exp = obs_feats_exp.view(T, N * nRef, nclusters)
130 |     obs_relpose_enc = obs_relpose_enc.view(T, N * nRef, -1)
131 |     tgt_relpose_enc = tgt_relpose_enc.view(1, N * nRef, -1)
132 | 
133 |     rec_inputs = {
134 |         "history_image_features": obs_feats_exp,
135 |         "history_pose_features": obs_relpose_enc,
136 |         "target_pose_features": tgt_relpose_enc,
137 |     }
138 | 
139 |     with torch.no_grad():
140 |         pred_logits = decoder(rec_inputs)  # (1, N*nRef, nclusters)
141 |     pred_logits = pred_logits.squeeze(0)  # (N*nRef, nclusters)
142 |     pred_logits = unflatten_two(pred_logits, N, nRef)  # (N, nRef, nclusters)
143 | 
144 |     # Compute GT classes
145 |     tgt_feats_sim = tgt_feats  # (N, nRef, nclusters)
146 |     topk_gt = torch.topk(tgt_feats_sim, 5, dim=2)
147 |     topk_gt_values = topk_gt.values  # (N, nRef, nclusters)
148 |     topk_gt_thresh = topk_gt_values.min(dim=2).values  # (N, nRef)
149 | 
150 |     # ------------------ KL Div loss based reward --------------------
151 |     reward = -rec_loss_fn_classify(
152 |         flatten_two(pred_logits),
153 |         flatten_two(tgt_feats),
154 |         cluster_centroids_t.t(),
155 |         K=2,
156 |         reduction="none",
157 |     ).sum(
158 |         dim=1
159 |     )  # (N*nRef, )
160 |     reward = reward.view(N, nRef)
161 | 
162 |     return reward
163 | 
164 | 
165 | def masked_mean(values, masks, axis=None):
166 |     return (values * masks).sum(axis=axis) / (masks.sum(axis=axis) + 1e-10)
167 | 


--------------------------------------------------------------------------------
/exploring_exploration/utils/visualization.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import cv2
  8 | import math
  9 | import torch
 10 | import numpy as np
 11 | from torch.utils.tensorboard import SummaryWriter
 12 | 
 13 | 
 14 | def torch_to_np(image):
 15 |     image = (image.cpu().numpy()).transpose(1, 2, 0)
 16 |     image = image.astype(np.uint8)
 17 |     image = np.flip(image, axis=2)
 18 |     return image
 19 | 
 20 | 
 21 | def torch_to_np_depth(image, max_depth=10000.0):
 22 |     depth = (image.cpu().numpy())[0]
 23 |     depth = (np.clip(depth, 0, max_depth) / max_depth) * 255.0
 24 |     depth = depth.astype(np.uint8)
 25 |     depth = np.repeat(depth[..., np.newaxis], 3, axis=2)
 26 |     return depth
 27 | 
 28 | 
 29 | class TensorboardWriter(SummaryWriter):
 30 |     def __init__(self, *args, **kwargs):
 31 |         super().__init__(*args, **kwargs)
 32 | 
 33 |     def add_video_from_np_images(
 34 |         self, video_name: str, step_idx: int, images: np.ndarray, fps: int = 10
 35 |     ) -> None:
 36 |         r"""Write video into tensorboard from images frames.
 37 |         Args:
 38 |             video_name: name of video string.
 39 |             step_idx: int of checkpoint index to be displayed.
 40 |             images: list of n frames. Each frame is a np.ndarray of shape.
 41 |             fps: frame per second for output video.
 42 |         Returns:
 43 |             None.
 44 |         """
 45 |         # initial shape of np.ndarray list: N * (H, W, 3)
 46 |         frame_tensors = [torch.from_numpy(np_arr).unsqueeze(0) for np_arr in images]
 47 |         video_tensor = torch.cat(tuple(frame_tensors))
 48 |         video_tensor = video_tensor.permute(0, 3, 1, 2).unsqueeze(0)
 49 |         # final shape of video tensor: (1, n, 3, H, W)
 50 |         self.add_video(video_name, video_tensor, fps=fps, global_step=step_idx)
 51 | 
 52 | 
 53 | def write_video(frames, path, fps=10.0, video_format="MP4V"):
 54 |     fourcc = cv2.VideoWriter_fourcc(*video_format)
 55 |     shape = frames[0].shape[:2][::-1]  # (WIDTH, HEIGHT)
 56 |     vidwriter = cv2.VideoWriter(path, fourcc, fps, shape)
 57 |     for frame in frames:
 58 |         vidwriter.write(frame[:, :, ::-1])  # Convert to BGR
 59 |     vidwriter.release()
 60 | 
 61 | 
 62 | def create_reference_grid(refs_uint8):
 63 |     """
 64 |     Inputs:
 65 |         refs_uint8 - (nRef, H, W, C) numpy array
 66 |     """
 67 |     refs_uint8 = np.copy(refs_uint8)
 68 |     nRef, H, W, C = refs_uint8.shape
 69 | 
 70 |     nrow = int(math.sqrt(nRef))
 71 | 
 72 |     ncol = nRef // nrow  # (number of images per column)
 73 |     if nrow * ncol < nRef:
 74 |         ncol += 1
 75 |     final_grid = np.zeros((nrow * ncol, *refs_uint8.shape[1:]), dtype=np.uint8)
 76 |     font = cv2.FONT_HERSHEY_SIMPLEX
 77 | 
 78 |     final_grid[:nRef] = refs_uint8
 79 |     final_grid = final_grid.reshape(
 80 |         ncol, nrow, *final_grid.shape[1:]
 81 |     )  # (ncol, nrow, H, W, C)
 82 |     final_grid = final_grid.transpose(0, 2, 1, 3, 4)
 83 |     final_grid = final_grid.reshape(ncol * H, nrow * W, C)
 84 |     return final_grid
 85 | 
 86 | 
 87 | def draw_border(images, color=(255, 0, 0), thickness=5):
 88 |     """Draw image border.
 89 | 
 90 |     Inputs:
 91 |         images - (N, H, W, C) numpy array
 92 |     """
 93 |     images[:, :thickness, :, 0] = color[0]
 94 |     images[:, :thickness, :, 1] = color[1]
 95 |     images[:, :thickness, :, 2] = color[2]
 96 | 
 97 |     images[:, -thickness:, :, 0] = color[0]
 98 |     images[:, -thickness:, :, 1] = color[1]
 99 |     images[:, -thickness:, :, 2] = color[2]
100 | 
101 |     images[:, :, :thickness, 0] = color[0]
102 |     images[:, :, :thickness, 1] = color[1]
103 |     images[:, :, :thickness, 2] = color[2]
104 | 
105 |     images[:, :, -thickness:, 0] = color[0]
106 |     images[:, :, -thickness:, 1] = color[1]
107 |     images[:, :, -thickness:, 2] = color[2]
108 | 
109 |     return images
110 | 


--------------------------------------------------------------------------------
/reconstruction_data_generation/avd/.gitignore:
--------------------------------------------------------------------------------
1 | uniform_samples
2 | imagenet_clusters
3 | 


--------------------------------------------------------------------------------
/reconstruction_data_generation/avd/gather_uniform_points.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | import math
  9 | import argparse
 10 | import numpy as np
 11 | import multiprocessing as mp
 12 | 
 13 | from PIL import Image
 14 | 
 15 | import gym
 16 | import gym_avd
 17 | 
 18 | 
 19 | def str2bool(v):
 20 |     return True if v.lower() in ["t", "true", "y", "yes"] else False
 21 | 
 22 | 
 23 | parser = argparse.ArgumentParser()
 24 | parser.add_argument("--save_directory", type=str, default="uniform_samples")
 25 | parser.add_argument("--seed", type=int, default=123)
 26 | parser.add_argument("--debug", type=str2bool, default=False)
 27 | 
 28 | 
 29 | args = parser.parse_args()
 30 | args.env_name = "avd-v1"
 31 | 
 32 | 
 33 | def write_data(data_tuple):
 34 |     img, img_name = data_tuple
 35 |     img = Image.fromarray(img)
 36 |     img.save(img_name)
 37 | 
 38 | 
 39 | def safe_mkdir(path):
 40 |     try:
 41 |         os.mkdir(path)
 42 |     except:
 43 |         pass
 44 | 
 45 | 
 46 | safe_mkdir(args.save_directory)
 47 | save_dir = args.save_directory
 48 | 
 49 | pool = mp.Pool(32)
 50 | 
 51 | env = gym.make(args.env_name)
 52 | 
 53 | 
 54 | def gather_data(env, scenes, args):
 55 |     scene_images = []
 56 |     for scene_idx in scenes:
 57 |         print("Gathering data for scene: {}".format(scene_idx))
 58 |         _ = env.reset(scene_idx=scene_idx)
 59 |         min_x, min_z, max_x, max_z = env.get_environment_extents()
 60 | 
 61 |         # Sample nodes uniformly @ 2m
 62 |         all_nodes = env.data_conn[scene_idx]["nodes"]
 63 |         all_nodes_positions = [
 64 |             [node["world_pos"][2], node["world_pos"][0]] for node in all_nodes
 65 |         ]
 66 |         all_nodes_positions = np.array(all_nodes_positions) * env.scale
 67 | 
 68 |         range_x = np.arange(min_x, max_x, 2000.0)
 69 |         range_z = np.arange(min_z, max_z, 2000.0)
 70 |         relevant_nodes = []
 71 |         for x in range_x:
 72 |             for z in range_z:
 73 |                 # Find closest node to this coordinate
 74 |                 min_dist = math.inf
 75 |                 min_dist_node = None
 76 |                 for node, node_position in zip(all_nodes, all_nodes_positions):
 77 |                     d = np.sqrt(
 78 |                         (x - node_position[0]) ** 2 + (z - node_position[1])
 79 |                     ).item()
 80 |                     if d < min_dist:
 81 |                         min_dist = d
 82 |                         min_dist_node = node
 83 |                 relevant_nodes.append(min_dist_node)
 84 | 
 85 |         relevant_images = []
 86 |         for node in relevant_nodes:
 87 |             for j in range(0, 12, 3):
 88 |                 image_name = node["views"][j]["image_name"]
 89 |                 relevant_images.append(env._get_img(image_name))
 90 | 
 91 |         scene_images += relevant_images
 92 | 
 93 |     return scene_images
 94 | 
 95 | 
 96 | for split in ["train", "val", "test"]:
 97 |     split_path = os.path.join(args.save_directory, split)
 98 |     safe_mkdir(split_path)
 99 |     split_scenes = getattr(env, "{}_scenes".format(split))
100 |     print("========= Gathering data for split: {} =========".format(split))
101 |     split_images = gather_data(env, split_scenes, args)
102 | 
103 |     img_tuples = []
104 |     for i, img in enumerate(split_images):
105 |         path = os.path.join(split_path, f"image_{i:07d}.png")
106 |         img_tuples.append((img, path))
107 | 
108 |     _ = pool.map(write_data, img_tuples)
109 | 


--------------------------------------------------------------------------------
/reconstruction_data_generation/generate_imagenet_clusters.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | import cv2
  9 | import h5py
 10 | import random
 11 | import argparse
 12 | import numpy as np
 13 | import subprocess as sp
 14 | 
 15 | import torch
 16 | import torch.nn.functional as F
 17 | import torchvision.transforms as transforms
 18 | from torch.utils.data import Dataset, DataLoader
 19 | 
 20 | from PIL import Image
 21 | from tensorboardX import SummaryWriter
 22 | from sklearn import metrics
 23 | from sklearn.cluster import MiniBatchKMeans
 24 | from exploring_exploration.models.reconstruction import FeatureNetwork
 25 | 
 26 | 
 27 | class RGBDataset(Dataset):
 28 |     def __init__(
 29 |         self, dataset_root, seed=123, transform=None, image_size=256, truncate_count=-1,
 30 |     ):
 31 |         random.seed(seed)
 32 |         self.dataset_root = dataset_root
 33 |         images = (
 34 |             sp.check_output(f"ls {dataset_root}", shell=True)
 35 |             .decode("utf-8")
 36 |             .split("\n")[:-1]
 37 |         )
 38 |         ndata = len(images)
 39 |         if truncate_count > 0:
 40 |             ndata = min(ndata, truncate_count)
 41 | 
 42 |         self.image_size = image_size
 43 | 
 44 |         self.dataset = [os.path.join(dataset_root, image) for image in images]
 45 | 
 46 |         random.shuffle(self.dataset)
 47 |         self.dataset = self.dataset[:ndata]
 48 | 
 49 |         # Data transform
 50 |         self.transform = transform if transform is not None else lambda x: x
 51 | 
 52 |         self.nimgs = ndata
 53 | 
 54 |     def __getitem__(self, index):
 55 |         path = self.dataset[index]
 56 |         img = Image.open(path).convert("RGB")
 57 |         img = self.transform(img)
 58 | 
 59 |         return {"rgb": img}, {"rgb": path}
 60 | 
 61 |     def __len__(self):
 62 |         return self.nimgs
 63 | 
 64 | 
 65 | def main(args):
 66 |     # Enable cuda by default
 67 |     args.cuda = True
 68 | 
 69 |     # Define transforms
 70 |     normalize = transforms.Normalize(
 71 |         mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
 72 |     )
 73 |     imagenet_mean = [0.485, 0.456, 0.406]
 74 |     imagenet_std = [0.229, 0.224, 0.225]
 75 |     transform = transforms.Compose(
 76 |         [transforms.Resize(args.image_size), transforms.ToTensor(), normalize]
 77 |     )
 78 | 
 79 |     # Create datasets
 80 |     datasets = {
 81 |         split: RGBDataset(
 82 |             os.path.join(args.dataset_root, split),
 83 |             seed=123,
 84 |             transform=transform,
 85 |             image_size=args.image_size,
 86 |             truncate_count=args.truncate_count,
 87 |         )
 88 |         for split in ["train", "val", "test"]
 89 |     }
 90 | 
 91 |     # Create data loaders
 92 |     data_loaders = {
 93 |         split: DataLoader(
 94 |             dataset, batch_size=args.batch_size, shuffle=True, num_workers=16
 95 |         )
 96 |         for split, dataset in datasets.items()
 97 |     }
 98 | 
 99 |     device = torch.device("cuda:0" if args.cuda else "cpu")
100 | 
101 |     # Create model
102 |     net = FeatureNetwork()
103 |     net.to(device)
104 |     net.eval()
105 | 
106 |     # Generate image features for training images
107 |     train_image_features = []
108 |     train_image_paths = []
109 | 
110 |     for i, data in enumerate(data_loaders["train"], 0):
111 | 
112 |         # sample data
113 |         inputs, input_paths = data
114 |         inputs = {key: val.to(device) for key, val in inputs.items()}
115 | 
116 |         # Extract features
117 |         with torch.no_grad():
118 |             feats = net(inputs["rgb"])  # (bs, 512)
119 |         feats = feats.detach().cpu().numpy()
120 |         train_image_features.append(feats)
121 |         train_image_paths += input_paths["rgb"]
122 | 
123 |     train_image_features = np.concatenate(train_image_features, axis=0)
124 | 
125 |     # Generate image features for testing images
126 |     test_image_features = []
127 |     test_image_paths = []
128 | 
129 |     for i, data in enumerate(data_loaders["test"], 0):
130 | 
131 |         # sample data
132 |         inputs, input_paths = data
133 |         inputs = {key: val.to(device) for key, val in inputs.items()}
134 | 
135 |         # Extract features
136 |         with torch.no_grad():
137 |             feats = net(inputs["rgb"])  # (bs, 512)
138 |         feats = feats.detach().cpu().numpy()
139 |         test_image_features.append(feats)
140 |         test_image_paths += input_paths["rgb"]
141 | 
142 |     test_image_features = np.concatenate(test_image_features, axis=0)  # (N, 512)
143 | 
144 |     # ================= Perform clustering ==================
145 |     kmeans = MiniBatchKMeans(
146 |         init="k-means++",
147 |         n_clusters=args.num_clusters,
148 |         batch_size=args.batch_size,
149 |         n_init=10,
150 |         max_no_improvement=20,
151 |         verbose=0,
152 |     )
153 |     save_h5_path = os.path.join(
154 |         args.save_dir, f"clusters_{args.num_clusters:05d}_data.h5"
155 |     )
156 |     if os.path.isfile(save_h5_path):
157 |         print("========> Loading existing clusters!")
158 |         h5file = h5py.File(os.path.join(save_h5_path), "r")
159 |         train_cluster_centroids = np.array(h5file["cluster_centroids"])
160 |         kmeans.cluster_centers_ = train_cluster_centroids
161 |         train_cluster_assignments = kmeans.predict(train_image_features)  # (N, )
162 |         h5file.close()
163 |     else:
164 |         kmeans.fit(train_image_features)
165 |         train_cluster_assignments = kmeans.predict(train_image_features)  # (N, )
166 |         train_cluster_centroids = np.copy(
167 |             kmeans.cluster_centers_
168 |         )  # (num_clusters, 512)
169 | 
170 |     # Create a dictionary of cluster -> images for visualization
171 |     cluster2image = {}
172 |     if args.visualize_clusters:
173 |         log_dir = os.path.join(
174 |             args.save_dir, f"train_clusters_#clusters{args.num_clusters:05d}"
175 |         )
176 |         tbwriter = SummaryWriter(log_dir=log_dir)
177 | 
178 |     for i in range(args.num_clusters):
179 |         valid_idxes = np.where(train_cluster_assignments == i)[0]
180 |         valid_image_paths = [train_image_paths[j] for j in valid_idxes]
181 |         # Shuffle and pick only upto 100 images per cluster
182 |         random.shuffle(valid_image_paths)
183 |         # Read the valid images
184 |         valid_images = []
185 |         for path in valid_image_paths[:100]:
186 |             img = cv2.resize(
187 |                 np.flip(cv2.imread(path), axis=2), (args.image_size, args.image_size),
188 |             )
189 |             valid_images.append(img)
190 |         valid_images = (
191 |             np.stack(valid_images, axis=0).astype(np.float32) / 255.0
192 |         )  # (K, H, W, C)
193 |         valid_images = torch.Tensor(valid_images).permute(0, 3, 1, 2).contiguous()
194 |         cluster2image[i] = valid_images
195 |         if args.visualize_clusters:
196 |             # Write the train image clusters to tensorboard
197 |             tbwriter.add_images(f"Cluster #{i:05d}", valid_images, 0)
198 | 
199 |     h5file = h5py.File(
200 |         os.path.join(args.save_dir, f"clusters_{args.num_clusters:05d}_data.h5"), "a"
201 |     )
202 | 
203 |     if "cluster_centroids" not in h5file.keys():
204 |         h5file.create_dataset("cluster_centroids", data=train_cluster_centroids)
205 |     for i in range(args.num_clusters):
206 |         if f"cluster_{i}/images" not in h5file.keys():
207 |             h5file.create_dataset(f"cluster_{i}/images", data=cluster2image[i])
208 | 
209 |     h5file.close()
210 | 
211 |     if args.visualize_clusters:
212 |         # Dot product of test_image_features with train_cluster_centroids
213 |         test_dot_centroids = np.matmul(
214 |             test_image_features, train_cluster_centroids.T
215 |         )  # (N, num_clusters)
216 |         if args.normalize_embedding:
217 |             test_dot_centroids = (test_dot_centroids + 1.0) / 2.0
218 |         else:
219 |             test_dot_centroids = F.softmax(
220 |                 torch.Tensor(test_dot_centroids), dim=1
221 |             ).numpy()
222 | 
223 |         # Find the top-K matching centroids
224 |         topk_matches = np.argpartition(test_dot_centroids, -5, axis=1)[:, -5:]  # (N, 5)
225 | 
226 |         # Write the test nearest neighbors to tensorboard
227 |         tbwriter = SummaryWriter(
228 |             log_dir=os.path.join(
229 |                 args.save_dir, f"test_neighbors_#clusters{args.num_clusters:05d}"
230 |             )
231 |         )
232 |         for i in range(100):
233 |             test_image_path = test_image_paths[i]
234 |             test_image = cv2.resize(
235 |                 cv2.imread(test_image_path), (args.image_size, args.image_size)
236 |             )
237 |             test_image = np.flip(test_image, axis=2).astype(np.float32) / 255.0
238 |             test_image = torch.Tensor(test_image).permute(2, 0, 1).contiguous()
239 |             topk_clusters = topk_matches[i]
240 |             # Pick some 4 images representative of a cluster
241 |             topk_cluster_images = []
242 |             for k in topk_clusters:
243 |                 imgs = cluster2image[k][:4]  # (4, C, H, W)
244 |                 if imgs.shape[0] == 0:
245 |                     continue
246 |                 elif imgs.shape[0] != 4:
247 |                     imgs_pad = torch.zeros(4 - imgs.shape[0], *imgs.shape[1:])
248 |                     imgs = torch.cat([imgs, imgs_pad], dim=0)
249 |                 # Downsample by a factor of 2
250 |                 imgs = F.interpolate(
251 |                     imgs, scale_factor=0.5, mode="bilinear"
252 |                 )  # (4, C, H/2, W/2)
253 |                 # Reshape to form a grid
254 |                 imgs = imgs.permute(1, 0, 2, 3)  # (C, 4, H/2, W/2)
255 |                 C, _, Hby2, Wby2 = imgs.shape
256 |                 imgs = (
257 |                     imgs.view(C, 2, 2, Hby2, Wby2)
258 |                     .permute(0, 1, 3, 2, 4)
259 |                     .contiguous()
260 |                     .view(C, Hby2 * 2, Wby2 * 2)
261 |                 )
262 |                 # Draw a red border
263 |                 imgs[0, :4, :] = 1.0
264 |                 imgs[1, :4, :] = 0.0
265 |                 imgs[2, :4, :] = 0.0
266 |                 imgs[0, -4:, :] = 1.0
267 |                 imgs[1, -4:, :] = 0.0
268 |                 imgs[2, -4:, :] = 0.0
269 |                 imgs[0, :, :4] = 1.0
270 |                 imgs[1, :, :4] = 0.0
271 |                 imgs[2, :, :4] = 0.0
272 |                 imgs[0, :, -4:] = 1.0
273 |                 imgs[1, :, -4:] = 0.0
274 |                 imgs[2, :, -4:] = 0.0
275 |                 topk_cluster_images.append(imgs)
276 | 
277 |             vis_img = torch.cat([test_image, *topk_cluster_images], dim=2)
278 |             image_name = f"Test image #{i:04d}"
279 |             for k in topk_clusters:
280 |                 score = test_dot_centroids[i, k].item()
281 |                 image_name += f"_{score:.3f}"
282 |             tbwriter.add_image(image_name, vis_img, 0)
283 | 
284 | 
285 | def str2bool(v):
286 |     return True if v.lower() in ["yes", "y", "true", "t"] else False
287 | 
288 | 
289 | if __name__ == "__main__":
290 |     parser = argparse.ArgumentParser()
291 |     parser.add_argument("--image-size", type=int, default=256)
292 |     parser.add_argument("--dataset-root", type=str, default="dataset")
293 |     parser.add_argument("--truncate-count", type=int, default=-1)
294 |     parser.add_argument("--batch-size", type=int, default=128)
295 |     parser.add_argument("--num-clusters", type=int, default=100)
296 |     parser.add_argument("--save-dir", type=str, default="visualization_dir")
297 |     parser.add_argument("--visualize-clusters", type=str2bool, default=True)
298 |     parser.add_argument("--normalize-embedding", type=str2bool, default=True)
299 | 
300 |     args = parser.parse_args()
301 | 
302 |     main(args)
303 | 


--------------------------------------------------------------------------------
/reconstruction_data_generation/mp3d/.gitignore:
--------------------------------------------------------------------------------
1 | uniform_samples
2 | imagenet_clusters
3 | 


--------------------------------------------------------------------------------
/reconstruction_data_generation/mp3d/configs/pointnav_mp3d_test.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 500
 3 | SIMULATOR:
 4 |   AGENT_0:
 5 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR']
 6 |   HABITAT_SIM_V0:
 7 |     GPU_DEVICE_ID: 0
 8 |   RGB_SENSOR:
 9 |     WIDTH: 84
10 |     HEIGHT: 84
11 |   DEPTH_SENSOR:
12 |     WIDTH: 84
13 |     HEIGHT: 84
14 | TASK:
15 |   TYPE: Nav-v0
16 |   SUCCESS_DISTANCE: 0.2
17 |   SENSORS: ['POINTGOAL_SENSOR']
18 |   POINTGOAL_SENSOR:
19 |     TYPE: PointGoalSensor
20 |     GOAL_FORMAT: POLAR
21 |   MEASUREMENTS: ['SPL']
22 |   SPL:
23 |     TYPE: SPL
24 |     SUCCESS_DISTANCE: 0.2
25 | DATASET:
26 |   TYPE: PointNav-v1
27 |   SPLIT: test
28 |   DATA_PATH: data/datasets/pointnav/mp3d/v1_unique/{split}/{split}.json.gz
29 |   SCENES_DIR: data/scene_datasets
30 |   SHUFFLE_DATASET: False
31 | 


--------------------------------------------------------------------------------
/reconstruction_data_generation/mp3d/configs/pointnav_mp3d_train.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 500
 3 | SIMULATOR:
 4 |   AGENT_0:
 5 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR']
 6 |   HABITAT_SIM_V0:
 7 |     GPU_DEVICE_ID: 0
 8 |   RGB_SENSOR:
 9 |     WIDTH: 84
10 |     HEIGHT: 84
11 |   DEPTH_SENSOR:
12 |     WIDTH: 84
13 |     HEIGHT: 84
14 | TASK:
15 |   TYPE: Nav-v0
16 |   SUCCESS_DISTANCE: 0.2
17 |   SENSORS: ['POINTGOAL_SENSOR']
18 |   POINTGOAL_SENSOR:
19 |     TYPE: PointGoalSensor
20 |     GOAL_FORMAT: POLAR
21 |   MEASUREMENTS: ['SPL']
22 |   SPL:
23 |     TYPE: SPL
24 |     SUCCESS_DISTANCE: 0.2
25 | DATASET:
26 |   TYPE: PointNav-v1
27 |   SPLIT: train
28 |   DATA_PATH: data/datasets/pointnav/mp3d/v1_unique/{split}/{split}.json.gz
29 |   SCENES_DIR: data/scene_datasets
30 |   SHUFFLE_DATASET: False
31 | 


--------------------------------------------------------------------------------
/reconstruction_data_generation/mp3d/configs/pointnav_mp3d_val.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 500
 3 | SIMULATOR:
 4 |   AGENT_0:
 5 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR']
 6 |   HABITAT_SIM_V0:
 7 |     GPU_DEVICE_ID: 0
 8 |   RGB_SENSOR:
 9 |     WIDTH: 84
10 |     HEIGHT: 84
11 |   DEPTH_SENSOR:
12 |     WIDTH: 84
13 |     HEIGHT: 84
14 | TASK:
15 |   TYPE: Nav-v0
16 |   SUCCESS_DISTANCE: 0.2
17 |   SENSORS: ['POINTGOAL_SENSOR']
18 |   POINTGOAL_SENSOR:
19 |     TYPE: PointGoalSensor
20 |     GOAL_FORMAT: POLAR
21 |   MEASUREMENTS: ['SPL']
22 |   SPL:
23 |     TYPE: SPL
24 |     SUCCESS_DISTANCE: 0.2
25 | DATASET:
26 |   TYPE: PointNav-v1
27 |   SPLIT: val
28 |   DATA_PATH: data/datasets/pointnav/mp3d/v1_unique/{split}/{split}.json.gz
29 |   SCENES_DIR: data/scene_datasets
30 |   SHUFFLE_DATASET: False
31 | 


--------------------------------------------------------------------------------
/reconstruction_data_generation/mp3d/extract_data_script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | habitat_root=$EXPLORING_EXPLORATION/environments/habitat/habitat-api
 8 | 
 9 | mkdir uniform_samples
10 | for split in 'val' 'test' 'train'
11 | do
12 |   python generate_uniform_points.py \
13 |     --config-path configs/pointnav_mp3d_${split}.yaml \
14 |     --habitat-root $habitat_root \
15 |     --save-dir uniform_samples/${split}
16 | done
17 | 


--------------------------------------------------------------------------------
/reconstruction_data_generation/mp3d/generate_uniform_points.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | # All rights reserved.
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | import json
  9 | import habitat
 10 | import argparse
 11 | import numpy as np
 12 | import multiprocessing as mp
 13 | 
 14 | from PIL import Image
 15 | 
 16 | 
 17 | def safe_mkdir(path):
 18 |     try:
 19 |         os.mkdir(path)
 20 |     except:
 21 |         pass
 22 | 
 23 | 
 24 | def write_data(data_tuple):
 25 |     img, img_name = data_tuple
 26 |     img = Image.fromarray(img)
 27 |     img.save(img_name)
 28 | 
 29 | 
 30 | def main(args):
 31 | 
 32 |     pool = mp.Pool(32)
 33 | 
 34 |     # ====================== Create environment ==========================
 35 |     config = habitat.get_config(config_paths=args.config_path)
 36 |     config.defrost()
 37 |     # Update path to SCENES_DIR, DATA_PATH
 38 |     config.DATASET.SCENES_DIR = os.path.join(args.habitat_root, "data/scene_datasets")
 39 |     config.DATASET.DATA_PATH = os.path.join(
 40 |         args.habitat_root,
 41 |         "data/datasets/pointnav/mp3d/v1_unique/{split}/{split}.json.gz",
 42 |     )
 43 |     config.freeze()
 44 |     env = habitat.Env(config=config)
 45 | 
 46 |     # Assumes each episode is in a unique environment
 47 |     num_episodes = len(env._dataset.episodes)
 48 |     all_images = []
 49 |     for epcount in range(num_episodes):
 50 |         env.reset()
 51 |         scene_id = env.current_episode.scene_id
 52 |         scene_name = scene_id.split("/")[-1]
 53 |         print("Gathering data for scene # {}: {}".format(epcount, scene_name))
 54 | 
 55 |         min_x, min_z, max_x, max_z = env._sim.get_environment_extents()
 56 |         # Sample a uniform grid of points separated by 4m
 57 |         uniform_grid_x = np.arange(min_x, max_x, 4)
 58 |         uniform_grid_z = np.arange(min_z, max_z, 4)
 59 | 
 60 |         agent_y = env._sim.get_agent_state().position[1]
 61 | 
 62 |         scene_images = []
 63 |         for x in uniform_grid_x:
 64 |             for z in uniform_grid_z:
 65 |                 random_point = [x.item(), agent_y.item(), z.item()]
 66 |                 if not env._sim.is_navigable(random_point):
 67 |                     print(f"=======> Skipping point ({x}, {agent_y}, {z})")
 68 |                     continue
 69 | 
 70 |                 # Sample multiple viewing angles
 71 |                 for heading in np.arange(-np.pi, np.pi, np.pi / 3):
 72 |                     # This is clockwise rotation about the vertical upward axis
 73 |                     rotation = [
 74 |                         0,
 75 |                         np.sin(heading / 2).item(),
 76 |                         0,
 77 |                         np.cos(heading / 2).item(),
 78 |                     ]
 79 | 
 80 |                     obs = env._sim.get_observations_at(random_point, rotation)
 81 |                     scene_images.append(obs["rgb"])
 82 | 
 83 |         all_images += scene_images
 84 | 
 85 |     safe_mkdir(args.save_dir)
 86 |     img_tuples = []
 87 |     for i, img in enumerate(all_images):
 88 |         img_path = os.path.join(args.save_dir, f"image_{i:07d}.png")
 89 |         img_tuples.append((img, img_path))
 90 | 
 91 |     _ = pool.map(write_data, img_tuples)
 92 | 
 93 | 
 94 | if __name__ == "__main__":
 95 | 
 96 |     parser = argparse.ArgumentParser()
 97 | 
 98 |     parser.add_argument("--seed", type=int, default=123)
 99 |     parser.add_argument("--config-path", type=str, default="config.yaml")
100 |     parser.add_argument("--save-dir", type=str, default="data")
101 |     parser.add_argument("--habitat-root", type=str, default="./")
102 | 
103 |     args = parser.parse_args()
104 | 
105 |     main(args)
106 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | einops
2 | gym
3 | h5py
4 | matplotlib
5 | opencv-python
6 | tensorboard
7 | tensorboardX
8 | scikit-learn
9 | 


--------------------------------------------------------------------------------