├── .gitignore ├── .gitmodules ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── assets └── exploring_exploration.png ├── baselines.patch ├── configs ├── exploration │ ├── mp3d_test_config.yaml │ ├── mp3d_train_config.yaml │ ├── mp3d_val_config.yaml │ ├── ppo_pose_test.yaml │ ├── ppo_pose_train.yaml │ └── ppo_pose_val.yaml ├── pose_estimation │ ├── mp3d_test_config.yaml │ ├── mp3d_val_config.yaml │ ├── ppo_pose_test.yaml │ └── ppo_pose_val.yaml ├── pretrain_imitation │ ├── mp3d_train_oracle_landmarks_config.yaml │ ├── mp3d_train_oracle_objects_config.yaml │ ├── mp3d_train_oracle_random_config.yaml │ ├── ppo_pose_train_oracle_landmarks.yaml │ ├── ppo_pose_train_oracle_objects.yaml │ └── ppo_pose_train_oracle_random.yaml ├── pretrain_reconstruction │ ├── mp3d_train_config.yaml │ ├── mp3d_val_config.yaml │ ├── ppo_pose_train.yaml │ └── ppo_pose_val.yaml └── reconstruction_exploration │ ├── mp3d_test_config.yaml │ ├── mp3d_train_config.yaml │ ├── mp3d_val_config.yaml │ ├── ppo_pose_test.yaml │ ├── ppo_pose_train.yaml │ └── ppo_pose_val.yaml ├── environments ├── gym-avd │ ├── .gitignore │ ├── README.md │ ├── gym_avd │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── assets │ │ │ └── maps_topdown_agent_sprite │ │ │ │ └── 100x100.png │ │ ├── demos │ │ │ ├── exploration_demo.py │ │ │ ├── pointnav_demo.py │ │ │ ├── pose_estimation_demo.py │ │ │ ├── reconstruction_demo.py │ │ │ └── utils.py │ │ └── envs │ │ │ ├── __init__.py │ │ │ ├── avd_base_env.py │ │ │ ├── avd_nav_env.py │ │ │ ├── avd_occ_base_env.py │ │ │ ├── avd_pose_env.py │ │ │ ├── avd_recon_env.py │ │ │ ├── config.py │ │ │ └── utils.py │ ├── preprocess_raw_data.py │ ├── requirements.txt │ └── setup.py └── habitat │ ├── .gitignore │ ├── README.md │ ├── habitat_api.patch │ └── habitat_sim.patch ├── evaluate_pose_estimation.py ├── evaluate_reconstruction.py ├── evaluate_visitation.py ├── exploring_exploration ├── .gitignore ├── algo │ ├── __init__.py │ ├── imitation.py │ ├── ppo.py │ └── supervised_reconstruction.py ├── arguments.py ├── envs │ ├── __init__.py │ ├── avd.py │ └── habitat.py ├── models │ ├── .gitignore │ ├── __init__.py │ ├── curiosity.py │ ├── exploration.py │ ├── frontier_agent.py │ ├── navigation.py │ ├── pose_estimation.py │ └── reconstruction.py └── utils │ ├── common.py │ ├── distributions.py │ ├── eval.py │ ├── geometry.py │ ├── median_pooling.py │ ├── metrics.py │ ├── pose_estimation.py │ ├── reconstruction.py │ ├── reconstruction_eval.py │ ├── storage.py │ └── visualization.py ├── pretrain_imitation.py ├── pretrain_reconstruction.py ├── reconstruction_data_generation ├── avd │ ├── .gitignore │ └── gather_uniform_points.py ├── generate_imagenet_clusters.py └── mp3d │ ├── .gitignore │ ├── configs │ ├── pointnav_mp3d_test.yaml │ ├── pointnav_mp3d_train.yaml │ └── pointnav_mp3d_val.yaml │ ├── extract_data_script.sh │ └── generate_uniform_points.py ├── requirements.txt ├── train_curiosity_exploration.py ├── train_exploration.py └── train_reconstruction_exploration.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | trained_models/ 104 | .fuse_hidden* 105 | 106 | # ctags 107 | tags 108 | 109 | *.swp 110 | logs 111 | *.mp4 112 | data/ 113 | tests/ 114 | pretrained_models 115 | *_results/ 116 | scripts 117 | baselines 118 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "exploring_exploration/models/astar_pycpp"] 2 | path = exploring_exploration/models/astar_pycpp 3 | url = git@github.com:srama2512/astar_pycpp.git 4 | branch = master 5 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to. 4 | Please read the [full text](https://code.fb.com/codeofconduct/) 5 | so that you can understand what actions will and will not be tolerated. 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to exploring_exploration 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `master`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Facebook's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 26 | disclosure of security bugs. In those cases, please go through the process 27 | outlined on that page and do not file a public issue. 28 | 29 | ## Coding Style 30 | * 80 character line length 31 | 32 | ## License 33 | By contributing to exploring_exploration, you agree that your contributions will be licensed 34 | under the LICENSE file in the root directory of this source tree. 35 | -------------------------------------------------------------------------------- /assets/exploring_exploration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/exploring_exploration/09d3f9b8703162fcc0974989e60f8cd5b47d4d39/assets/exploring_exploration.png -------------------------------------------------------------------------------- /baselines.patch: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # This source code is licensed under the license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | diff --git a/baselines/bench/monitor.py b/baselines/bench/monitor.py 7 | index 0db473a..f2d93ac 100644 8 | --- a/baselines/bench/monitor.py 9 | +++ b/baselines/bench/monitor.py 10 | @@ -76,6 +76,7 @@ class Monitor(Wrapper): 11 | self.total_steps += 1 12 | 13 | def close(self): 14 | + super().close() 15 | if self.f is not None: 16 | self.f.close() 17 | 18 | diff --git a/baselines/common/vec_env/dummy_vec_env.py b/baselines/common/vec_env/dummy_vec_env.py 19 | index 60db11d..387ea11 100644 20 | --- a/baselines/common/vec_env/dummy_vec_env.py 21 | +++ b/baselines/common/vec_env/dummy_vec_env.py 22 | @@ -61,6 +61,13 @@ class DummyVecEnv(VecEnv): 23 | self._save_obs(e, obs) 24 | return self._obs_from_buf() 25 | 26 | + def close_extras(self): 27 | + """ 28 | + Clean up the extra resources, beyond what's in this base class. 29 | + Only runs when not self.closed(). 30 | + """ 31 | + self.envs[0].close() 32 | + 33 | def _save_obs(self, e, obs): 34 | for k in self.keys: 35 | if k is None: 36 | diff --git a/baselines/common/vec_env/subproc_vec_env.py b/baselines/common/vec_env/subproc_vec_env.py 37 | index 4dc4d2c..a1ec19c 100644 38 | --- a/baselines/common/vec_env/subproc_vec_env.py 39 | +++ b/baselines/common/vec_env/subproc_vec_env.py 40 | @@ -70,13 +70,29 @@ class SubprocVecEnv(VecEnv): 41 | results = [remote.recv() for remote in self.remotes] 42 | self.waiting = False 43 | obs, rews, dones, infos = zip(*results) 44 | - return np.stack(obs), np.stack(rews), np.stack(dones), infos 45 | + if isinstance(obs[0], dict): 46 | + obs_output = { 47 | + key: np.stack([obs_[key] for obs_ in obs]) 48 | + for key in obs[0].keys() 49 | + } 50 | + else: 51 | + obs_output = np.stack(obs) 52 | + return obs_output, np.stack(rews), np.stack(dones), infos 53 | 54 | def reset(self): 55 | self._assert_not_closed() 56 | for remote in self.remotes: 57 | remote.send(('reset', None)) 58 | - return np.stack([remote.recv() for remote in self.remotes]) 59 | + 60 | + obs = [remote.recv() for remote in self.remotes] 61 | + if isinstance(obs[0], dict): 62 | + obs_output = { 63 | + key: np.stack([obs_[key] for obs_ in obs]) 64 | + for key in obs[0].keys() 65 | + } 66 | + else: 67 | + obs_output = np.stack(obs) 68 | + return obs_output 69 | 70 | def close_extras(self): 71 | self.closed = True 72 | -------------------------------------------------------------------------------- /configs/exploration/mp3d_test_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 1001 3 | SIMULATOR: 4 | TYPE: "Sim-v1" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | DEPTH_SENSOR: 14 | WIDTH: 84 15 | HEIGHT: 84 16 | OCCUPANCY_MAPS: 17 | MAP_SCALE: 0.1 18 | MAP_SIZE: 800 19 | MAX_DEPTH: 3 20 | SMALL_MAP_RANGE: 20 21 | LARGE_MAP_RANGE: 100 22 | HEIGHT_LOWER: 0.2 23 | HEIGHT_UPPER: 1.5 24 | FINE_OCC_SENSOR: 25 | WIDTH: 84 26 | HEIGHT: 84 27 | COARSE_OCC_SENSOR: 28 | WIDTH: 84 29 | HEIGHT: 84 30 | OBJECT_ANNOTATIONS: 31 | IS_AVAILABLE: True 32 | PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d' 33 | TASK: 34 | TYPE: Pose-v0 35 | SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR'] 36 | ORACLE_ACTION_SENSOR: 37 | ORACLE_TYPE: 'random' 38 | MEASUREMENTS: ['AREA_COVERED', 'TOP_DOWN_MAP_POSE', 'OPSR', 'OBJECTS_COVERED_GEOMETRIC'] 39 | TOP_DOWN_MAP_POSE: 40 | TYPE: TopDownMapPose 41 | FOG_OF_WAR: 42 | DRAW: True 43 | FOV: 90 44 | VISIBILITY_DIST: 4.0 45 | DATASET: 46 | TYPE: PoseEstimation-v1 47 | SPLIT: test 48 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 49 | DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz 50 | SHUFFLE_DATASET: False 51 | -------------------------------------------------------------------------------- /configs/exploration/mp3d_train_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 501 3 | SIMULATOR: 4 | TYPE: "Sim-v1" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | DEPTH_SENSOR: 14 | WIDTH: 84 15 | HEIGHT: 84 16 | OCCUPANCY_MAPS: 17 | MAP_SCALE: 0.1 18 | MAP_SIZE: 800 19 | MAX_DEPTH: 3 20 | SMALL_MAP_RANGE: 20 21 | LARGE_MAP_RANGE: 100 22 | HEIGHT_LOWER: 0.2 23 | HEIGHT_UPPER: 1.5 24 | FINE_OCC_SENSOR: 25 | WIDTH: 84 26 | HEIGHT: 84 27 | COARSE_OCC_SENSOR: 28 | WIDTH: 84 29 | HEIGHT: 84 30 | OBJECT_ANNOTATIONS: 31 | IS_AVAILABLE: True 32 | PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d' 33 | TASK: 34 | TYPE: Pose-v0 35 | SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR'] 36 | ORACLE_ACTION_SENSOR: 37 | ORACLE_TYPE: 'random' 38 | MEASUREMENTS: ['AREA_COVERED', 'NOVELTY_REWARD', 'COVERAGE_NOVELTY_REWARD'] 39 | DATASET: 40 | TYPE: PoseEstimation-v1 41 | SPLIT: train 42 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 43 | DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz 44 | -------------------------------------------------------------------------------- /configs/exploration/mp3d_val_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 1001 3 | SIMULATOR: 4 | TYPE: "Sim-v1" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | DEPTH_SENSOR: 14 | WIDTH: 84 15 | HEIGHT: 84 16 | OCCUPANCY_MAPS: 17 | MAP_SCALE: 0.1 18 | MAP_SIZE: 800 19 | MAX_DEPTH: 3 20 | SMALL_MAP_RANGE: 20 21 | LARGE_MAP_RANGE: 100 22 | HEIGHT_LOWER: 0.2 23 | HEIGHT_UPPER: 1.5 24 | FINE_OCC_SENSOR: 25 | WIDTH: 84 26 | HEIGHT: 84 27 | COARSE_OCC_SENSOR: 28 | WIDTH: 84 29 | HEIGHT: 84 30 | OBJECT_ANNOTATIONS: 31 | IS_AVAILABLE: True 32 | PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d' 33 | TASK: 34 | TYPE: Pose-v0 35 | SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR'] 36 | ORACLE_ACTION_SENSOR: 37 | ORACLE_TYPE: 'random' 38 | MEASUREMENTS: ['AREA_COVERED', 'TOP_DOWN_MAP_POSE', 'OPSR', 'OBJECTS_COVERED_GEOMETRIC'] 39 | TOP_DOWN_MAP_POSE: 40 | TYPE: TopDownMapPose 41 | DATASET: 42 | TYPE: PoseEstimation-v1 43 | SPLIT: val 44 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 45 | DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz 46 | SHUFFLE_DATASET: False 47 | -------------------------------------------------------------------------------- /configs/exploration/ppo_pose_test.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/exploration/mp3d_test_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 1 8 | task_config: "configs/exploration/mp3d_test_config.yaml" 9 | sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR" 10 | -------------------------------------------------------------------------------- /configs/exploration/ppo_pose_train.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/exploration/mp3d_train_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 8 8 | task_config: "configs/exploration/mp3d_train_config.yaml" 9 | sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR" 10 | -------------------------------------------------------------------------------- /configs/exploration/ppo_pose_val.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/exploration/mp3d_val_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 1 8 | task_config: "configs/exploration/mp3d_val_config.yaml" 9 | sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR" 10 | -------------------------------------------------------------------------------- /configs/pose_estimation/mp3d_test_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 1001 3 | SIMULATOR: 4 | TYPE: "Sim-v1" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | DEPTH_SENSOR: 14 | WIDTH: 84 15 | HEIGHT: 84 16 | OCCUPANCY_MAPS: 17 | MAP_SCALE: 0.1 18 | MAP_SIZE: 800 19 | MAX_DEPTH: 3 20 | SMALL_MAP_RANGE: 20 21 | LARGE_MAP_RANGE: 100 22 | HEIGHT_LOWER: 0.2 23 | HEIGHT_UPPER: 1.5 24 | FINE_OCC_SENSOR: 25 | WIDTH: 84 26 | HEIGHT: 84 27 | COARSE_OCC_SENSOR: 28 | WIDTH: 84 29 | HEIGHT: 84 30 | OBJECT_ANNOTATIONS: 31 | IS_AVAILABLE: True 32 | PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d' 33 | TASK: 34 | TYPE: Pose-v0 35 | SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR'] 36 | POSE_RGB_SENSOR: 37 | NREF: 20 38 | POSE_REGRESS_SENSOR: 39 | NREF: 20 40 | ORACLE_ACTION_SENSOR: 41 | ORACLE_TYPE: 'random' 42 | MEASUREMENTS: ['AREA_COVERED', 'TOP_DOWN_MAP_POSE', 'OPSR', 'OBJECTS_COVERED_GEOMETRIC'] 43 | TOP_DOWN_MAP_POSE: 44 | TYPE: TopDownMapPose 45 | FOG_OF_WAR: 46 | DRAW: True 47 | FOV: 90 48 | VISIBILITY_DIST: 4.0 49 | DATASET: 50 | TYPE: PoseEstimation-v1 51 | SPLIT: test 52 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 53 | DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz 54 | SHUFFLE_DATASET: False 55 | -------------------------------------------------------------------------------- /configs/pose_estimation/mp3d_val_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 1001 3 | SIMULATOR: 4 | TYPE: "Sim-v1" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | DEPTH_SENSOR: 14 | WIDTH: 84 15 | HEIGHT: 84 16 | OCCUPANCY_MAPS: 17 | MAP_SCALE: 0.1 18 | MAP_SIZE: 800 19 | MAX_DEPTH: 3 20 | SMALL_MAP_RANGE: 20 21 | LARGE_MAP_RANGE: 100 22 | HEIGHT_LOWER: 0.2 23 | HEIGHT_UPPER: 1.5 24 | FINE_OCC_SENSOR: 25 | WIDTH: 84 26 | HEIGHT: 84 27 | COARSE_OCC_SENSOR: 28 | WIDTH: 84 29 | HEIGHT: 84 30 | OBJECT_ANNOTATIONS: 31 | IS_AVAILABLE: True 32 | PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d' 33 | TASK: 34 | TYPE: Pose-v0 35 | SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR'] 36 | POSE_RGB_SENSOR: 37 | NREF: 20 38 | POSE_REGRESS_SENSOR: 39 | NREF: 20 40 | ORACLE_ACTION_SENSOR: 41 | ORACLE_TYPE: 'random' 42 | MEASUREMENTS: ['AREA_COVERED', 'TOP_DOWN_MAP_POSE', 'OPSR', 'OBJECTS_COVERED_GEOMETRIC'] 43 | TOP_DOWN_MAP_POSE: 44 | TYPE: TopDownMapPose 45 | DATASET: 46 | TYPE: PoseEstimation-v1 47 | SPLIT: val 48 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 49 | DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz 50 | SHUFFLE_DATASET: False 51 | -------------------------------------------------------------------------------- /configs/pose_estimation/ppo_pose_test.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/pose_estimation/mp3d_test_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 1 8 | task_config: "configs/pose_estimation/mp3d_test_config.yaml" 9 | sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR" 10 | -------------------------------------------------------------------------------- /configs/pose_estimation/ppo_pose_val.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/pose_estimation/mp3d_val_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 1 8 | task_config: "configs/pose_estimation/mp3d_val_config.yaml" 9 | sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR" 10 | -------------------------------------------------------------------------------- /configs/pretrain_imitation/mp3d_train_oracle_landmarks_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 501 3 | SIMULATOR: 4 | TYPE: "Sim-v1" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | DEPTH_SENSOR: 14 | WIDTH: 84 15 | HEIGHT: 84 16 | OCCUPANCY_MAPS: 17 | MAP_SCALE: 0.1 18 | MAP_SIZE: 800 19 | MAX_DEPTH: 3 20 | SMALL_MAP_RANGE: 20 21 | LARGE_MAP_RANGE: 100 22 | HEIGHT_LOWER: 0.2 23 | HEIGHT_UPPER: 1.5 24 | FINE_OCC_SENSOR: 25 | WIDTH: 84 26 | HEIGHT: 84 27 | COARSE_OCC_SENSOR: 28 | WIDTH: 84 29 | HEIGHT: 84 30 | OBJECT_ANNOTATIONS: 31 | IS_AVAILABLE: True 32 | PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d' 33 | TASK: 34 | TYPE: Pose-v0 35 | SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR'] 36 | ORACLE_ACTION_SENSOR: 37 | ORACLE_TYPE: 'pose' 38 | MEASUREMENTS: ['AREA_COVERED', 'NOVELTY_REWARD', 'COVERAGE_NOVELTY_REWARD'] 39 | DATASET: 40 | TYPE: PoseEstimation-v1 41 | SPLIT: train 42 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 43 | DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz 44 | -------------------------------------------------------------------------------- /configs/pretrain_imitation/mp3d_train_oracle_objects_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 501 3 | SIMULATOR: 4 | TYPE: "Sim-v1" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | DEPTH_SENSOR: 14 | WIDTH: 84 15 | HEIGHT: 84 16 | OCCUPANCY_MAPS: 17 | MAP_SCALE: 0.1 18 | MAP_SIZE: 800 19 | MAX_DEPTH: 3 20 | SMALL_MAP_RANGE: 20 21 | LARGE_MAP_RANGE: 100 22 | HEIGHT_LOWER: 0.2 23 | HEIGHT_UPPER: 1.5 24 | FINE_OCC_SENSOR: 25 | WIDTH: 84 26 | HEIGHT: 84 27 | COARSE_OCC_SENSOR: 28 | WIDTH: 84 29 | HEIGHT: 84 30 | OBJECT_ANNOTATIONS: 31 | IS_AVAILABLE: True 32 | PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d' 33 | TASK: 34 | TYPE: Pose-v0 35 | SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR'] 36 | ORACLE_ACTION_SENSOR: 37 | ORACLE_TYPE: 'object' 38 | MEASUREMENTS: ['AREA_COVERED', 'NOVELTY_REWARD', 'COVERAGE_NOVELTY_REWARD'] 39 | DATASET: 40 | TYPE: PoseEstimation-v1 41 | SPLIT: train 42 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 43 | DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz 44 | -------------------------------------------------------------------------------- /configs/pretrain_imitation/mp3d_train_oracle_random_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 501 3 | SIMULATOR: 4 | TYPE: "Sim-v1" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | DEPTH_SENSOR: 14 | WIDTH: 84 15 | HEIGHT: 84 16 | OCCUPANCY_MAPS: 17 | MAP_SCALE: 0.1 18 | MAP_SIZE: 800 19 | MAX_DEPTH: 3 20 | SMALL_MAP_RANGE: 20 21 | LARGE_MAP_RANGE: 100 22 | HEIGHT_LOWER: 0.2 23 | HEIGHT_UPPER: 1.5 24 | FINE_OCC_SENSOR: 25 | WIDTH: 84 26 | HEIGHT: 84 27 | COARSE_OCC_SENSOR: 28 | WIDTH: 84 29 | HEIGHT: 84 30 | OBJECT_ANNOTATIONS: 31 | IS_AVAILABLE: True 32 | PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d' 33 | TASK: 34 | TYPE: Pose-v0 35 | SENSORS: ['DELTA_SENSOR', 'ORACLE_ACTION_SENSOR', 'COLLISION_SENSOR'] 36 | ORACLE_ACTION_SENSOR: 37 | ORACLE_TYPE: 'random' 38 | MEASUREMENTS: ['AREA_COVERED', 'NOVELTY_REWARD', 'COVERAGE_NOVELTY_REWARD'] 39 | DATASET: 40 | TYPE: PoseEstimation-v1 41 | SPLIT: train 42 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 43 | DATA_PATH: environments/habitat/habitat-api/data/datasets/pose_estimation/mp3d/v1/{split}/{split}.json.gz 44 | -------------------------------------------------------------------------------- /configs/pretrain_imitation/ppo_pose_train_oracle_landmarks.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/pretrain_imitation/mp3d_train_oracle_landmarks_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 8 8 | task_config: "configs/pretrain_imitation/mp3d_train_oracle_landmarks_config.yaml" 9 | sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR" 10 | -------------------------------------------------------------------------------- /configs/pretrain_imitation/ppo_pose_train_oracle_objects.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/pretrain_imitation/mp3d_train_oracle_objects_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 8 8 | task_config: "configs/pretrain_imitation/mp3d_train_oracle_objects_config.yaml" 9 | sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR" 10 | -------------------------------------------------------------------------------- /configs/pretrain_imitation/ppo_pose_train_oracle_random.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/pretrain_imitation/mp3d_train_oracle_random_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 8 8 | task_config: "configs/pretrain_imitation/mp3d_train_oracle_random_config.yaml" 9 | sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR" 10 | -------------------------------------------------------------------------------- /configs/pretrain_reconstruction/mp3d_train_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 501 3 | SIMULATOR: 4 | TYPE: "Sim-v0" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | TASK: 14 | TYPE: Pose-v0 15 | SENSORS: ['DELTA_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR', 'POSE_MASK_SENSOR', 'ORACLE_ACTION_SENSOR'] 16 | POSE_RGB_SENSOR: 17 | NREF: 100 18 | POSE_REGRESS_SENSOR: 19 | NREF: 100 20 | POSE_MASK_SENSOR: 21 | NREF: 100 22 | ORACLE_ACTION_SENSOR: 23 | ORACLE_TYPE: 'random' 24 | MEASUREMENTS: [] 25 | DATASET: 26 | TYPE: PoseEstimation-v1 27 | SPLIT: train 28 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 29 | DATA_PATH: environments/habitat/habitat-api/data/datasets/reconstruction/mp3d/v1/{split}/{split}.json.gz 30 | -------------------------------------------------------------------------------- /configs/pretrain_reconstruction/mp3d_val_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 501 3 | SIMULATOR: 4 | TYPE: "Sim-v0" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | TASK: 14 | TYPE: Pose-v0 15 | SENSORS: ['DELTA_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR', 'POSE_MASK_SENSOR', 'ORACLE_ACTION_SENSOR'] 16 | POSE_RGB_SENSOR: 17 | NREF: 100 18 | POSE_REGRESS_SENSOR: 19 | NREF: 100 20 | POSE_MASK_SENSOR: 21 | NREF: 100 22 | ORACLE_ACTION_SENSOR: 23 | ORACLE_TYPE: 'random' 24 | MEASUREMENTS: ['TOP_DOWN_MAP_POSE'] 25 | TOP_DOWN_MAP_POSE: 26 | TYPE: TopDownMapPose 27 | DATASET: 28 | TYPE: PoseEstimation-v1 29 | SPLIT: val 30 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 31 | DATA_PATH: environments/habitat/habitat-api/data/datasets/reconstruction/mp3d/v1/{split}/{split}.json.gz 32 | SHUFFLE_DATASET: False 33 | -------------------------------------------------------------------------------- /configs/pretrain_reconstruction/ppo_pose_train.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/pretrain_reconstruction/mp3d_train_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 8 8 | task_config: "configs/pretrain_reconstruction/mp3d_train_config.yaml" 9 | -------------------------------------------------------------------------------- /configs/pretrain_reconstruction/ppo_pose_val.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/pretrain_reconstruction/mp3d_val_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 1 8 | task_config: "configs/pretrain_reconstruction/mp3d_val_config.yaml" 9 | -------------------------------------------------------------------------------- /configs/reconstruction_exploration/mp3d_test_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 1001 3 | SIMULATOR: 4 | TYPE: "Sim-v1" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | DEPTH_SENSOR: 14 | WIDTH: 84 15 | HEIGHT: 84 16 | OCCUPANCY_MAPS: 17 | MAP_SCALE: 0.1 18 | MAP_SIZE: 800 19 | MAX_DEPTH: 3 20 | SMALL_MAP_RANGE: 20 21 | LARGE_MAP_RANGE: 100 22 | HEIGHT_LOWER: 0.2 23 | HEIGHT_UPPER: 1.5 24 | FINE_OCC_SENSOR: 25 | WIDTH: 84 26 | HEIGHT: 84 27 | COARSE_OCC_SENSOR: 28 | WIDTH: 84 29 | HEIGHT: 84 30 | OBJECT_ANNOTATIONS: 31 | IS_AVAILABLE: True 32 | PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d' 33 | TASK: 34 | TYPE: Pose-v0 35 | SENSORS: ['DELTA_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR', 'POSE_MASK_SENSOR', 'ORACLE_ACTION_SENSOR'] 36 | POSE_RGB_SENSOR: 37 | NREF: 100 38 | POSE_REGRESS_SENSOR: 39 | NREF: 100 40 | POSE_MASK_SENSOR: 41 | NREF: 100 42 | ORACLE_ACTION_SENSOR: 43 | ORACLE_TYPE: 'random' 44 | MEASUREMENTS: ['AREA_COVERED', 'TOP_DOWN_MAP_POSE', 'OPSR', 'OBJECTS_COVERED_GEOMETRIC'] 45 | TOP_DOWN_MAP_POSE: 46 | TYPE: TopDownMapPose 47 | DATASET: 48 | TYPE: PoseEstimation-v1 49 | SPLIT: test 50 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 51 | DATA_PATH: environments/habitat/habitat-api/data/datasets/reconstruction/mp3d/v1/{split}/{split}.json.gz 52 | SHUFFLE_DATASET: False 53 | -------------------------------------------------------------------------------- /configs/reconstruction_exploration/mp3d_train_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 501 3 | SIMULATOR: 4 | TYPE: "Sim-v1" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | DEPTH_SENSOR: 14 | WIDTH: 84 15 | HEIGHT: 84 16 | OCCUPANCY_MAPS: 17 | MAP_SCALE: 0.1 18 | MAP_SIZE: 800 19 | MAX_DEPTH: 3 20 | SMALL_MAP_RANGE: 20 21 | LARGE_MAP_RANGE: 100 22 | HEIGHT_LOWER: 0.2 23 | HEIGHT_UPPER: 1.5 24 | FINE_OCC_SENSOR: 25 | WIDTH: 84 26 | HEIGHT: 84 27 | COARSE_OCC_SENSOR: 28 | WIDTH: 84 29 | HEIGHT: 84 30 | OBJECT_ANNOTATIONS: 31 | IS_AVAILABLE: True 32 | PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d' 33 | TASK: 34 | TYPE: Pose-v0 35 | SENSORS: ['DELTA_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR', 'POSE_MASK_SENSOR', 'ORACLE_ACTION_SENSOR'] 36 | POSE_RGB_SENSOR: 37 | NREF: 100 38 | POSE_REGRESS_SENSOR: 39 | NREF: 100 40 | POSE_MASK_SENSOR: 41 | NREF: 100 42 | ORACLE_ACTION_SENSOR: 43 | ORACLE_TYPE: 'random' 44 | MEASUREMENTS: ['AREA_COVERED', 'INC_AREA_COVERED', 'OPSR'] 45 | DATASET: 46 | TYPE: PoseEstimation-v1 47 | SPLIT: train 48 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 49 | DATA_PATH: environments/habitat/habitat-api/data/datasets/reconstruction/mp3d/v1/{split}/{split}.json.gz 50 | -------------------------------------------------------------------------------- /configs/reconstruction_exploration/mp3d_val_config.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 1001 3 | SIMULATOR: 4 | TYPE: "Sim-v1" 5 | AGENT_0: 6 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'FINE_OCC_SENSOR', 'COARSE_OCC_SENSOR'] 7 | ACTION_SPACE_CONFIG: "v2" 8 | HABITAT_SIM_V0: 9 | GPU_DEVICE_ID: 0 10 | RGB_SENSOR: 11 | WIDTH: 84 12 | HEIGHT: 84 13 | DEPTH_SENSOR: 14 | WIDTH: 84 15 | HEIGHT: 84 16 | OCCUPANCY_MAPS: 17 | MAP_SCALE: 0.1 18 | MAP_SIZE: 800 19 | MAX_DEPTH: 3 20 | SMALL_MAP_RANGE: 20 21 | LARGE_MAP_RANGE: 100 22 | HEIGHT_LOWER: 0.2 23 | HEIGHT_UPPER: 1.5 24 | FINE_OCC_SENSOR: 25 | WIDTH: 84 26 | HEIGHT: 84 27 | COARSE_OCC_SENSOR: 28 | WIDTH: 84 29 | HEIGHT: 84 30 | OBJECT_ANNOTATIONS: 31 | IS_AVAILABLE: True 32 | PATH: 'environments/habitat/habitat-api/data/object_annotations/mp3d' 33 | TASK: 34 | TYPE: Pose-v0 35 | SENSORS: ['DELTA_SENSOR', 'COLLISION_SENSOR', 'POSE_RGB_SENSOR', 'POSE_REGRESS_SENSOR', 'POSE_MASK_SENSOR', 'ORACLE_ACTION_SENSOR'] 36 | POSE_RGB_SENSOR: 37 | NREF: 100 38 | POSE_REGRESS_SENSOR: 39 | NREF: 100 40 | POSE_MASK_SENSOR: 41 | NREF: 100 42 | ORACLE_ACTION_SENSOR: 43 | ORACLE_TYPE: 'random' 44 | MEASUREMENTS: ['AREA_COVERED', 'TOP_DOWN_MAP_POSE', 'OPSR', 'OBJECTS_COVERED_GEOMETRIC'] 45 | TOP_DOWN_MAP_POSE: 46 | TYPE: TopDownMapPose 47 | DATASET: 48 | TYPE: PoseEstimation-v1 49 | SPLIT: val 50 | SCENES_DIR: environments/habitat/habitat-api/data/scene_datasets 51 | DATA_PATH: environments/habitat/habitat-api/data/datasets/reconstruction/mp3d/v1/{split}/{split}.json.gz 52 | SHUFFLE_DATASET: False 53 | -------------------------------------------------------------------------------- /configs/reconstruction_exploration/ppo_pose_test.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/reconstruction_exploration/mp3d_test_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 1 8 | task_config: "configs/reconstruction_exploration/mp3d_test_config.yaml" 9 | sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR" 10 | -------------------------------------------------------------------------------- /configs/reconstruction_exploration/ppo_pose_train.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/reconstruction_exploration/mp3d_train_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 8 8 | task_config: "configs/reconstruction_exploration/mp3d_train_config.yaml" 9 | sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR" 10 | -------------------------------------------------------------------------------- /configs/reconstruction_exploration/ppo_pose_val.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: "configs/reconstruction_exploration/mp3d_val_config.yaml" 2 | TRAINER: 3 | TRAINER_NAME: "ppo" 4 | RL: 5 | PPO: 6 | # general options 7 | num_processes: 1 8 | task_config: "configs/reconstruction_exploration/mp3d_val_config.yaml" 9 | sensors: "RGB_SENSOR,DEPTH_SENSOR,FINE_OCC_SENSOR,COARSE_OCC_SENSOR" 10 | -------------------------------------------------------------------------------- /environments/gym-avd/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | *.swp 106 | data 107 | -------------------------------------------------------------------------------- /environments/gym-avd/README.md: -------------------------------------------------------------------------------- 1 | # Active Vision Simulator 2 | This directory contains the code to an [OpenAI gym](https://gym.openai.com/)-based environment for simulating discrete motion on the [Active Vision Dataset](https://www.cs.unc.edu/~ammirato/active_vision_dataset_website/). 3 | 4 | ## Installation instructions 5 | 1. Install dependencies. 6 | 7 | ``` 8 | pip install -r requirements.txt 9 | export GYM_AVD_ROOT= 10 | ``` 11 | 2. Install `gym-avd`. 12 | 13 | ``` 14 | cd $GYM_AVD_ROOT 15 | python setup.py install 16 | ``` 17 | 3. Add the code root to `~/.bashrc`. 18 | 19 | ``` 20 | export PYTHONPATH=$GYM_AVD_ROOT:$PYTHONPATH 21 | ``` 22 | 4. Download data from the [AVD website](https://www.cs.unc.edu/~ammirato/active_vision_dataset_website/get_data.html). The camera calibration information can be obtained from the author of AVD. 23 | 24 | ``` 25 | tar -xvf ActiveVisionDataset_part1.tar 26 | tar -xvf ActiveVisionDataset_part2.tar 27 | tar -xvf ActiveVisionDataset_part3.tar 28 | tar -xvf ActiveVisionDataset_COLMAP_camera_params_part1-3.tar 29 | 30 | export AVD_DATASET_ROOT= 31 | ``` 32 | 33 | 5. Download additional processed data for simulation. 34 | 35 | ``` 36 | cd $GYM_AVD_ROOT/gym_avd 37 | mkdir data 38 | cd data 39 | wget https://dl.fbaipublicfiles.com/exploring-exploration/avd_extra_data.tar.gz -O data.tar.gz 40 | tar -xvf data.tar.gz 41 | rm data.tar.gz 42 | ``` 43 | 6. Set configuration paths for the simulator in `$GYM_AVD_ROOT/gym_avd/envs/config.py`. 44 | 45 | ``` 46 | GYM_AVD_ROOT= 47 | ROOT_DIR= 48 | ``` 49 | 7. Process dataset to extract images and connectivity: 50 | 51 | ``` 52 | cd $GYM_AVD_ROOT 53 | python preprocess_raw_data.py --root_dir $AVD_DATASET_ROOT 54 | ``` 55 | This will create the following files: 56 | 57 | ``` 58 | $AVD_DATASET_ROOT/processed_images_84x84.h5 59 | $AVD_DATASET_ROOT/processed_scenes_84x84.npy 60 | ``` 61 | 62 | ## Task demos 63 | This repository supports four tasks: 64 | 65 | - Exploration 66 | - Pose estimation 67 | - Reconstruction 68 | - PointNav 69 | 70 | Visual demos for each task are available. 71 | 72 | ``` 73 | cd $GYM_AVD_ROOT 74 | python gym_avd/demos/exploration_demo.py 75 | python gym_avd/demos/pose_estimation_demo.py 76 | python gym_avd/demos/reconstruction_demo.py 77 | python gym_avd/demos/pointnav_demo.py 78 | ``` 79 | -------------------------------------------------------------------------------- /environments/gym-avd/gym_avd/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | -------------------------------------------------------------------------------- /environments/gym-avd/gym_avd/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import gym_avd.envs 8 | -------------------------------------------------------------------------------- /environments/gym-avd/gym_avd/assets/maps_topdown_agent_sprite/100x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/exploring_exploration/09d3f9b8703162fcc0974989e60f8cd5b47d4d39/environments/gym-avd/gym_avd/assets/maps_topdown_agent_sprite/100x100.png -------------------------------------------------------------------------------- /environments/gym-avd/gym_avd/demos/exploration_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import cv2 8 | import gym 9 | import gym_avd 10 | import numpy as np 11 | from utils import * 12 | 13 | env = gym.make("avd-pose-landmarks-oracle-v0") 14 | obs = env.reset() 15 | topdown = env.generate_topdown_occupancy() 16 | rgb_im = proc_rgb(obs["im"]) 17 | fine_occ_im = proc_rgb(obs["fine_occupancy"]) 18 | coarse_occ_im = proc_rgb(obs["coarse_occupancy"]) 19 | topdown_im = proc_rgb(topdown) 20 | cv2.imshow( 21 | "Exploration demo", 22 | np.concatenate([rgb_im, fine_occ_im, coarse_occ_im, topdown_im], axis=1), 23 | ) 24 | cv2.waitKey(60) 25 | for i in range(1000): 26 | # oracle action is generated by sampling shortest paths between random points in the environment. 27 | action = obs["oracle_action"][0] 28 | obs, _, done, info = env.step(action) 29 | if done: 30 | obs = env.reset() 31 | topdown = env.generate_topdown_occupancy() 32 | rgb_im = proc_rgb(obs["im"]) 33 | fine_occ_im = proc_rgb(obs["fine_occupancy"]) 34 | coarse_occ_im = proc_rgb(obs["coarse_occupancy"]) 35 | topdown_im = proc_rgb(topdown) 36 | 37 | metrics_to_print = { 38 | "Area covered (m^2)": info["seen_area"], 39 | "Objects covered": info["num_objects_visited"], 40 | "Landmarks covered": info["oracle_pose_success"], 41 | "Novelty": info["count_based_reward"], 42 | "Smooth coverage": info["coverage_novelty_reward"], 43 | } 44 | 45 | print("===============================================") 46 | for k, v in metrics_to_print.items(): 47 | print(f"{k:<25s}: {v:6.2f}") 48 | 49 | cv2.imshow( 50 | "Exploration demo", 51 | np.concatenate([rgb_im, fine_occ_im, coarse_occ_im, topdown_im], axis=1), 52 | ) 53 | cv2.waitKey(60) 54 | -------------------------------------------------------------------------------- /environments/gym-avd/gym_avd/demos/pointnav_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import cv2 8 | import gym 9 | import gym_avd 10 | import numpy as np 11 | from utils import * 12 | 13 | WIDTH = 300 14 | HEIGHT = 300 15 | 16 | overall_image = np.zeros((HEIGHT * 2, WIDTH * 3, 3), dtype=np.uint8) 17 | 18 | T_exp = 50 19 | T_nav = 50 20 | 21 | env = gym.make("avd-nav-random-oracle-v0") 22 | env.seed(123) 23 | env.set_split("val") 24 | env.set_t_exp_and_nav(T_exp, T_nav) 25 | env.set_return_topdown_map() 26 | 27 | 28 | def process_inputs(rgb, depth, fine_occ, coarse_occ, topdown_map, target): 29 | obs_1 = np.concatenate([rgb, depth, topdown_map], axis=1) 30 | obs_2 = np.concatenate([fine_occ, coarse_occ, target], axis=1) 31 | return np.concatenate([obs_1, obs_2], axis=0) 32 | 33 | 34 | for i in range(10): 35 | obs = env.reset() 36 | topdown = env.generate_topdown_occupancy() 37 | rgb_im = proc_rgb(obs["im"]) 38 | fine_occ_im = proc_rgb(obs["fine_occupancy"]) 39 | coarse_occ_im = proc_rgb(obs["coarse_occupancy"]) 40 | topdown_im = proc_rgb(topdown) 41 | cv2.imshow( 42 | "PointNav: exploration phase", 43 | np.concatenate([rgb_im, fine_occ_im, coarse_occ_im, topdown_im], axis=1), 44 | ) 45 | cv2.waitKey(150) 46 | 47 | done = False 48 | for t in range(T_exp + T_nav): 49 | if t < T_exp: 50 | action = obs["oracle_action"][0].item() 51 | else: 52 | action = obs["sp_action"][0].item() 53 | 54 | obs, reward, done, info = env.step(action) 55 | if done or action == 3: 56 | cv2.destroyWindow("PointNav: navigation phase") 57 | break 58 | 59 | topdown = env.generate_topdown_occupancy() 60 | rgb_im = proc_rgb(obs["im"]) 61 | fine_occ_im = proc_rgb(obs["fine_occupancy"]) 62 | coarse_occ_im = proc_rgb(obs["coarse_occupancy"]) 63 | topdown_im = proc_rgb(topdown) 64 | if t < T_exp: 65 | cv2.imshow( 66 | "PointNav: exploration phase", 67 | np.concatenate( 68 | [rgb_im, fine_occ_im, coarse_occ_im, topdown_im], axis=1 69 | ), 70 | ) 71 | else: 72 | if t == T_exp: 73 | cv2.destroyWindow("PointNav: exploration phase") 74 | cv2.imshow( 75 | "PointNav: navigation phase", 76 | np.concatenate( 77 | [rgb_im, fine_occ_im, coarse_occ_im, topdown_im], axis=1 78 | ), 79 | ) 80 | 81 | cv2.waitKey(150) 82 | -------------------------------------------------------------------------------- /environments/gym-avd/gym_avd/demos/pose_estimation_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import cv2 8 | import gym 9 | import gym_avd 10 | import numpy as np 11 | from utils import * 12 | 13 | 14 | def create_reference_grid(refs_uint8): 15 | """ 16 | Inputs: 17 | refs_uint8 - (nRef, H, W, C) numpy array 18 | """ 19 | refs_uint8 = np.copy(refs_uint8) 20 | nRef, H, W, C = refs_uint8.shape 21 | 22 | nrow = int(math.sqrt(nRef)) 23 | 24 | ncol = nRef // nrow # (number of images per column) 25 | if nrow * ncol < nRef: 26 | ncol += 1 27 | final_grid = np.zeros((nrow * ncol, *refs_uint8.shape[1:]), dtype=np.uint8) 28 | font = cv2.FONT_HERSHEY_SIMPLEX 29 | 30 | final_grid[:nRef] = refs_uint8 31 | final_grid = final_grid.reshape( 32 | ncol, nrow, *final_grid.shape[1:] 33 | ) # (ncol, nrow, H, W, C) 34 | final_grid = final_grid.transpose(0, 2, 1, 3, 4) 35 | final_grid = final_grid.reshape(ncol * H, nrow * W, C) 36 | return final_grid 37 | 38 | 39 | WIDTH = 300 40 | HEIGHT = 300 41 | 42 | overall_image = np.zeros((HEIGHT * 2, WIDTH * 3, 3), dtype=np.uint8) 43 | 44 | env = gym.make("avd-pose-landmarks-oracle-v0") 45 | env.set_split("test") 46 | env.seed(123 + 12) 47 | env.plot_references_in_topdown = True 48 | nref = 10 49 | env.set_nref(nref) 50 | 51 | obs = env.reset() 52 | topdown = env.generate_topdown_occupancy() 53 | rgb_im = proc_rgb(obs["im"]) 54 | topdown_im = proc_rgb(topdown) 55 | ref_rgb = [proc_rgb(obs["pose_refs"][n]) for n in range(nref)] 56 | ref_rgb = cv2.resize(create_reference_grid(np.stack(ref_rgb, axis=0)), (HEIGHT, WIDTH)) 57 | 58 | overall_image = np.concatenate([rgb_im, topdown_im, ref_rgb], axis=1) 59 | 60 | cv2.imshow("Pose estimation demo", overall_image) 61 | cv2.waitKey(60) 62 | 63 | for i in range(10000): 64 | action = obs["oracle_action"][0] 65 | 66 | obs, _, done, info = env.step(action) 67 | 68 | if done: 69 | obs = env.reset() 70 | ref_rgb = [proc_rgb(obs["pose_refs"][n]) for n in range(nref)] 71 | ref_rgb = cv2.resize( 72 | create_reference_grid(np.stack(ref_rgb, axis=0)), (HEIGHT, WIDTH) 73 | ) 74 | 75 | topdown = env.generate_topdown_occupancy() 76 | rgb_im = proc_rgb(obs["im"]) 77 | topdown_im = proc_rgb(topdown) 78 | overall_image = np.concatenate([rgb_im, topdown_im, ref_rgb], axis=1) 79 | 80 | area = info["seen_area"] 81 | nlandmarks = info["oracle_pose_success"] 82 | nobjects = info["num_objects_visited"] 83 | 84 | print(f"Area: {area:5.2f} | OSR: {nlandmarks:5.2f} | Objects: {nobjects:5.2f}") 85 | cv2.imshow("Pose estimation demo", overall_image) 86 | cv2.waitKey(60) 87 | -------------------------------------------------------------------------------- /environments/gym-avd/gym_avd/demos/reconstruction_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import cv2 8 | import gym 9 | import gym_avd 10 | import numpy as np 11 | from utils import * 12 | 13 | 14 | def create_reference_grid(refs_uint8): 15 | """ 16 | Inputs: 17 | refs_uint8 - (nRef, H, W, C) numpy array 18 | """ 19 | refs_uint8 = np.copy(refs_uint8) 20 | nRef, H, W, C = refs_uint8.shape 21 | 22 | nrow = int(math.sqrt(nRef)) 23 | 24 | ncol = nRef // nrow # (number of images per column) 25 | if nrow * ncol < nRef: 26 | ncol += 1 27 | final_grid = np.zeros((nrow * ncol, *refs_uint8.shape[1:]), dtype=np.uint8) 28 | font = cv2.FONT_HERSHEY_SIMPLEX 29 | 30 | final_grid[:nRef] = refs_uint8 31 | final_grid = final_grid.reshape( 32 | ncol, nrow, *final_grid.shape[1:] 33 | ) # (ncol, nrow, H, W, C) 34 | final_grid = final_grid.transpose(0, 2, 1, 3, 4) 35 | final_grid = final_grid.reshape(ncol * H, nrow * W, C) 36 | return final_grid 37 | 38 | 39 | WIDTH = 300 40 | HEIGHT = 300 41 | 42 | overall_image = np.zeros((HEIGHT * 2, WIDTH * 3, 3), dtype=np.uint8) 43 | 44 | env = gym.make("avd-recon-v0") 45 | env.set_split("test") 46 | env.seed(123 + 12) 47 | env.plot_references_in_topdown = True 48 | nref = 50 49 | env.set_nref(nref) 50 | 51 | obs = env.reset() 52 | topdown = env.generate_topdown_occupancy() 53 | rgb_im = proc_rgb(obs["im"]) 54 | topdown_im = proc_rgb(topdown) 55 | ref_rgb = [proc_rgb(obs["pose_refs"][n]) for n in range(nref)] 56 | ref_rgb = cv2.resize(create_reference_grid(np.stack(ref_rgb, axis=0)), (HEIGHT, WIDTH)) 57 | 58 | overall_image = np.concatenate([rgb_im, topdown_im, ref_rgb], axis=1) 59 | 60 | cv2.imshow("Reconstruction demo", overall_image) 61 | cv2.waitKey(60) 62 | 63 | for i in range(10000): 64 | action = obs["oracle_action"][0] 65 | 66 | obs, _, done, info = env.step(action) 67 | 68 | if done: 69 | obs = env.reset() 70 | ref_rgb = [proc_rgb(obs["pose_refs"][n]) for n in range(nref)] 71 | ref_rgb = cv2.resize( 72 | create_reference_grid(np.stack(ref_rgb, axis=0)), (HEIGHT, WIDTH) 73 | ) 74 | 75 | topdown = env.generate_topdown_occupancy() 76 | rgb_im = proc_rgb(obs["im"]) 77 | topdown_im = proc_rgb(topdown) 78 | overall_image = np.concatenate([rgb_im, topdown_im, ref_rgb], axis=1) 79 | 80 | cv2.imshow("Reconstruction demo", overall_image) 81 | cv2.waitKey(60) 82 | -------------------------------------------------------------------------------- /environments/gym-avd/gym_avd/demos/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import cv2 8 | import numpy as np 9 | 10 | 11 | def proc_rgb(rgb): 12 | return cv2.resize(np.flip(rgb, axis=2), (300, 300)) 13 | 14 | 15 | def proc_depth(depth): 16 | depth = np.clip(depth / 1000.0, 0.0, 10.0) # Meters 17 | depth = depth * 255.0 / 10.0 # Intensities 18 | depth = np.repeat(depth, 3, axis=-1) 19 | return cv2.resize(depth.astype(np.uint8), (300, 300)) 20 | -------------------------------------------------------------------------------- /environments/gym-avd/gym_avd/envs/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from gym_avd.envs.avd_base_env import * 8 | from gym_avd.envs.avd_occ_base_env import * 9 | from gym_avd.envs.avd_pose_env import * 10 | from gym_avd.envs.avd_recon_env import * 11 | from gym_avd.envs.avd_nav_env import * 12 | -------------------------------------------------------------------------------- /environments/gym-avd/gym_avd/envs/avd_recon_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import gym 8 | import math 9 | import numpy as np 10 | 11 | from typing import Any, Dict, List, Optional, Tuple 12 | 13 | from gym import error, spaces, utils 14 | from gym.utils import seeding 15 | from gym_avd.envs.config import * 16 | from gym_avd.envs.utils import * 17 | from gym.envs.registration import register 18 | 19 | from gym_avd.envs.avd_pose_env import AVDPoseBaseEnv 20 | 21 | 22 | class AVDReconEnv(AVDPoseBaseEnv): 23 | r"""Implements an environment for the reconstruction task. It builds on top of 24 | the AVDPoseBaseEnv and samples reconstruction targets as locations on a uniform 25 | grid in the environment. 26 | """ 27 | 28 | def __init__(self, nRef: int = 50): 29 | super().__init__(nRef=nRef) 30 | 31 | def _initialize_environment_variables(self): 32 | r"""Additionally define reconstruction reference sampling details. 33 | """ 34 | super()._initialize_environment_variables() 35 | self.cluster_root_dir = CLUSTER_ROOT_DIR 36 | self.ref_sample_intervals = None 37 | 38 | def _sample_pose_refs(self): 39 | r"""Sample views from a uniform grid locations. 40 | """ 41 | min_x, min_z, max_x, max_z = self.get_environment_extents() 42 | all_nodes = self.data_conn[self.scene_idx]["nodes"] 43 | all_node_idxes = list(range(len(all_nodes))) 44 | all_nodes_positions = [ 45 | [node["world_pos"][2], node["world_pos"][0]] for node in all_nodes 46 | ] 47 | all_nodes_positions = np.array(all_nodes_positions) * self.scale 48 | # Sample nodes uniformly @ 1.5m distance from the environment. 49 | range_x = np.arange(min_x, max_x, 1500.0) 50 | range_z = np.arange(min_z, max_z, 1500.0) 51 | relevant_node_idxes = set() 52 | relevant_nodes = [] 53 | for x in range_x: 54 | for z in range_z: 55 | # Find closest node to this coordinate. 56 | zipped_data = zip(all_nodes, all_node_idxes, all_nodes_positions,) 57 | min_dist = math.inf 58 | min_dist_node = None 59 | min_dist_node_idx = None 60 | for node, node_idx, node_position in zipped_data: 61 | nx, nz = node_position[0], node_position[1] 62 | d = np.sqrt((x - nx) ** 2 + (z - nz) ** 2).item() 63 | if d < min_dist: 64 | min_dist = d 65 | min_dist_node = node 66 | min_dist_node_idx = node_idx 67 | if min_dist_node_idx not in relevant_node_idxes: 68 | relevant_nodes.append(min_dist_node) 69 | relevant_node_idxes.add(min_dist_node_idx) 70 | # Sample the reference images from the nodes. 71 | relevant_images = [] 72 | for node in relevant_nodes: 73 | for j in range(0, 12, 3): 74 | image_name = node["views"][j]["image_name"] 75 | relevant_images.append(image_name) 76 | self._pose_image_names = [] 77 | self._pose_refs = [] 78 | self._pose_refs_depth = [] 79 | self.ref_positions = [] 80 | self.ref_poses = [] 81 | self._pose_regress = [] 82 | for count, pose_image in enumerate(relevant_images): 83 | # Limit to self.nRef images. 84 | if count >= self.nRef: 85 | break 86 | # Compute data for the pose references. 87 | ref_position = self._get_position(pose_image) 88 | ref_pose = self._get_pose(pose_image) 89 | pose_idx = self.images_to_idx[pose_image] 90 | pose_ref = self.scene_images[pose_idx] 91 | pose_ref_depth = self._process_depth(self.scene_depth[pose_idx]) 92 | pose_ref = pose_ref[np.newaxis, :, :, :] 93 | pose_ref_depth = pose_ref_depth[np.newaxis, :, :, :] 94 | # Compute reference pose relative to agent's starting pose. 95 | dx = ref_position[0] - self.start_position[0] 96 | dz = ref_position[2] - self.start_position[2] 97 | dr = math.sqrt(dx ** 2 + dz ** 2) 98 | dtheta = math.atan2(dz, dx) - self.start_pose 99 | dhead = ref_pose - self.start_pose 100 | delev = 0.0 101 | pose_regress = (dr, dtheta, dhead, delev) 102 | # Update the set of pose references. 103 | self._pose_image_names.append(pose_image) 104 | self._pose_refs.append(pose_ref) 105 | self._pose_refs_depth.append(pose_ref_depth) 106 | self.ref_positions.append(ref_position) 107 | self.ref_poses.append(ref_pose) 108 | self._pose_regress.append(pose_regress) 109 | 110 | self._pose_refs = np.concatenate(self._pose_refs, axis=0) 111 | self._pose_refs_depth = np.concatenate(self._pose_refs_depth, axis=0) 112 | self.ref_positions = np.array(self.ref_positions) 113 | self.ref_poses = np.array(self.ref_poses) 114 | self._pose_regress = np.array(self._pose_regress) 115 | self.oracle_pose_successes = np.zeros((self.nRef,)) 116 | self._valid_masks = np.ones((self._pose_refs.shape[0],)) 117 | # Pad the data with dummy data to account for missing references. 118 | if self._pose_refs.shape[0] < self.nRef: 119 | padding = self.nRef - self._pose_refs.shape[0] 120 | dummy_pose_image_names = ["" for _ in range(padding)] 121 | np_shape = (padding, *self._pose_refs.shape[1:]) 122 | dummy_pose_refs = np.zeros(np_shape, dtype=np.uint8) 123 | np_shape = (padding, *self._pose_refs_depth.shape[1:]) 124 | dummy_pose_refs_depth = np.zeros(np_shape, dtype=np.float32) 125 | dummy_ref_positions = np.zeros((padding, 3)) 126 | dummy_ref_poses = np.zeros((padding,)) 127 | dummy_pose_regress = np.zeros((padding, 4)) 128 | dummy_mask = np.zeros((padding,)) 129 | self._pose_image_names += dummy_pose_image_names 130 | self._pose_refs = np.concatenate( 131 | [self._pose_refs, dummy_pose_refs], axis=0, 132 | ) 133 | self._pose_refs_depth = np.concatenate( 134 | [self._pose_refs_depth, dummy_pose_refs_depth], axis=0, 135 | ) 136 | self.ref_positions = np.concatenate( 137 | [self.ref_positions, dummy_ref_positions], axis=0, 138 | ) 139 | self.ref_poses = np.concatenate([self.ref_poses, dummy_ref_poses], axis=0,) 140 | self._pose_regress = np.concatenate( 141 | [self._pose_regress, dummy_pose_regress], axis=0, 142 | ) 143 | self._valid_masks = np.concatenate([self._valid_masks, dummy_mask], axis=0,) 144 | 145 | def generate_topdown_occupancy(self) -> np.array: 146 | r"""Generates the top-down occupancy map of the environment. 147 | """ 148 | # Obtain the top-down images from the original environment. 149 | grid = super().generate_topdown_occupancy() 150 | # Draw the set of pose references. 151 | min_x, min_z, max_x, max_z = self.get_environment_extents() 152 | grid_size = 20.0 153 | env_size = max(max_z - min_z, max_x - min_x, 8000.0) 154 | x_pad = (env_size - (max_x - min_x)) // 2 155 | z_pad = (env_size - (max_z - min_z)) // 2 156 | min_x = min_x - x_pad 157 | min_z = min_z - z_pad 158 | max_x = max_x + x_pad 159 | max_z = max_z + z_pad 160 | radius = max(grid.shape[0] // 50, 1) 161 | for pose_img in self._pose_image_names: 162 | if pose_img == "": 163 | continue 164 | curr_pos = self._get_position(pose_img) 165 | curr_pos = np.array([curr_pos[0], curr_pos[2]]) 166 | curr_pos = (curr_pos - np.array([min_x, min_z])) / grid_size 167 | curr_theta = self._get_pose(pose_img) 168 | grid = draw_agent(grid, curr_pos, curr_theta, (255, 0, 0), size=radius,) 169 | 170 | return grid 171 | 172 | 173 | register( 174 | id="avd-recon-v0", entry_point="gym_avd.envs:AVDReconEnv", 175 | ) 176 | -------------------------------------------------------------------------------- /environments/gym-avd/gym_avd/envs/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | GYM_AVD_ROOT = "" 8 | ROOT_DIR = "" 9 | CLUSTER_ROOT_DIR = f"{GYM_AVD_ROOT}/gym_avd/data/avd_clusters" 10 | AREAS_FILE = f"{GYM_AVD_ROOT}/gym_avd/data/environment_areas.json" 11 | OBJ_COUNTS_FILE = f"{GYM_AVD_ROOT}/gym_avd/data/object_counts_per_env.json" 12 | OBJ_PROPS_FILE = "" 13 | VALID_INSTANCES_ROOT_DIR = f"{GYM_AVD_ROOT}/gym_avd/data/valid_instances_per_env" 14 | SIZE_CLASSIFICATION_PATH = f"{GYM_AVD_ROOT}/gym_avd/data/size_classification.json.gz" 15 | POINTNAV_TEST_EPISODES_PATH = f"{GYM_AVD_ROOT}/gym_avd/data/tdn_test_episodes.json" 16 | MAX_STEPS = 200 17 | -------------------------------------------------------------------------------- /environments/gym-avd/gym_avd/envs/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import cv2 8 | import math 9 | import scipy.ndimage 10 | import numpy as np 11 | import networkx as nx 12 | 13 | 14 | def norm_angle(angle): 15 | return math.atan2(math.sin(angle), math.cos(angle)) 16 | 17 | 18 | def create_nav_graph(scan): 19 | """ 20 | scan - dictionary with keys as nodes, ... 21 | 22 | nodes is a list with each node containing a list of neighbors 23 | """ 24 | G = nx.Graph() 25 | 26 | def distance(pos1, pos2): 27 | # Returns Euclidean distance in 3D space 28 | return np.linalg.norm(pos1 - pos2) 29 | 30 | for nodeix, node in enumerate(scan["nodes"]): 31 | for nbrix in node["neighbors"]: 32 | nbr = scan["nodes"][nbrix] 33 | node_pos = np.array(node["world_pos"]) * scan["scale"] 34 | nbr_pos = np.array(nbr["world_pos"]) * scan["scale"] 35 | G.add_edge(nodeix, nbrix, weight=distance(node_pos, nbr_pos)) 36 | 37 | return G 38 | 39 | 40 | def draw_border(img, color=(255, 0, 0)): 41 | cv2.rectangle(img, (0, 0), (img.shape[1] - 1, img.shape[0] - 1), color, 3) 42 | 43 | 44 | def draw_triangle(img, loc1, loc2, loc3, color=(0, 255, 0)): 45 | triangle_cnt = np.array([loc1, loc2, loc3]) 46 | cv2.drawContours(img, [triangle_cnt], 0, color, -1) 47 | 48 | 49 | def draw_agent(image, position, pose, color, size=5): 50 | loc1 = (int(position[0] - size), int(position[1] - size)) 51 | loc2 = (int(position[0]), int(position[1] + size)) 52 | loc3 = (int(position[0] + size), int(position[1] - size)) 53 | 54 | center = (int(position[0]), int(position[1])) 55 | loc4 = ( 56 | int(center[0] + 2 * size * math.cos(pose)), 57 | int(center[1] + 2 * size * math.sin(pose)), 58 | ) 59 | 60 | draw_triangle(image, loc1, loc2, loc3, color=color) 61 | image = cv2.line(image, center, loc4, (255, 255, 255), size // 2) 62 | return image 63 | 64 | 65 | def draw_agent_sprite(image, position, pose, sprite, size=5): 66 | # Rotate before resize 67 | rotated_sprite = scipy.ndimage.interpolation.rotate(sprite, -pose * 180 / np.pi) 68 | # Rescale because rotation may result in larger image than original, but 69 | # the agent sprite image should stay the same. 70 | initial_agent_size = sprite.shape[0] 71 | new_size = rotated_sprite.shape[0] 72 | 73 | # Rescale to a fixed size 74 | rotated_sprite = cv2.resize( 75 | rotated_sprite, 76 | ( 77 | int(3 * size * new_size / initial_agent_size), 78 | int(3 * size * new_size / initial_agent_size), 79 | ), 80 | ) 81 | 82 | # Add the rotated sprite to the image while ensuring boundary limits 83 | start_x = int(position[0]) - (rotated_sprite.shape[1] // 2) 84 | start_y = int(position[1]) - (rotated_sprite.shape[0] // 2) 85 | end_x = start_x + rotated_sprite.shape[1] - 1 86 | end_y = start_y + rotated_sprite.shape[0] - 1 87 | 88 | if start_x < 0: 89 | rotated_sprite = rotated_sprite[:, (-start_x):] 90 | start_x = 0 91 | elif end_x >= image.shape[1]: 92 | rotated_sprite = rotated_sprite[:, : (image.shape[1] - end_x - 1)] 93 | end_x = image.shape[1] - 1 94 | 95 | if start_y < 0: 96 | rotated_sprite = rotated_sprite[ 97 | (-start_y):, 98 | ] 99 | start_y = 0 100 | elif end_y >= image.shape[0]: 101 | rotated_sprite = rotated_sprite[ 102 | : (image.shape[0] - end_y - 1), 103 | ] 104 | end_y = image.shape[0] - 1 105 | 106 | alpha_mask = rotated_sprite[..., 2:3].astype(np.float32) / 255.0 107 | background = image[start_y : (end_y + 1), start_x : (end_x + 1)].astype(np.float32) 108 | foreground = rotated_sprite[..., :3].astype(np.float32) 109 | 110 | blended_sprite = cv2.add(foreground * alpha_mask, background * (1 - alpha_mask)) 111 | blended_sprite = blended_sprite.astype(np.uint8) 112 | image[start_y : (end_y + 1), start_x : (end_x + 1)] = blended_sprite 113 | 114 | return image 115 | -------------------------------------------------------------------------------- /environments/gym-avd/requirements.txt: -------------------------------------------------------------------------------- 1 | gym 2 | h5py 3 | imageio 4 | networkx 5 | numpy 6 | opencv-python 7 | Pillow 8 | scipy 9 | -------------------------------------------------------------------------------- /environments/gym-avd/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from setuptools import setup 8 | 9 | setup(name="gym_avd", version="0.0.1", install_requires=["gym", "opencv-python"]) 10 | -------------------------------------------------------------------------------- /environments/habitat/.gitignore: -------------------------------------------------------------------------------- 1 | habitat-api 2 | habitat-sim 3 | -------------------------------------------------------------------------------- /environments/habitat/README.md: -------------------------------------------------------------------------------- 1 | # Habitat 2 | Our project uses a modified version of the original [habitat-lab](https://github.com/facebookresearch/habitat-lab) and [habitat-sim](https://github.com/facebookresearch/habitat-sim) repositories for simulating 3D motion in Matterport3D dataset. 3 | 4 | ## Installing habitat-sim 5 | 1. Create a root directory for Habitat. 6 | 7 | ``` 8 | export HABITAT_ROOT= 9 | mkdir $HABITAT_ROOT 10 | cd $HABITAT_ROOT 11 | ``` 12 | 2. Clone `habitat-sim` and checkout a specific version used for this code-base. 13 | 14 | ``` 15 | git clone git@github.com:facebookresearch/habitat-sim.git 16 | cd $HABITAT_ROOT/habitat-sim 17 | git checkout 15994e440560c1608b251a1c4059507d1cae801b 18 | ``` 19 | 20 | 3. Follow installation instructions from `https://github.com/facebookresearch/habitat-sim` (at that specific commit). 21 | 22 | 4. Apply `habitat_sim.patch` to `habitat-sim` repository. This will incorporate some minor additions to the original simulator. 23 | 24 | ``` 25 | cd $HABITAT_ROOT 26 | cp habitat_sim.patch habitat-sim 27 | cd habitat-sim 28 | git apply habitat_sim.patch 29 | ``` 30 | 31 | ## Installing habitat-api 32 | 1. Clone `habitat-lab` and checkout a specific version used for this code-base. 33 | 34 | ``` 35 | cd $HABITAT_ROOT 36 | git clone git@github.com:facebookresearch/habitat-lab.git habitat-api 37 | cd habitat-api 38 | git checkout 31318f81db05100099cfd308438d5930c3fb6cd2 39 | ``` 40 | 2. Follow the [installation instructions](https://github.com/facebookresearch/habitat-api). Download the Matterport3D scene dataset as instructed. 41 | 3. Apply `habitat_api.patch` to `habitat-api` repository. This will incorporate the necessary additions to the original api. 42 | 43 | ``` 44 | cd $HABITAT_ROOT 45 | cp habitat_sim.patch habitat-api 46 | cd habitat-api 47 | patch -p0 < habitat_api.patch 48 | ``` 49 | 50 | 4. Download the task datasets. 51 | 52 | ``` 53 | mkdir -p $HABITAT_ROOT/habitat-api/data 54 | cd $HABITAT_ROOT/habitat-api/data 55 | wget -O task_datasets.tar.gz https://dl.fbaipublicfiles.com/exploring-exploration/mp3d_task_datasets.tar.gz 56 | tar -xvf task_datasets.tar.gz 57 | rm task_datasets.tar.gz 58 | ``` 59 | 5. Extract object annotations for MP3D: 60 | 61 | ``` 62 | cd $HABITAT_ROOT/habitat-api 63 | python data_generation_scripts/extract_object_annotations_per_env.py 64 | ``` 65 | 66 | ## Task demos 67 | This repository supports four tasks: 68 | 69 | - Exploration 70 | - Pose estimation 71 | - Reconstruction 72 | - PointNav 73 | 74 | Visual demos for each task are available. 75 | 76 | ``` 77 | python demos/exploration_demo.py 78 | python demos/pose_estimation_demo.py 79 | python demos/reconstruction_demo.py 80 | python demos/pointnav_demo.py 81 | ``` 82 | -------------------------------------------------------------------------------- /environments/habitat/habitat_sim.patch: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # This source code is licensed under the license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | diff --git a/habitat_sim/simulator.py b/habitat_sim/simulator.py 7 | index df7b2af..f7edca6 100644 8 | --- a/habitat_sim/simulator.py 9 | +++ b/habitat_sim/simulator.py 10 | @@ -151,6 +151,10 @@ class Simulator: 11 | observations[sensor_uuid] = sensor.get_observation() 12 | return observations 13 | 14 | + def get_specific_sensor_observations(self, sensor_uuid): 15 | + observations = self._sensors[sensor_uuid].get_observation() 16 | + return observations 17 | + 18 | def last_state(self): 19 | return self._last_state 20 | 21 | -------------------------------------------------------------------------------- /evaluate_pose_estimation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | import sys 9 | import math 10 | import json 11 | import torch 12 | import logging 13 | import numpy as np 14 | import torch.nn as nn 15 | 16 | from exploring_exploration.arguments import get_args 17 | from exploring_exploration.envs import ( 18 | make_vec_envs_avd, 19 | make_vec_envs_habitat, 20 | ) 21 | from exploring_exploration.models import RGBEncoder, MapRGBEncoder, Policy 22 | from exploring_exploration.models.pose_estimation import ( 23 | RetrievalNetwork, 24 | PairwisePosePredictor, 25 | ViewLocalizer, 26 | ) 27 | from exploring_exploration.utils.pose_estimation import ( 28 | get_pose_criterion, 29 | get_pose_label_shape, 30 | get_gaussian_kernel, 31 | ) 32 | from exploring_exploration.utils.eval import evaluate_pose 33 | 34 | args = get_args() 35 | 36 | torch.manual_seed(args.seed) 37 | if args.cuda: 38 | torch.cuda.manual_seed(args.seed) 39 | 40 | try: 41 | os.makedirs(args.log_dir) 42 | except OSError: 43 | pass 44 | 45 | eval_log_dir = os.path.join(args.log_dir, "monitor") 46 | 47 | try: 48 | os.makedirs(eval_log_dir) 49 | except OSError: 50 | pass 51 | 52 | 53 | def main(): 54 | torch.set_num_threads(1) 55 | device = torch.device("cuda:0" if args.cuda else "cpu") 56 | ndevices = torch.cuda.device_count() 57 | args.map_shape = (1, args.map_size, args.map_size) 58 | # Setup loggers 59 | logging.basicConfig(filename=f"{args.log_dir}/eval_log.txt", level=logging.DEBUG) 60 | logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) 61 | logging.getLogger().setLevel(logging.INFO) 62 | 63 | args.feat_shape_sim = (512,) 64 | args.feat_shape_pose = (512 * 9,) 65 | args.odometer_shape = (4,) # (delta_y, delta_x, delta_head, delta_elev) 66 | args.match_thresh = 0.95 67 | args.requires_policy = args.actor_type not in [ 68 | "random", 69 | "oracle", 70 | "forward", 71 | "forward-plus", 72 | "frontier", 73 | ] 74 | if "habitat" in args.env_name: 75 | if "CUDA_VISIBLE_DEVICES" in os.environ: 76 | devices = [ 77 | int(dev) for dev in os.environ["CUDA_VISIBLE_DEVICES"].split(",") 78 | ] 79 | # Devices need to be indexed between 0 to N-1 80 | devices = [dev for dev in range(len(devices))] 81 | else: 82 | devices = None 83 | eval_envs = make_vec_envs_habitat( 84 | args.habitat_config_file, 85 | device, 86 | devices, 87 | enable_odometry_noise=args.enable_odometry_noise, 88 | odometer_noise_scaling=args.odometer_noise_scaling, 89 | measure_noise_free_area=args.measure_noise_free_area, 90 | ) 91 | if args.actor_type == "frontier": 92 | large_map_range = 100.0 93 | H = eval_envs.observation_space.spaces["highres_coarse_occupancy"].shape[1] 94 | args.occ_map_scale = 0.1 * (2 * large_map_range + 1) / H 95 | else: 96 | eval_envs = make_vec_envs_avd( 97 | args.env_name, 98 | 123 + args.num_processes, 99 | args.num_processes, 100 | eval_log_dir, 101 | device, 102 | True, 103 | split=args.eval_split, 104 | nRef=args.num_pose_refs, 105 | set_return_topdown_map=True, 106 | ) 107 | if args.actor_type == "frontier": 108 | large_map_range = 100.0 109 | H = eval_envs.observation_space.spaces["highres_coarse_occupancy"].shape[0] 110 | args.occ_map_scale = 50.0 * (2 * large_map_range + 1) / H 111 | args.obs_shape = eval_envs.observation_space.spaces["im"].shape 112 | args.angles = torch.Tensor(np.radians(np.linspace(180, -150, 12))).to(device) 113 | args.bin_size = math.radians(31) 114 | 115 | # =================== Create models ==================== 116 | rnet = RetrievalNetwork() 117 | posenet = PairwisePosePredictor( 118 | use_classification=args.use_classification, num_classes=args.num_classes 119 | ) 120 | pose_head = ViewLocalizer(args.map_scale) 121 | if args.requires_policy: 122 | encoder = RGBEncoder() if args.encoder_type == "rgb" else MapRGBEncoder() 123 | action_config = ( 124 | { 125 | "nactions": eval_envs.action_space.n, 126 | "embedding_size": args.action_embedding_size, 127 | } 128 | if args.use_action_embedding 129 | else None 130 | ) 131 | collision_config = ( 132 | {"collision_dim": 2, "embedding_size": args.collision_embedding_size} 133 | if args.use_collision_embedding 134 | else None 135 | ) 136 | actor_critic = Policy( 137 | eval_envs.action_space, 138 | base_kwargs={ 139 | "feat_dim": args.feat_shape_sim[0], 140 | "recurrent": True, 141 | "hidden_size": args.feat_shape_sim[0], 142 | "action_config": action_config, 143 | "collision_config": collision_config, 144 | }, 145 | ) 146 | # =================== Load models ==================== 147 | rnet_state = torch.load(args.pretrained_rnet)["state_dict"] 148 | rnet.load_state_dict(rnet_state) 149 | posenet_state = torch.load(args.pretrained_posenet)["state_dict"] 150 | posenet.load_state_dict(posenet_state) 151 | rnet.to(device) 152 | posenet.to(device) 153 | pose_head.to(device) 154 | rnet.eval() 155 | posenet.eval() 156 | pose_head.eval() 157 | if args.requires_policy: 158 | encoder_state, actor_critic_state = torch.load(args.load_path)[:2] 159 | encoder.load_state_dict(encoder_state) 160 | actor_critic.load_state_dict(actor_critic_state) 161 | actor_critic.to(device) 162 | encoder.to(device) 163 | actor_critic.eval() 164 | encoder.eval() 165 | if args.use_multi_gpu: 166 | rnet.compare = nn.DataParallel(rnet.compare) 167 | rnet.feat_extract = nn.DataParallel(rnet.feat_extract) 168 | posenet.compare = nn.DataParallel(posenet.compare) 169 | posenet.feat_extract = nn.DataParallel(posenet.feat_extract) 170 | posenet.predict_depth = nn.DataParallel(posenet.predict_depth) 171 | posenet.predict_baseline = nn.DataParallel(posenet.predict_baseline) 172 | posenet.predict_baseline_sign = nn.DataParallel(posenet.predict_baseline_sign) 173 | 174 | # =================== Define pose criterion ==================== 175 | args.pose_loss_fn = get_pose_criterion() 176 | lab_shape = get_pose_label_shape() 177 | gaussian_kernel = get_gaussian_kernel( 178 | kernel_size=args.vote_kernel_size, sigma=0.5, channels=1 179 | ) 180 | 181 | eval_config = {} 182 | eval_config["num_steps"] = args.num_steps 183 | eval_config["num_processes"] = args.num_processes 184 | eval_config["obs_shape"] = args.obs_shape 185 | eval_config["feat_shape_sim"] = args.feat_shape_sim 186 | eval_config["feat_shape_pose"] = args.feat_shape_pose 187 | eval_config["odometer_shape"] = args.odometer_shape 188 | eval_config["lab_shape"] = lab_shape 189 | eval_config["map_shape"] = args.map_shape 190 | eval_config["map_scale"] = args.map_scale 191 | eval_config["angles"] = args.angles 192 | eval_config["bin_size"] = args.bin_size 193 | eval_config["gaussian_kernel"] = gaussian_kernel 194 | eval_config["match_thresh"] = args.match_thresh 195 | eval_config["pose_loss_fn"] = args.pose_loss_fn 196 | eval_config["num_eval_episodes"] = args.eval_episodes 197 | eval_config["num_pose_refs"] = args.num_pose_refs 198 | eval_config["median_filter_size"] = 3 199 | eval_config["vote_kernel_size"] = args.vote_kernel_size 200 | eval_config["env_name"] = args.env_name 201 | eval_config["actor_type"] = args.actor_type 202 | eval_config["pose_predictor_type"] = args.pose_predictor_type 203 | eval_config["encoder_type"] = args.encoder_type 204 | eval_config["ransac_n"] = args.ransac_n 205 | eval_config["ransac_niter"] = args.ransac_niter 206 | eval_config["ransac_batch"] = args.ransac_batch 207 | eval_config["use_action_embedding"] = args.use_action_embedding 208 | eval_config["use_collision_embedding"] = args.use_collision_embedding 209 | eval_config["vis_save_dir"] = os.path.join(args.log_dir, "visualizations") 210 | eval_config["final_topdown_save_path"] = os.path.join( 211 | args.log_dir, "top_down_maps.h5" 212 | ) 213 | eval_config["forward_action_id"] = 2 if "avd" in args.env_name else 0 214 | eval_config["turn_action_id"] = 0 if "avd" in args.env_name else 1 215 | eval_config["input_highres"] = args.input_highres 216 | if args.actor_type == "frontier": 217 | eval_config["occ_map_scale"] = args.occ_map_scale 218 | eval_config["frontier_dilate_occ"] = args.frontier_dilate_occ 219 | eval_config["max_time_per_target"] = args.max_time_per_target 220 | 221 | models = {} 222 | models["rnet"] = rnet 223 | models["posenet"] = posenet 224 | models["pose_head"] = pose_head 225 | if args.requires_policy: 226 | models["actor_critic"] = actor_critic 227 | models["encoder"] = encoder 228 | 229 | metrics, per_episode_metrics = evaluate_pose( 230 | models, 231 | eval_envs, 232 | eval_config, 233 | device, 234 | multi_step=True, 235 | interval_steps=args.interval_steps, 236 | visualize_policy=args.visualize_policy, 237 | visualize_size=args.visualize_size, 238 | visualize_batches=args.visualize_batches, 239 | visualize_n_per_batch=args.visualize_n_per_batch, 240 | ) 241 | 242 | json.dump( 243 | per_episode_metrics, open(os.path.join(args.log_dir, "statistics.json"), "w") 244 | ) 245 | 246 | 247 | if __name__ == "__main__": 248 | main() 249 | -------------------------------------------------------------------------------- /evaluate_reconstruction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | import sys 9 | import json 10 | import h5py 11 | import torch 12 | import logging 13 | import numpy as np 14 | import torch.nn as nn 15 | 16 | from exploring_exploration.arguments import get_args 17 | from exploring_exploration.envs import ( 18 | make_vec_envs_avd, 19 | make_vec_envs_habitat, 20 | ) 21 | from exploring_exploration.models import RGBEncoder, MapRGBEncoder, Policy 22 | from exploring_exploration.utils.reconstruction_eval import evaluate_reconstruction 23 | from exploring_exploration.models.reconstruction import ( 24 | FeatureReconstructionModule, 25 | FeatureNetwork, 26 | PoseEncoder, 27 | ) 28 | from exploring_exploration.utils.reconstruction import rec_loss_fn_classify 29 | 30 | args = get_args() 31 | 32 | torch.manual_seed(args.seed) 33 | if args.cuda: 34 | torch.cuda.manual_seed(args.seed) 35 | 36 | try: 37 | os.makedirs(args.log_dir) 38 | except OSError: 39 | pass 40 | 41 | eval_log_dir = os.path.join(args.log_dir, "monitor") 42 | 43 | try: 44 | os.makedirs(eval_log_dir) 45 | except OSError: 46 | pass 47 | 48 | 49 | def main(): 50 | torch.set_num_threads(1) 51 | device = torch.device("cuda:0" if args.cuda else "cpu") 52 | ndevices = torch.cuda.device_count() 53 | # Setup loggers 54 | logging.basicConfig(filename=f"{args.log_dir}/eval_log.txt", level=logging.DEBUG) 55 | logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) 56 | logging.getLogger().setLevel(logging.INFO) 57 | 58 | args.feat_shape_sim = (512,) 59 | args.odometer_shape = (4,) # (delta_y, delta_x, delta_head, delta_elev) 60 | args.requires_policy = args.actor_type not in [ 61 | "random", 62 | "oracle", 63 | "forward", 64 | "forward-plus", 65 | "frontier", 66 | ] 67 | if "habitat" in args.env_name: 68 | if "CUDA_VISIBLE_DEVICES" in os.environ: 69 | devices = [ 70 | int(dev) for dev in os.environ["CUDA_VISIBLE_DEVICES"].split(",") 71 | ] 72 | # Devices need to be indexed between 0 to N-1 73 | devices = [dev for dev in range(len(devices))] 74 | else: 75 | devices = None 76 | eval_envs = make_vec_envs_habitat( 77 | args.habitat_config_file, device, devices, seed=args.seed 78 | ) 79 | if args.actor_type == "frontier": 80 | large_map_range = 100.0 81 | H = eval_envs.observation_space.spaces["highres_coarse_occupancy"].shape[1] 82 | args.occ_map_scale = 0.1 * (2 * large_map_range + 1) / H 83 | else: 84 | eval_envs = make_vec_envs_avd( 85 | args.env_name, 86 | args.seed + args.num_processes, 87 | args.num_processes, 88 | eval_log_dir, 89 | device, 90 | True, 91 | split=args.eval_split, 92 | nRef=args.num_pose_refs, 93 | set_return_topdown_map=True, 94 | ) 95 | if args.actor_type == "frontier": 96 | large_map_range = 100.0 97 | H = eval_envs.observation_space.spaces["highres_coarse_occupancy"].shape[0] 98 | args.occ_map_scale = 50.0 * (2 * large_map_range + 1) / H 99 | args.obs_shape = eval_envs.observation_space.spaces["im"].shape 100 | 101 | # =================== Load clusters ================= 102 | clusters_h5 = h5py.File(args.clusters_path, "r") 103 | cluster_centroids = torch.Tensor(np.array(clusters_h5["cluster_centroids"])).to( 104 | device 105 | ) 106 | args.nclusters = cluster_centroids.shape[0] 107 | clusters2images = {} 108 | for i in range(args.nclusters): 109 | cluster_images = np.array( 110 | clusters_h5[f"cluster_{i}/images"] 111 | ) # (K, C, H, W) torch Tensor 112 | cluster_images = np.ascontiguousarray(cluster_images.transpose(0, 2, 3, 1)) 113 | cluster_images = (cluster_images * 255.0).astype(np.uint8) 114 | clusters2images[i] = cluster_images # (K, H, W, C) 115 | clusters_h5.close() 116 | 117 | # =================== Create models ==================== 118 | decoder = FeatureReconstructionModule( 119 | args.nclusters, args.nclusters, nlayers=args.n_transformer_layers, 120 | ) 121 | feature_network = FeatureNetwork() 122 | feature_network = nn.DataParallel(feature_network, dim=0) 123 | pose_encoder = PoseEncoder() 124 | if args.use_multi_gpu: 125 | decoder = nn.DataParallel(decoder, dim=1) 126 | pose_encoder = nn.DataParallel(pose_encoder, dim=0) 127 | if args.requires_policy: 128 | encoder = RGBEncoder() if args.encoder_type == "rgb" else MapRGBEncoder() 129 | action_config = ( 130 | { 131 | "nactions": eval_envs.action_space.n, 132 | "embedding_size": args.action_embedding_size, 133 | } 134 | if args.use_action_embedding 135 | else None 136 | ) 137 | collision_config = ( 138 | {"collision_dim": 2, "embedding_size": args.collision_embedding_size} 139 | if args.use_collision_embedding 140 | else None 141 | ) 142 | actor_critic = Policy( 143 | eval_envs.action_space, 144 | base_kwargs={ 145 | "feat_dim": args.feat_shape_sim[0], 146 | "recurrent": True, 147 | "hidden_size": args.feat_shape_sim[0], 148 | "action_config": action_config, 149 | "collision_config": collision_config, 150 | }, 151 | ) 152 | 153 | # =================== Load models ==================== 154 | decoder_state, pose_encoder_state = torch.load(args.load_path_rec)[:2] 155 | decoder.load_state_dict(decoder_state) 156 | pose_encoder.load_state_dict(pose_encoder_state) 157 | decoder.to(device) 158 | feature_network.to(device) 159 | decoder.eval() 160 | feature_network.eval() 161 | pose_encoder.eval() 162 | pose_encoder.to(device) 163 | if args.requires_policy: 164 | encoder_state, actor_critic_state = torch.load(args.load_path)[:2] 165 | encoder.load_state_dict(encoder_state) 166 | actor_critic.load_state_dict(actor_critic_state) 167 | actor_critic.to(device) 168 | encoder.to(device) 169 | actor_critic.eval() 170 | encoder.eval() 171 | 172 | eval_config = {} 173 | eval_config["num_steps"] = args.num_steps 174 | eval_config["num_processes"] = args.num_processes 175 | eval_config["feat_shape_sim"] = args.feat_shape_sim 176 | eval_config["odometer_shape"] = args.odometer_shape 177 | eval_config["num_eval_episodes"] = args.eval_episodes 178 | eval_config["num_pose_refs"] = args.num_pose_refs 179 | eval_config["env_name"] = args.env_name 180 | eval_config["actor_type"] = args.actor_type 181 | eval_config["encoder_type"] = args.encoder_type 182 | eval_config["use_action_embedding"] = args.use_action_embedding 183 | eval_config["use_collision_embedding"] = args.use_collision_embedding 184 | eval_config["cluster_centroids"] = cluster_centroids 185 | eval_config["clusters2images"] = clusters2images 186 | eval_config["rec_loss_fn"] = rec_loss_fn_classify 187 | eval_config["vis_save_dir"] = os.path.join(args.log_dir, "visualizations") 188 | eval_config["forward_action_id"] = 2 if "avd" in args.env_name else 0 189 | eval_config["turn_action_id"] = 0 if "avd" in args.env_name else 1 190 | if args.actor_type == "frontier": 191 | eval_config["occ_map_scale"] = args.occ_map_scale 192 | eval_config["frontier_dilate_occ"] = args.frontier_dilate_occ 193 | eval_config["max_time_per_target"] = args.max_time_per_target 194 | 195 | models = {} 196 | models["decoder"] = decoder 197 | models["pose_encoder"] = pose_encoder 198 | models["feature_network"] = feature_network 199 | if args.requires_policy: 200 | models["actor_critic"] = actor_critic 201 | models["encoder"] = encoder 202 | 203 | metrics, per_episode_metrics = evaluate_reconstruction( 204 | models, 205 | eval_envs, 206 | eval_config, 207 | device, 208 | multi_step=True, 209 | interval_steps=args.interval_steps, 210 | visualize_policy=args.visualize_policy, 211 | ) 212 | 213 | json.dump( 214 | per_episode_metrics, open(os.path.join(args.log_dir, "statistics.json"), "w") 215 | ) 216 | 217 | 218 | if __name__ == "__main__": 219 | main() 220 | -------------------------------------------------------------------------------- /evaluate_visitation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | import sys 9 | import json 10 | import torch 11 | import logging 12 | 13 | from exploring_exploration.arguments import get_args 14 | from exploring_exploration.envs import ( 15 | make_vec_envs_avd, 16 | make_vec_envs_habitat, 17 | ) 18 | from exploring_exploration.models import RGBEncoder, MapRGBEncoder, Policy 19 | from exploring_exploration.utils.eval import evaluate_visitation 20 | 21 | args = get_args() 22 | 23 | torch.manual_seed(args.seed) 24 | if args.cuda: 25 | torch.cuda.manual_seed(args.seed) 26 | 27 | try: 28 | os.makedirs(args.log_dir) 29 | except OSError: 30 | pass 31 | 32 | eval_log_dir = os.path.join(args.log_dir, "monitor") 33 | 34 | try: 35 | os.makedirs(eval_log_dir) 36 | except OSError: 37 | pass 38 | 39 | 40 | def main(): 41 | torch.set_num_threads(1) 42 | device = torch.device("cuda:0" if args.cuda else "cpu") 43 | ndevices = torch.cuda.device_count() 44 | # Setup loggers 45 | logging.basicConfig(filename=f"{args.log_dir}/eval_log.txt", level=logging.DEBUG) 46 | logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) 47 | logging.getLogger().setLevel(logging.INFO) 48 | 49 | args.feat_shape_sim = (512,) 50 | args.feat_shape_pose = (512 * 9,) 51 | args.requires_policy = args.actor_type not in [ 52 | "random", 53 | "oracle", 54 | "forward", 55 | "forward-plus", 56 | "frontier", 57 | ] 58 | if "habitat" in args.env_name: 59 | if "CUDA_VISIBLE_DEVICES" in os.environ: 60 | devices = [ 61 | int(dev) for dev in os.environ["CUDA_VISIBLE_DEVICES"].split(",") 62 | ] 63 | # Devices need to be indexed between 0 to N-1 64 | devices = [dev for dev in range(len(devices))] 65 | else: 66 | devices = None 67 | eval_envs = make_vec_envs_habitat( 68 | args.habitat_config_file, 69 | device, 70 | devices, 71 | enable_odometry_noise=args.enable_odometry_noise, 72 | odometer_noise_scaling=args.odometer_noise_scaling, 73 | measure_noise_free_area=args.measure_noise_free_area, 74 | ) 75 | if args.actor_type == "frontier": 76 | large_map_range = 100.0 77 | H = eval_envs.observation_space.spaces["highres_coarse_occupancy"].shape[1] 78 | args.occ_map_scale = 0.1 * (2 * large_map_range + 1) / H 79 | else: 80 | eval_envs = make_vec_envs_avd( 81 | args.env_name, 82 | 123 + args.num_processes, 83 | args.num_processes, 84 | eval_log_dir, 85 | device, 86 | True, 87 | split=args.eval_split, 88 | nRef=args.num_pose_refs, 89 | set_return_topdown_map=True, 90 | ) 91 | if args.actor_type == "frontier": 92 | large_map_range = 100.0 93 | H = eval_envs.observation_space.spaces["highres_coarse_occupancy"].shape[0] 94 | args.occ_map_scale = 50.0 * (2 * large_map_range + 1) / H 95 | args.obs_shape = eval_envs.observation_space.spaces["im"].shape 96 | 97 | if args.requires_policy: 98 | # =================== Create models ==================== 99 | encoder = RGBEncoder() if args.encoder_type == "rgb" else MapRGBEncoder() 100 | action_config = ( 101 | { 102 | "nactions": eval_envs.action_space.n, 103 | "embedding_size": args.action_embedding_size, 104 | } 105 | if args.use_action_embedding 106 | else None 107 | ) 108 | collision_config = ( 109 | {"collision_dim": 2, "embedding_size": args.collision_embedding_size} 110 | if args.use_collision_embedding 111 | else None 112 | ) 113 | actor_critic = Policy( 114 | eval_envs.action_space, 115 | base_kwargs={ 116 | "feat_dim": args.feat_shape_sim[0], 117 | "recurrent": True, 118 | "hidden_size": args.feat_shape_sim[0], 119 | "action_config": action_config, 120 | "collision_config": collision_config, 121 | }, 122 | ) 123 | # =================== Load models ==================== 124 | encoder_state, actor_critic_state = torch.load(args.load_path)[:2] 125 | encoder.load_state_dict(encoder_state) 126 | actor_critic.load_state_dict(actor_critic_state) 127 | actor_critic.to(device) 128 | encoder.to(device) 129 | actor_critic.eval() 130 | encoder.eval() 131 | 132 | eval_config = {} 133 | eval_config["num_steps"] = args.num_steps 134 | eval_config["feat_shape_sim"] = args.feat_shape_sim 135 | eval_config["num_processes"] = args.num_processes 136 | eval_config["num_pose_refs"] = args.num_pose_refs 137 | eval_config["num_eval_episodes"] = args.eval_episodes 138 | eval_config["env_name"] = args.env_name 139 | eval_config["actor_type"] = args.actor_type 140 | eval_config["encoder_type"] = args.encoder_type 141 | eval_config["use_action_embedding"] = args.use_action_embedding 142 | eval_config["use_collision_embedding"] = args.use_collision_embedding 143 | eval_config["vis_save_dir"] = os.path.join(args.log_dir, "visualizations") 144 | eval_config["final_topdown_save_path"] = os.path.join( 145 | args.log_dir, "top_down_maps.h5" 146 | ) 147 | eval_config["forward_action_id"] = 2 if "avd" in args.env_name else 0 148 | eval_config["turn_action_id"] = 0 if "avd" in args.env_name else 1 149 | eval_config["input_highres"] = args.input_highres 150 | if args.actor_type == "frontier": 151 | eval_config["occ_map_scale"] = args.occ_map_scale 152 | eval_config["frontier_dilate_occ"] = args.frontier_dilate_occ 153 | eval_config["max_time_per_target"] = args.max_time_per_target 154 | 155 | models = {} 156 | if args.requires_policy: 157 | models["actor_critic"] = actor_critic 158 | models["encoder"] = encoder 159 | 160 | metrics, per_episode_metrics = evaluate_visitation( 161 | models, 162 | eval_envs, 163 | eval_config, 164 | device, 165 | multi_step=True, 166 | interval_steps=args.interval_steps, 167 | visualize_policy=args.visualize_policy, 168 | visualize_size=args.visualize_size, 169 | visualize_batches=args.visualize_batches, 170 | visualize_n_per_batch=args.visualize_n_per_batch, 171 | ) 172 | 173 | json.dump( 174 | per_episode_metrics, open(os.path.join(args.log_dir, "statistics.json"), "w") 175 | ) 176 | 177 | 178 | if __name__ == "__main__": 179 | main() 180 | -------------------------------------------------------------------------------- /exploring_exploration/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | trained_models/ 104 | .fuse_hidden* 105 | 106 | # ctags 107 | tags 108 | 109 | *.swp 110 | logs 111 | *.mp4 112 | data/ 113 | custom_kernels/ 114 | enjoy_script.sh 115 | eval_scripts/ 116 | tests/ 117 | visualize_clusterings* 118 | *.sh 119 | *nfs* 120 | plotting_results 121 | policy_visualizations 122 | visualized_avd_clusters/ 123 | visualized_avd_clusters_v2/ 124 | *.yaml 125 | imagenet_* 126 | mp3d_tdn_visualizations/ 127 | sptm_vis_examples/ 128 | trained_models 129 | -------------------------------------------------------------------------------- /exploring_exploration/algo/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .ppo import PPO 8 | from .imitation import Imitation 9 | from .supervised_reconstruction import SupervisedReconstruction 10 | -------------------------------------------------------------------------------- /exploring_exploration/algo/imitation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | from itertools import chain 12 | 13 | from exploring_exploration.utils.common import ( 14 | flatten_two, 15 | unflatten_two, 16 | ) 17 | 18 | 19 | def get_onehot_tensor(idxes, size): 20 | device = idxes.device 21 | bs = idxes.shape[0] 22 | oh = torch.zeros(bs, size).to(device).scatter_(1, idxes, 1) 23 | return oh 24 | 25 | 26 | class Imitation: 27 | """Algorithm to learn policy from expert trajectories via 28 | imitation learning. Incorporates inflection weighting from 29 | https://arxiv.org/pdf/1904.03461.pdf. 30 | """ 31 | 32 | def __init__(self, config): 33 | self.encoder = config["encoder"] 34 | self.actor_critic = config["actor_critic"] 35 | lr = config["lr"] 36 | eps = config["eps"] 37 | self.max_grad_norm = config["max_grad_norm"] 38 | self.nactions = config["nactions"] 39 | self.encoder_type = config["encoder_type"] 40 | self.use_action_embedding = config["use_action_embedding"] 41 | self.use_collision_embedding = config["use_collision_embedding"] 42 | self.use_inflection_weighting = config["use_inflection_weighting"] 43 | self.optimizer = optim.Adam( 44 | list( 45 | filter( 46 | lambda p: p.requires_grad, 47 | chain(self.encoder.parameters(), self.actor_critic.parameters()), 48 | ) 49 | ), 50 | lr=lr, 51 | eps=eps, 52 | ) 53 | if self.use_inflection_weighting: 54 | # inflection_factor = L / N where L = episode length and 55 | # N = # of inflection points in the episode. 56 | # The loss function will be biased towards inflection points in 57 | # the episode. Fewer the inflection points, larger the bias. 58 | # loss = inflection_factor * loss_inflection + 59 | # 1.0 * loss_non_inflection 60 | self.inflection_factor = 1.0 61 | # The inflection factor is updated during training by computing 62 | # a moving average estimate (with weighting inflection_beta). 63 | self.inflection_beta = 0.90 64 | # The inflection factor estimate for an episode is clipped to 65 | # this value to prevent explosion. 66 | self.trunc_factor_clipping = 10.0 67 | 68 | def update(self, rollouts): 69 | """Update the policy based on expert data in the rollouts. 70 | """ 71 | T, N = rollouts.actions.shape[:2] 72 | expert_actions = rollouts.actions # (T, N, 1) 73 | # Masks indicating when expert actions were *not* taken. This permits 74 | # a form of data augmentation where non-expert actions are taken to 75 | # accomodate distribution shifts b/w the expert and the learned policy. 76 | action_masks = rollouts.action_masks # (T, N, 1) 77 | hxs = rollouts.recurrent_hidden_states[0].unsqueeze(0) # (1, N, nfeats) 78 | masks = rollouts.masks[:-1] # (T, N, nfeats) 79 | # ============= Update inflection factor if applicable ================ 80 | if self.use_inflection_weighting: 81 | inflection_mask = self._get_inflection_mask(expert_actions) 82 | # Inverse frequency of inflection points. 83 | inflection_factor = T / (inflection_mask.sum(dim=0) + 1e-12) 84 | inflection_factor = torch.clamp( 85 | inflection_factor, 1.0, self.trunc_factor_clipping 86 | ) 87 | self._update_inflection_factor(inflection_factor.mean().item()) 88 | # ========================= Forward pass ============================== 89 | hxs = flatten_two(hxs) # (N, nfeats) 90 | masks = flatten_two(masks) # (T*N, nfeats) 91 | action_masks = flatten_two(action_masks).squeeze(1) # (T*N, ) 92 | policy_inputs = self._create_policy_inputs(rollouts) 93 | # (T*N, nactions) 94 | pred_action_log_probs = self.actor_critic.get_log_probs( 95 | policy_inputs, hxs, masks 96 | ) 97 | # ==================== Compute the prediction loss ==================== 98 | expert_actions = flatten_two(expert_actions).squeeze(1).long() # (T*N,) 99 | action_loss = F.nll_loss( 100 | pred_action_log_probs, expert_actions, reduction="none" 101 | ) # (T*N, ) 102 | # Weight the loss based on inflection points. 103 | if self.use_inflection_weighting: 104 | inflection_mask = flatten_two(inflection_mask).squeeze(1) # (T*N,) 105 | action_loss = action_loss * ( 106 | inflection_mask * self.inflection_factor + (1 - inflection_mask) * 1.0 107 | ) 108 | # Mask the losses for non-expert actions. 109 | action_loss = (action_loss * action_masks).sum() / (action_masks.sum() + 1e-10) 110 | # ============================ Backward pass ========================== 111 | self.optimizer.zero_grad() 112 | action_loss.backward() 113 | nn.utils.clip_grad_norm_( 114 | chain(self.encoder.parameters(), self.actor_critic.parameters()), 115 | self.max_grad_norm, 116 | ) 117 | self.optimizer.step() 118 | 119 | losses = {} 120 | losses["action_loss"] = action_loss.item() 121 | return losses 122 | 123 | def _update_inflection_factor(self, inflection_factor): 124 | self.inflection_factor = ( 125 | self.inflection_factor * self.inflection_beta 126 | + inflection_factor * (1 - self.inflection_beta) 127 | ) 128 | 129 | def _create_policy_inputs(self, rollouts): 130 | """The policy inputs consist of features extract from the RGB and 131 | top-down occupancy maps, and learned encodings of the previous actions, 132 | and collision detections. 133 | """ 134 | obs_im = rollouts.obs_im[:-1] # (T, N, *obs_shape) 135 | encoder_inputs = [obs_im] 136 | if self.encoder_type == "rgb+map": 137 | encoder_inputs.append(rollouts.obs_sm[:-1]) # (T, N, *obs_shape) 138 | encoder_inputs.append(rollouts.obs_lm[:-1]) # (T, N, *obs_shape) 139 | encoder_inputs = [flatten_two(v) for v in encoder_inputs] 140 | obs_feats = self.encoder(*encoder_inputs) # (T*N, nfeats) 141 | policy_inputs = {"features": obs_feats} 142 | if self.use_action_embedding: 143 | prev_actions = torch.zeros_like(rollouts.actions) # (T, N, 1) 144 | prev_actions[1:] = rollouts.actions[:-1] 145 | prev_actions = flatten_two(prev_actions) # (T*N, 1) 146 | policy_inputs["actions"] = prev_actions.long() 147 | if self.use_collision_embedding: 148 | prev_collisions = flatten_two(rollouts.collisions[:-1]) # (T*N, 1) 149 | policy_inputs["collisions"] = prev_collisions.long() 150 | return policy_inputs 151 | 152 | def _get_inflection_mask(self, actions): 153 | """Given a sequence of actions, it predicts a mask highlighting 154 | the inflection points, i.e., points when the actions in the 155 | sequence change between t-1 and t. 156 | """ 157 | device = actions.device 158 | # actions - (T, N, 1) tensor 159 | prev_actions = actions[:-1] 160 | curr_actions = actions[1:] 161 | inflection_mask = (curr_actions != prev_actions).float() # (T-1, N, 1) 162 | # First action is never an inflection point 163 | inflection_mask = torch.cat( 164 | [torch.zeros(1, *actions.shape[1:]).to(device), inflection_mask], dim=0 165 | ) 166 | return inflection_mask 167 | -------------------------------------------------------------------------------- /exploring_exploration/algo/ppo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | 12 | from itertools import chain 13 | 14 | 15 | class PPO: 16 | """Algorithm to learn a policy via Proximal Policy Optimization: 17 | https://arxiv.org/abs/1707.06347 . 18 | Large parts of the code were borrowed from Ilya Kostrikov's codebase: 19 | https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail 20 | """ 21 | 22 | def __init__(self, config): 23 | self.encoder = config["encoder"] 24 | self.actor_critic = config["actor_critic"] 25 | lr = config["lr"] 26 | eps = config["eps"] 27 | self.clip_param = config["clip_param"] 28 | self.ppo_epoch = config["ppo_epoch"] 29 | self.encoder_type = config["encoder_type"] 30 | self.num_mini_batch = config["num_mini_batch"] 31 | self.entropy_coef = config["entropy_coef"] 32 | self.max_grad_norm = config["max_grad_norm"] 33 | self.nactions = config["nactions"] 34 | self.value_loss_coef = config["value_loss_coef"] 35 | self.use_clipped_value_loss = config["use_clipped_value_loss"] 36 | self.use_action_embedding = config["use_action_embedding"] 37 | self.use_collision_embedding = config["use_collision_embedding"] 38 | 39 | self.optimizer = optim.Adam( 40 | list( 41 | filter( 42 | lambda p: p.requires_grad, 43 | chain(self.encoder.parameters(), self.actor_critic.parameters()), 44 | ) 45 | ), 46 | lr=lr, 47 | eps=eps, 48 | ) 49 | 50 | def update(self, rollouts): 51 | advantages = rollouts.returns[:-1] - rollouts.value_preds[:-1] 52 | advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-5) 53 | 54 | value_loss_epoch = 0 55 | action_loss_epoch = 0 56 | dist_entropy_epoch = 0 57 | 58 | for e in range(self.ppo_epoch): 59 | if self.actor_critic.is_recurrent: 60 | data_generator = rollouts.recurrent_generator( 61 | advantages, self.num_mini_batch 62 | ) 63 | else: 64 | data_generator = rollouts.feed_forward_generator( 65 | advantages, self.num_mini_batch 66 | ) 67 | 68 | for sample in data_generator: 69 | ( 70 | obs_im_batch, 71 | obs_sm_batch, 72 | obs_lm_batch, 73 | recurrent_hidden_states_batch, 74 | actions_batch, 75 | value_preds_batch, 76 | return_batch, 77 | masks_batch, 78 | collisions_batch, 79 | old_action_log_probs_batch, 80 | adv_targ, 81 | T, 82 | N, 83 | ) = sample 84 | 85 | # ======================= Forward pass ======================== 86 | encoder_inputs = [obs_im_batch] 87 | if self.encoder_type == "rgb+map": 88 | encoder_inputs += [obs_sm_batch, obs_lm_batch] 89 | obs_feats = self.encoder(*encoder_inputs) 90 | policy_inputs = {"features": obs_feats} 91 | prev_actions = torch.zeros_like(actions_batch.view(T, N, 1)) 92 | prev_actions[1:] = actions_batch.view(T, N, 1)[:-1] 93 | prev_actions = prev_actions.view(T * N, 1) 94 | prev_collisions = collisions_batch 95 | if self.use_action_embedding: 96 | policy_inputs["actions"] = prev_actions.long() 97 | if self.use_collision_embedding: 98 | policy_inputs["collisions"] = prev_collisions.long() 99 | # Reshape to do in a single forward pass for all steps 100 | policy_outputs = self.actor_critic.evaluate_actions( 101 | policy_inputs, 102 | recurrent_hidden_states_batch, 103 | masks_batch, 104 | actions_batch, 105 | ) 106 | values, action_log_probs, dist_entropy, _ = policy_outputs 107 | # ===================== Compute PPO losses ==================== 108 | # Clipped surrogate loss 109 | ratio = torch.exp(action_log_probs - old_action_log_probs_batch) 110 | surr1 = ratio * adv_targ 111 | surr2 = ( 112 | torch.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) 113 | * adv_targ 114 | ) 115 | action_loss = -torch.min(surr1, surr2).mean() 116 | # Value function loss 117 | if self.use_clipped_value_loss: 118 | value_pred_clipped = value_preds_batch + ( 119 | values - value_preds_batch 120 | ).clamp(-self.clip_param, self.clip_param) 121 | value_losses = (values - return_batch).pow(2) 122 | value_losses_clipped = (value_pred_clipped - return_batch).pow(2) 123 | value_loss = ( 124 | 0.5 * torch.max(value_losses, value_losses_clipped).mean() 125 | ) 126 | else: 127 | value_loss = 0.5 * F.mse_loss(return_batch, values) 128 | # ======================= Backward pass ======================= 129 | self.optimizer.zero_grad() 130 | ( 131 | value_loss * self.value_loss_coef 132 | + action_loss 133 | - dist_entropy * self.entropy_coef 134 | ).backward() 135 | nn.utils.clip_grad_norm_( 136 | chain(self.encoder.parameters(), self.actor_critic.parameters()), 137 | self.max_grad_norm, 138 | ) 139 | self.optimizer.step() 140 | # ===================== Update statistics ===================== 141 | value_loss_epoch += value_loss.item() 142 | action_loss_epoch += action_loss.item() 143 | dist_entropy_epoch += dist_entropy.item() 144 | 145 | num_updates = self.ppo_epoch * self.num_mini_batch 146 | value_loss_epoch /= num_updates 147 | action_loss_epoch /= num_updates 148 | dist_entropy_epoch /= num_updates 149 | 150 | losses = {} 151 | losses["value_loss"] = value_loss_epoch 152 | losses["action_loss"] = action_loss_epoch 153 | losses["dist_entropy"] = dist_entropy_epoch 154 | return losses 155 | -------------------------------------------------------------------------------- /exploring_exploration/algo/supervised_reconstruction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | import itertools 11 | 12 | from exploring_exploration.utils.geometry import subtract_pose 13 | from exploring_exploration.utils.common import ( 14 | flatten_two, 15 | unflatten_two, 16 | unsq_exp, 17 | ) 18 | from einops import rearrange 19 | 20 | 21 | class SupervisedReconstruction: 22 | """Algorithm to learn a reconstruction task-head that reconstructs 23 | features at a new target location given features from existing locations. 24 | 25 | The default loss function is a multilabel loss. Existing features from 26 | training environments are clustered into K clusters. For each target 27 | location, the nearest J clusters are defined as positives and the 28 | remaining K-J clusters are defined as negatives. The model is required to 29 | identify these top-J clusters / "reconstruction" concepts. 30 | """ 31 | 32 | def __init__(self, config): 33 | self.decoder = config["decoder"] 34 | self.pose_encoder = config["pose_encoder"] 35 | lr = config["lr"] 36 | eps = config["eps"] 37 | self.max_grad_norm = config["max_grad_norm"] 38 | # The loss function is passed as an argument. The default loss is a 39 | # multi-label loss. 40 | self.rec_loss_fn = config["rec_loss_fn"] 41 | # The number of nearest neighbors to use as positives. 42 | self.rec_loss_fn_J = config["rec_loss_fn_J"] 43 | self.cluster_centroids = config["cluster_centroids"] 44 | # Make a prediction at regular intervals of this size. 45 | self.prediction_interval = config["prediction_interval"] 46 | self.optimizer = optim.Adam( 47 | itertools.chain(self.decoder.parameters(), self.pose_encoder.parameters(),), 48 | lr=lr, 49 | eps=eps, 50 | ) 51 | 52 | def update(self, rollouts): 53 | T, N, nfeats = rollouts.obs_feats[:-1].shape 54 | nRef = rollouts.tgt_feats.shape[1] 55 | device = rollouts.obs_feats.device 56 | avg_loss = 0.0 57 | avg_loss_count = 0.0 58 | tgt_feats = rollouts.tgt_feats # (N, nRef, nfeats) 59 | tgt_masks = rollouts.tgt_masks.squeeze(2) # (N, nRef) 60 | obs_feats = unsq_exp(rollouts.obs_feats, nRef, dim=2) # (T+1, N, nRef, nfeats) 61 | obs_poses = unsq_exp( 62 | rollouts.obs_odometer[:, :, :3], nRef, dim=2 63 | ) # (T+1, N, nRef, 3) - (y, x, phi) 64 | tgt_poses = unsq_exp(rollouts.tgt_poses, T + 1, dim=0) # (T+1, N, nRef, 3) 65 | # Make a prediction after every prediction_interval steps, i.e., 66 | # the agent has seen self.prediction_interval*(i+1) observations. 67 | for i in range(0, T, self.prediction_interval): 68 | L = min(i + self.prediction_interval, T) 69 | # Estimate relative pose b/w targets and observations. 70 | obs_relpose = subtract_pose( 71 | rearrange(tgt_poses[:L], "l b n f -> (l b n) f"), 72 | rearrange(obs_poses[:L], "l b n f -> (l b n) f"), 73 | ) # (L*N*nRef, 3) --- (x, y, phi) 74 | # ========================= Forward pass ========================== 75 | # Encode the poses of the observations and targets. 76 | obs_relpose_enc = self.pose_encoder(obs_relpose) # (L*N*nRef, 16) 77 | obs_relpose_enc = obs_relpose_enc.view(L, N * nRef, -1) 78 | tgt_relpose_enc = torch.zeros(1, *obs_relpose_enc.shape[1:]).to(device) 79 | obs_feats_i = rearrange(obs_feats[:L], "l b n f -> l (b n) f") 80 | # These serve as inputs to an encoder-decoder transformer model. 81 | rec_inputs = { 82 | # encoder inputs 83 | "history_image_features": obs_feats_i, # (L, N*nRef, nfeats) 84 | "history_pose_features": obs_relpose_enc, # (L, N*nRef, 16) 85 | # decoder inputs 86 | "target_pose_features": tgt_relpose_enc, # (1, N*nRef, 16) 87 | } 88 | pred_logits = self.decoder(rec_inputs).squeeze(0) # (N*nRef, nclass) 89 | # =================== Compute reconstruction loss ================= 90 | loss = self.rec_loss_fn( 91 | pred_logits, # (N*nRef, nclass) 92 | flatten_two(tgt_feats), # (N*nRef, nfeats) 93 | self.cluster_centroids, 94 | K=self.rec_loss_fn_J, 95 | reduction="none", 96 | ).sum( 97 | dim=1 98 | ) # (N*nRef, ) 99 | loss = unflatten_two(loss, N, nRef) 100 | # Mask out invalid targets. 101 | loss = loss * tgt_masks 102 | loss = loss.mean() 103 | # ========================== Backward pass ======================== 104 | self.optimizer.zero_grad() 105 | loss.backward() 106 | nn.utils.clip_grad_norm_( 107 | itertools.chain( 108 | self.decoder.parameters(), self.pose_encoder.parameters(), 109 | ), 110 | self.max_grad_norm, 111 | ) 112 | self.optimizer.step() 113 | 114 | avg_loss += loss.item() 115 | avg_loss_count += 1.0 116 | 117 | avg_loss = avg_loss / avg_loss_count 118 | losses = {"rec_loss": avg_loss} 119 | return losses 120 | -------------------------------------------------------------------------------- /exploring_exploration/envs/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .habitat import make_vec_envs as make_vec_envs_habitat 8 | from .avd import make_vec_envs as make_vec_envs_avd 9 | -------------------------------------------------------------------------------- /exploring_exploration/envs/avd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | import torch 9 | 10 | import gym 11 | import gym_avd 12 | from gym.spaces.box import Box 13 | 14 | from baselines import bench 15 | from baselines.common.vec_env import VecEnvWrapper 16 | from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv 17 | 18 | from einops import rearrange 19 | 20 | 21 | def make_env( 22 | env_id, 23 | seed, 24 | rank, 25 | log_dir, 26 | allow_early_resets, 27 | split="train", 28 | nRef=1, 29 | set_return_topdown_map=False, 30 | tdn_min_dist=2000.0, 31 | tdn_t_exp=200, 32 | tdn_t_nav=200, 33 | provide_collision_penalty=False, 34 | collision_penalty_factor=1e-1, 35 | n_landmarks=20, 36 | ): 37 | # Define a temporary function that creates an environment instance. 38 | def _thunk(): 39 | env = gym.make(env_id) 40 | env.set_split(split) 41 | env.set_nref(nRef) 42 | if set_return_topdown_map: 43 | env.set_return_topdown_map() 44 | env.set_nlandmarks(n_landmarks) 45 | if "tdn" in env_id: 46 | env.set_min_dist(tdn_min_dist) 47 | env.set_t_exp_and_nav(tdn_t_exp, tdn_t_nav) 48 | env.seed(seed + rank) 49 | obs_shape = env.observation_space.shape 50 | if log_dir is not None: 51 | env = bench.Monitor( 52 | env, 53 | os.path.join(log_dir, str(rank)), 54 | allow_early_resets=allow_early_resets, 55 | ) 56 | # If the input has shape (W,H,3), wrap for PyTorch convolutions 57 | obs_shape = env.observation_space.spaces["im"].shape 58 | if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: 59 | env = TransposeImageDict(env) 60 | return env 61 | 62 | return _thunk 63 | 64 | 65 | def make_vec_envs( 66 | env_name, 67 | seed, 68 | num_processes, 69 | log_dir, 70 | device, 71 | allow_early_resets, 72 | num_frame_stack=None, 73 | nRef=1, 74 | n_landmarks=20, 75 | set_return_topdown_map=False, 76 | **kwargs 77 | ): 78 | envs = [ 79 | make_env( 80 | env_name, 81 | seed, 82 | i, 83 | log_dir, 84 | allow_early_resets, 85 | nRef=nRef, 86 | set_return_topdown_map=set_return_topdown_map, 87 | n_landmarks=n_landmarks, 88 | **kwargs 89 | ) 90 | for i in range(num_processes) 91 | ] 92 | 93 | envs = SubprocVecEnv(envs) 94 | envs = VecPyTorchDict(envs, device) 95 | 96 | return envs 97 | 98 | 99 | class TransposeImageDict(gym.ObservationWrapper): 100 | """Transpose the image data from (..., H, W, C) -> (..., C, H, W).""" 101 | 102 | keys_to_check = [ 103 | "im", 104 | "depth", 105 | "coarse_occupancy", 106 | "fine_occupancy", 107 | "highres_coarse_occupancy", 108 | "target_im", 109 | "pose_refs", 110 | "pose_refs_depth", 111 | "landmark_ims", 112 | ] 113 | 114 | def __init__(self, env=None): 115 | super().__init__(env) 116 | self.keys_to_transpose = [] 117 | for key in self.keys_to_check: 118 | if key in self.observation_space.spaces: 119 | self.keys_to_transpose.append(key) 120 | for key in self.keys_to_transpose: 121 | self.observation_space.spaces[key] = self._transpose_obs_space( 122 | self.observation_space.spaces[key] 123 | ) 124 | 125 | def _transpose_obs_space(self, obs_space): 126 | """Transposes the observation space from (... H W C) -> (... C H W).""" 127 | obs_shape = obs_space.shape 128 | assert len(obs_shape) in [3, 4] 129 | if len(obs_shape) == 4: 130 | new_obs_shape = [obs_shape[0], obs_shape[3], obs_shape[1], obs_shape[2]] 131 | else: 132 | new_obs_shape = [obs_shape[2], obs_shape[0], obs_shape[1]] 133 | dtype = obs_space.dtype 134 | low = obs_space.low.flat[0] 135 | high = obs_space.high.flat[0] 136 | return Box(low, high, new_obs_shape, dtype=dtype) 137 | 138 | def _transpose_obs(self, obs): 139 | """Transposes the observation from (... H W C) -> (... C H W) 140 | """ 141 | assert len(obs.shape) in [3, 4] 142 | if len(obs.shape) == 4: 143 | return rearrange(obs, "n h w c -> n c h w") 144 | else: 145 | return rearrange(obs, "h w c -> c h w") 146 | 147 | def observation(self, observation): 148 | for key in self.keys_to_transpose: 149 | if key in observation.keys(): 150 | observation[key] = self._transpose_obs(observation[key]) 151 | return observation 152 | 153 | 154 | class VecPyTorchDict(VecEnvWrapper): 155 | def __init__(self, venv, device): 156 | """Converts numpy arrays to torch sensors and load them to GPU.""" 157 | super(VecPyTorchDict, self).__init__(venv) 158 | self.device = device 159 | 160 | def reset(self): 161 | obs = self.venv.reset() 162 | obs = {key: torch.from_numpy(obs[key]).float().to(self.device) for key in obs} 163 | return obs 164 | 165 | def step_async(self, actions): 166 | actions = actions.squeeze(1).cpu().numpy() 167 | self.venv.step_async(actions) 168 | 169 | def step_wait(self): 170 | obs, reward, done, info = self.venv.step_wait() 171 | obs = {key: torch.from_numpy(obs[key]).float().to(self.device) for key in obs} 172 | reward = torch.from_numpy(reward).unsqueeze(dim=1).float() 173 | return obs, reward, done, info 174 | -------------------------------------------------------------------------------- /exploring_exploration/envs/habitat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | 9 | from gym import spaces 10 | from gym.spaces import Box 11 | 12 | import habitat 13 | from habitat_baselines.common.utils import batch_obs 14 | from habitat_baselines.common.env_utils import construct_envs 15 | from habitat_baselines.common.environments import PoseRLEnv, ExpNavRLEnv 16 | from habitat_baselines.config.default_pose import get_config_pose 17 | from habitat_baselines.config.default_exp_nav import get_config_exp_nav 18 | 19 | from einops import rearrange 20 | 21 | 22 | def make_vec_envs( 23 | config, 24 | device, 25 | devices, 26 | seed=100, 27 | task_type="pose", 28 | enable_odometry_noise=None, 29 | odometer_noise_scaling=None, 30 | measure_noise_free_area=None, 31 | ): 32 | if task_type == "pose": 33 | config = get_config_pose(config, []) 34 | env_class = PoseRLEnv 35 | else: 36 | config = get_config_exp_nav(config, []) 37 | env_class = ExpNavRLEnv 38 | config.defrost() 39 | config.TASK_CONFIG.SEED = seed 40 | config.TASK_CONFIG.SIMULATOR.SEED = seed 41 | if enable_odometry_noise is not None: 42 | config.TASK_CONFIG.SIMULATOR.ENABLE_ODOMETRY_NOISE = enable_odometry_noise 43 | config.TASK_CONFIG.SIMULATOR.ODOMETER_NOISE_SCALING = odometer_noise_scaling 44 | if measure_noise_free_area is not None: 45 | config.TASK_CONFIG.SIMULATOR.OCCUPANCY_MAPS.MEASURE_NOISE_FREE_AREA = ( 46 | measure_noise_free_area 47 | ) 48 | config.freeze() 49 | envs = construct_envs(config, env_class, devices) 50 | envs = BatchDataWrapper(envs) 51 | envs = TransposeImageWrapper(envs) 52 | envs = RenameKeysWrapper(envs) 53 | envs = DeviceWrapper(envs, device) 54 | return envs 55 | 56 | 57 | class BatchDataWrapper: 58 | """Batches the observations received from habitat-api environments.""" 59 | 60 | def __init__(self, vec_envs): 61 | self._envs = vec_envs 62 | self.observation_space = vec_envs.observation_spaces[0] 63 | self.action_space = vec_envs.action_spaces[0] 64 | 65 | def reset(self): 66 | obs = self._envs.reset() 67 | obs = batch_obs(obs) 68 | return obs 69 | 70 | def step(self, actions): 71 | actions_list = [a[0].item() for a in actions] 72 | outputs = self._envs.step(actions_list) 73 | obs, rewards, done, info = [list(x) for x in zip(*outputs)] 74 | obs = batch_obs(obs) 75 | rewards = torch.tensor(rewards, dtype=torch.float) 76 | rewards = rewards.unsqueeze(1) 77 | 78 | return obs, rewards, done, info 79 | 80 | def close(self): 81 | self._envs.close() 82 | 83 | 84 | class TransposeImageWrapper: 85 | """Transpose the image data from (..., H, W, C) -> (..., C, H, W)""" 86 | 87 | keys_to_check = [ 88 | "rgb", 89 | "depth", 90 | "coarse_occupancy", 91 | "fine_occupancy", 92 | "highres_coarse_occupancy", 93 | "pose_estimation_rgb", 94 | "pose_estimation_depth", 95 | ] 96 | 97 | def __init__(self, vec_envs): 98 | self._envs = vec_envs 99 | self.observation_space = vec_envs.observation_space 100 | self.keys_to_transpose = [] 101 | for key in self.keys_to_check: 102 | if key in self.observation_space.spaces: 103 | self.keys_to_transpose.append(key) 104 | for key in self.keys_to_transpose: 105 | self.observation_space.spaces[key] = self._transpose_obs_space( 106 | self.observation_space.spaces[key] 107 | ) 108 | self.action_space = vec_envs.action_space 109 | 110 | def _transpose_obs_space(self, obs_space): 111 | """Transposes the observation space from (... H W C) -> (... C H W).""" 112 | obs_shape = obs_space.shape 113 | assert len(obs_shape) in [3, 4] 114 | if len(obs_shape) == 4: 115 | new_obs_shape = [obs_shape[0], obs_shape[3], obs_shape[1], obs_shape[2]] 116 | else: 117 | new_obs_shape = [obs_shape[2], obs_shape[0], obs_shape[1]] 118 | dtype = obs_space.dtype 119 | low = obs_space.low.flat[0] 120 | high = obs_space.high.flat[0] 121 | return Box(low, high, new_obs_shape, dtype=dtype) 122 | 123 | def _transpose_obs(self, obs): 124 | """Transposes the observation from (B ... H W C) -> (B ... C H W) 125 | """ 126 | assert len(obs.shape) in [4, 5] 127 | if len(obs.shape) == 5: 128 | return rearrange(obs, "b n h w c -> b n c h w") 129 | else: 130 | return rearrange(obs, "b h w c -> b c h w") 131 | 132 | def reset(self): 133 | obs = self._envs.reset() 134 | for k in self.keys_to_transpose: 135 | if k in obs.keys(): 136 | obs[k] = self._transpose_obs(obs[k]) 137 | return obs 138 | 139 | def step(self, actions): 140 | obs, reward, done, info = self._envs.step(actions) 141 | for k in self.keys_to_transpose: 142 | if k in obs.keys(): 143 | obs[k] = self._transpose_obs(obs[k]) 144 | return obs, reward, done, info 145 | 146 | def close(self): 147 | self._envs.close() 148 | 149 | 150 | class RenameKeysWrapper: 151 | """Renames keys from habitat-api convention to exploring_exploration 152 | convention. 153 | """ 154 | 155 | def __init__(self, vec_envs): 156 | self._envs = vec_envs 157 | self.mapping = { 158 | "rgb": "im", 159 | "depth": "depth", 160 | "coarse_occupancy": "coarse_occupancy", 161 | "fine_occupancy": "fine_occupancy", 162 | "delta_sensor": "delta", 163 | "pose_estimation_rgb": "pose_refs", 164 | "pose_estimation_depth": "pose_refs_depth", 165 | "pose_estimation_reg": "pose_regress", 166 | "pose_estimation_mask": "valid_masks", 167 | "oracle_action_sensor": "oracle_action", 168 | "collision_sensor": "collisions", 169 | "opsr": "oracle_pose_success", 170 | "area_covered": "seen_area", 171 | "inc_area_covered": "inc_area", 172 | "frac_area_covered": "frac_seen_area", 173 | "top_down_map_pose": "topdown_map", 174 | "novelty_reward": "count_based_reward", 175 | # Navigation specific ones 176 | "highres_coarse_occupancy": "highres_coarse_occupancy", 177 | "grid_goal_exp_nav": "target_grid_loc", 178 | "spl_exp_nav": "spl", 179 | "success_exp_nav": "success_rate", 180 | "nav_error_exp_nav": "nav_error", 181 | "top_down_map_exp_nav": "topdown_map", 182 | "local_top_down_sensor": "gt_highres_coarse_occupancy", 183 | } 184 | self.observation_space = spaces.Dict( 185 | { 186 | self.mapping[key]: val 187 | for key, val in vec_envs.observation_space.spaces.items() 188 | } 189 | ) 190 | self.action_space = vec_envs.action_space 191 | 192 | def reset(self): 193 | obs = self._envs.reset() 194 | obs_new = {} 195 | for key, val in obs.items(): 196 | obs_new[self.mapping[key]] = val 197 | return obs_new 198 | 199 | def step(self, actions): 200 | obs, reward, done, infos = self._envs.step(actions) 201 | obs_new = {} 202 | for key, val in obs.items(): 203 | obs_new[self.mapping[key]] = val 204 | 205 | infos_new = [] 206 | for info in infos: 207 | info_new = {} 208 | for key, val in info.items(): 209 | if key == "objects_covered_geometric": 210 | small = val["small_objects_visited"] 211 | medium = val["medium_objects_visited"] 212 | large = val["large_objects_visited"] 213 | categories = float(val["categories_visited"]) 214 | info_new["num_objects_visited"] = small + medium + large 215 | info_new["num_small_objects_visited"] = small 216 | info_new["num_medium_objects_visited"] = medium 217 | info_new["num_large_objects_visited"] = large 218 | info_new["num_categories_visited"] = categories 219 | elif key in self.mapping.keys(): 220 | info_new[self.mapping[key]] = val 221 | else: 222 | info_new[key] = val 223 | infos_new.append(info_new) 224 | 225 | return obs_new, reward, done, infos_new 226 | 227 | def close(self): 228 | self._envs.close() 229 | 230 | 231 | class DeviceWrapper: 232 | """Moves all observations to a torch device.""" 233 | 234 | def __init__(self, vec_envs, device): 235 | self._envs = vec_envs 236 | self.device = device 237 | 238 | self.observation_space = vec_envs.observation_space 239 | self.action_space = vec_envs.action_space 240 | 241 | def reset(self): 242 | obs = self._envs.reset() 243 | for key, val in obs.items(): 244 | obs[key] = val.to(self.device) 245 | 246 | return obs 247 | 248 | def step(self, actions): 249 | obs, reward, done, info = self._envs.step(actions) 250 | for key, val in obs.items(): 251 | obs[key] = val.to(self.device) 252 | 253 | return obs, reward, done, info 254 | 255 | def close(self): 256 | self._envs.close() 257 | -------------------------------------------------------------------------------- /exploring_exploration/models/.gitignore: -------------------------------------------------------------------------------- 1 | astar_pycpp 2 | -------------------------------------------------------------------------------- /exploring_exploration/models/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .exploration import RGBEncoder, MapRGBEncoder, Policy 8 | -------------------------------------------------------------------------------- /exploring_exploration/models/curiosity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | import torchvision.models as models 11 | 12 | 13 | class ForwardDynamics(nn.Module): 14 | """Model that takes previous state, and action as inputs to predict 15 | the next state. 16 | """ 17 | 18 | def __init__(self, n_actions): 19 | super().__init__() 20 | state_size = 512 21 | hidden_size = 256 22 | 23 | class ResidualBlock(nn.Module): 24 | def __init__(self): 25 | super().__init__() 26 | self.fc1 = nn.Sequential( 27 | nn.Linear(hidden_size + n_actions, hidden_size), 28 | nn.LeakyReLU(0.2, inplace=True), 29 | ) 30 | self.fc2 = nn.Sequential( 31 | nn.Linear(hidden_size + n_actions, hidden_size) 32 | ) 33 | 34 | def forward(self, feat, act): 35 | x = feat 36 | x = self.fc1(torch.cat([x, act], dim=1)) 37 | x = self.fc2(torch.cat([x, act], dim=1)) 38 | return feat + x 39 | 40 | self.pre_rb = nn.Sequential( 41 | nn.Linear(state_size + n_actions, hidden_size), 42 | nn.LeakyReLU(0.2, inplace=True), 43 | ) 44 | self.post_rb = nn.Linear(hidden_size, state_size) 45 | self.rb1 = ResidualBlock() 46 | self.rb2 = ResidualBlock() 47 | self.rb3 = ResidualBlock() 48 | self.rb4 = ResidualBlock() 49 | 50 | def forward(self, s, a): 51 | """ 52 | Inputs: 53 | s - (bs, state_size) 54 | a - (bs, n_actions) onehot encoding 55 | Outputs: 56 | sp - (bs, state_size) 57 | """ 58 | x = self.pre_rb(torch.cat([s, a], dim=1)) 59 | x = self.rb1(x, a) 60 | x = self.rb2(x, a) 61 | x = self.rb3(x, a) 62 | x = self.rb4(x, a) 63 | sp = self.post_rb(x) 64 | return sp 65 | 66 | 67 | class Phi(nn.Module): 68 | """A simple imagenet-pretrained encoder for state representation. 69 | """ 70 | 71 | def __init__(self): 72 | super().__init__() 73 | 74 | resnet = models.resnet18(pretrained=True) 75 | self.main = nn.Sequential( 76 | resnet.conv1, 77 | resnet.bn1, 78 | resnet.relu, 79 | resnet.maxpool, 80 | resnet.layer1, 81 | resnet.layer2, 82 | resnet.layer3, 83 | resnet.layer4, 84 | resnet.avgpool, 85 | ) 86 | 87 | def forward(self, x): 88 | """ 89 | Inputs: 90 | x - (bs, C, H, W) 91 | Outputs: 92 | feat - (bs, 512) 93 | """ 94 | feat = self.main(x).squeeze(3).squeeze(2) 95 | return feat 96 | 97 | 98 | # Maintains running statistics of the mean and standard deviation of 99 | # the episode returns. Used for reward normalization as done here: 100 | # https://arxiv.org/pdf/1808.04355.pdf 101 | class RunningMeanStd(object): 102 | # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm 103 | def __init__(self, epsilon=1e-4, shape=()): 104 | self.mean = np.zeros(shape, "float64") 105 | self.var = np.ones(shape, "float64") 106 | self.count = epsilon 107 | 108 | def update(self, x): 109 | batch_mean = np.mean(x, axis=0) 110 | batch_var = np.var(x, axis=0) 111 | batch_count = x.shape[0] 112 | self.update_from_moments(batch_mean, batch_var, batch_count) 113 | 114 | def update_from_moments(self, batch_mean, batch_var, batch_count): 115 | self.mean, self.var, self.count = update_mean_var_count_from_moments( 116 | self.mean, self.var, self.count, batch_mean, batch_var, batch_count 117 | ) 118 | 119 | 120 | def update_mean_var_count_from_moments( 121 | mean, var, count, batch_mean, batch_var, batch_count 122 | ): 123 | delta = batch_mean - mean 124 | tot_count = count + batch_count 125 | new_mean = mean + delta * batch_count / tot_count 126 | m_a = var * count 127 | m_b = batch_var * batch_count 128 | M2 = m_a + m_b + np.square(delta) * count * batch_count / tot_count 129 | new_var = M2 / tot_count 130 | new_count = tot_count 131 | return new_mean, new_var, new_count 132 | -------------------------------------------------------------------------------- /exploring_exploration/models/exploration.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torchvision.models as models 10 | 11 | from exploring_exploration.utils.distributions import Categorical 12 | from exploring_exploration.utils.common import init 13 | 14 | 15 | class RGBEncoder(nn.Module): 16 | """Encoder that only takes RGB readings as inputs. 17 | """ 18 | 19 | def __init__(self, fix_cnn=True): 20 | super().__init__() 21 | self.rgb_resnet_model = models.resnet18(pretrained=True) 22 | resnet_models = [self.rgb_resnet_model] 23 | if fix_cnn: 24 | for model in resnet_models: 25 | for param in model.parameters(): 26 | param.requires_grad = False 27 | num_ftrs = self.rgb_resnet_model.fc.in_features 28 | num_in = 0 29 | self.rgb_resnet_model.avgpool = nn.AvgPool2d(3, stride=1) 30 | self.rgb_resnet_model.fc = nn.Linear(num_ftrs, 128) 31 | num_in += 128 32 | self.merge_fc = nn.Linear(num_in, 512) 33 | 34 | def forward(self, rgb): 35 | """ 36 | Inputs: 37 | rgb - (bs, C, H, W) 38 | Outputs: 39 | feat - (bs, 512) 40 | """ 41 | feat_rgb = self.rgb_resnet_model(rgb) 42 | feat = self.merge_fc(feat_rgb) 43 | return feat 44 | 45 | def get_feats(self, rgb): 46 | return self(rgb) 47 | 48 | 49 | class MapRGBEncoder(nn.Module): 50 | """Encoder that only takes RGB readings and egocentric occupancy 51 | maps as inputs. 52 | """ 53 | 54 | def __init__(self, fix_cnn=True): 55 | super().__init__() 56 | self.rgb_resnet_model = models.resnet18(pretrained=True) 57 | self.large_map_resnet_model = models.resnet18(pretrained=True) 58 | self.small_map_resnet_model = models.resnet18(pretrained=True) 59 | resnet_models = [ 60 | self.rgb_resnet_model, 61 | self.large_map_resnet_model, 62 | self.small_map_resnet_model, 63 | ] 64 | if fix_cnn: 65 | for model in resnet_models: 66 | for param in model.parameters(): 67 | param.requires_grad = False 68 | num_ftrs = self.large_map_resnet_model.fc.in_features 69 | num_in = 0 70 | self.rgb_resnet_model.avgpool = nn.AvgPool2d(3, stride=1) 71 | self.rgb_resnet_model.fc = nn.Linear(num_ftrs, 128) 72 | num_in += 128 73 | self.large_map_resnet_model.avgpool = nn.AvgPool2d(3, stride=1) 74 | self.large_map_resnet_model.fc = nn.Linear(num_ftrs, 128) 75 | num_in += 128 76 | self.small_map_resnet_model.avgpool = nn.AvgPool2d(3, stride=1) 77 | self.small_map_resnet_model.fc = nn.Linear(num_ftrs, 128) 78 | num_in += 128 79 | self.merge_fc = nn.Linear(num_in, 512) 80 | 81 | def forward(self, rgb, small_maps, large_maps): 82 | """ 83 | Inputs: 84 | rgb - (bs, C, H, W) 85 | small_maps - (bs, C, H, W) 86 | large_maps - (bs, C, H, W) 87 | Outputs: 88 | feat - (bs, 512) 89 | """ 90 | feat_lm = self.large_map_resnet_model(large_maps) 91 | feat_sm = self.small_map_resnet_model(small_maps) 92 | feat_rgb = self.rgb_resnet_model(rgb) 93 | feat = self.merge_fc(torch.cat([feat_lm, feat_sm, feat_rgb], dim=1)) 94 | return feat 95 | 96 | def get_feats(self, rgb, small_maps, large_maps): 97 | return self(rgb, small_maps, large_maps) 98 | 99 | 100 | # Borrowed from https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail 101 | class NNBase(nn.Module): 102 | def __init__(self, recurrent, recurrent_input_size, hidden_size): 103 | super(NNBase, self).__init__() 104 | self._hidden_size = hidden_size 105 | self._recurrent = recurrent 106 | if recurrent: 107 | self.gru = nn.GRUCell(recurrent_input_size, hidden_size) 108 | nn.init.orthogonal_(self.gru.weight_ih.data) 109 | nn.init.orthogonal_(self.gru.weight_hh.data) 110 | self.gru.bias_ih.data.fill_(0) 111 | self.gru.bias_hh.data.fill_(0) 112 | 113 | @property 114 | def is_recurrent(self): 115 | return self._recurrent 116 | 117 | @property 118 | def recurrent_hidden_state_size(self): 119 | if self._recurrent: 120 | return self._hidden_size 121 | return 1 122 | 123 | @property 124 | def output_size(self): 125 | return self._hidden_size 126 | 127 | def _forward_gru(self, x, hxs, masks): 128 | if x.size(0) == hxs.size(0): 129 | x = hxs = self.gru(x, hxs * masks) 130 | else: 131 | # x is a (T, N, -1) tensor that has been flatten to (T * N, -1) 132 | N = hxs.size(0) 133 | T = int(x.size(0) / N) 134 | # unflatten 135 | x = x.view(T, N, x.size(1)) 136 | # Same deal with masks 137 | masks = masks.view(T, N, 1) 138 | outputs = [] 139 | for i in range(T): 140 | hx = hxs = self.gru(x[i], hxs * masks[i]) 141 | outputs.append(hx) 142 | # assert len(outputs) == T 143 | # x is a (T, N, -1) tensor 144 | x = torch.stack(outputs, dim=0) 145 | # flatten 146 | x = x.view(T * N, -1) 147 | return x, hxs 148 | 149 | 150 | class MLPBase(NNBase): 151 | """An MLP recurrent-encoder for the policy inputs. It takes features, 152 | past actions, and collisions as inputs. 153 | """ 154 | 155 | def __init__( 156 | self, 157 | feat_dim=512, 158 | recurrent=False, 159 | hidden_size=512, 160 | action_config=None, 161 | collision_config=None, 162 | ): 163 | super().__init__(recurrent, hidden_size, hidden_size) 164 | init_ = lambda m: init( 165 | m, 166 | nn.init.orthogonal_, 167 | lambda x: nn.init.constant_(x, 0), 168 | nn.init.calculate_gain("relu"), 169 | ) 170 | main_input_dim = feat_dim 171 | # Model to encode actions 172 | self.use_action_embedding = False 173 | if action_config is not None: 174 | nactions = action_config["nactions"] 175 | action_embedding_size = action_config["embedding_size"] 176 | self.action_encoder = nn.Sequential( 177 | nn.Embedding(nactions, action_embedding_size), 178 | nn.Linear(action_embedding_size, action_embedding_size), 179 | nn.ReLU(), 180 | nn.Linear(action_embedding_size, action_embedding_size), 181 | ) 182 | main_input_dim += action_embedding_size 183 | self.use_action_embedding = True 184 | # Model to encode collisions 185 | self.use_collision_embedding = False 186 | if collision_config is not None: 187 | collision_dim = collision_config["collision_dim"] 188 | collision_embedding_size = collision_config["embedding_size"] 189 | self.collision_encoder = nn.Sequential( 190 | nn.Embedding(collision_dim, collision_embedding_size), 191 | nn.Linear(collision_embedding_size, collision_embedding_size), 192 | nn.ReLU(), 193 | nn.Linear(collision_embedding_size, collision_embedding_size), 194 | ) 195 | main_input_dim += collision_embedding_size 196 | self.use_collision_embedding = True 197 | # Feature merging 198 | self.main = nn.Sequential( 199 | init_(nn.Linear(main_input_dim, hidden_size)), nn.ReLU() 200 | ) 201 | init_ = lambda m: init( 202 | m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0) 203 | ) 204 | # Critic for policy learning 205 | self.critic_linear = init_(nn.Linear(hidden_size, 1)) 206 | self.train() 207 | 208 | def forward(self, inputs, rnn_hxs, masks): 209 | # Encode input features 210 | x = self._process_inputs(inputs) 211 | if self.is_recurrent: 212 | x, rnn_hxs = self._forward_gru(x, rnn_hxs, masks) 213 | return self.critic_linear(x), x, rnn_hxs 214 | 215 | def _process_inputs(self, inputs): 216 | """ 217 | inputs is a dictionary consisting of the following: 218 | { 219 | features: (bs, feat_dim) 220 | actions: (bs, 1) (optional) 221 | collisions: (bs, 1) one hot vector (optional) 222 | } 223 | """ 224 | input_values = [inputs["features"]] 225 | if self.use_action_embedding: 226 | act_feat = self.action_encoder(inputs["actions"].squeeze(1)) 227 | input_values.append(act_feat) 228 | if self.use_collision_embedding: 229 | coll_feat = self.collision_encoder(inputs["collisions"].squeeze(1)) 230 | input_values.append(coll_feat) 231 | input_values = torch.cat(input_values, dim=1) 232 | return self.main(input_values) 233 | 234 | 235 | # Borrowed from https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail 236 | class Policy(nn.Module): 237 | def __init__(self, action_space, base_kwargs=None): 238 | super().__init__() 239 | if base_kwargs is None: 240 | base_kwargs = {} 241 | self.base = MLPBase(**base_kwargs) 242 | num_outputs = action_space.n 243 | self.dist = Categorical(self.base.output_size, num_outputs) 244 | 245 | @property 246 | def is_recurrent(self): 247 | return self.base.is_recurrent 248 | 249 | @property 250 | def recurrent_hidden_state_size(self): 251 | """Size of rnn_hx.""" 252 | return self.base.recurrent_hidden_state_size 253 | 254 | def forward(self, inputs, rnn_hxs, masks): 255 | raise NotImplementedError 256 | 257 | def act(self, inputs, rnn_hxs, masks, deterministic=False): 258 | value, actor_features, rnn_hxs = self.base(inputs, rnn_hxs, masks) 259 | dist = self.dist(actor_features) 260 | if deterministic: 261 | action = dist.mode() 262 | else: 263 | action = dist.sample() 264 | action_log_probs = dist.log_probs(action) 265 | dist_entropy = dist.entropy().mean() 266 | return value, action, action_log_probs, rnn_hxs 267 | 268 | def get_value(self, inputs, rnn_hxs, masks): 269 | value, _, _ = self.base(inputs, rnn_hxs, masks) 270 | return value 271 | 272 | def evaluate_actions(self, inputs, rnn_hxs, masks, action): 273 | value, actor_features, rnn_hxs = self.base(inputs, rnn_hxs, masks) 274 | dist = self.dist(actor_features) 275 | action_log_probs = dist.log_probs(action) 276 | dist_entropy = dist.entropy().mean() 277 | return value, action_log_probs, dist_entropy, rnn_hxs 278 | 279 | def get_log_probs(self, inputs, rnn_hxs, masks): 280 | value, actor_features, rnn_hxs = self.base(inputs, rnn_hxs, masks) 281 | dist = self.dist(actor_features) 282 | action_log_probs = (dist.probs + 1e-10).log() 283 | return action_log_probs 284 | -------------------------------------------------------------------------------- /exploring_exploration/models/frontier_agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import cv2 8 | import logging 9 | import numpy as np 10 | from exploring_exploration.models.navigation import ( 11 | AStarActorAVD, 12 | AStarActorHabitat, 13 | HierarchicalAStarActorHabitat, 14 | HierarchicalAStarActorHabitatV2, 15 | ) 16 | 17 | 18 | class FrontierAgent: 19 | def __init__( 20 | self, 21 | action_space, 22 | env_name, 23 | occ_map_scale, 24 | show_animation=False, 25 | seed=123, 26 | use_contour_sampling=True, 27 | dilate_occupancy=True, 28 | max_time_per_target=-1, 29 | ): 30 | self.map_size = None 31 | self.action_space = action_space 32 | self.show_animation = show_animation 33 | self.frontier_target = None 34 | self.occ_buffer = None 35 | self.seed = seed 36 | self._rng = random.Random(seed) 37 | self._time_elapsed_for_target = 0 38 | self._failure_count = 0 39 | self.use_contour_sampling = use_contour_sampling 40 | self.env_name = env_name 41 | 42 | if "avd" in env_name: 43 | self.actor = AStarActorAVD(action_space, show_animation=show_animation) 44 | self.max_time_per_target = ( 45 | 20 if max_time_per_target == -1 else max_time_per_target 46 | ) 47 | else: 48 | self.actor = HierarchicalAStarActorHabitatV2( 49 | action_space, occ_map_scale, show_animation=show_animation 50 | ) 51 | # Manually set dilate_occupancy flag 52 | if dilate_occupancy: 53 | self.actor.high_level_actor.dilate_occupancy = True 54 | self.actor.low_level_actor.dilate_occupancy = True 55 | self.max_time_per_target = ( 56 | 200 if max_time_per_target == -1 else max_time_per_target 57 | ) 58 | 59 | logging.info("========> FrontierAgent settings") 60 | logging.info(f"===> max_time_per_target : {self.max_time_per_target}") 61 | logging.info(f"===> dilate_occupancy : {dilate_occupancy}") 62 | 63 | def act(self, occ_map, prev_delta, collision_prev_step): 64 | if self.occ_buffer is None: 65 | self.map_size = occ_map.shape[0] 66 | self.occ_buffer = np.zeros((self.map_size, self.map_size), dtype=np.uint8) 67 | 68 | action = 3 69 | action_count = 0 70 | while action == 3: 71 | # If no target is selected or too much time was spent on a single target, pick a new target 72 | if ( 73 | self.frontier_target is None 74 | or self._time_elapsed_for_target >= self.max_time_per_target 75 | ): 76 | self.sample_frontier_target(occ_map) 77 | self.actor.reset() 78 | # If the hierarchical planner failed twice to generate a plan to the target, then sample a new target 79 | elif self._failure_count == 2: 80 | self.sample_frontier_target(occ_map) 81 | self.actor.reset() 82 | # If a valid target is available, then update the target coordinate based on the past motion. 83 | else: 84 | self.update_target(prev_delta) 85 | # If the agent has reached the target or the target is occupied, then sample a new target 86 | if self.has_reached_target() or np.all( 87 | occ_map[self.frontier_target[1], self.frontier_target[0]] 88 | == (0, 0, 255) 89 | ): 90 | self.sample_frontier_target(occ_map) 91 | self.actor.reset() 92 | # When the hierarchical actor has reached the target, resample the target 93 | elif action_count > 0 and action == 3: 94 | self.sample_frontier_target(occ_map) 95 | self.actor.reset() 96 | 97 | if self.show_animation: 98 | cv2.imshow("Occupancy", np.flip(occ_map, axis=2)) 99 | cv2.waitKey(20) 100 | 101 | action_count += 1 102 | 103 | # Prevents infinite loop when all frontier targets sampled return action=3 104 | if action_count > 3: 105 | logging.info("=====> Stuck in occupied region! ") 106 | return random.choice( 107 | [ 108 | self.action_space["left"], 109 | self.action_space["right"], 110 | self.action_space["forward"], 111 | ] 112 | ) 113 | 114 | if "avd" in self.env_name: 115 | action = self.actor.act( 116 | occ_map, self.frontier_target, collision_prev_step 117 | ) 118 | else: 119 | # This does not process the occupancy map. Process it. 120 | action = self.actor.act( 121 | occ_map, self.frontier_target, prev_delta, collision_prev_step 122 | ) 123 | if self.actor.planning_failure_flag: 124 | self._failure_count += 1 125 | if self._failure_count == 2: 126 | action = 3 127 | 128 | self._time_elapsed_for_target += 1 129 | 130 | return action 131 | 132 | def sample_frontier_target(self, occ_map): 133 | """ 134 | Inputs: 135 | occ_map - occupancy map with the following color coding: 136 | (0, 0, 255) is occupied region 137 | (255, 255, 255) is unknown region 138 | (0, 255, 0) is free region 139 | """ 140 | self.occ_buffer.fill(0) 141 | self._time_elapsed_for_target = 0 142 | self._failure_count = 0 143 | 144 | unknown_mask = np.all(occ_map == (255, 255, 255), axis=-1).astype(np.uint8) 145 | free_mask = np.all(occ_map == (0, 255, 0), axis=-1).astype(np.uint8) 146 | 147 | unknown_mask_shiftup = np.pad( 148 | unknown_mask, ((0, 1), (0, 0)), mode="constant", constant_values=0 149 | )[1:, :] 150 | unknown_mask_shiftdown = np.pad( 151 | unknown_mask, ((1, 0), (0, 0)), mode="constant", constant_values=0 152 | )[:-1, :] 153 | unknown_mask_shiftleft = np.pad( 154 | unknown_mask, ((0, 0), (0, 1)), mode="constant", constant_values=0 155 | )[:, 1:] 156 | unknown_mask_shiftright = np.pad( 157 | unknown_mask, ((0, 0), (1, 0)), mode="constant", constant_values=0 158 | )[:, :-1] 159 | 160 | frontier_mask = ( 161 | (free_mask == unknown_mask_shiftup) 162 | | (free_mask == unknown_mask_shiftdown) 163 | | (free_mask == unknown_mask_shiftleft) 164 | | (free_mask == unknown_mask_shiftright) 165 | ) & (free_mask == 1) 166 | 167 | frontier_idxes = list(zip(*np.where(frontier_mask))) 168 | if len(frontier_idxes) > 0: 169 | if self.use_contour_sampling: 170 | frontier_img = frontier_mask.astype(np.uint8) * 255 171 | # Reduce size for efficiency 172 | scaling_factor = frontier_mask.shape[0] / 200.0 173 | frontier_img = cv2.resize( 174 | frontier_img, 175 | None, 176 | fx=1.0 / scaling_factor, 177 | fy=1.0 / scaling_factor, 178 | interpolation=cv2.INTER_NEAREST, 179 | ) 180 | # Add a single channel 181 | frontier_img = frontier_img[:, :, np.newaxis] 182 | contours, _ = cv2.findContours( 183 | frontier_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE 184 | ) 185 | if len(contours) == 0: 186 | tgt = self._rng.choice(frontier_idxes) # (y, x) 187 | else: 188 | contours_length = [len(contour) for contour in contours] 189 | contours = list(zip(contours, contours_length)) 190 | sorted_contours = sorted(contours, key=lambda x: x[1], reverse=True) 191 | 192 | contours = sorted_contours[:3] 193 | # Randomly pick one of the longest contours 194 | # To introduce some stochasticity in case the agent is stuck 195 | max_contour = self._rng.choice(contours)[0] 196 | # Pick a random sample from the longest contour 197 | tgt = self._rng.choice(max_contour)[ 198 | 0 199 | ] # Each point is [[x, y]] for some reason 200 | # Scale it back to original image size 201 | # Convert it to (y, x) convention as this will be reversed next 202 | tgt = (int(tgt[1] * scaling_factor), int(tgt[0] * scaling_factor)) 203 | else: 204 | tgt = self._rng.choice(frontier_idxes) # (y, x) 205 | 206 | self.frontier_target = ( 207 | np.clip(tgt[1], 1, self.map_size - 2).item(), 208 | np.clip(tgt[0], 1, self.map_size - 2).item(), 209 | ) # (x, y) 210 | else: 211 | self.frontier_target = (self.map_size // 2 + 4, self.map_size // 2 + 4) 212 | 213 | if self.show_animation: 214 | occ_map_copy = np.copy(occ_map) 215 | occ_map_copy = cv2.circle( 216 | occ_map_copy, self.frontier_target, 3, (255, 0, 0), -1 217 | ) 218 | cv2.imshow("Occupancy map with target", np.flip(occ_map_copy, axis=2)) 219 | cv2.imshow("Frontier mask", frontier_mask.astype(np.uint8) * 255) 220 | cv2.waitKey(10) 221 | 222 | def has_reached_target(self): 223 | fx, fy = self.frontier_target 224 | cx, cy = self.map_size / 2, self.map_size / 2 225 | if math.sqrt((fx - cx) ** 2 + (fy - cy) ** 2) < 3.0: 226 | return True 227 | else: 228 | return False 229 | 230 | def update_target(self, prev_delta): 231 | """ 232 | Update the target to the new egocentric coordinate system. 233 | Inputs: 234 | prev_delta - (dx, dy, dtheta) motion in the previous position's 235 | frame of reference 236 | """ 237 | # Note: X - forward, Y - rightward in egocentric frame of references 238 | 239 | # Perform update in egocentric coordinate 240 | x, y = self._convert_to_egocentric(self.frontier_target) 241 | 242 | dx, dy, dt = prev_delta 243 | # Translate 244 | xp, yp = x - dx, y - dy 245 | # Rotate by -dt 246 | xp, yp = ( 247 | math.cos(-dt) * xp - math.sin(-dt) * yp, 248 | math.sin(-dt) * xp + math.cos(-dt) * yp, 249 | ) 250 | 251 | # Convert back to image coordinate 252 | xi, yi = self._convert_to_image((xp, yp)) 253 | xi = np.clip(xi, 1, self.map_size - 2) 254 | yi = np.clip(yi, 1, self.map_size - 2) 255 | 256 | self.frontier_target = (int(xi), int(yi)) 257 | 258 | def _convert_to_egocentric(self, coords): 259 | return (-coords[1] + self.map_size / 2, coords[0] - self.map_size / 2) 260 | 261 | def _convert_to_image(self, coords): 262 | # Forward - positive X, rightward - positive Y 263 | return (coords[1] + self.map_size / 2, -coords[0] + self.map_size / 2) 264 | 265 | def reset(self): 266 | self.frontier_target = None 267 | self._failure_count = 0 268 | self._time_elapsed_for_target = 0 269 | -------------------------------------------------------------------------------- /exploring_exploration/models/reconstruction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torchvision.models as tmodels 11 | import torch.nn.modules.transformer as transformer 12 | 13 | 14 | class View(nn.Module): 15 | def __init__(self, *shape): 16 | # shape is a list 17 | super().__init__() 18 | self.shape = shape 19 | 20 | def forward(self, input): 21 | return input.view(*self.shape) 22 | 23 | 24 | class FeatureReconstructionModule(nn.Module): 25 | """An encoder-decoder model based on transformers for reconstructing 26 | concepts at a target location. 27 | """ 28 | 29 | def __init__(self, nfeats, noutputs, nlayers=4): 30 | super().__init__() 31 | encoder_layer = transformer.TransformerEncoderLayer(nfeats + 16, 2, nfeats) 32 | decoder_layer = transformer.TransformerDecoderLayer(nfeats + 16, 2, nfeats) 33 | self.encoder = transformer.TransformerEncoder(encoder_layer, nlayers) 34 | self.decoder = transformer.TransformerDecoder(decoder_layer, nlayers) 35 | self.predict_outputs = nn.Linear(nfeats + 16, noutputs) 36 | 37 | def forward(self, x): 38 | """ 39 | Inputs: 40 | x - dictionary consisting of the following: 41 | { 42 | 'history_image_features': (T, N, nfeats) 43 | 'history_pose_features': (T, N, 16) 44 | 'target_pose_features': (1, N, 16) 45 | } 46 | Outputs: 47 | pred_outputs - (1, N, noutputs) 48 | """ 49 | target_pose_features = x["target_pose_features"][0] 50 | T, N, nfeats = x["history_image_features"].shape 51 | nRef = target_pose_features.shape[1] 52 | device = x["target_pose_features"].device 53 | # =================== Encode features and poses ======================= 54 | encoder_inputs = torch.cat( 55 | [x["history_image_features"], x["history_pose_features"]], dim=2 56 | ) # (T, N, nfeats+16) 57 | encoded_features = self.encoder(encoder_inputs) # (T, N, nfeats+16) 58 | # ================ Decode features for given poses ==================== 59 | decoder_pose_features = target_pose_features.unsqueeze(0) # (1, N, 16) 60 | # Initialize as zeros 61 | decoder_image_features = torch.zeros( 62 | *decoder_pose_features.shape[:2], nfeats 63 | ).to( 64 | device 65 | ) # (1, N, nfeats) 66 | decoder_inputs = torch.cat( 67 | [decoder_image_features, decoder_pose_features], dim=2 68 | ) # (1, N, nfeats+16) 69 | decoder_features = self.decoder( 70 | decoder_inputs, encoded_features 71 | ) # (1, N, nfeats+16) 72 | pred_outputs = self.predict_outputs(decoder_features).squeeze(0) 73 | return pred_outputs.unsqueeze(0) 74 | 75 | 76 | class FeatureNetwork(nn.Module): 77 | """Network to extract image features. 78 | """ 79 | 80 | def __init__(self): 81 | super().__init__() 82 | resnet = tmodels.resnet50(pretrained=True) 83 | self.net = nn.Sequential( 84 | resnet.conv1, 85 | resnet.bn1, 86 | resnet.relu, 87 | resnet.maxpool, 88 | resnet.layer1, 89 | resnet.layer2, 90 | resnet.layer3, 91 | resnet.layer4, 92 | resnet.avgpool, 93 | ) 94 | 95 | def forward(self, x): 96 | feat = self.net(x).squeeze(3).squeeze(2) 97 | feat = F.normalize(feat, p=2, dim=1) 98 | return feat 99 | 100 | 101 | class PoseEncoder(nn.Module): 102 | """Network to encode pose information. 103 | """ 104 | 105 | def __init__(self): 106 | super().__init__() 107 | self.main = nn.Sequential(nn.Linear(3, 16), nn.ReLU(), nn.Linear(16, 16),) 108 | 109 | def forward(self, x): 110 | return self.main(x) 111 | -------------------------------------------------------------------------------- /exploring_exploration/utils/common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | import torch 9 | import random 10 | import numpy as np 11 | import torch.nn.functional as F 12 | 13 | from einops import rearrange, asnumpy 14 | 15 | 16 | def flatten_two(x): 17 | """Flattens the first two dims. 18 | """ 19 | return x.view(-1, *x.shape[2:]) 20 | 21 | 22 | def unflatten_two(x, sh1, sh2): 23 | """Unflattens the first two dims. 24 | """ 25 | return x.view(sh1, sh2, *x.shape[1:]) 26 | 27 | 28 | def unsq_exp(x, reps, dim=0): 29 | """Unsqueezes along dimension dim and repeats along the axis reps times. 30 | """ 31 | x_e = x.unsqueeze(dim) 32 | exp_args = [-1] * len(x_e.shape) 33 | exp_args[dim] = reps 34 | return x_e.expand(*exp_args).contiguous() 35 | 36 | 37 | approx_eq = lambda a, b, eps: torch.lt(torch.abs(a - b), eps) 38 | 39 | 40 | def norm_angle(x): 41 | """Normalizes an angle (scalar) between -pi to pi. 42 | """ 43 | if isinstance(x, np.ndarray): 44 | return np.arctan2(np.sin(x), np.cos(x)) 45 | elif isinstance(x, torch.Tensor): 46 | return torch.atan2(torch.sin(x), torch.cos(x)) 47 | else: 48 | return math.atan2(math.sin(x), math.cos(x)) 49 | 50 | 51 | def freeze_params(module): 52 | """Freezes all parameters of a module by setting requires_grad to False. 53 | """ 54 | for param in module.parameters(): 55 | param.requires_grad = False 56 | 57 | 58 | def unnormalize(data, mean, std): 59 | # data - (bs, H, W, C) 60 | data[:, :, :, 0] = data[:, :, :, 0] * std[0] + mean[0] 61 | data[:, :, :, 1] = data[:, :, :, 1] * std[1] + mean[1] 62 | data[:, :, :, 2] = data[:, :, :, 2] * std[2] + mean[2] 63 | return data 64 | 65 | 66 | def process_image(img): 67 | """Apply imagenet normalization to a batch of images. 68 | """ 69 | # img - (bs, C, H, W) 70 | mean = [0.485, 0.456, 0.406] 71 | std = [0.229, 0.224, 0.225] 72 | img_proc = img.float() / 255.0 73 | 74 | img_proc[:, 0] = (img_proc[:, 0] - mean[0]) / std[0] 75 | img_proc[:, 1] = (img_proc[:, 1] - mean[1]) / std[1] 76 | img_proc[:, 2] = (img_proc[:, 2] - mean[2]) / std[2] 77 | 78 | return img_proc 79 | 80 | 81 | def resize_image(img, shape=(84, 84), mode="bilinear"): 82 | """Resizes a batch of images. 83 | """ 84 | # img - (bs, C, H, W) FloatTensor 85 | out_img = F.interpolate(img, size=shape, mode=mode) 86 | return out_img 87 | 88 | 89 | def unprocess_image(img): 90 | """Undo imagenet normalization to a batch of images.""" 91 | # img - (bs, C, H, W) 92 | mean = [0.485, 0.456, 0.406] 93 | std = [0.229, 0.224, 0.225] 94 | img_unproc = np.copy(asnumpy(img)) 95 | img_unproc[:, 0] = img_unproc[:, 0] * std[0] + mean[0] 96 | img_unproc[:, 1] = img_unproc[:, 1] * std[1] + mean[1] 97 | img_unproc[:, 2] = img_unproc[:, 2] * std[2] + mean[2] 98 | 99 | img_unproc = np.clip(img_unproc, 0.0, 1.0) * 255.0 100 | img_unproc = img_unproc.astype(np.uint8) 101 | img_unproc = rearrange(img_unproc, "b c h w -> b h w c") 102 | 103 | return img_unproc 104 | 105 | 106 | # Weight initializations 107 | def init(module, weight_init, bias_init, gain=1): 108 | weight_init(module.weight.data, gain=gain) 109 | bias_init(module.bias.data) 110 | return module 111 | 112 | 113 | # https://github.com/openai/baselines/blob/master/baselines/common/tf_util.py#L87 114 | def init_normc_(weight, gain=1): 115 | weight.normal_(0, 1) 116 | weight *= gain / torch.sqrt(weight.pow(2).sum(1, keepdim=True)) 117 | 118 | 119 | def random_range(start, end, interval=1): 120 | """Returns a randomized range of numbers. 121 | """ 122 | vals = list(range(start, end, interval)) 123 | random.shuffle(vals) 124 | return vals 125 | -------------------------------------------------------------------------------- /exploring_exploration/utils/distributions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | from exploring_exploration.utils.common import init 11 | 12 | """ 13 | Modify standard PyTorch distributions so they are compatible with this code. 14 | """ 15 | 16 | FixedCategorical = torch.distributions.Categorical 17 | 18 | old_sample = FixedCategorical.sample 19 | FixedCategorical.sample = lambda self: old_sample(self).unsqueeze(-1) 20 | 21 | log_prob_cat = FixedCategorical.log_prob 22 | FixedCategorical.log_probs = lambda self, actions: log_prob_cat( 23 | self, actions.squeeze(-1) 24 | ).unsqueeze(-1) 25 | FixedCategorical.log_probs_all = lambda self: self.logits 26 | 27 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=1, keepdim=True) 28 | 29 | FixedNormal = torch.distributions.Normal 30 | log_prob_normal = FixedNormal.log_prob 31 | FixedNormal.log_probs = lambda self, actions: log_prob_normal(self, actions).sum( 32 | -1, keepdim=True 33 | ) 34 | 35 | entropy = FixedNormal.entropy 36 | FixedNormal.entropy = lambda self: entropy(self).sum(-1) 37 | 38 | FixedNormal.mode = lambda self: self.mean 39 | 40 | 41 | class Categorical(nn.Module): 42 | def __init__(self, num_inputs, num_outputs): 43 | super(Categorical, self).__init__() 44 | 45 | init_ = lambda m: init( 46 | m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=0.01 47 | ) 48 | 49 | self.linear = init_(nn.Linear(num_inputs, num_outputs)) 50 | 51 | def forward(self, x): 52 | x = self.linear(x) 53 | return FixedCategorical(logits=x) 54 | -------------------------------------------------------------------------------- /exploring_exploration/utils/geometry.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import numpy as np 9 | 10 | approx_eq = lambda a, b, eps: torch.lt(torch.abs(a - b), eps) 11 | 12 | 13 | def process_pose(pose): 14 | # pose - num_processes x 4 element Tensor with (r, theta, phi_head, phi_elev) - angles in radians 15 | # Output - num_processes x 3 torch tensor representing distance, cos and sin of relative theta 16 | pose_processed = torch.stack( 17 | (pose[:, 0], torch.cos(pose[:, 1]), torch.sin(pose[:, 1])), dim=1 18 | ).to(pose.device) 19 | return pose_processed 20 | 21 | 22 | def process_poseref(pose, map_shape, map_scale, angles, eps): 23 | # pose - batch_size x 3 - (r, theta, head) of the reference view 24 | r = pose[:, 0] 25 | t = pose[:, 1] 26 | x = r * torch.cos(t) 27 | y = r * torch.sin(t) 28 | mh, mw = map_shape[1:] 29 | # This convention comes from transform_to_map() in model_pose.py 30 | ref_on_map_x = torch.clamp(mw / 2 + x / map_scale, 0, mw - 1) 31 | ref_on_map_y = torch.clamp(mh / 2 + y / map_scale, 0, mh - 1) 32 | # Mapping heading angles to map locations 33 | ref_on_map_dir = torch.zeros(pose.shape[0]).to(pose.device) 34 | normalized_angles = torch.atan2(torch.sin(pose[:, 2]), torch.cos(pose[:, 2])) 35 | for i in range(angles.shape[0]): 36 | ref_on_map_dir[approx_eq(normalized_angles, angles[i].item(), eps)] = i 37 | return torch.stack([ref_on_map_x, ref_on_map_y, ref_on_map_dir], dim=1).long() 38 | 39 | 40 | def process_poseref_raw(pose, map_shape, map_scale, angles, eps): 41 | # pose - batch_size x 3 - (r, theta, head) of the reference view 42 | r = pose[:, 0] 43 | t = pose[:, 1] 44 | x = r * torch.cos(t) 45 | y = r * torch.sin(t) 46 | mh, mw = map_shape[1:] 47 | # This convention comes from transform_to_map() in model_pose.py 48 | ref_on_map_x = torch.clamp(mw / 2 + x / map_scale, 0, mw - 1) 49 | ref_on_map_y = torch.clamp(mh / 2 + y / map_scale, 0, mh - 1) 50 | normalized_angles = torch.atan2(torch.sin(pose[:, 2]), torch.cos(pose[:, 2])) 51 | return torch.stack([ref_on_map_x, ref_on_map_y, normalized_angles], dim=1) 52 | 53 | 54 | def position_loss_fn(pred, gt): 55 | """ 56 | pred - (bs, 3) ---> (r, cos_phi, sin_phi) 57 | gt - (bs, 3) ---> (r, cos_phi, sin_phi) 58 | pred won't be normalized, gt will be normalized cos, sin values 59 | """ 60 | pred_cossin = norm_cossin(pred[:, 1:]) 61 | gt_cossin = gt[:, 1:] 62 | pred_r = pred[:, 0] 63 | gt_r = gt[:, 0] 64 | pred_x = pred_r * pred_cossin[:, 0] 65 | pred_y = pred_r * pred_cossin[:, 1] 66 | gt_x = gt_r * gt_cossin[:, 0] 67 | gt_y = gt_r * gt_cossin[:, 1] 68 | loss = (pred_x - gt_x) ** 2 + (pred_y - gt_y) ** 2 69 | return loss 70 | 71 | 72 | def norm_cossin(input): 73 | """Convert unnormalized cos, sin predictions into [0, 1] range. 74 | """ 75 | # Normalize cos, sin predictions 76 | if isinstance(input, torch.Tensor): 77 | input = input / (torch.norm(input, dim=1).unsqueeze(1) + 1e-8) 78 | elif isinstance(input, np.ndarray): 79 | input = input / (np.linalg.norm(input, axis=1)[:, np.newaxis] + 1e-8) 80 | else: 81 | raise ValueError("Incorrect type for norm_cossin!") 82 | 83 | return input 84 | 85 | 86 | def process_odometer(poses): 87 | """Converts odometer readings in polar coordinates to xyt coordinates. 88 | 89 | Inputs: 90 | pose - (bs, 4) Tensor with (r, theta, phi_head, phi_elev) 91 | - where angles are in radians 92 | Outputs: 93 | pose_processed - (bs, 4) Tensor with (y, x, phi_head, phi_elev) 94 | """ 95 | pose_processed = torch.stack( 96 | [ 97 | poses[:, 0] * torch.sin(poses[:, 1]), 98 | poses[:, 0] * torch.cos(poses[:, 1]), 99 | poses[:, 2], 100 | poses[:, 3], 101 | ], 102 | dim=1, 103 | ) 104 | return pose_processed 105 | 106 | 107 | def np_normalize(angles): 108 | return np.arctan2(np.sin(angles), np.cos(angles)) 109 | 110 | 111 | def xyt2polar(poses): 112 | """Converts poses from carteisan (xyt) to polar (rpt) coordinates. 113 | 114 | Inputs: 115 | poses - (bs, 3) Tensor --- (x, y, theta) 116 | Outputs: 117 | poses Tensor with (r, phi, theta) conventions 118 | """ 119 | return torch.stack( 120 | [ 121 | torch.norm(poses[:, :2], dim=1), # r 122 | torch.atan2(poses[:, 1], poses[:, 0]), # phi 123 | poses[:, 2], 124 | ], 125 | dim=1, 126 | ) 127 | 128 | 129 | def polar2xyt(poses): 130 | """Converts poses from polar (rpt) to cartesian (xyt) coordinates. 131 | 132 | Inputs: 133 | poses - (bs, 3) Tensor --- (r, phi, theta) 134 | Outputs: 135 | poses Tensor with (x, y, theta) conventions 136 | """ 137 | return torch.stack( 138 | [ 139 | poses[:, 0] * torch.cos(poses[:, 1]), # x 140 | poses[:, 0] * torch.sin(poses[:, 1]), # y 141 | poses[:, 2], 142 | ], 143 | dim=1, 144 | ) 145 | 146 | 147 | def compute_egocentric_coors(delta, prev_pos, scale): 148 | """ 149 | delta - (N, 4) --- (y, x, phi_head, phi_elev) 150 | prev_pos - (N, 4) --- (y, x, phi_head, phi_elev) 151 | """ 152 | dy, dx, dt = delta[:, 0], delta[:, 1], delta[:, 2] 153 | x, y, t = prev_pos[:, 0], prev_pos[:, 1], prev_pos[:, 2] 154 | dr = torch.sqrt(dx ** 2 + dy ** 2) 155 | dp = torch.atan2(dy, dx) - t 156 | dx_ego = dr * torch.cos(dp) / scale 157 | dy_ego = dr * torch.sin(dp) / scale 158 | dt_ego = dt 159 | 160 | return torch.stack([dx_ego, dy_ego, dt_ego], dim=1) 161 | 162 | 163 | def subtract_pose(pose_common, poses): 164 | """ 165 | Convert poses to frame-of-reference of pose_common. 166 | 167 | Inputs: 168 | pose_common - (N, 3) --- (y, x, phi) 169 | poses - (N, 3) --- (y, x, phi) 170 | 171 | Outputs: 172 | poses_n - (N, 3) --- (x, y, phi) in the new coordinate system 173 | """ 174 | 175 | x = poses[:, 1] 176 | y = poses[:, 0] 177 | phi = poses[:, 2] 178 | 179 | x_c = pose_common[:, 1] 180 | y_c = pose_common[:, 0] 181 | phi_c = pose_common[:, 2] 182 | 183 | # Polar coordinates in the new frame-of-reference 184 | r_n = torch.sqrt((x - x_c) ** 2 + (y - y_c) ** 2) 185 | theta_n = torch.atan2(y - y_c, x - x_c) - phi_c 186 | # Convert to cartesian coordinates 187 | x_n = r_n * torch.cos(theta_n) 188 | y_n = r_n * torch.sin(theta_n) 189 | phi_n = phi - phi_c 190 | # Normalize phi to lie between -pi to pi 191 | phi_n = torch.atan2(torch.sin(phi_n), torch.cos(phi_n)) 192 | 193 | poses_n = torch.stack([x_n, y_n, phi_n], dim=1) 194 | 195 | return poses_n 196 | 197 | 198 | def add_pose(pose_common, dposes, mode="yxt"): 199 | """ 200 | Convert dposes from frame-of-reference of pose_common to global pose. 201 | 202 | Inputs: 203 | pose_common - (N, 3) 204 | dposes - (N, 3) 205 | 206 | Outputs: 207 | poses - (N, 3) 208 | """ 209 | 210 | assert mode in ["xyt", "yxt"] 211 | 212 | if mode == "yxt": 213 | dy, dx, dt = torch.unbind(dposes, dim=1) 214 | y_c, x_c, t_c = torch.unbind(pose_common, dim=1) 215 | else: 216 | dx, dy, dt = torch.unbind(dposes, dim=1) 217 | x_c, y_c, t_c = torch.unbind(pose_common, dim=1) 218 | 219 | dr = torch.sqrt(dx ** 2 + dy ** 2) 220 | dphi = torch.atan2(dy, dx) + t_c 221 | x = x_c + dr * torch.cos(dphi) 222 | y = y_c + dr * torch.sin(dphi) 223 | t = t_c + dt 224 | # Normalize angles to lie between -pi to pi 225 | t = torch.atan2(torch.sin(t), torch.cos(t)) 226 | 227 | if mode == "yxt": 228 | poses = torch.stack([y, x, t], dim=1) 229 | else: 230 | poses = torch.stack([x, y, t], dim=1) 231 | 232 | return poses 233 | -------------------------------------------------------------------------------- /exploring_exploration/utils/median_pooling.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.nn.modules.utils import _pair, _quadruple 10 | 11 | # Obtained from https://gist.github.com/rwightman/f2d3849281624be7c0f11c85c87c1598 12 | class MedianPool2d(nn.Module): 13 | """ Median pool (usable as median filter when stride=1) module. 14 | 15 | Args: 16 | kernel_size: size of pooling kernel, int or 2-tuple 17 | stride: pool stride, int or 2-tuple 18 | padding: pool padding, int or 4-tuple (l, r, t, b) as in pytorch F.pad 19 | same: override padding and enforce same padding, boolean 20 | """ 21 | 22 | def __init__(self, kernel_size=3, stride=1, padding=0, same=False): 23 | super(MedianPool2d, self).__init__() 24 | self.k = _pair(kernel_size) 25 | self.stride = _pair(stride) 26 | self.padding = _quadruple(padding) # convert to l, r, t, b 27 | self.same = same 28 | 29 | def _padding(self, x): 30 | if self.same: 31 | ih, iw = x.size()[2:] 32 | if ih % self.stride[0] == 0: 33 | ph = max(self.k[0] - self.stride[0], 0) 34 | else: 35 | ph = max(self.k[0] - (ih % self.stride[0]), 0) 36 | if iw % self.stride[1] == 0: 37 | pw = max(self.k[1] - self.stride[1], 0) 38 | else: 39 | pw = max(self.k[1] - (iw % self.stride[1]), 0) 40 | pl = pw // 2 41 | pr = pw - pl 42 | pt = ph // 2 43 | pb = ph - pt 44 | padding = (pl, pr, pt, pb) 45 | else: 46 | padding = self.padding 47 | return padding 48 | 49 | def forward(self, x): 50 | # using existing pytorch functions and tensor ops so that we get autograd, 51 | # would likely be more efficient to implement from scratch at C/Cuda level 52 | x = F.pad(x, self._padding(x), mode="reflect") 53 | x = x.unfold(2, self.k[0], self.stride[0]).unfold(3, self.k[1], self.stride[1]) 54 | x = x.contiguous().view(x.size()[:4] + (-1,)).median(dim=-1)[0] 55 | return x 56 | 57 | 58 | class MedianPool1d(nn.Module): 59 | """ Median pool (usable as median filter when stride=1) module. 60 | 61 | Args: 62 | kernel_size: size of pooling kernel, int 63 | stride: pool stride, int 64 | padding: pool padding, int 65 | same: override padding and enforce same padding, boolean 66 | """ 67 | 68 | def __init__(self, kernel_size=3, stride=1, padding=0, same=False): 69 | super(MedianPool1d, self).__init__() 70 | self.k = kernel_size 71 | self.stride = stride 72 | self.padding = _pair(padding) 73 | self.same = same 74 | 75 | def _padding(self, x): 76 | # x - (bs, C, L) 77 | if self.same: 78 | il = x.size()[2] 79 | if il % self.stride == 0: 80 | pl = max(self.k - self.stride, 0) 81 | else: 82 | pl = max(self.k - (il % self.stride), 0) 83 | pleft = pl // 2 84 | pright = pl - pleft 85 | padding = (pleft, pright) 86 | else: 87 | padding = self.padding 88 | return padding 89 | 90 | def forward(self, x): 91 | # using existing pytorch functions and tensor ops so that we get autograd, 92 | # would likely be more efficient to implement from scratch at C/Cuda level 93 | x = F.pad(x, self._padding(x), mode="reflect") 94 | x = x.unfold(2, self.k, self.stride) 95 | x = x.contiguous().median(dim=-1)[0] 96 | return x 97 | -------------------------------------------------------------------------------- /exploring_exploration/utils/metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | import torch 9 | import logging 10 | import numpy as np 11 | 12 | from exploring_exploration.utils.geometry import norm_cossin 13 | from exploring_exploration.utils.common import norm_angle 14 | 15 | 16 | def precision_at_k(pred_scores, gt_scores, k=5, gt_K=5): 17 | """ 18 | Measures the fraction of correctly retrieved classes among the top-k 19 | retrievals. 20 | 21 | Inputs: 22 | pred_scores - (N, nclasses) logits 23 | gt_scores - (N, nclasses) similarity scores 24 | k - the top-k retrievals from pred_scores to consider 25 | gt_K - how many of the most similar classes in gt_scores 26 | should be considered as ground-truth 27 | 28 | Outputs: 29 | prec_scores - (N, ) precision@k scores per batch element 30 | """ 31 | device = pred_scores.device 32 | N, nclasses = pred_scores.shape 33 | 34 | relevant_idxes = (torch.topk(gt_scores, gt_K, dim=1).indices).cpu() # (N, gt_K) 35 | relevant_idxes_indicator = torch.zeros(N, nclasses) 36 | relevant_idxes_indicator.scatter_(1, relevant_idxes, 1.0) 37 | 38 | pred_idxes = (torch.topk(pred_scores, k, dim=1).indices).cpu() # (N, k) 39 | pred_idxes_indicator = torch.zeros(N, nclasses) 40 | pred_idxes_indicator.scatter_(1, pred_idxes, 1.0) 41 | 42 | intersection_indicator = ( 43 | relevant_idxes_indicator * pred_idxes_indicator 44 | ) # (N, nclasses) 45 | prec_scores = (intersection_indicator.sum(dim=1) / k).to(device) 46 | 47 | return prec_scores 48 | 49 | 50 | def s_metric(agent_pos, target_pos, thresh, stop_called): 51 | # Success rate 52 | if not stop_called: 53 | return 0.0 54 | 55 | dist = np.linalg.norm(np.array(agent_pos) - np.array(target_pos)).item() 56 | score = 0.0 if dist > thresh else 1.0 57 | return score 58 | 59 | 60 | def spl_metric( 61 | agent_pos, target_pos, thresh, path_length, shortest_path_length, stop_called 62 | ): 63 | # Success rate normalized by Path Length 64 | if not stop_called: 65 | return 0.0 66 | 67 | dist = np.linalg.norm(np.array(agent_pos) - np.array(target_pos)).item() 68 | score = ( 69 | 0.0 70 | if dist > thresh 71 | else shortest_path_length / (max(shortest_path_length, path_length) + 1e-7) 72 | ) 73 | return score 74 | 75 | 76 | def compute_pose_metrics( 77 | true_poses, pred_poses, true_pose_angles, pred_pose_angles, env_name 78 | ): 79 | """ 80 | Inputs: 81 | true_poses - array of ground truth poses 82 | pred_poses - array of predicted poses 83 | true_pose_angles - array of ground truth heading angles 84 | pred_pose_angles - array of predicted heading angles 85 | env_name - name of current environment 86 | 87 | Outputs 88 | metrics - a dictionary containing the different metrics measured 89 | """ 90 | metrics = {} 91 | heading_err = np.abs(norm_angle(pred_pose_angles - true_pose_angles)) 92 | avg_heading_err = math.degrees(heading_err.mean().item()) 93 | heading_err_per_episode = np.degrees(heading_err) 94 | 95 | # Compute angular error 96 | norm_gt_pose = torch.Tensor(true_poses[:, 1:]) 97 | norm_gt_angle = torch.atan2(norm_gt_pose[:, 1], norm_gt_pose[:, 0]) 98 | norm_pred_pose = norm_cossin(torch.Tensor(pred_poses[:, 1:])) 99 | norm_pred_angle = torch.atan2(norm_pred_pose[:, 1], norm_pred_pose[:, 0]) 100 | norm_ae = torch.abs(norm_angle(norm_pred_angle - norm_gt_angle)) 101 | norm_ae_avg = math.degrees(norm_ae.cpu().mean().item()) 102 | 103 | norm_ae_per_episode = np.degrees(norm_ae.cpu().numpy()) 104 | 105 | # Compute distance prediction error 106 | distance_err = np.sqrt(((true_poses[:, 0] - pred_poses[:, 0]) ** 2)) 107 | if "avd" not in env_name: 108 | distance_err = distance_err * 1000.0 # Convert to mm 109 | avg_distance_err = distance_err.mean() 110 | 111 | distance_err_per_episode = distance_err 112 | 113 | # Compute position error 114 | gt_r = torch.Tensor(true_poses[:, 0]) 115 | gt_x = gt_r * torch.cos(norm_gt_angle) 116 | gt_y = gt_r * torch.sin(norm_gt_angle) 117 | pred_r = torch.Tensor(pred_poses[:, 0]) 118 | pred_x = pred_r * torch.cos(norm_pred_angle) 119 | pred_y = pred_r * torch.sin(norm_pred_angle) 120 | position_err = torch.sqrt((gt_x - pred_x) ** 2 + (gt_y - pred_y) ** 2) 121 | if "avd" not in env_name: 122 | position_err = position_err * 1000.0 # Convert to mm 123 | mean_position_err = position_err.mean().item() 124 | 125 | position_err_per_episode = position_err.cpu().numpy() 126 | 127 | # Compute position error, heading error as a function of difficulty 128 | difficulty_bins = list(range(500, 7000, 500)) 129 | position_errors_vs_diff = [] 130 | heading_errors_vs_diff = [] 131 | heading_err = torch.Tensor(heading_err) 132 | for i in range(len(difficulty_bins) - 1): 133 | dl, dh = difficulty_bins[i], difficulty_bins[i + 1] 134 | if "avd" not in env_name: 135 | diff_mask = (gt_r * 1000.0 < dh) & (gt_r * 1000.0 >= dl) 136 | else: 137 | diff_mask = (gt_r < dh) & (gt_r >= dl) 138 | position_error_curr = position_err[diff_mask] 139 | heading_error_curr = heading_err[diff_mask] 140 | if diff_mask.sum() == 0: 141 | position_errors_vs_diff.append(0) 142 | heading_errors_vs_diff.append(0) 143 | else: 144 | position_errors_vs_diff.append(position_error_curr.mean()) 145 | heading_errors_vs_diff.append(math.degrees(heading_error_curr.mean())) 146 | 147 | # Compute pose success rates at various thresholds 148 | success_thresholds = [250, 500, 750, 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750] 149 | success_rates = [ 150 | (position_err < sthresh).float().mean().item() for sthresh in success_thresholds 151 | ] 152 | 153 | logging.info("Success rates and thresholds:") 154 | logging.info( 155 | " | ".join(["{:6.0f}".format(sthresh) for sthresh in success_thresholds]) 156 | ) 157 | logging.info(" | ".join(["{:6.4f}".format(srate) for srate in success_rates])) 158 | 159 | logging.info("Position, heading errors at different difficulty levels:") 160 | logging.info(" || ".join(["{:6.2f}".format(dlevel) for dlevel in difficulty_bins])) 161 | logging.info( 162 | " || ".join(["{:6.2f}".format(perror) for perror in position_errors_vs_diff]) 163 | ) 164 | logging.info( 165 | " || ".join(["{:6.2f}".format(herror) for herror in heading_errors_vs_diff]) 166 | ) 167 | 168 | metrics["norm_ae"] = norm_ae_avg 169 | metrics["distance_err"] = avg_distance_err 170 | metrics["position_err"] = mean_position_err 171 | metrics["heading_err"] = avg_heading_err 172 | for thresh, rate in zip(success_thresholds, success_rates): 173 | metrics["success_rate @ {:.1f}".format(thresh)] = rate 174 | for level_0, level_1, err in zip( 175 | difficulty_bins[:-1], difficulty_bins[1:], position_errors_vs_diff 176 | ): 177 | metrics[ 178 | "position_err @ distances b/w {:.1f} to {:.1f}".format(level_0, level_1) 179 | ] = err 180 | 181 | for level_0, level_1, err in zip( 182 | difficulty_bins[:-1], difficulty_bins[1:], heading_errors_vs_diff 183 | ): 184 | metrics[ 185 | "heading_err @ distances b/w {:.1f} to {:.1f}".format(level_0, level_1) 186 | ] = err 187 | 188 | per_episode_metrics = { 189 | "heading_err": heading_err_per_episode, 190 | "norm_ae": norm_ae_per_episode, 191 | "position_err": position_err_per_episode, 192 | } 193 | return metrics, per_episode_metrics 194 | -------------------------------------------------------------------------------- /exploring_exploration/utils/reconstruction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | from exploring_exploration.utils.common import ( 11 | unflatten_two, 12 | flatten_two, 13 | ) 14 | from exploring_exploration.utils.geometry import subtract_pose 15 | 16 | 17 | def multi_label_classification_loss(x, y, reduction="batchmean"): 18 | """ 19 | Multi-label classification loss - KL divergence between a uniform 20 | distribution over the GT classes and the predicted probabilities. 21 | Inputs: 22 | x - (bs, nclasses) predicted logits 23 | y - (bs, nclasses) with ones for the right classes and zeros 24 | for the wrong classes 25 | """ 26 | x_logprob = F.log_softmax(x, dim=1) 27 | y_prob = F.normalize( 28 | y.float(), p=1, dim=1 29 | ) # L1 normalization to convert to probabilities 30 | loss = F.kl_div(x_logprob, y_prob, reduction=reduction) 31 | return loss 32 | 33 | 34 | def rec_loss_fn_classify( 35 | x_logits, x_gt_feat, cluster_centroids, K=5, reduction="batchmean" 36 | ): 37 | """ 38 | Given the predicted logits and ground-truth reference feature, 39 | find the top-K NN cluster centroids to the ground-truth feature. 40 | Using the top-k clusters as the ground-truth, use a multi-label 41 | classification loss. 42 | 43 | NOTE - this assumes that x_gt_feat and cluster_centroids are unit vectors. 44 | 45 | Inputs: 46 | x_logits - (bs, nclusters) predicted logits 47 | x_gt_feat - (bs, nclusters) reference feature that consists of 48 | similarity scores between GT image and cluster centroids. 49 | cluster_centroids - (nclusters, feat_size) cluster centroids 50 | """ 51 | bs, nclasses = x_logits.shape 52 | nclusters, feat_size = cluster_centroids.shape 53 | device = x_logits.device 54 | 55 | # Compute cosine similarity between x_gt_feat and cluster_centroids 56 | cosine_sim = x_gt_feat 57 | 58 | # Sample top-K similar clusters 59 | topK_outputs = torch.topk(cosine_sim, K, dim=1) 60 | 61 | # Generate K-hot encoding 62 | k_hot_encoding = ( 63 | torch.zeros(bs, nclasses).to(device).scatter_(1, topK_outputs.indices, 1.0) 64 | ) 65 | 66 | loss = multi_label_classification_loss( 67 | x_logits, k_hot_encoding, reduction=reduction 68 | ) 69 | 70 | return loss 71 | 72 | 73 | def compute_reconstruction_rewards( 74 | obs_feats, 75 | obs_odometer, 76 | tgt_feats, 77 | tgt_poses, 78 | cluster_centroids_t, 79 | decoder, 80 | pose_encoder, 81 | ): 82 | """ 83 | Inputs: 84 | obs_feats - (T, N, nclusters) 85 | obs_odometer - (T, N, 3) --- (y, x, theta) 86 | tgt_feats - (N, nRef, nclusters) 87 | tgt_poses - (N, nRef, 3) --- (y, x, theta) 88 | cluster_centroids_t - (nclusters, feat_dim) 89 | decoder - decoder model 90 | pose_encoder - pose_encoder model 91 | 92 | Outputs: 93 | reward - (N, nRef) float values indicating how many 94 | GT clusters were successfully retrieved for 95 | each target. 96 | """ 97 | T, N, nclusters = obs_feats.shape 98 | nRef = tgt_feats.shape[1] 99 | device = obs_feats.device 100 | 101 | obs_feats_exp = obs_feats.unsqueeze(2) 102 | obs_feats_exp = obs_feats_exp.expand( 103 | -1, -1, nRef, -1 104 | ).contiguous() # (T, N, nRef, nclusters) 105 | obs_odometer_exp = obs_odometer.unsqueeze(2) 106 | obs_odometer_exp = obs_odometer_exp.expand( 107 | -1, -1, nRef, -1 108 | ).contiguous() # (T, N, nRef, 3) 109 | tgt_poses_exp = ( 110 | tgt_poses.unsqueeze(0).expand(T, -1, -1, -1).contiguous() 111 | ) # (T, N, nRef, 3) 112 | 113 | # Compute relative poses 114 | obs_odometer_exp = obs_odometer_exp.view(T * N * nRef, 3) 115 | tgt_poses_exp = tgt_poses_exp.view(T * N * nRef, 3) 116 | obs_relpose = subtract_pose( 117 | obs_odometer_exp, tgt_poses_exp 118 | ) # (T*N*nRef, 3) --- (x, y, phi) 119 | 120 | # Compute pose encoding 121 | with torch.no_grad(): 122 | obs_relpose_enc = pose_encoder(obs_relpose) # (T*N*nRef, 16) 123 | obs_relpose_enc = obs_relpose_enc.view(T, N, nRef, -1) # (T, N, nRef, 16) 124 | tgt_relpose_enc = torch.zeros(1, *obs_relpose_enc.shape[1:]).to( 125 | device 126 | ) # (1, N, nRef, 16) 127 | 128 | # Compute reconstructions 129 | obs_feats_exp = obs_feats_exp.view(T, N * nRef, nclusters) 130 | obs_relpose_enc = obs_relpose_enc.view(T, N * nRef, -1) 131 | tgt_relpose_enc = tgt_relpose_enc.view(1, N * nRef, -1) 132 | 133 | rec_inputs = { 134 | "history_image_features": obs_feats_exp, 135 | "history_pose_features": obs_relpose_enc, 136 | "target_pose_features": tgt_relpose_enc, 137 | } 138 | 139 | with torch.no_grad(): 140 | pred_logits = decoder(rec_inputs) # (1, N*nRef, nclusters) 141 | pred_logits = pred_logits.squeeze(0) # (N*nRef, nclusters) 142 | pred_logits = unflatten_two(pred_logits, N, nRef) # (N, nRef, nclusters) 143 | 144 | # Compute GT classes 145 | tgt_feats_sim = tgt_feats # (N, nRef, nclusters) 146 | topk_gt = torch.topk(tgt_feats_sim, 5, dim=2) 147 | topk_gt_values = topk_gt.values # (N, nRef, nclusters) 148 | topk_gt_thresh = topk_gt_values.min(dim=2).values # (N, nRef) 149 | 150 | # ------------------ KL Div loss based reward -------------------- 151 | reward = -rec_loss_fn_classify( 152 | flatten_two(pred_logits), 153 | flatten_two(tgt_feats), 154 | cluster_centroids_t.t(), 155 | K=2, 156 | reduction="none", 157 | ).sum( 158 | dim=1 159 | ) # (N*nRef, ) 160 | reward = reward.view(N, nRef) 161 | 162 | return reward 163 | 164 | 165 | def masked_mean(values, masks, axis=None): 166 | return (values * masks).sum(axis=axis) / (masks.sum(axis=axis) + 1e-10) 167 | -------------------------------------------------------------------------------- /exploring_exploration/utils/visualization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import cv2 8 | import math 9 | import torch 10 | import numpy as np 11 | from torch.utils.tensorboard import SummaryWriter 12 | 13 | 14 | def torch_to_np(image): 15 | image = (image.cpu().numpy()).transpose(1, 2, 0) 16 | image = image.astype(np.uint8) 17 | image = np.flip(image, axis=2) 18 | return image 19 | 20 | 21 | def torch_to_np_depth(image, max_depth=10000.0): 22 | depth = (image.cpu().numpy())[0] 23 | depth = (np.clip(depth, 0, max_depth) / max_depth) * 255.0 24 | depth = depth.astype(np.uint8) 25 | depth = np.repeat(depth[..., np.newaxis], 3, axis=2) 26 | return depth 27 | 28 | 29 | class TensorboardWriter(SummaryWriter): 30 | def __init__(self, *args, **kwargs): 31 | super().__init__(*args, **kwargs) 32 | 33 | def add_video_from_np_images( 34 | self, video_name: str, step_idx: int, images: np.ndarray, fps: int = 10 35 | ) -> None: 36 | r"""Write video into tensorboard from images frames. 37 | Args: 38 | video_name: name of video string. 39 | step_idx: int of checkpoint index to be displayed. 40 | images: list of n frames. Each frame is a np.ndarray of shape. 41 | fps: frame per second for output video. 42 | Returns: 43 | None. 44 | """ 45 | # initial shape of np.ndarray list: N * (H, W, 3) 46 | frame_tensors = [torch.from_numpy(np_arr).unsqueeze(0) for np_arr in images] 47 | video_tensor = torch.cat(tuple(frame_tensors)) 48 | video_tensor = video_tensor.permute(0, 3, 1, 2).unsqueeze(0) 49 | # final shape of video tensor: (1, n, 3, H, W) 50 | self.add_video(video_name, video_tensor, fps=fps, global_step=step_idx) 51 | 52 | 53 | def write_video(frames, path, fps=10.0, video_format="MP4V"): 54 | fourcc = cv2.VideoWriter_fourcc(*video_format) 55 | shape = frames[0].shape[:2][::-1] # (WIDTH, HEIGHT) 56 | vidwriter = cv2.VideoWriter(path, fourcc, fps, shape) 57 | for frame in frames: 58 | vidwriter.write(frame[:, :, ::-1]) # Convert to BGR 59 | vidwriter.release() 60 | 61 | 62 | def create_reference_grid(refs_uint8): 63 | """ 64 | Inputs: 65 | refs_uint8 - (nRef, H, W, C) numpy array 66 | """ 67 | refs_uint8 = np.copy(refs_uint8) 68 | nRef, H, W, C = refs_uint8.shape 69 | 70 | nrow = int(math.sqrt(nRef)) 71 | 72 | ncol = nRef // nrow # (number of images per column) 73 | if nrow * ncol < nRef: 74 | ncol += 1 75 | final_grid = np.zeros((nrow * ncol, *refs_uint8.shape[1:]), dtype=np.uint8) 76 | font = cv2.FONT_HERSHEY_SIMPLEX 77 | 78 | final_grid[:nRef] = refs_uint8 79 | final_grid = final_grid.reshape( 80 | ncol, nrow, *final_grid.shape[1:] 81 | ) # (ncol, nrow, H, W, C) 82 | final_grid = final_grid.transpose(0, 2, 1, 3, 4) 83 | final_grid = final_grid.reshape(ncol * H, nrow * W, C) 84 | return final_grid 85 | 86 | 87 | def draw_border(images, color=(255, 0, 0), thickness=5): 88 | """Draw image border. 89 | 90 | Inputs: 91 | images - (N, H, W, C) numpy array 92 | """ 93 | images[:, :thickness, :, 0] = color[0] 94 | images[:, :thickness, :, 1] = color[1] 95 | images[:, :thickness, :, 2] = color[2] 96 | 97 | images[:, -thickness:, :, 0] = color[0] 98 | images[:, -thickness:, :, 1] = color[1] 99 | images[:, -thickness:, :, 2] = color[2] 100 | 101 | images[:, :, :thickness, 0] = color[0] 102 | images[:, :, :thickness, 1] = color[1] 103 | images[:, :, :thickness, 2] = color[2] 104 | 105 | images[:, :, -thickness:, 0] = color[0] 106 | images[:, :, -thickness:, 1] = color[1] 107 | images[:, :, -thickness:, 2] = color[2] 108 | 109 | return images 110 | -------------------------------------------------------------------------------- /reconstruction_data_generation/avd/.gitignore: -------------------------------------------------------------------------------- 1 | uniform_samples 2 | imagenet_clusters 3 | -------------------------------------------------------------------------------- /reconstruction_data_generation/avd/gather_uniform_points.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | import math 9 | import argparse 10 | import numpy as np 11 | import multiprocessing as mp 12 | 13 | from PIL import Image 14 | 15 | import gym 16 | import gym_avd 17 | 18 | 19 | def str2bool(v): 20 | return True if v.lower() in ["t", "true", "y", "yes"] else False 21 | 22 | 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument("--save_directory", type=str, default="uniform_samples") 25 | parser.add_argument("--seed", type=int, default=123) 26 | parser.add_argument("--debug", type=str2bool, default=False) 27 | 28 | 29 | args = parser.parse_args() 30 | args.env_name = "avd-v1" 31 | 32 | 33 | def write_data(data_tuple): 34 | img, img_name = data_tuple 35 | img = Image.fromarray(img) 36 | img.save(img_name) 37 | 38 | 39 | def safe_mkdir(path): 40 | try: 41 | os.mkdir(path) 42 | except: 43 | pass 44 | 45 | 46 | safe_mkdir(args.save_directory) 47 | save_dir = args.save_directory 48 | 49 | pool = mp.Pool(32) 50 | 51 | env = gym.make(args.env_name) 52 | 53 | 54 | def gather_data(env, scenes, args): 55 | scene_images = [] 56 | for scene_idx in scenes: 57 | print("Gathering data for scene: {}".format(scene_idx)) 58 | _ = env.reset(scene_idx=scene_idx) 59 | min_x, min_z, max_x, max_z = env.get_environment_extents() 60 | 61 | # Sample nodes uniformly @ 2m 62 | all_nodes = env.data_conn[scene_idx]["nodes"] 63 | all_nodes_positions = [ 64 | [node["world_pos"][2], node["world_pos"][0]] for node in all_nodes 65 | ] 66 | all_nodes_positions = np.array(all_nodes_positions) * env.scale 67 | 68 | range_x = np.arange(min_x, max_x, 2000.0) 69 | range_z = np.arange(min_z, max_z, 2000.0) 70 | relevant_nodes = [] 71 | for x in range_x: 72 | for z in range_z: 73 | # Find closest node to this coordinate 74 | min_dist = math.inf 75 | min_dist_node = None 76 | for node, node_position in zip(all_nodes, all_nodes_positions): 77 | d = np.sqrt( 78 | (x - node_position[0]) ** 2 + (z - node_position[1]) 79 | ).item() 80 | if d < min_dist: 81 | min_dist = d 82 | min_dist_node = node 83 | relevant_nodes.append(min_dist_node) 84 | 85 | relevant_images = [] 86 | for node in relevant_nodes: 87 | for j in range(0, 12, 3): 88 | image_name = node["views"][j]["image_name"] 89 | relevant_images.append(env._get_img(image_name)) 90 | 91 | scene_images += relevant_images 92 | 93 | return scene_images 94 | 95 | 96 | for split in ["train", "val", "test"]: 97 | split_path = os.path.join(args.save_directory, split) 98 | safe_mkdir(split_path) 99 | split_scenes = getattr(env, "{}_scenes".format(split)) 100 | print("========= Gathering data for split: {} =========".format(split)) 101 | split_images = gather_data(env, split_scenes, args) 102 | 103 | img_tuples = [] 104 | for i, img in enumerate(split_images): 105 | path = os.path.join(split_path, f"image_{i:07d}.png") 106 | img_tuples.append((img, path)) 107 | 108 | _ = pool.map(write_data, img_tuples) 109 | -------------------------------------------------------------------------------- /reconstruction_data_generation/generate_imagenet_clusters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | import cv2 9 | import h5py 10 | import random 11 | import argparse 12 | import numpy as np 13 | import subprocess as sp 14 | 15 | import torch 16 | import torch.nn.functional as F 17 | import torchvision.transforms as transforms 18 | from torch.utils.data import Dataset, DataLoader 19 | 20 | from PIL import Image 21 | from tensorboardX import SummaryWriter 22 | from sklearn import metrics 23 | from sklearn.cluster import MiniBatchKMeans 24 | from exploring_exploration.models.reconstruction import FeatureNetwork 25 | 26 | 27 | class RGBDataset(Dataset): 28 | def __init__( 29 | self, dataset_root, seed=123, transform=None, image_size=256, truncate_count=-1, 30 | ): 31 | random.seed(seed) 32 | self.dataset_root = dataset_root 33 | images = ( 34 | sp.check_output(f"ls {dataset_root}", shell=True) 35 | .decode("utf-8") 36 | .split("\n")[:-1] 37 | ) 38 | ndata = len(images) 39 | if truncate_count > 0: 40 | ndata = min(ndata, truncate_count) 41 | 42 | self.image_size = image_size 43 | 44 | self.dataset = [os.path.join(dataset_root, image) for image in images] 45 | 46 | random.shuffle(self.dataset) 47 | self.dataset = self.dataset[:ndata] 48 | 49 | # Data transform 50 | self.transform = transform if transform is not None else lambda x: x 51 | 52 | self.nimgs = ndata 53 | 54 | def __getitem__(self, index): 55 | path = self.dataset[index] 56 | img = Image.open(path).convert("RGB") 57 | img = self.transform(img) 58 | 59 | return {"rgb": img}, {"rgb": path} 60 | 61 | def __len__(self): 62 | return self.nimgs 63 | 64 | 65 | def main(args): 66 | # Enable cuda by default 67 | args.cuda = True 68 | 69 | # Define transforms 70 | normalize = transforms.Normalize( 71 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] 72 | ) 73 | imagenet_mean = [0.485, 0.456, 0.406] 74 | imagenet_std = [0.229, 0.224, 0.225] 75 | transform = transforms.Compose( 76 | [transforms.Resize(args.image_size), transforms.ToTensor(), normalize] 77 | ) 78 | 79 | # Create datasets 80 | datasets = { 81 | split: RGBDataset( 82 | os.path.join(args.dataset_root, split), 83 | seed=123, 84 | transform=transform, 85 | image_size=args.image_size, 86 | truncate_count=args.truncate_count, 87 | ) 88 | for split in ["train", "val", "test"] 89 | } 90 | 91 | # Create data loaders 92 | data_loaders = { 93 | split: DataLoader( 94 | dataset, batch_size=args.batch_size, shuffle=True, num_workers=16 95 | ) 96 | for split, dataset in datasets.items() 97 | } 98 | 99 | device = torch.device("cuda:0" if args.cuda else "cpu") 100 | 101 | # Create model 102 | net = FeatureNetwork() 103 | net.to(device) 104 | net.eval() 105 | 106 | # Generate image features for training images 107 | train_image_features = [] 108 | train_image_paths = [] 109 | 110 | for i, data in enumerate(data_loaders["train"], 0): 111 | 112 | # sample data 113 | inputs, input_paths = data 114 | inputs = {key: val.to(device) for key, val in inputs.items()} 115 | 116 | # Extract features 117 | with torch.no_grad(): 118 | feats = net(inputs["rgb"]) # (bs, 512) 119 | feats = feats.detach().cpu().numpy() 120 | train_image_features.append(feats) 121 | train_image_paths += input_paths["rgb"] 122 | 123 | train_image_features = np.concatenate(train_image_features, axis=0) 124 | 125 | # Generate image features for testing images 126 | test_image_features = [] 127 | test_image_paths = [] 128 | 129 | for i, data in enumerate(data_loaders["test"], 0): 130 | 131 | # sample data 132 | inputs, input_paths = data 133 | inputs = {key: val.to(device) for key, val in inputs.items()} 134 | 135 | # Extract features 136 | with torch.no_grad(): 137 | feats = net(inputs["rgb"]) # (bs, 512) 138 | feats = feats.detach().cpu().numpy() 139 | test_image_features.append(feats) 140 | test_image_paths += input_paths["rgb"] 141 | 142 | test_image_features = np.concatenate(test_image_features, axis=0) # (N, 512) 143 | 144 | # ================= Perform clustering ================== 145 | kmeans = MiniBatchKMeans( 146 | init="k-means++", 147 | n_clusters=args.num_clusters, 148 | batch_size=args.batch_size, 149 | n_init=10, 150 | max_no_improvement=20, 151 | verbose=0, 152 | ) 153 | save_h5_path = os.path.join( 154 | args.save_dir, f"clusters_{args.num_clusters:05d}_data.h5" 155 | ) 156 | if os.path.isfile(save_h5_path): 157 | print("========> Loading existing clusters!") 158 | h5file = h5py.File(os.path.join(save_h5_path), "r") 159 | train_cluster_centroids = np.array(h5file["cluster_centroids"]) 160 | kmeans.cluster_centers_ = train_cluster_centroids 161 | train_cluster_assignments = kmeans.predict(train_image_features) # (N, ) 162 | h5file.close() 163 | else: 164 | kmeans.fit(train_image_features) 165 | train_cluster_assignments = kmeans.predict(train_image_features) # (N, ) 166 | train_cluster_centroids = np.copy( 167 | kmeans.cluster_centers_ 168 | ) # (num_clusters, 512) 169 | 170 | # Create a dictionary of cluster -> images for visualization 171 | cluster2image = {} 172 | if args.visualize_clusters: 173 | log_dir = os.path.join( 174 | args.save_dir, f"train_clusters_#clusters{args.num_clusters:05d}" 175 | ) 176 | tbwriter = SummaryWriter(log_dir=log_dir) 177 | 178 | for i in range(args.num_clusters): 179 | valid_idxes = np.where(train_cluster_assignments == i)[0] 180 | valid_image_paths = [train_image_paths[j] for j in valid_idxes] 181 | # Shuffle and pick only upto 100 images per cluster 182 | random.shuffle(valid_image_paths) 183 | # Read the valid images 184 | valid_images = [] 185 | for path in valid_image_paths[:100]: 186 | img = cv2.resize( 187 | np.flip(cv2.imread(path), axis=2), (args.image_size, args.image_size), 188 | ) 189 | valid_images.append(img) 190 | valid_images = ( 191 | np.stack(valid_images, axis=0).astype(np.float32) / 255.0 192 | ) # (K, H, W, C) 193 | valid_images = torch.Tensor(valid_images).permute(0, 3, 1, 2).contiguous() 194 | cluster2image[i] = valid_images 195 | if args.visualize_clusters: 196 | # Write the train image clusters to tensorboard 197 | tbwriter.add_images(f"Cluster #{i:05d}", valid_images, 0) 198 | 199 | h5file = h5py.File( 200 | os.path.join(args.save_dir, f"clusters_{args.num_clusters:05d}_data.h5"), "a" 201 | ) 202 | 203 | if "cluster_centroids" not in h5file.keys(): 204 | h5file.create_dataset("cluster_centroids", data=train_cluster_centroids) 205 | for i in range(args.num_clusters): 206 | if f"cluster_{i}/images" not in h5file.keys(): 207 | h5file.create_dataset(f"cluster_{i}/images", data=cluster2image[i]) 208 | 209 | h5file.close() 210 | 211 | if args.visualize_clusters: 212 | # Dot product of test_image_features with train_cluster_centroids 213 | test_dot_centroids = np.matmul( 214 | test_image_features, train_cluster_centroids.T 215 | ) # (N, num_clusters) 216 | if args.normalize_embedding: 217 | test_dot_centroids = (test_dot_centroids + 1.0) / 2.0 218 | else: 219 | test_dot_centroids = F.softmax( 220 | torch.Tensor(test_dot_centroids), dim=1 221 | ).numpy() 222 | 223 | # Find the top-K matching centroids 224 | topk_matches = np.argpartition(test_dot_centroids, -5, axis=1)[:, -5:] # (N, 5) 225 | 226 | # Write the test nearest neighbors to tensorboard 227 | tbwriter = SummaryWriter( 228 | log_dir=os.path.join( 229 | args.save_dir, f"test_neighbors_#clusters{args.num_clusters:05d}" 230 | ) 231 | ) 232 | for i in range(100): 233 | test_image_path = test_image_paths[i] 234 | test_image = cv2.resize( 235 | cv2.imread(test_image_path), (args.image_size, args.image_size) 236 | ) 237 | test_image = np.flip(test_image, axis=2).astype(np.float32) / 255.0 238 | test_image = torch.Tensor(test_image).permute(2, 0, 1).contiguous() 239 | topk_clusters = topk_matches[i] 240 | # Pick some 4 images representative of a cluster 241 | topk_cluster_images = [] 242 | for k in topk_clusters: 243 | imgs = cluster2image[k][:4] # (4, C, H, W) 244 | if imgs.shape[0] == 0: 245 | continue 246 | elif imgs.shape[0] != 4: 247 | imgs_pad = torch.zeros(4 - imgs.shape[0], *imgs.shape[1:]) 248 | imgs = torch.cat([imgs, imgs_pad], dim=0) 249 | # Downsample by a factor of 2 250 | imgs = F.interpolate( 251 | imgs, scale_factor=0.5, mode="bilinear" 252 | ) # (4, C, H/2, W/2) 253 | # Reshape to form a grid 254 | imgs = imgs.permute(1, 0, 2, 3) # (C, 4, H/2, W/2) 255 | C, _, Hby2, Wby2 = imgs.shape 256 | imgs = ( 257 | imgs.view(C, 2, 2, Hby2, Wby2) 258 | .permute(0, 1, 3, 2, 4) 259 | .contiguous() 260 | .view(C, Hby2 * 2, Wby2 * 2) 261 | ) 262 | # Draw a red border 263 | imgs[0, :4, :] = 1.0 264 | imgs[1, :4, :] = 0.0 265 | imgs[2, :4, :] = 0.0 266 | imgs[0, -4:, :] = 1.0 267 | imgs[1, -4:, :] = 0.0 268 | imgs[2, -4:, :] = 0.0 269 | imgs[0, :, :4] = 1.0 270 | imgs[1, :, :4] = 0.0 271 | imgs[2, :, :4] = 0.0 272 | imgs[0, :, -4:] = 1.0 273 | imgs[1, :, -4:] = 0.0 274 | imgs[2, :, -4:] = 0.0 275 | topk_cluster_images.append(imgs) 276 | 277 | vis_img = torch.cat([test_image, *topk_cluster_images], dim=2) 278 | image_name = f"Test image #{i:04d}" 279 | for k in topk_clusters: 280 | score = test_dot_centroids[i, k].item() 281 | image_name += f"_{score:.3f}" 282 | tbwriter.add_image(image_name, vis_img, 0) 283 | 284 | 285 | def str2bool(v): 286 | return True if v.lower() in ["yes", "y", "true", "t"] else False 287 | 288 | 289 | if __name__ == "__main__": 290 | parser = argparse.ArgumentParser() 291 | parser.add_argument("--image-size", type=int, default=256) 292 | parser.add_argument("--dataset-root", type=str, default="dataset") 293 | parser.add_argument("--truncate-count", type=int, default=-1) 294 | parser.add_argument("--batch-size", type=int, default=128) 295 | parser.add_argument("--num-clusters", type=int, default=100) 296 | parser.add_argument("--save-dir", type=str, default="visualization_dir") 297 | parser.add_argument("--visualize-clusters", type=str2bool, default=True) 298 | parser.add_argument("--normalize-embedding", type=str2bool, default=True) 299 | 300 | args = parser.parse_args() 301 | 302 | main(args) 303 | -------------------------------------------------------------------------------- /reconstruction_data_generation/mp3d/.gitignore: -------------------------------------------------------------------------------- 1 | uniform_samples 2 | imagenet_clusters 3 | -------------------------------------------------------------------------------- /reconstruction_data_generation/mp3d/configs/pointnav_mp3d_test.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 500 3 | SIMULATOR: 4 | AGENT_0: 5 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] 6 | HABITAT_SIM_V0: 7 | GPU_DEVICE_ID: 0 8 | RGB_SENSOR: 9 | WIDTH: 84 10 | HEIGHT: 84 11 | DEPTH_SENSOR: 12 | WIDTH: 84 13 | HEIGHT: 84 14 | TASK: 15 | TYPE: Nav-v0 16 | SUCCESS_DISTANCE: 0.2 17 | SENSORS: ['POINTGOAL_SENSOR'] 18 | POINTGOAL_SENSOR: 19 | TYPE: PointGoalSensor 20 | GOAL_FORMAT: POLAR 21 | MEASUREMENTS: ['SPL'] 22 | SPL: 23 | TYPE: SPL 24 | SUCCESS_DISTANCE: 0.2 25 | DATASET: 26 | TYPE: PointNav-v1 27 | SPLIT: test 28 | DATA_PATH: data/datasets/pointnav/mp3d/v1_unique/{split}/{split}.json.gz 29 | SCENES_DIR: data/scene_datasets 30 | SHUFFLE_DATASET: False 31 | -------------------------------------------------------------------------------- /reconstruction_data_generation/mp3d/configs/pointnav_mp3d_train.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 500 3 | SIMULATOR: 4 | AGENT_0: 5 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] 6 | HABITAT_SIM_V0: 7 | GPU_DEVICE_ID: 0 8 | RGB_SENSOR: 9 | WIDTH: 84 10 | HEIGHT: 84 11 | DEPTH_SENSOR: 12 | WIDTH: 84 13 | HEIGHT: 84 14 | TASK: 15 | TYPE: Nav-v0 16 | SUCCESS_DISTANCE: 0.2 17 | SENSORS: ['POINTGOAL_SENSOR'] 18 | POINTGOAL_SENSOR: 19 | TYPE: PointGoalSensor 20 | GOAL_FORMAT: POLAR 21 | MEASUREMENTS: ['SPL'] 22 | SPL: 23 | TYPE: SPL 24 | SUCCESS_DISTANCE: 0.2 25 | DATASET: 26 | TYPE: PointNav-v1 27 | SPLIT: train 28 | DATA_PATH: data/datasets/pointnav/mp3d/v1_unique/{split}/{split}.json.gz 29 | SCENES_DIR: data/scene_datasets 30 | SHUFFLE_DATASET: False 31 | -------------------------------------------------------------------------------- /reconstruction_data_generation/mp3d/configs/pointnav_mp3d_val.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 500 3 | SIMULATOR: 4 | AGENT_0: 5 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] 6 | HABITAT_SIM_V0: 7 | GPU_DEVICE_ID: 0 8 | RGB_SENSOR: 9 | WIDTH: 84 10 | HEIGHT: 84 11 | DEPTH_SENSOR: 12 | WIDTH: 84 13 | HEIGHT: 84 14 | TASK: 15 | TYPE: Nav-v0 16 | SUCCESS_DISTANCE: 0.2 17 | SENSORS: ['POINTGOAL_SENSOR'] 18 | POINTGOAL_SENSOR: 19 | TYPE: PointGoalSensor 20 | GOAL_FORMAT: POLAR 21 | MEASUREMENTS: ['SPL'] 22 | SPL: 23 | TYPE: SPL 24 | SUCCESS_DISTANCE: 0.2 25 | DATASET: 26 | TYPE: PointNav-v1 27 | SPLIT: val 28 | DATA_PATH: data/datasets/pointnav/mp3d/v1_unique/{split}/{split}.json.gz 29 | SCENES_DIR: data/scene_datasets 30 | SHUFFLE_DATASET: False 31 | -------------------------------------------------------------------------------- /reconstruction_data_generation/mp3d/extract_data_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | habitat_root=$EXPLORING_EXPLORATION/environments/habitat/habitat-api 8 | 9 | mkdir uniform_samples 10 | for split in 'val' 'test' 'train' 11 | do 12 | python generate_uniform_points.py \ 13 | --config-path configs/pointnav_mp3d_${split}.yaml \ 14 | --habitat-root $habitat_root \ 15 | --save-dir uniform_samples/${split} 16 | done 17 | -------------------------------------------------------------------------------- /reconstruction_data_generation/mp3d/generate_uniform_points.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import os 8 | import json 9 | import habitat 10 | import argparse 11 | import numpy as np 12 | import multiprocessing as mp 13 | 14 | from PIL import Image 15 | 16 | 17 | def safe_mkdir(path): 18 | try: 19 | os.mkdir(path) 20 | except: 21 | pass 22 | 23 | 24 | def write_data(data_tuple): 25 | img, img_name = data_tuple 26 | img = Image.fromarray(img) 27 | img.save(img_name) 28 | 29 | 30 | def main(args): 31 | 32 | pool = mp.Pool(32) 33 | 34 | # ====================== Create environment ========================== 35 | config = habitat.get_config(config_paths=args.config_path) 36 | config.defrost() 37 | # Update path to SCENES_DIR, DATA_PATH 38 | config.DATASET.SCENES_DIR = os.path.join(args.habitat_root, "data/scene_datasets") 39 | config.DATASET.DATA_PATH = os.path.join( 40 | args.habitat_root, 41 | "data/datasets/pointnav/mp3d/v1_unique/{split}/{split}.json.gz", 42 | ) 43 | config.freeze() 44 | env = habitat.Env(config=config) 45 | 46 | # Assumes each episode is in a unique environment 47 | num_episodes = len(env._dataset.episodes) 48 | all_images = [] 49 | for epcount in range(num_episodes): 50 | env.reset() 51 | scene_id = env.current_episode.scene_id 52 | scene_name = scene_id.split("/")[-1] 53 | print("Gathering data for scene # {}: {}".format(epcount, scene_name)) 54 | 55 | min_x, min_z, max_x, max_z = env._sim.get_environment_extents() 56 | # Sample a uniform grid of points separated by 4m 57 | uniform_grid_x = np.arange(min_x, max_x, 4) 58 | uniform_grid_z = np.arange(min_z, max_z, 4) 59 | 60 | agent_y = env._sim.get_agent_state().position[1] 61 | 62 | scene_images = [] 63 | for x in uniform_grid_x: 64 | for z in uniform_grid_z: 65 | random_point = [x.item(), agent_y.item(), z.item()] 66 | if not env._sim.is_navigable(random_point): 67 | print(f"=======> Skipping point ({x}, {agent_y}, {z})") 68 | continue 69 | 70 | # Sample multiple viewing angles 71 | for heading in np.arange(-np.pi, np.pi, np.pi / 3): 72 | # This is clockwise rotation about the vertical upward axis 73 | rotation = [ 74 | 0, 75 | np.sin(heading / 2).item(), 76 | 0, 77 | np.cos(heading / 2).item(), 78 | ] 79 | 80 | obs = env._sim.get_observations_at(random_point, rotation) 81 | scene_images.append(obs["rgb"]) 82 | 83 | all_images += scene_images 84 | 85 | safe_mkdir(args.save_dir) 86 | img_tuples = [] 87 | for i, img in enumerate(all_images): 88 | img_path = os.path.join(args.save_dir, f"image_{i:07d}.png") 89 | img_tuples.append((img, img_path)) 90 | 91 | _ = pool.map(write_data, img_tuples) 92 | 93 | 94 | if __name__ == "__main__": 95 | 96 | parser = argparse.ArgumentParser() 97 | 98 | parser.add_argument("--seed", type=int, default=123) 99 | parser.add_argument("--config-path", type=str, default="config.yaml") 100 | parser.add_argument("--save-dir", type=str, default="data") 101 | parser.add_argument("--habitat-root", type=str, default="./") 102 | 103 | args = parser.parse_args() 104 | 105 | main(args) 106 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | einops 2 | gym 3 | h5py 4 | matplotlib 5 | opencv-python 6 | tensorboard 7 | tensorboardX 8 | scikit-learn 9 | --------------------------------------------------------------------------------