├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── assets ├── find_shoes.png ├── framework.png ├── spatial_reason.png └── user_sim.png ├── demos ├── demo.jpg ├── demo_gradcam_output.jpg ├── demo_groundedsam_output.jpg ├── demo_lseg_output.jpg ├── play_chatgpt_api.py ├── play_gradcam.py ├── play_groundingSAM.py ├── play_habitat_teleop.py ├── play_interactive_gradio.py ├── play_interactive_terminal.py └── play_lseg.py ├── orion ├── __init__.py ├── abstract │ ├── __init__.py │ ├── agent.py │ ├── interaction_history.py │ ├── interfaces.py │ ├── memory.py │ ├── perception.py │ ├── pose.py │ └── usersim.py ├── agent_env │ ├── chatgpt_control_base.py │ ├── chatgpt_control_cow.py │ ├── chatgpt_control_orion.py │ ├── chatgpt_control_vlmap.py │ ├── fbe.py │ ├── habitat │ │ ├── base.py │ │ ├── holonomic_actions.py │ │ └── utils.py │ ├── hybrid_search.py │ └── teleop.py ├── chatgpt │ ├── __init__.py │ ├── api.py │ └── prompts │ │ ├── __init__.py │ │ ├── agent_functions.py │ │ ├── agent_prompts.py │ │ ├── baseline_cow_prompt.py │ │ ├── baseline_vlmap_prompt.py │ │ ├── usersim_prompts_correction.py │ │ ├── usersim_prompts_description.py │ │ ├── usersim_prompts_instruction.py │ │ ├── usersim_prompts_landmark.py │ │ ├── usersim_prompts_mix.py │ │ └── usersim_prompts_none.py ├── config │ ├── __init__.py │ ├── chatgpt_config.py │ ├── my_config.py │ └── my_objectnav_hm3d.yaml ├── gradio_init_img.jpg ├── map │ ├── __init__.py │ ├── map.py │ ├── map_build │ │ └── build_voxel.py │ ├── map_search │ │ ├── search_base.py │ │ └── search_voxel.py │ ├── occupancy.py │ ├── voxel.py │ └── voxel_sparse.py ├── memory │ └── neural_memory2d.py ├── navigation │ ├── fmm_planner.py │ ├── frontier_based_exploration.py │ ├── shortest_path_follower_wrapper.py │ └── waypoint_planner.py ├── perception │ ├── __init__.py │ ├── detector │ │ ├── __init__.py │ │ ├── clipgradcam.py │ │ └── groundingSAM.py │ └── extractor │ │ ├── __init__.py │ │ ├── clipbase.py │ │ ├── concept_fusion_extractor.py │ │ └── lseg_extractor.py ├── user_simulator │ ├── __init__.py │ ├── base.py │ ├── chatgpt_based_sim.py │ ├── goals │ │ ├── 4ok3usBNeis │ │ │ ├── final.json │ │ │ └── objects.json │ │ ├── LT9Jq6dN3Ea │ │ │ ├── final.json │ │ │ └── objects.json │ │ ├── MHPLjHsuG27 │ │ │ ├── final.json │ │ │ └── objects.json │ │ ├── QaLdnwvtxbs │ │ │ ├── final.json │ │ │ └── objects.json │ │ ├── TEEsavR23oF │ │ │ ├── final.json │ │ │ └── objects.json │ │ ├── cvZr5TUy5C5 │ │ │ ├── final.json │ │ │ └── objects.json │ │ ├── h1zeeAwLh9Z │ │ │ ├── final.json │ │ │ └── objects.json │ │ ├── mL8ThkuaVTM │ │ │ ├── final.json │ │ │ └── objects.json │ │ ├── qyAac8rV8Zk │ │ │ ├── final.json │ │ │ └── objects.json │ │ └── y9hTuugGdiq │ │ │ ├── final.json │ │ │ └── objects.json │ ├── rule_based_sim.py │ ├── topograph.py │ └── user_goal.py └── utils │ ├── __init__.py │ ├── clip_score_utils.py │ ├── file_load.py │ ├── geometry.py │ ├── gradio_interface.py │ └── visulization.py ├── requirements.txt ├── scripts ├── build_vlmap.py ├── collect_scene_fbe.py ├── create_video.py ├── user_agent_talk_cf.py ├── user_agent_talk_cow.py ├── user_agent_talk_orion.py └── user_agent_talk_vlmap.py ├── setup.py └── tests ├── test_fmm_planner.py ├── test_gradio_helloworld.py ├── test_point_planner.py └── test_vlmap_planner.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | data 163 | */chatgpt-config.py 164 | logs 165 | images 166 | sandbox 167 | 168 | .vscode/ 169 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/habitat-lab"] 2 | path = third_party/habitat-lab 3 | url = https://github.com/facebookresearch/habitat-lab.git 4 | [submodule "third_party/Grounded-Segment-Anything"] 5 | path = third_party/Grounded-Segment-Anything 6 | url = https://github.com/IDEA-Research/Grounded-Segment-Anything.git 7 | [submodule "orion/perception/detector/gradcam"] 8 | path = orion/perception/detector/gradcam 9 | url = https://github.com/hila-chefer/Transformer-MM-Explainability.git 10 | [submodule "orion/perception/extractor/lseg_module"] 11 | path = orion/perception/extractor/lseg_module 12 | url = https://github.com/YinpeiDai/lseg-module.git 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Intelligent Systems Lab Org 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/find_shoes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/assets/find_shoes.png -------------------------------------------------------------------------------- /assets/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/assets/framework.png -------------------------------------------------------------------------------- /assets/spatial_reason.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/assets/spatial_reason.png -------------------------------------------------------------------------------- /assets/user_sim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/assets/user_sim.png -------------------------------------------------------------------------------- /demos/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/demos/demo.jpg -------------------------------------------------------------------------------- /demos/demo_gradcam_output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/demos/demo_gradcam_output.jpg -------------------------------------------------------------------------------- /demos/demo_groundedsam_output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/demos/demo_groundedsam_output.jpg -------------------------------------------------------------------------------- /demos/demo_lseg_output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/demos/demo_lseg_output.jpg -------------------------------------------------------------------------------- /demos/play_chatgpt_api.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from orion.chatgpt.api import ChatAPI 4 | from orion.config.chatgpt_config import ( 5 | AzureGPT4Config, 6 | AzureGPT35Config, 7 | OpenAIGPT4Config, 8 | OpenAIGPT35Config, 9 | ) 10 | 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--api-type", choices=["openai", "azure"], default="azure") 13 | parser.add_argument("--model-type", choices=["gpt35", "gpt4"], default="gpt4") 14 | parser.add_argument("--stream", action="store_true", default=False) 15 | 16 | args = parser.parse_args() 17 | 18 | if args.api_type == "openai": 19 | if args.model_type == "gpt35": 20 | chat_api = ChatAPI(config=OpenAIGPT35Config()) 21 | elif args.model_type == "gpt4": 22 | chat_api = ChatAPI(config=OpenAIGPT4Config()) 23 | else: 24 | raise ValueError("model_type can only be ['gpt35', 'gpt4']") 25 | elif args.api_type == "azure": 26 | if args.model_type == "gpt35": 27 | chat_api = ChatAPI(config=AzureGPT35Config()) 28 | elif args.model_type == "gpt4": 29 | chat_api = ChatAPI(config=AzureGPT4Config()) 30 | else: 31 | raise ValueError("model_type can only be ['gpt35', 'gpt4']") 32 | 33 | while True: 34 | utter = input("\nUser>>>") 35 | chat_api.add_user_message(utter) 36 | if args.stream: 37 | response = "" 38 | gen = chat_api.get_system_response_stream() 39 | print("Response>>>", end="") 40 | for chuck in gen: 41 | response += chuck 42 | print(chuck, end="") 43 | print() 44 | else: 45 | response = chat_api.get_system_response() 46 | print("Response>>>", response) 47 | chat_api.add_assistant_message(response) 48 | -------------------------------------------------------------------------------- /demos/play_gradcam.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from orion.perception.detector.clipgradcam import CLIPGradCAM 3 | 4 | 5 | from orion.utils.file_load import load_image 6 | 7 | input_image = load_image("demos/demo.jpg") 8 | 9 | clipgradcam = CLIPGradCAM() 10 | returnpt = clipgradcam.predict(input_image, "dog") 11 | 12 | # plot the centroid into the image 13 | import matplotlib.pyplot as plt 14 | 15 | plt.imshow(input_image) 16 | plt.scatter(returnpt[0], returnpt[1], c="r", s=100) 17 | plt.savefig( 18 | "demos/demo_gradcam_output.jpg", 19 | bbox_inches="tight", 20 | dpi=300, 21 | pad_inches=0.0, 22 | ) 23 | -------------------------------------------------------------------------------- /demos/play_groundingSAM.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from orion.abstract.interfaces import TextQuery 3 | from orion.perception.detector.groundingSAM import GroundingSAM, show_mask, show_box 4 | 5 | import matplotlib.pyplot as plt 6 | 7 | from orion.utils.file_load import load_image 8 | 9 | input_image = load_image("demos/demo.jpg") 10 | 11 | text_prompt = TextQuery(prompt="dog on the grass", target="dog") 12 | 13 | 14 | plt.figure(figsize=(10, 10)) 15 | plt.imshow(input_image) 16 | 17 | groundsam = GroundingSAM() 18 | 19 | mmboxes = groundsam.predict(input_image, text_prompt) 20 | for mask in mmboxes.masks: 21 | show_mask(mask, plt.gca(), random_color=True) 22 | for box, label in zip(mmboxes.bboxes, mmboxes.texts): 23 | show_box(box, plt.gca(), label) 24 | 25 | plt.axis("off") 26 | plt.savefig( 27 | "demos/demo_groundedsam_output.jpg", 28 | bbox_inches="tight", 29 | dpi=300, 30 | pad_inches=0.0, 31 | ) 32 | -------------------------------------------------------------------------------- /demos/play_habitat_teleop.py: -------------------------------------------------------------------------------- 1 | from orion.agent_env.teleop import TeleOpAgentEnv 2 | 3 | 4 | # pick a scene from orion.config.my_config.SCENE_ID_FLOOR_SET 5 | game = TeleOpAgentEnv( 6 | scene_ids=["MHPLjHsuG27"], 7 | floor_set=(-2, 2), 8 | display_shortside=256, 9 | save_dir_name="teleop", 10 | auto_record=False, 11 | display_setting="rgb+topdownmap", 12 | use_gt_pose=True, 13 | load_existing_occumap=True, 14 | ) 15 | game.run() 16 | -------------------------------------------------------------------------------- /demos/play_interactive_gradio.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Process, Queue 2 | 3 | from orion.utils.gradio_interface import ( 4 | GradioInterface, 5 | GradioDemoChatGPTControlORION, 6 | ) 7 | from orion.config.chatgpt_config import AzureGPT4Config 8 | 9 | 10 | def run_gradio(image_queue, user_message_queue, bot_message_queue): 11 | gradio_interface = GradioInterface( 12 | image_queue=image_queue, 13 | user_message_queue=user_message_queue, 14 | bot_message_queue=bot_message_queue, 15 | ) 16 | gradio_interface.run() 17 | 18 | 19 | def main(): 20 | user_message_queue = Queue() 21 | bot_message_queue = Queue() 22 | image_queue = Queue() 23 | 24 | p = Process( 25 | target=run_gradio, args=(image_queue, user_message_queue, bot_message_queue) 26 | ) 27 | p.start() 28 | 29 | game = GradioDemoChatGPTControlORION( 30 | image_queue=image_queue, 31 | user_message_queue=user_message_queue, 32 | bot_message_queue=bot_message_queue, 33 | chatgpt_config=AzureGPT4Config(), 34 | dump_dir="dump_dir", 35 | use_stream=True, 36 | record_interaction=False, 37 | use_memory=True, 38 | use_vlmap=True, 39 | fast_explore=True, 40 | display_shortside=480, 41 | save_dir_name="predict", 42 | scene_ids=["4ok3usBNeis"], 43 | floor_set=(-1, 1), 44 | auto_record=False, 45 | display_setting="rgb+topdownmap", 46 | display_horizontally=False, 47 | headless=True, 48 | use_gt_pose=True, 49 | load_existing_occumap=True, 50 | save_new_occumap=False, 51 | ) 52 | 53 | game.run() 54 | 55 | p.join() 56 | 57 | 58 | if __name__ == "__main__": 59 | main() 60 | -------------------------------------------------------------------------------- /demos/play_interactive_terminal.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from orion.agent_env.chatgpt_control_orion import ChatGPTControlORION 3 | from orion.agent_env.chatgpt_control_cow import ChatGPTControlCoW 4 | from orion.agent_env.chatgpt_control_vlmap import ChatGPTControlVLMap 5 | from orion.agent_env.hybrid_search import HybridSearchAgentEnv 6 | from orion.config.chatgpt_config import ( 7 | AzureGPT35Config, 8 | AzureGPT4Config, 9 | OpenAIGPT35Config, 10 | OpenAIGPT4Config, 11 | ) 12 | 13 | 14 | if __name__ == "__main__": 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("--api-type", choices=["openai", "azure"], default="azure") 17 | parser.add_argument("--model-type", choices=["gpt35", "gpt4"], default="gpt4") 18 | parser.add_argument("--stream", action="store_true", default=False) 19 | parser.add_argument("--use_memory", type=bool, default=True) 20 | parser.add_argument("--use_vlmap", type=bool, default=True) 21 | parser.add_argument("--fast_explore", action="store_true", default=False) 22 | parser.add_argument("--scene_id", type=str, default="4ok3usBNeis") 23 | parser.add_argument("--floor_set", type=int, nargs=2, default=(-1, 1)) 24 | parser.add_argument("--use_chatgpt", type=bool, default=True) 25 | parser.add_argument( 26 | "--method-type", type=str, default="orion", choices=["orion", "vlmap", "cow"] 27 | ) 28 | parser.add_argument( 29 | "--vlmap_dir", 30 | type=str, 31 | default="lseg_vlmap", 32 | choices=["lseg_vlmap", "conceptfusion_vlmap"], 33 | ) 34 | parser.add_argument("--dump_dir", type=str, default="dump") 35 | parser.add_argument("--record_interaction", type=bool, default=False) 36 | 37 | args = parser.parse_args() 38 | 39 | if args.api_type == "openai": 40 | if args.model_type == "gpt35": 41 | chatgpt_config = OpenAIGPT35Config() 42 | elif args.model_type == "gpt4": 43 | chatgpt_config = OpenAIGPT4Config() # type: ignore 44 | else: 45 | raise ValueError("model_type can only be ['gpt35', 'gpt4']") 46 | elif args.api_type == "azure": 47 | if args.model_type == "gpt35": 48 | chatgpt_config = AzureGPT35Config() 49 | elif args.model_type == "gpt4": 50 | chatgpt_config = AzureGPT4Config() 51 | else: 52 | raise ValueError("model_type can only be ['gpt35', 'gpt4']") 53 | 54 | if args.method_type == "orion": 55 | is_vlmap_baseline = False 56 | is_cow_baseline = False 57 | ChatGPTControl = ChatGPTControlORION 58 | dump_dir = args.dump_dir + "/orion" 59 | elif args.method_type == "vlmap": 60 | is_vlmap_baseline = True 61 | is_cow_baseline = False 62 | ChatGPTControl = ChatGPTControlVLMap 63 | dump_dir = args.dump_dir + "/vlmap" 64 | elif args.method_type == "cow": 65 | is_vlmap_baseline = False 66 | is_cow_baseline = True 67 | ChatGPTControl = ChatGPTControlCoW 68 | dump_dir = args.dump_dir + "/cow" 69 | else: 70 | raise ValueError("method_type can only be ['orion', 'vlmap', 'cow']") 71 | 72 | if args.use_chatgpt: 73 | # talk in natural language in the cmd line, e.g. "go to the shelf" 74 | game = ChatGPTControl( 75 | use_stream=args.stream, 76 | use_memory=args.use_memory, 77 | use_vlmap=args.use_vlmap, 78 | fast_explore=args.fast_explore, 79 | display_shortside=256, 80 | save_dir_name="predict", 81 | auto_record=False, 82 | display_setting="rgb+occumap+topdownmap", 83 | headless=False, 84 | use_gt_pose=True, 85 | load_existing_occumap=True, 86 | save_new_occumap=False, 87 | scene_ids=[args.scene_id], 88 | floor_set=args.floor_set, 89 | chatgpt_config=chatgpt_config, 90 | vlmap_dir=args.vlmap_dir, 91 | is_vlmap_baseline=is_vlmap_baseline, 92 | is_cow_baseline=is_cow_baseline, 93 | dump_dir=dump_dir, 94 | record_interaction=args.record_interaction, 95 | ) 96 | 97 | else: 98 | # talk with restricted inputs 99 | # the input string is (phrase|noun), e.g. "red apple|apple" 100 | # or just input a noun, e.g. "apple" to the cmd line 101 | game = HybridSearchAgentEnv( 102 | use_memory=args.use_memory, 103 | use_vlmap=args.use_vlmap, 104 | fast_explore=args.fast_explore, 105 | scene_ids=[args.scene_id], 106 | floor_set=args.floor_set, 107 | display_shortside=256, 108 | save_dir_name="predict", 109 | auto_record=False, 110 | display_setting="rgb+occumap+topdownmap", 111 | use_gt_pose=True, 112 | load_existing_occumap=True, 113 | save_new_occumap=False, 114 | vlmap_dir=args.vlmap_dir, 115 | is_vlmap_baseline=is_vlmap_baseline, 116 | is_cow_baseline=is_cow_baseline, 117 | ) 118 | 119 | game.run() 120 | -------------------------------------------------------------------------------- /demos/play_lseg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | 4 | from orion.utils.visulization import plot_pixel_feature_match 5 | from orion.utils.file_load import load_image 6 | from orion.config.my_config import LsegConfig 7 | from orion.perception.extractor.clipbase import CLIPBase 8 | from orion.abstract.interfaces import TextQueries 9 | from orion.config.my_config import CLIPConfig_vitB32_openai, VLMAP_QUERY_LIST_BASE 10 | from orion.perception.extractor.lseg_extractor import LSegExtractor 11 | 12 | # # Test LSegExtractor 13 | lseg_extractor = LSegExtractor(cfg=LsegConfig()) 14 | 15 | 16 | input_image = load_image("demos/demo.jpg") 17 | # NB: has to change size to 480x640 !!!! 18 | # Otherwise, the Lseg will not work 19 | input_image = cv2.resize(input_image, (640, 480)) 20 | 21 | clip_extractor = CLIPBase(CLIPConfig_vitB32_openai(device="cpu")) 22 | 23 | text_list = VLMAP_QUERY_LIST_BASE + ["dog", "cat", "grass", "tree"] 24 | text_feat = clip_extractor.encode_text(TextQueries(text_list)) 25 | text_feat = text_feat.cpu().numpy() 26 | print("text feature size: ", text_feat.shape) 27 | 28 | with torch.no_grad(): 29 | pixel_feat = lseg_extractor.predict(input_image).cpu().numpy() 30 | print("img feature size:", pixel_feat.shape) 31 | 32 | plot_pixel_feature_match( 33 | pixel_feat, text_feat, text_list, save_path="demos/demo_lseg_output.jpg" 34 | ) 35 | -------------------------------------------------------------------------------- /orion/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | logging.basicConfig( 7 | # filename=f"logs/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log", 8 | format="[%(asctime)s %(levelname)s] %(message)s", 9 | level=logging.INFO, 10 | datefmt="%Y-%m-%d %H:%M:%S", 11 | ) 12 | -------------------------------------------------------------------------------- /orion/abstract/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/abstract/__init__.py -------------------------------------------------------------------------------- /orion/abstract/agent.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from attr import define 4 | 5 | from .pose import Agent2DPose, Agent3DPose 6 | 7 | 8 | @define 9 | class AgentState: 10 | pose_2d: Agent2DPose 11 | pose_3d: Agent3DPose 12 | camera_height: float = 0.88 # in meter 13 | 14 | 15 | class AgentEnv(ABC): 16 | """ 17 | Base agent that can interact with the environment 18 | """ 19 | 20 | @abstractmethod 21 | def _prepare_env(self, env_config, *args, **kwargs): 22 | """ 23 | 1. set env config. 24 | 2. initialize env that can interact with agent. 25 | 3. get ground truth semantics 26 | """ 27 | 28 | @abstractmethod 29 | def _prepare_occupancy_map(self, *args, **kwargs): 30 | """ 31 | Here we use frontier-based exploration policy. 32 | """ 33 | 34 | @abstractmethod 35 | def _prepare_low_level_planner(self, *args, **kwargs): 36 | """ 37 | low-level PointNav 38 | """ 39 | 40 | @abstractmethod 41 | def _prepare_agent_state(self, *args, **kwargs): 42 | """ 43 | pose, inventory, etc. 44 | """ 45 | 46 | @abstractmethod 47 | def _observation_wrapper(self, *args, **kwargs): 48 | pass 49 | 50 | @abstractmethod 51 | def reset(self): 52 | pass 53 | 54 | def _prepare_perception(self, *args, **kwargs): 55 | """grounding SAM""" 56 | 57 | def _prepare_vlmap(self, *args, **kwargs): 58 | """vlmap is running background""" 59 | 60 | def _prepare_memory(self, *args, **kwargs): 61 | """ 62 | Memory that agent can store experiences 63 | """ 64 | -------------------------------------------------------------------------------- /orion/abstract/interaction_history.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from copy import deepcopy 4 | from typing import List, Optional, Union 5 | 6 | import cv2 7 | import numpy as np 8 | from attr import define 9 | 10 | from orion import logger 11 | from orion.abstract.agent import AgentState 12 | from orion.abstract.interfaces import Observations 13 | from orion.abstract.perception import MaskedBBOX 14 | from orion.abstract.pose import Agent2DPose, Agent3DPose 15 | 16 | 17 | @define 18 | class UsrMsg: 19 | # usr utterance 20 | text: str 21 | 22 | 23 | @define 24 | class SucMsg: 25 | # success message 26 | reward: float 27 | 28 | 29 | @define 30 | class GPTMsg: 31 | # chatgpt response 32 | content: str 33 | 34 | 35 | @define 36 | class FuncMsg: 37 | # function return message 38 | content: str 39 | 40 | 41 | @define 42 | class BotMsg: 43 | # robot response 44 | text: str 45 | 46 | 47 | @define 48 | class PointAct: 49 | # waypoint 50 | pt2d: Agent2DPose 51 | pt3d: Agent3DPose # pose by simulaotor [x, y, z] 52 | 53 | 54 | @define 55 | class StepAct: 56 | action: Optional[str] # low-level action 57 | next_obs: Observations 58 | next_state: AgentState 59 | 60 | def compression(self): 61 | self.next_obs = None 62 | # # depth consumes too much memory 63 | # self.next_obs.semantic = None 64 | # self.next_obs.depth = (self.next_obs.depth * 1000).astype(np.uint16) 65 | # if self.next_obs.info is not None and "collisions" in self.next_obs.info and self.next_obs.info["collisions"]["is_collision"]: 66 | # self.next_obs.info = {"collisions": self.next_obs.info["collisions"]} 67 | # else: 68 | # self.next_obs.info = None 69 | 70 | 71 | @define 72 | class DetAct: 73 | mmbox: MaskedBBOX 74 | 75 | 76 | JointType = Union[UsrMsg, GPTMsg, BotMsg, PointAct, StepAct, DetAct] 77 | 78 | 79 | class InteractionHistory: 80 | def __init__(self, record=False): 81 | self.record = record 82 | self.interactions: List[JointType] = [] 83 | 84 | def append(self, item: JointType): 85 | if not self.record: 86 | return 87 | _item = deepcopy(item) 88 | 89 | if isinstance(_item, StepAct): 90 | _item.compression() 91 | 92 | self.interactions.append(_item) 93 | 94 | def __len__(self): 95 | return len(self.interactions) 96 | 97 | def __getitem__(self, idx): 98 | return self.interactions[idx] 99 | 100 | def __iter__(self): 101 | return iter(self.interactions) 102 | 103 | def save(self, save_dir): 104 | if not os.path.exists(save_dir): 105 | os.makedirs(save_dir) 106 | save_path = os.path.join(save_dir, "interaction_history.pkl") 107 | logger.info(f"save interaction history to {save_path}") 108 | with open(save_path, "wb") as f: 109 | pickle.dump(self.interactions, f) 110 | -------------------------------------------------------------------------------- /orion/abstract/interfaces.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | import attr 4 | import numpy as np 5 | 6 | 7 | @attr.s(auto_attribs=True) 8 | class Observations: 9 | """Sensor observations.""" 10 | 11 | # (camera_height, camera_width, 3) in [0, 255] 12 | rgb: np.ndarray 13 | # (camera_height, camera_width, 1) in meters, not normalized! 14 | depth: Optional[np.ndarray] = None 15 | # (camera_height, camera_width, 1) in [0, num_sem_categories - 1] 16 | semantic: Optional[np.ndarray] = None 17 | info: Optional[Dict[str, Any]] = None # additional information 18 | rel_cam_pose: Optional[np.ndarray] = None # relative to the first camera frame 19 | compass: Optional[np.ndarray] = None # in radians 20 | gps: Optional[np.ndarray] = None # in meters 21 | 22 | 23 | @attr.s(auto_attribs=True) 24 | class TextQuery: 25 | """A text prompt to query vlmap or perception model.""" 26 | 27 | prompt: str # discription of the taregt object, 28 | # like "a yellow chair near the bed" 29 | target: Optional[str] = None # a noun word for the target object 30 | # like 'chair', 'bedroom' 31 | 32 | def __attrs_post_init__(self): 33 | if self.target is None: 34 | self.target = self.prompt 35 | 36 | def __str__(self): 37 | return f"(target: {self.target}, prompt: {self.prompt})" 38 | 39 | 40 | @attr.s(auto_attribs=True) 41 | class TextQueries: 42 | """A list of text prompt to query vlmap or perception model.""" 43 | 44 | prompts: List[str] 45 | targets: Optional[List[str]] = None 46 | 47 | def __attrs_post_init__(self): 48 | if self.targets is None: 49 | self.targets = self.prompts 50 | 51 | def to_str(self, prompt=True) -> str: 52 | """Return a string representation of prompts or targets.""" 53 | items = self.prompts if prompt else self.targets 54 | return " . ".join(items) if items else "" 55 | 56 | def __getitem__(self, idx): 57 | return self.prompts[idx], self.targets[idx] 58 | 59 | def __len__(self): 60 | return len(self.prompts) 61 | 62 | def __iter__(self): 63 | return iter(zip(self.prompts, self.targets)) 64 | -------------------------------------------------------------------------------- /orion/abstract/memory.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any 3 | 4 | from attr import define 5 | 6 | 7 | @define 8 | class EpisodicMemory(ABC): 9 | """ 10 | Base memory that can store experiences 11 | """ 12 | 13 | positive_memory: Any 14 | negative_memory: Any 15 | 16 | @abstractmethod 17 | def add(self, *args, **kwargs): 18 | pass 19 | 20 | @abstractmethod 21 | def delete(self, *args, **kwargs): 22 | pass 23 | 24 | @abstractmethod 25 | def update(self, *args, **kwargs): 26 | pass 27 | 28 | @abstractmethod 29 | def save(self, *args, **kwargs): 30 | pass 31 | 32 | @abstractmethod 33 | def load(self, *args, **kwargs): 34 | pass 35 | 36 | @abstractmethod 37 | def retrieve(self, *args, **kwargs): 38 | pass 39 | -------------------------------------------------------------------------------- /orion/abstract/perception.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from dataclasses import dataclass 3 | from typing import Any, List, Optional, Union 4 | 5 | import numpy as np 6 | import torch 7 | 8 | from orion import logger 9 | from orion.abstract.interfaces import TextQuery 10 | 11 | 12 | class PerceptionModule(ABC): 13 | @abstractmethod 14 | def predict( 15 | self, rgb: np.ndarray, txt: TextQuery 16 | ) -> Union["MaskedBBOX", float, np.ndarray, torch.Tensor]: 17 | """ 18 | single image prediction 19 | """ 20 | 21 | 22 | class DetectionModule(PerceptionModule): 23 | """img -> bboxes""" 24 | 25 | @abstractmethod 26 | def predict(self, rgb: np.ndarray, txt: TextQuery) -> "MaskedBBOX": 27 | pass 28 | 29 | 30 | class ExtractorModule(PerceptionModule): 31 | """img -> feature vector""" 32 | 33 | def __init__(self, *args, **kwargs): 34 | super().__init__(*args, **kwargs) 35 | self.feat_dim = None 36 | 37 | @abstractmethod 38 | def predict( 39 | self, rgb: np.ndarray, txt: Optional[TextQuery] = None 40 | ) -> Union[np.ndarray, torch.Tensor]: 41 | pass 42 | 43 | 44 | @dataclass 45 | class MaskedBBOX: 46 | flag: bool 47 | bboxes: List[Any] # (x1, y1, x2, y2) 48 | texts: List[str] 49 | masks: List[np.ndarray] 50 | 51 | def __bool__(self): 52 | return self.flag 53 | 54 | def __len__(self): 55 | return len(self.bboxes) 56 | 57 | def __iter__(self): 58 | return iter(zip(self.bboxes, self.texts, self.masks)) 59 | 60 | def __getitem__(self, idx): 61 | return self.bboxes[idx], self.texts[idx], self.masks[idx] 62 | 63 | @classmethod 64 | def from_tuple_list(cls, flag, tuple_list): 65 | if len(tuple_list) == 0: 66 | return cls(flag, [], [], []) 67 | tuple_list = sorted( 68 | tuple_list, key=lambda x: cls._bbox_area(x[0]), reverse=True 69 | ) 70 | return cls(flag, *zip(*tuple_list)) 71 | 72 | @staticmethod 73 | def _bbox_area(bbox): 74 | if bbox[2] < bbox[0] or bbox[3] < bbox[1]: 75 | return 0 76 | return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) 77 | 78 | def de_duplication(self): 79 | if len(self) in [0, 1]: 80 | return self 81 | 82 | mask_idx = [] 83 | for i in range(len(self)): 84 | for j in range(i + 1, len(self)): 85 | bbox_i = self.bboxes[i] 86 | bbox_j = self.bboxes[j] 87 | bbox_j_area = self._bbox_area(bbox_j) 88 | intersect_box = [ 89 | max(bbox_i[0], bbox_j[0]), 90 | max(bbox_i[1], bbox_j[1]), 91 | min(bbox_i[2], bbox_j[2]), 92 | min(bbox_i[3], bbox_j[3]), 93 | ] 94 | intersect_area = self._bbox_area(intersect_box) 95 | 96 | mask_i: np.ndarray = self.masks[i].squeeze() 97 | mask_j: np.ndarray = self.masks[j].squeeze() 98 | 99 | intersect_mask = np.logical_and(mask_i, mask_j) 100 | 101 | if ( 102 | intersect_area / bbox_j_area > 0.8 103 | or np.sum(intersect_mask) / np.sum(mask_j) > 0.9 104 | ): 105 | logger.info( 106 | f"[MBBOX] de_duplication: " "{self.bboxes[i]}, {self.bboxes[j]}" 107 | ) 108 | mask_idx.append(j) 109 | 110 | tuple_list = [] 111 | logger.info(f"[MBBOX] before: {len(self)}") 112 | for i in range(len(self)): 113 | if i not in mask_idx: 114 | tuple_list.append((self.bboxes[i], self.texts[i], self.masks[i])) 115 | logger.info(f"[MBBOX] after: {len(tuple_list)}") 116 | return MaskedBBOX.from_tuple_list(self.flag, tuple_list) 117 | -------------------------------------------------------------------------------- /orion/abstract/usersim.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class UserSimulator(ABC): 5 | @abstractmethod 6 | def reset(self, *args, **kwargs): 7 | pass 8 | 9 | @abstractmethod 10 | def step(self, *args, **kwargs): 11 | pass 12 | 13 | @abstractmethod 14 | def evaluate(self, *args, **kwargs): 15 | pass 16 | -------------------------------------------------------------------------------- /orion/agent_env/fbe.py: -------------------------------------------------------------------------------- 1 | """ 2 | Frontier Based Exploration 3 | This is used to explore the environment, collecting rgbd data and build vlmap. 4 | """ 5 | 6 | import numpy as np 7 | 8 | from orion import logger 9 | from orion.abstract.pose import Agent2DPose, FrontierWayPoint 10 | from orion.agent_env.habitat.base import HabitatAgentEnv 11 | from orion.agent_env.habitat.utils import try_action_import_v2 12 | from orion.utils import visulization as vis 13 | 14 | MOVE_FORWARD, _, TURN_LEFT, TURN_RIGHT, STOP = try_action_import_v2() 15 | 16 | 17 | class FBEAgentEnv(HabitatAgentEnv): 18 | def __init__(self, fast_explore: bool, *args, **kwargs): 19 | super().__init__(*args, **kwargs) 20 | self.fast_explore = fast_explore 21 | 22 | def move_and_spin(self, spin_angle=360, move_ahead=0): 23 | for _ in range(move_ahead): 24 | self.step(MOVE_FORWARD) 25 | if spin_angle > 0: 26 | for _ in range(spin_angle // self.config.SIMULATOR.TURN_ANGLE): 27 | self.step(TURN_RIGHT) 28 | else: 29 | for _ in range(-spin_angle // self.config.SIMULATOR.TURN_ANGLE): 30 | self.step(TURN_LEFT) 31 | 32 | def loop(self): 33 | self.fbe.reset(reset_floor=True, init_spin=True) 34 | while self.step_count < 500: 35 | if self.fbe.mode == self.fbe.InitSpinMode: 36 | self.move_and_spin(360) 37 | self.fbe._init_check_large_room() 38 | self.fbe.mode = self.fbe.ExploreMode 39 | self.fbe.set_explore_strategy(self.fast_explore) 40 | 41 | elif self.fbe.mode == self.fbe.ExploreMode: 42 | self.follower.set_traversible_map(self.fbe.traversable_map) 43 | navigable_mask = self.follower.get_navigable_mask() 44 | if not self.fbe.fast_explore: 45 | # go to viewpt, look around, then go to goalpt, look around 46 | next_plan: FrontierWayPoint = self.fbe.plan( 47 | navigable_mask, with_viewpoint=True 48 | ) 49 | else: 50 | # go to goalpt directly, update at every serveral steps 51 | next_plan: FrontierWayPoint = self.fbe.plan( 52 | navigable_mask, with_viewpoint=False 53 | ) 54 | if next_plan is None: 55 | logger.info("all goal finished===") 56 | self.fbe.reset(reset_floor=True, init_spin=True) 57 | break 58 | else: 59 | if not self.fbe.fast_explore: 60 | viewpt: Agent2DPose = next_plan.viewpt 61 | if viewpt is not None: 62 | logger.info(f"=== move to view point first {viewpt}===") 63 | 64 | best_action = self._get_next_action(viewpt) 65 | if best_action == 0: 66 | logger.info( 67 | "cannot change position with the follower===" 68 | ) 69 | while best_action != 0: 70 | self.step(best_action) 71 | best_action = self._get_next_action(viewpt) 72 | self.move_and_spin(-90) 73 | self.move_and_spin(180) 74 | 75 | goalpt: Agent2DPose = next_plan.goalpt 76 | if goalpt is not None: 77 | logger.info(f"=== move to goal point {goalpt}===") 78 | best_action = self._get_next_action( 79 | goalpt, new_goal_dist=10 80 | ) 81 | if best_action == 0: 82 | logger.info( 83 | "cannot change position with the follower===" 84 | ) 85 | while best_action != 0: 86 | self.step(best_action) 87 | best_action = self._get_next_action( 88 | goalpt, new_goal_dist=10 89 | ) 90 | self.move_and_spin(-90) 91 | self.move_and_spin(180) 92 | 93 | else: 94 | goalpt: Agent2DPose = next_plan.goalpt 95 | if goalpt is None: 96 | continue 97 | logger.info(f"=== move to goal point {goalpt}===") 98 | best_action = self._get_next_action(goalpt, new_goal_dist=10) 99 | count = self.fbe.fbecfg.fast_explore_forwardcount 100 | if best_action == 0: 101 | logger.info("cannot change position with the follower===") 102 | while best_action != 0 and count > 0: 103 | self.step(best_action) 104 | if best_action == MOVE_FORWARD: 105 | count -= 1 106 | best_action = self._get_next_action( 107 | goalpt, new_goal_dist=10 108 | ) 109 | 110 | if ( 111 | self.fbe.l2(goalpt, self._grdpose) 112 | < self.fbe.fbecfg.dist_large_thres 113 | ): 114 | self.move_and_spin(-90) 115 | self.move_and_spin(180) 116 | 117 | 118 | if __name__ == "__main__": 119 | from orion.config.my_config import SCENE_ID_FLOOR_SET 120 | 121 | game = FBEAgentEnv( 122 | total_round=1, 123 | scene_ids=["4ok3usBNeis"], 124 | floor_set=(-1, 1), 125 | fast_explore=True, 126 | display_shortside=256, 127 | save_dir_name="predict", 128 | auto_record=True, 129 | record_dir="recordings_prelim_fbe", 130 | display_setting="rgb+occumap+topdownmap", 131 | use_gt_pose=False, 132 | load_existing_occumap=True, 133 | save_new_occumap=False, 134 | ) 135 | game.run() 136 | -------------------------------------------------------------------------------- /orion/agent_env/habitat/holonomic_actions.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import habitat 3 | import habitat_sim 4 | from habitat.core.registry import registry 5 | from habitat.core.simulator import ActionSpaceConfiguration 6 | from habitat.sims.habitat_simulator.actions import ( 7 | HabitatSimActions, 8 | HabitatSimV1ActionSpaceConfiguration, 9 | ) 10 | from habitat.tasks.nav.nav import SimulatorTaskAction 11 | 12 | 13 | @habitat.registry.register_action_space_configuration(name="Holonomic") 14 | class HolonomicMovement(HabitatSimV1ActionSpaceConfiguration): 15 | def get(self): 16 | config = super().get() 17 | config[HabitatSimActions.MOVE_BACKWARD] = habitat_sim.ActionSpec( 18 | "move_backward", 19 | habitat_sim.ActuationSpec(amount=self.config.FORWARD_STEP_SIZE), 20 | ) 21 | return config 22 | 23 | 24 | @habitat.registry.register_task_action 25 | class MoveBackwardAction(SimulatorTaskAction): 26 | name = "MOVE_BACKWARD" 27 | 28 | def _get_uuid(self, *args, **kwargs) -> str: 29 | return "move_backward" 30 | 31 | def step(self, *args, **kwargs): 32 | r"""Update ``_metric``, this method is called from ``Env`` on each 33 | ``step``. 34 | """ 35 | return self._sim.step(HabitatSimActions.MOVE_BACKWARD) 36 | 37 | 38 | HabitatSimActions.extend_action_space("MOVE_BACKWARD") 39 | -------------------------------------------------------------------------------- /orion/agent_env/habitat/utils.py: -------------------------------------------------------------------------------- 1 | import habitat 2 | from habitat.core.utils import try_cv2_import 3 | from habitat.utils.visualizations import maps 4 | from habitat.utils.visualizations.utils import append_text_to_image, images_to_video 5 | 6 | cv2 = try_cv2_import() 7 | 8 | 9 | def transform_rgb_bgr(image): 10 | return image[:, :, [2, 1, 0]] 11 | 12 | 13 | def quiet(): 14 | import os 15 | 16 | os.environ["MAGNUM_LOG"] = "quiet" 17 | os.environ["HABITAT_SIM_LOG"] = "quiet" 18 | 19 | 20 | def update_fov(config, fov=90): 21 | config.defrost() 22 | config.SIMULATOR.DEPTH_SENSOR.HFOV = fov 23 | config.SIMULATOR.RGB_SENSOR.HFOV = fov 24 | config.SIMULATOR.SEMANTIC_SENSOR.HFOV = fov 25 | config.freeze() 26 | 27 | 28 | def update_scene(config, split="val", scene_ids=["4ok3usBNeis"]): 29 | config.defrost() 30 | if split is not None: 31 | config.DATASET.SPLIT = split 32 | if scene_ids is not None: 33 | config.DATASET.CONTENT_SCENES = scene_ids 34 | config.freeze() 35 | 36 | 37 | def update_holonomic_action(config): 38 | config.defrost() 39 | config.TASK.ACTIONS.MOVE_BACKWARD = habitat.config.Config() 40 | config.TASK.ACTIONS.MOVE_BACKWARD.TYPE = "MoveBackwardAction" 41 | config.SIMULATOR.ACTION_SPACE_CONFIG = "Holonomic" 42 | config.freeze() 43 | 44 | 45 | def add_top_down_map_and_collision(config): 46 | config.defrost() 47 | config.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") 48 | config.TASK.MEASUREMENTS.append("COLLISIONS") 49 | config.freeze() 50 | 51 | 52 | def try_action_import(): 53 | try: 54 | from habitat.sims.habitat_simulator.actions import HabitatSimActions 55 | 56 | MOVE_FORWARD = HabitatSimActions.MOVE_FORWARD 57 | TURN_LEFT = HabitatSimActions.TURN_LEFT 58 | TURN_RIGHT = HabitatSimActions.TURN_RIGHT 59 | STOP = HabitatSimActions.STOP 60 | except: 61 | MOVE_FORWARD = 0 62 | TURN_LEFT = 1 63 | TURN_RIGHT = 2 64 | STOP = 3 65 | return MOVE_FORWARD, TURN_LEFT, TURN_RIGHT, STOP 66 | 67 | 68 | def try_action_import_v2(): 69 | try: 70 | from habitat.sims.habitat_simulator.actions import HabitatSimActions 71 | 72 | MOVE_FORWARD = HabitatSimActions.MOVE_FORWARD 73 | MOVE_BACKWARD = "MOVE_BACKWARD" 74 | TURN_LEFT = HabitatSimActions.TURN_LEFT 75 | TURN_RIGHT = HabitatSimActions.TURN_RIGHT 76 | # LOOK_UP = HabitatSimActions.LOOK_UP 77 | # LOOK_DOWN = HabitatSimActions.LOOK_DOWN 78 | STOP = HabitatSimActions.STOP 79 | return MOVE_FORWARD, MOVE_BACKWARD, TURN_LEFT, TURN_RIGHT, STOP 80 | except: 81 | raise NotImplementedError 82 | -------------------------------------------------------------------------------- /orion/agent_env/teleop.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | from orion.agent_env.habitat.base import HabitatAgentEnv 4 | from orion.agent_env.habitat.utils import try_action_import_v2 5 | 6 | FORWARD_KEY = "w" 7 | BACKWARD_KEY = "s" 8 | LEFT_KEY = "a" 9 | RIGHT_KEY = "d" 10 | FINISH = "p" 11 | 12 | MOVE_FORWARD, MOVE_BACKWARD, TURN_LEFT, TURN_RIGHT, STOP = try_action_import_v2() 13 | 14 | 15 | class TeleOpAgentEnv(HabitatAgentEnv): 16 | def loop(self): 17 | while True: 18 | keystroke = cv2.waitKey(0) 19 | if keystroke == ord(FORWARD_KEY): 20 | action = MOVE_FORWARD 21 | elif keystroke == ord(BACKWARD_KEY): 22 | action = MOVE_BACKWARD 23 | elif keystroke == ord(LEFT_KEY): 24 | action = TURN_LEFT 25 | elif keystroke == ord(RIGHT_KEY): 26 | action = TURN_RIGHT 27 | elif keystroke == ord(FINISH): 28 | action = STOP 29 | else: 30 | print("INVALID KEY") 31 | action = None 32 | if action is not None: 33 | self.step(action) 34 | -------------------------------------------------------------------------------- /orion/chatgpt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/chatgpt/__init__.py -------------------------------------------------------------------------------- /orion/chatgpt/api.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Dict, List, Union 3 | 4 | from openai import AzureOpenAI, OpenAI 5 | 6 | from orion import logger 7 | from orion.config.chatgpt_config import AzureConfig, OpenAIConfig 8 | 9 | 10 | class ChatAPI: 11 | def __init__( 12 | self, 13 | config: Union[OpenAIConfig, AzureConfig], 14 | ): 15 | self.messages: List[Dict[str, str]] = [] 16 | self.history: List[Dict[str, str]] = [] 17 | 18 | if isinstance(config, AzureConfig): 19 | self.client = AzureOpenAI( 20 | api_key=config.api_key, 21 | api_version=config.api_version, 22 | azure_endpoint=config.azure_endpoint, 23 | ) 24 | elif isinstance(config, OpenAIConfig): 25 | self.client = OpenAI(api_key=config.api_key) 26 | else: 27 | print(type(config)) 28 | raise ValueError("api_type can only be ['azure', 'openai']") 29 | 30 | self.model = config.model 31 | self.max_limit = min(config.limit - 2000, 32000) 32 | self.price = config.price 33 | self.usage_tokens = 0 34 | self.cost = 0 # USD money cost 35 | 36 | def add_function_message(self, content: str): 37 | if not re.search(r"^Function Return:", content): 38 | content = "Function Return:\n" + content 39 | self.messages.append({"role": "user", "content": content}) 40 | self.history.append(self.messages[-1]) 41 | 42 | def add_user_message(self, content: str): 43 | if not re.search(r"^User Utterance:", content): 44 | content = "User Utterance: " + content 45 | self.messages.append({"role": "user", "content": content}) 46 | self.history.append(self.messages[-1]) 47 | 48 | def add_assistant_message(self, content: str): 49 | self.messages.append({"role": "assistant", "content": content}) 50 | self.history.append(self.messages[-1]) 51 | 52 | def get_system_response(self) -> str: 53 | try: 54 | self.do_truncation = False 55 | 56 | response = self.client.chat.completions.create( 57 | model=self.model, messages=self.messages 58 | ) 59 | response_message = response.choices[0].message 60 | 61 | usage_tokens = response.usage.total_tokens 62 | self.cost += usage_tokens * self.price / 1000 63 | logger.info( 64 | f"[ChatGPT] current model {self.model}, usage_tokens: {usage_tokens}, " 65 | f"cost: ${self.cost:.5f}, price: ${self.price:.5f}" 66 | ) 67 | if usage_tokens > self.max_limit: 68 | logger.info( 69 | f"[ChatGPT] truncate the conversation to avoid token usage limit, save money" 70 | ) 71 | self.truncate() 72 | 73 | return response_message.content 74 | except Exception as e: 75 | logger.warning(f"[ChatGPT] Error: {e}") 76 | return "Sorry, I am not able to respond to that." 77 | 78 | def get_system_response_stream(self): 79 | response = self.client.chat.completions.create( 80 | model=self.model, messages=self.messages, stream=True 81 | ) 82 | for chuck in response: 83 | if len(chuck.choices) > 0 and chuck.choices[0].finish_reason != "stop": 84 | if chuck.choices[0].delta.content is None: 85 | continue 86 | yield chuck.choices[0].delta.content 87 | 88 | # stream mode does not support token usage check, give a rough estimation 89 | usage_tokens = int(sum([len(item["content"]) for item in self.message]) / 3.5) 90 | self.usage_tokens = usage_tokens 91 | self.cost += usage_tokens * self.price / 1000 92 | logger.info( 93 | f"[ChatGPT] current model {self.model}, usage_tokens approximation: {usage_tokens}," 94 | f" cost: ${self.cost:.2f}, price: ${self.price:.2f}" 95 | ) 96 | 97 | if usage_tokens > self.max_limit: 98 | logger.info( 99 | f"[ChatGPT] truncate the conversation to avoid token usage limit" 100 | ) 101 | self.truncate() 102 | 103 | @property 104 | def message(self): 105 | return self.messages 106 | 107 | @message.setter 108 | def message(self, message): 109 | """ 110 | Usually at the dialog begining 111 | {"role": "system", "content": system_prompt}, 112 | {"role": "user", "content": user_message_first_turn}, 113 | {"role": "assistant", "content": assistant_message_first_turn}, 114 | """ 115 | self.init_length = len(message) 116 | self.messages = message 117 | self.history.extend(self.messages) 118 | 119 | def truncate(self, percentage: int = 3): 120 | self.do_truncation = True 121 | usr_idx = [ 122 | idx 123 | for idx in range(len(self.messages)) 124 | if self.messages[idx]["role"] == "user" 125 | ] 126 | middle_idx = usr_idx[len(usr_idx) // percentage] 127 | logger.info( 128 | f"\033[33m [ChatGPT] truncate the conversation at index: {middle_idx} from {usr_idx} \033[m" 129 | ) 130 | self.messages = self.messages[: self.init_length] + self.messages[middle_idx:] 131 | 132 | def clear(self): 133 | """end the conversation""" 134 | self.messages = [] 135 | self.history = [] 136 | 137 | def clear_ctx(self): 138 | """clear the context""" 139 | usr_idx = [ 140 | idx 141 | for idx in range(len(self.messages)) 142 | if self.messages[idx]["role"] == "user" 143 | ] 144 | if len(usr_idx) == 0: 145 | return 146 | elif len(usr_idx) <= 3: 147 | last_idx = usr_idx[0] 148 | else: 149 | last_idx = usr_idx[-3] # keep the last 4 user messages 150 | logger.info( 151 | f"\033[33m [ChatGPT] clear context, user message remove at index: {last_idx} from {usr_idx} \033[m" 152 | ) 153 | self.messages = self.messages[: self.init_length] + self.messages[last_idx:] 154 | -------------------------------------------------------------------------------- /orion/chatgpt/prompts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/chatgpt/prompts/__init__.py -------------------------------------------------------------------------------- /orion/chatgpt/prompts/baseline_cow_prompt.py: -------------------------------------------------------------------------------- 1 | SYSTEM_PROMPT = """You are controling a robot to navigate to target objects according to the user's instructions. 2 | 3 | Your goals are: 4 | 1. Try to understand the user utterance, decide which API to call. 5 | 2. Using the return messsges of the API to infer what to do next. you can continue call APIs to control robot or talk with the user. 6 | 7 | All commands you can use are below: 8 | 9 | There are 4 APIs you can call: 10 | [ 11 | { 12 | "name": "dialog", 13 | "description": "talk to the user, usually the last function called for one turn", 14 | "parameters": { 15 | "type": "object", 16 | "properties": { 17 | "content": { 18 | "type": "string", 19 | "description": "dialog content", 20 | } 21 | }, 22 | }, 23 | "required": ["content"], 24 | }, 25 | { 26 | "name": "search_object", 27 | "description": "use the frontier-based exploration to search the object, return possible detected results. issue this command again can continue the searching", 28 | "parameters": { 29 | "type": "object", 30 | "properties": { 31 | "target": { 32 | "type": "string", 33 | "description": "the target object you want to detect", 34 | } 35 | }, 36 | }, 37 | "required": ["target"], 38 | }, 39 | { 40 | "name": "rotate", 41 | "description": "rotate the robot left or right", 42 | "parameters": { 43 | "type": "object", 44 | "properties": { 45 | "angle": { 46 | "type": "number", 47 | "description": "the angle degree to rotate the robot, > 0 for right, < 0 for left, should be in [-180, 180]", 48 | } 49 | }, 50 | }, 51 | "required": ["angle"], 52 | }, 53 | { 54 | "name": "move", 55 | "description": "issue the command to move the robot in the environment forward or backward", 56 | "parameters": { 57 | "type": "object", 58 | "properties": { 59 | "distance": { 60 | "type": "number", 61 | "description": "the total units to move the robot, > 0 for forward, < 0 for backward", 62 | } 63 | }, 64 | }, 65 | "required": ["distance"], 66 | }, 67 | { 68 | "name": "goto_points", 69 | "description": "move the robot to specific points", 70 | "parameters": { 71 | "type": "object", 72 | "properties": { 73 | "points": { 74 | "type": "array", 75 | "description": "The list of points to go. Each point is a polar coordinate (distance, angle) tuple with respect to current robot position.", 76 | } 77 | }, 78 | }, 79 | "required": ["points"], 80 | }, 81 | ] 82 | 83 | ------------- 84 | 85 | Important Notes: 86 | 1. When user gives a new goal, you should use `search_object` to find it, if failed, continue to use `search_object` to find the next possible object. 87 | 2. If the user correct your detection, you should remember what the object_id acutally is, then next time will not make the same mistake. 88 | 89 | Examples: 90 | 91 | User Utterance: You need to find the laundry machine in room, we bought it from Walmart in 2020. It is on your left side, approximately 100 units away. Can you move towards it? 92 | 93 | { 94 | "Thought": "Now I should search the laundry machine", 95 | "Command": {"name": "search_object", "args": {"target": "laundry machine"}} 96 | } 97 | 98 | . 99 | Function Return: 100 | Found possitble object. 101 | 102 | { 103 | "Thought": "I found the laundry machine, I should confirm with user", 104 | "Command": {"name": "dialog", "args": {"content": "I found the laundry machine, is it correct?"}} 105 | } 106 | 107 | User Utterance: No, it's a freezer. 108 | 109 | { 110 | "Thought": "I should search again", 111 | "Command": {"name": "search_object", "args": {"target": "laundry machine"}} 112 | } 113 | 114 | Function Return: Already reached laundry_machine_2. 115 | 116 | { 117 | "Thought": "I can just confirm with user, then go to the next possible laundry machine", 118 | "Command": {"name": "dialog", "args": {"content": "I found the laundry machine, is it correct?"}} 119 | } 120 | 121 | User Utterance: No, the laundry machine is on your right side, approximately 100 units away. Can you move towards it? 122 | 123 | { 124 | "Thought": "I should go to the next possible laundry machine, right can be 0-180, I'll set it to 90", 125 | "Command": {"name": "goto_points", "args": {"points": [[100, 90]]}} 126 | } 127 | 128 | Function Return: Already reached sepecified points. 129 | 130 | { 131 | "Thought": "I should confirm with user", 132 | "Command": {"name": "dialog", "args": {"content": "I found the laundry machine, is it correct?"}} 133 | } 134 | 135 | User Utterance: Yes, it's correct. 136 | 137 | """ 138 | 139 | SYSTEM_PROMPT_FUNCALL = SYSTEM_PROMPT 140 | -------------------------------------------------------------------------------- /orion/chatgpt/prompts/usersim_prompts_description.py: -------------------------------------------------------------------------------- 1 | SYSTEM_PROMPT = """Given multiple selected object goals, you are supposed to talk to a robot to reach them one by one by providing different types of userfeedback to guide the navigation. 2 | 3 | Each turn, you will be given below two messages sources to generate natural language instructions. 4 | 5 | 1. The utterance from the robot. 6 | 2. Function messages from the system. return as a dictionary, including: 7 | { 8 | "is_current_goal_reached": bool, # whether the robot reached the current goal. if reached, the next goal will be given. 9 | "is_max_trial_reached": bool, # whether the robot reached the maximum trial number for the current goal. if reached maximum, the next goal will be given. 10 | "current_goal/next_goal": { # the current goal or the next goal. if the current goal or maximum trials reached, current goal is empty, next goal will be given. 11 | "object_id": str, # unique id of the object 12 | "object_name": str, 13 | "room_name": str, # which room name the goal is located. This is used for Landmark User Feedback or general feedback of the object goal. 14 | "description": str, # descriptive visual information of the current object goal, split by '|'. This is used for Description User Feedback. 15 | "explaination": str, # the explaination from dictionary. This is used for Description User Feedback, to help robot understand the object goal better. 16 | "num_trial": int, # total number of trials for the current goal by the robot. Maximum number is 5. 17 | "num_round": int # total number of rounds for all objects. You will ask the robot for find each object one by one for several rounds. 18 | } 19 | } 20 | 21 | Note: 22 | 1. You can only convey the object name to the robot, not the object id. 23 | 2. Do not give all description and explaination at once! you can give one by one for each turn during the robot trials. Also, please add more language variation, make it varied for differnt turns. 24 | 3. Be sure to be adhere to the function messages provided by the system, but add more language variation. Do not simply copy from the information. 25 | 4. If is_current_goal_reached=true, this means the robot already reaches the goal, You can tell robot "you already reached the goal xxx. Let's look for the next goal..." 26 | 5. Do not add too much information in one turn, the description and explaination should be given one by one for each turn. 27 | 28 | You should only respond in a JSON format dict as described below: 29 | { 30 | "Thought": "think about the current goal, the mistakes the robot may make, the possible feedback you can provide, how to decribe the goal more variant, etc.", 31 | "Response": "Your response to the robot." 32 | } 33 | Make sure the generated string can be parsed by `json.loads`. 34 | 35 | Example: 36 | 37 | Robot Utterance: Hello, what should I do? 38 | Function Message: 39 | { 40 | "current_goal": {}, 41 | "is_current_goal_reached": false, 42 | "is_max_trial_reached": false, 43 | "next_goal": { 44 | "object_id": "recliner_0", 45 | "object_name": "recliner", 46 | "room_name": "living room", 47 | "description": "massage and heat for elderly", 48 | "explaination": "a lying chair that can be adjusted to a reclining position", 49 | "num_trial": 0, 50 | "num_round": 1 51 | } 52 | } 53 | 54 | 55 | { 56 | "Thought": "Current goal is a recliner, I can tell the robot the description of the object.", 57 | "Response": "Can you find a recliner for me? It's a lying chair that can be adjusted to a reclining position." 58 | } 59 | 60 | 61 | Robot Utterance: is it correct? 62 | Function Message: 63 | { 64 | "current_goal": { 65 | "object_id": "recliner_0", 66 | "object_name": "recliner", 67 | "room_name": "living room", 68 | "description": "massage and heat for elderly", 69 | "explaination": "a lying chair that can be adjusted to a reclining position", 70 | "num_trial": 0, 71 | "num_round": 1 72 | }, 73 | "is_current_goal_reached": false, 74 | "is_max_trial_reached": false, 75 | "next_goal": {} 76 | } 77 | 78 | { 79 | "Thought": "is_current_goal_reached is false, so the robot not reach the goal, I can tell the robot the spatial information of the goal object.", 80 | "Response": "No. it's not the recliner I'm looking for. The recliner I'm looking for is looks like a lying chair, it's for massage and heat for elderly" 81 | } 82 | 83 | Robot Utterance: I found 2 possible couches, one is 12 units away at -33 degrees, and the other is 30 units away at 24 degrees. Is either of them the couch you're looking for? 84 | Function Message: 85 | { 86 | "current_goal": {}, 87 | "is_current_goal_reached": true, 88 | "is_max_trial_reached": false, 89 | "next_goal": { 90 | "object_id": "tv_0", 91 | "object_name": "tv", 92 | "room_name": "bedroom", 93 | "description": "LED TV", 94 | "explaination": "a television for living show" 95 | "num_trial": 0, 96 | "num_round": 1 97 | } 98 | } 99 | 100 | { 101 | "Thought": "The robot found two possible couches, the goal is reached, I can tell the robot the next goal.", 102 | "Response": "Yes. You found the couch I'm looking for. Now, find the TV in the bedroom, it's a LED TV for living show." 103 | } 104 | 105 | 106 | Robot Utterance: I'm sorry but I cannot verify the origin of the wardrobe. Is the wardrobe I detected the one you're looking for? 107 | Function Message: 108 | { 109 | "current_goal": { 110 | "object_id": "wardrobe_3", 111 | "object_name": "wardrobe", 112 | "room_name": "bedroom", 113 | "description": "a shelf near the bed", 114 | "explaination": "a large, tall cupboard for storing clothes", 115 | "num_trial": 2, 116 | "num_round": 1 117 | }, 118 | "is_current_goal_reached": false, 119 | "is_max_trial_reached": false, 120 | "next_goal": {} 121 | } 122 | 123 | { 124 | "Thought": "The robot detect is wrong since current goal is not reached, I can tell the robot more information about the goal object.", 125 | "Response": "No, you are wrong. The wardrobe I'm looking for is a large, tall cupboard for storing clothes, and it's near the bed." 126 | } 127 | 128 | Robot Utterance: I found the microwave, is it correct? 129 | Function Message: 130 | { 131 | "current_goal": {}, 132 | "is_current_goal_reached": false, 133 | "is_max_trial_reached": true, 134 | "next_goal": { 135 | "object_id": "kitchen counter_0", 136 | "object_name": "kitchen counter", 137 | "room_name": "kitchen", 138 | "description": "", 139 | "explaination": "a flat surface in a kitchen for preparing food", 140 | "num_trial": 0, 141 | "num_round": 1 142 | } 143 | } 144 | 145 | { 146 | "Thought": "The goal is not reached, but the maximum trial is reached, I can tell the robot the next goal.", 147 | "Response": "Your detection is wrong. Now, find the kitchen counter in the kitchen, it's a flat surface for preparing food." 148 | } 149 | 150 | ---------- 151 | Let's start 152 | 153 | """ 154 | -------------------------------------------------------------------------------- /orion/chatgpt/prompts/usersim_prompts_none.py: -------------------------------------------------------------------------------- 1 | SYSTEM_PROMPT = """Given multiple selected object goals, you are supposed to talk to a robot to reach them one by one. 2 | 3 | Each turn, you will be given below two messages sources to generate natural language instructions. 4 | 5 | 1. The utterance from the robot. 6 | 2. Function messages from the system. return as a dictionary, including: 7 | { 8 | "is_current_goal_reached": bool, # whether the robot reached the current goal. if reached, the next goal will be given. 9 | "is_max_trial_reached": bool, # whether the robot reached the maximum trial number for the current goal. if reached maximum, the next goal will be given. 10 | "current_goal/next_goal": { # the current goal or the next goal. if the current goal or maximum trials reached, current goal is empty, next goal will be given. 11 | "object_id": str, # unique id of the object 12 | "object_name": str, 13 | "room_name": str, # which room name the goal is located. This is used for Landmark User Feedback or general feedback of the object goal. 14 | "num_trial": int, # total number of trials for the current goal by the robot. Maximum number is 5. 15 | "num_round": int # total number of rounds for all objects. You will ask the robot for find each object one by one for several rounds. 16 | } 17 | } 18 | 19 | 20 | You should only respond in a JSON format dict as described below: 21 | { 22 | "Thought": "think about the current goal, whether the robot reach it etc.", 23 | "Response": "Your response to the robot." 24 | } 25 | Make sure the generated string can be parsed by `json.loads`. 26 | 27 | Example: 28 | 29 | Robot Utterance: Hello, what should I do? 30 | Function Message: 31 | { 32 | "current_goal": {}, 33 | "next_goal": { 34 | "object_id": "rack_0", 35 | "object_name": "rack", 36 | "room_name": "living room", 37 | "num_trial": 0, 38 | "num_round": 0 39 | } 40 | } 41 | 42 | { 43 | "Thought": "set a goal for the robot to find a rack", 44 | "Response": "Can you find a rack? it's located in the living room." 45 | } 46 | 47 | 48 | Robot Utterance: is it correct? 49 | Function Message: 50 | { 51 | "current_goal": { 52 | "object_id": "rack_0", 53 | "object_name": "rack", 54 | "room_name": "living room", 55 | "num_trial": 3, 56 | "num_round": 1 57 | }, 58 | "is_current_goal_reached": false, 59 | "is_max_trial_reached": false, 60 | "next_goal": {} 61 | } 62 | 63 | { 64 | "Thought": "The robot asks whether it's correct, is_current_goal_reached = False, so the robot didn't reach the goal" 65 | "Response": "No, this is not the rack I'm looking for. Keep searching." 66 | } 67 | 68 | Robot Utterance: I found 2 possible couches, one is 12 units away at -33 degrees, and the other is 30 units away at 24 degrees. Is either of them the couch you're looking for? 69 | Function Message: 70 | { 71 | "current_goal": {}, 72 | "is_current_goal_reached": true, 73 | "is_max_trial_reached": false, 74 | "next_goal": { 75 | "object_id": "tv_0", 76 | "object_name": "tv", 77 | "room_name": "bedroom", 78 | "num_trial": 0, 79 | "num_round": 1 80 | } 81 | } 82 | 83 | 84 | { 85 | "Thought": "The robot found two possible couches, is_current_goal_reached is true, so it should be correct.", 86 | "Response": "Yes. Now, find the tv in the bedroom." 87 | } 88 | 89 | 90 | Robot Utterance: I found the microwave, is it correct? 91 | Function Message: 92 | { 93 | "current_goal": {}, 94 | "is_current_goal_reached": false, 95 | "is_max_trial_reached": true, 96 | "next_goal": { 97 | "object_id": "kitchen counter_0", 98 | "object_name": "kitchen counter", 99 | "num_trial": 0, 100 | "num_round": 1 101 | } 102 | } 103 | 104 | { 105 | "Thought": "The goal is not reached, and the maximum trial is reached, so the robot didn't find the goal object", 106 | "Response": "No, it's not the microwave I'm looking for. You've reached the maximum number of trials. Now, find the kitchen counter." 107 | } 108 | 109 | Robot Utterance: I found another bed with a red pillow, is it correct? 110 | Function Message: 111 | { 112 | "current_goal": { 113 | "object_id": "bed_1", 114 | "object_name": "bed", 115 | "room_name": "Alice's bedroom", 116 | "num_trial": 3, 117 | "num_round": 1 118 | }, 119 | "is_current_goal_reached": false, 120 | "is_max_trial_reached": false, 121 | "next_goal": {} 122 | } 123 | 124 | { 125 | "Thought": "the robot may found a bed, but not the goal bed, since is_current_goal_reached = False", 126 | "Response": "No, it's not the bed I'm looking for. The bed I'm looking for is in Alice's bedroom" 127 | } 128 | 129 | ---------- 130 | 131 | If is_current_goal_reached = True, that means the robot already reaches the goal, otherwise not. Do not provide other information beyond what is provided in the function message. 132 | 133 | Let's start 134 | 135 | """ 136 | -------------------------------------------------------------------------------- /orion/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/config/__init__.py -------------------------------------------------------------------------------- /orion/config/chatgpt_config.py: -------------------------------------------------------------------------------- 1 | class ChatGPTConfig: 2 | pass 3 | 4 | 5 | ####################### OpenAI ####################### 6 | class OpenAIConfig(ChatGPTConfig): 7 | api_type: str = "openai" 8 | api_key: str = "" 9 | model: str 10 | limit: int 11 | price: float 12 | 13 | 14 | class OpenAIGPT35Config(OpenAIConfig): 15 | model: str = "gpt-3.5-turbo-0125" 16 | limit: int = 16000 17 | price: float = 0.0005 18 | 19 | 20 | class OpenAIGPT4Config(OpenAIConfig): 21 | model: str = "gpt-4-turbo-preview" 22 | limit: int = 128000 23 | price: float = 0.01 24 | 25 | 26 | ####################### Azure ####################### 27 | 28 | 29 | class AzureConfig(ChatGPTConfig): 30 | api_type: str = "azure" 31 | api_key: str = "" 32 | api_version: str = "2023-12-01-preview" 33 | azure_endpoint: str = "" 34 | model: str 35 | limit: int 36 | price: float 37 | 38 | 39 | class AzureGPT35Config(AzureConfig): 40 | model: str = "gpt-35-turbo-16k-0613" 41 | limit: int = 16000 42 | price: float = 0.0005 43 | 44 | 45 | class AzureGPT4Config(AzureConfig): 46 | model: str = "gpt-4-0125-preview" 47 | limit: int = 128000 48 | price: float = 0.01 49 | -------------------------------------------------------------------------------- /orion/config/my_objectnav_hm3d.yaml: -------------------------------------------------------------------------------- 1 | ENVIRONMENT: 2 | MAX_EPISODE_STEPS: 40000 3 | 4 | SIMULATOR: 5 | TURN_ANGLE: 15 6 | TILT_ANGLE: 15 7 | ACTION_SPACE_CONFIG: "v1" 8 | AGENT_0: 9 | SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', "SEMANTIC_SENSOR"] 10 | HEIGHT: 0.88 11 | RADIUS: 0.18 12 | HABITAT_SIM_V0: 13 | GPU_DEVICE_ID: 0 14 | ALLOW_SLIDING: False 15 | SEMANTIC_SENSOR: 16 | WIDTH: 640 17 | HEIGHT: 480 18 | HFOV: 90 19 | POSITION: [0, 0.88, 0] 20 | RGB_SENSOR: 21 | WIDTH: 640 22 | HEIGHT: 480 23 | HFOV: 90 24 | POSITION: [0, 0.88, 0] 25 | DEPTH_SENSOR: 26 | WIDTH: 640 27 | HEIGHT: 480 28 | HFOV: 90 29 | MIN_DEPTH: 0.1 30 | MAX_DEPTH: 10.0 31 | POSITION: [0, 0.88, 0] 32 | TASK: 33 | TYPE: ObjectNav-v1 34 | POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN", "MOVE_BACKWARD"] 35 | 36 | SENSORS: ['OBJECTGOAL_SENSOR', 'COMPASS_SENSOR', 'GPS_SENSOR'] 37 | GOAL_SENSOR_UUID: objectgoal 38 | MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL', 'SOFT_SPL'] 39 | 40 | DISTANCE_TO_GOAL: 41 | DISTANCE_TO: VIEW_POINTS 42 | SUCCESS: 43 | SUCCESS_DISTANCE: 0.1 44 | 45 | DATASET: 46 | TYPE: ObjectNav-v1 47 | SPLIT: val 48 | DATA_PATH: "data/datasets/objectnav_hm3d_v2/{split}/{split}.json.gz" 49 | SCENES_DIR: "data/scene_datasets/" 50 | -------------------------------------------------------------------------------- /orion/gradio_init_img.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/gradio_init_img.jpg -------------------------------------------------------------------------------- /orion/map/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/map/__init__.py -------------------------------------------------------------------------------- /orion/map/map.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from orion.config.my_config import MapConfig 4 | 5 | 6 | class Mapping: 7 | def __init__(self, mapcfg: MapConfig): 8 | self.mapcfg = mapcfg 9 | self.num_grid = mapcfg.num_grid 10 | self.cell_size = mapcfg.cell_size 11 | self.min_depth = mapcfg.min_depth 12 | self.max_depth = mapcfg.max_depth 13 | self.ceiling_height_wrt_camera = mapcfg.ceiling_height_wrt_camera 14 | self.camera_height = mapcfg.camera_height 15 | self.agent_height_tolerance = mapcfg.agent_height_tolerance 16 | self.num_vxl_height = mapcfg.num_vxl_height 17 | self.downsample_factor = mapcfg.downsample_factor 18 | 19 | def update(self, *args, **kwargs): 20 | return NotImplementedError 21 | 22 | def get_point_cloud_from_depth(self, depth: np.ndarray, cam_insc_inv: np.ndarray): 23 | """ 24 | Return 3xN array in camera frame, X right, Y down, Z into the screen 25 | """ 26 | if len(depth.shape) == 3: 27 | depth = depth.squeeze() 28 | 29 | h, w = depth.shape 30 | 31 | y, x = np.meshgrid(np.arange(h), np.arange(w), indexing="ij") 32 | 33 | x = x.reshape((1, -1))[:, :] 34 | y = y.reshape((1, -1))[:, :] 35 | z = depth.reshape((1, -1)) # [1, h*w] 36 | 37 | p_2d = np.vstack([x, y, np.ones_like(x)]) 38 | pc = cam_insc_inv @ p_2d 39 | pc = pc * z # 40 | mask = (pc[2, :] > self.min_depth) * ( 41 | pc[2, :] < self.max_depth * 0.99 42 | ) # avoid non-deteced points 43 | return pc, mask 44 | -------------------------------------------------------------------------------- /orion/map/map_build/build_voxel.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from orion import logger 5 | from orion.utils.geometry import CoordinateTransform 6 | from orion.config.my_config import * 7 | from orion.utils import file_load as load_utils 8 | from orion.map.voxel import VoxelMapping 9 | from orion.map.voxel_sparse import VoxelMappingSparse 10 | from orion.abstract.perception import ExtractorModule 11 | from orion.perception.extractor.lseg_extractor import LSegExtractor 12 | from orion.utils.geometry import PinholeCameraModel as pinhole 13 | from orion.abstract.interfaces import Observations 14 | 15 | 16 | class OfflineDataLoader: 17 | def __init__(self, data_dir, mapcfg: MapConfig = MapConfig()): 18 | rgb_dir = os.path.join(data_dir, "rgb") 19 | depth_dir = os.path.join(data_dir, "depth") 20 | pose_dir = os.path.join(data_dir, "pose") 21 | semantic_dir = os.path.join(data_dir, "semantic") 22 | 23 | rgb_list = sorted( 24 | os.listdir(rgb_dir), key=lambda x: int(x.split("_")[-1].split(".")[0]) 25 | ) 26 | depth_list = sorted( 27 | os.listdir(depth_dir), key=lambda x: int(x.split("_")[-1].split(".")[0]) 28 | ) 29 | pose_list = sorted( 30 | os.listdir(pose_dir), key=lambda x: int(x.split("_")[-1].split(".")[0]) 31 | ) 32 | semantic_list = sorted( 33 | os.listdir(semantic_dir), key=lambda x: int(x.split("_")[-1].split(".")[0]) 34 | ) 35 | 36 | rgb_list = [os.path.join(rgb_dir, x) for x in rgb_list] 37 | depth_list = [os.path.join(depth_dir, x) for x in depth_list] 38 | pose_list = [os.path.join(pose_dir, x) for x in pose_list] 39 | semantic_list = [os.path.join(semantic_dir, x) for x in semantic_list] 40 | 41 | self._data = list(zip(rgb_list, depth_list, semantic_list, pose_list)) 42 | 43 | self.obj2cls_dic, _ = load_utils.load_obj2cls_dict( 44 | os.path.join(data_dir, "obj2cls_dict.txt") 45 | ) 46 | self.mapcfg = mapcfg 47 | 48 | def __getitem__(self, idx): 49 | rgb_path, depth_path, semantic_path, pose_path = self._data[idx] 50 | rgb = load_utils.load_image(rgb_path) 51 | depth = load_utils.load_depth(depth_path) 52 | semantic = load_utils.load_semantic(semantic_path, self.obj2cls_dic) 53 | simpose = load_utils.load_pose(pose_path) 54 | if idx == 0: 55 | self.transform_fn = CoordinateTransform( 56 | num_grd=self.mapcfg.num_grid, 57 | cell_size=self.mapcfg.cell_size, 58 | init_agtpose=simpose, 59 | ) 60 | cam_pose = self.transform_fn.get_relative_campose(simpose) 61 | return Observations( 62 | rgb=rgb, # [h, w, 3] uint8 63 | depth=depth, # [h, w] float32 64 | semantic=semantic, # [h, w] int 65 | rel_cam_pose=cam_pose, # [4, 4] 66 | ) 67 | 68 | def __len__(self): 69 | return len(self._data) 70 | 71 | 72 | class VoxelMapBuilder: 73 | """This is working in another process""" 74 | 75 | def __init__( 76 | self, 77 | save_dir, 78 | mapcfg: MapConfig = MapConfig(), 79 | extractor_type: str = "lseg", 80 | extractor: ExtractorModule = LSegExtractor(), 81 | accelerate_mapping=True, 82 | use_sparse_build=True, 83 | ) -> None: 84 | self.mapcfg = mapcfg 85 | self.extractor_type = extractor_type 86 | self.extractor = extractor 87 | if not use_sparse_build: 88 | self.vxlmap = VoxelMapping( 89 | self.mapcfg, self.extractor, accelerate_mapping=accelerate_mapping 90 | ) 91 | else: 92 | self.vxlmap = VoxelMappingSparse( 93 | self.mapcfg, self.extractor, accelerate_mapping=accelerate_mapping 94 | ) 95 | self.cam_insc = pinhole.get_camera_intrinsic_matrix( 96 | mapcfg.screen_h, mapcfg.screen_w, mapcfg.fov 97 | ) 98 | self.cam_insc_inv = np.linalg.inv(self.cam_insc) 99 | 100 | self.save_dir = save_dir 101 | self.use_sparse_build = use_sparse_build 102 | 103 | def build(self, obs: Observations): 104 | rgb = obs.rgb 105 | depth = obs.depth 106 | semantic = obs.semantic 107 | camera_pose = obs.rel_cam_pose 108 | feats = self.vxlmap.get_feature(rgb) # torch.Tensor cuda 109 | self.vxlmap.update(feats, depth, rgb, semantic, camera_pose, self.cam_insc_inv) 110 | 111 | def _return_result(self): 112 | """return vlmap results for temporaray planning""" 113 | if self.use_sparse_build: 114 | return ValueError("Not supported yet") 115 | featmap = self.vxlmap.featmap 116 | vxlcnt = np.expand_dims(self.vxlmap.vxl_count, axis=-1) 117 | 118 | indices = np.transpose(np.nonzero(np.any(vxlcnt, axis=-1))) 119 | feat_values = featmap[tuple(indices.T)] 120 | 121 | return {"indices": indices, "feat_values": feat_values} 122 | 123 | def _save(self): 124 | if not self.use_sparse_build: 125 | # This can save 100 times of storage space 126 | assert len(self.vxlmap.featmap.shape) == 4 127 | assert len(self.vxlmap.rgbmap.shape) == 4 128 | assert len(self.vxlmap.gtmap.shape) == 3 129 | assert len(self.vxlmap.vxl_count.shape) == 3 130 | 131 | featmap = self.vxlmap.featmap 132 | rgbmap = self.vxlmap.rgbmap 133 | vxlcnt = np.expand_dims(self.vxlmap.vxl_count, axis=-1) 134 | gtmap = np.expand_dims(self.vxlmap.gtmap, axis=-1) 135 | 136 | indices = np.transpose(np.nonzero(np.any(vxlcnt, axis=-1))) 137 | 138 | feat_values = featmap[tuple(indices.T)] 139 | count_values = vxlcnt[tuple(indices.T)].squeeze() 140 | rgb_values = rgbmap[tuple(indices.T)] 141 | gt_values = gtmap[tuple(indices.T)].squeeze() 142 | else: 143 | indices = self.vxlmap.indices 144 | feat_values = self.vxlmap.feat_values 145 | count_values = self.vxlmap.vxl_count 146 | rgb_values = self.vxlmap.rgb_values 147 | gt_values = self.vxlmap.gt_values 148 | 149 | if not os.path.exists(self.save_dir): 150 | os.makedirs(self.save_dir) 151 | else: 152 | logger.warning(f"{self.save_dir} already exists.") 153 | input("Press Enter to continue...") 154 | # Save the indices and values 155 | save_path = os.path.join(self.save_dir, "sparse_vxl_map.npz") 156 | np.savez( 157 | save_path, 158 | indices=indices, # [N, 3] 159 | count_values=count_values, # [N,] 160 | feat_values=feat_values, # [N, feat_dim=512] 161 | rgb_values=rgb_values, # [N, 3] 162 | gt_values=gt_values, # [N,] 163 | ) 164 | logger.info(f"{save_path} is saved. (sparse map)") 165 | 166 | @staticmethod 167 | def _save_npy(save_path, array): 168 | with open(save_path, "wb") as f: 169 | np.save(f, array) 170 | logger.info(f"{save_path} is saved.") 171 | -------------------------------------------------------------------------------- /orion/map/map_search/search_base.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import numpy as np 4 | 5 | from orion import logger 6 | from orion.config.my_config import MapConfig 7 | 8 | 9 | class MapSearch: 10 | def __init__( 11 | self, 12 | load_sparse_map_path: str, 13 | map_index_bound: Optional[List[int]] = None, 14 | mapcfg: MapConfig = MapConfig(), 15 | ): 16 | self.mapcfg = mapcfg 17 | 18 | self._load_sparse_map(load_sparse_map_path) 19 | self.update_index(map_index_bound) 20 | 21 | def _load_sparse_map(self, load_path): 22 | # Load the indices and values 23 | sparse_map = np.load(load_path) 24 | self.indices = sparse_map["indices"] 25 | self.feat_values = sparse_map["feat_values"] 26 | self.vxl_count = sparse_map["count_values"] 27 | self.rgb_values = sparse_map["rgb_values"] 28 | self.gt_values = sparse_map["gt_values"] 29 | self._3dshape = ( 30 | self.mapcfg.num_grid, 31 | self.mapcfg.num_grid, 32 | self.mapcfg.num_vxl_height, 33 | ) 34 | logger.info(f"load_path: {load_path}") 35 | 36 | def update_index(self, map_index_bound: Optional[List[int]] = None): 37 | "save search time when using this to crop the vxlmap" 38 | z_indices, x_indices, y_indices = ( 39 | self.indices[:, 0], 40 | self.indices[:, 1], 41 | self.indices[:, 2], 42 | ) 43 | no_map_mask = np.zeros(shape=(self.mapcfg.num_grid, self.mapcfg.num_grid)) 44 | no_map_mask[z_indices, x_indices] = 1 45 | self.no_map_mask = np.logical_not(no_map_mask) 46 | 47 | if map_index_bound is not None: 48 | self.xmin, self.xmax, self.zmin, self.zmax = map_index_bound 49 | else: 50 | self.xmin = np.min(x_indices) 51 | self.xmax = np.max(x_indices) 52 | self.zmin = np.min(z_indices) 53 | self.zmax = np.max(z_indices) 54 | self.ymin = np.min(y_indices) 55 | self.ymax = np.max(y_indices) 56 | 57 | # logger.info( 58 | # f"map_index_bound: {self.xmin}, {self.xmax}, {self.zmin}, {self.zmax}" 59 | # ) 60 | self.no_map_mask_crop = self.no_map_mask[ 61 | self.zmin : self.zmax + 1, self.xmin : self.xmax + 1 62 | ] 63 | 64 | @staticmethod 65 | def get_BEV_map(indices, values, map_shape): 66 | assert indices.shape[-1] == 3 67 | assert indices.shape[0] == values.shape[0] 68 | assert values.shape[-1] == map_shape[-1] 69 | assert len(map_shape) == 4 70 | 71 | rev_indices = indices[ 72 | ::-1 73 | ] # reverse to get the largest y value for each (z, x) easily 74 | rev_z, rev_x, rev_y = rev_indices[:, 0], rev_indices[:, 1], rev_indices[:, 2] 75 | 76 | # Create a unique identifier for each (z, x) pair 77 | rev_unique_zx, rev_paired_y = np.unique( 78 | np.column_stack((rev_z, rev_x)), axis=0, return_index=True 79 | ) 80 | 81 | # Find the maximum 'y' value for each unique (z, x) pair 82 | max_y_values = rev_y[rev_paired_y] 83 | 84 | bev_indices_sparse = np.column_stack( 85 | (rev_unique_zx, max_y_values) 86 | ) # [z, x, top_y] 87 | 88 | paired_y = len(indices) - rev_paired_y - 1 89 | 90 | bev_feat_sparse = values[paired_y] # [z, x, feat_dim] 91 | bev_map = np.zeros( 92 | shape=(map_shape[0], map_shape[1], map_shape[3]) 93 | ) # [num_z, num_x, feat_dim] 94 | bev_map[bev_indices_sparse[:, 0], bev_indices_sparse[:, 1], :] = bev_feat_sparse 95 | 96 | return bev_map 97 | -------------------------------------------------------------------------------- /orion/map/occupancy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from orion.config.my_config import MapConfig 4 | from orion.map.map import Mapping 5 | from orion.utils.geometry import PinholeCameraModel as pinhole 6 | 7 | 8 | class OccupancyMapping(Mapping): 9 | # mainly for frontier based exploration 10 | # also provide explored area 11 | UNKNOWN = 0 12 | FREE = 1 13 | OCCUPIED = 2 14 | WALL = 3 15 | FRONTIER = 4 16 | UNKNOWN_FREE = 5 17 | 18 | def __init__(self, mapcfg: MapConfig): 19 | super().__init__(mapcfg=mapcfg) 20 | self.map = np.zeros((self.num_grid, self.num_grid), dtype=np.uint8) 21 | 22 | def save(self, path): 23 | np.save(path, self.map) 24 | 25 | def load(self, path): 26 | self.map = np.load(path) 27 | 28 | def reset_floor(self): 29 | self.map[self.map == OccupancyMapping.FREE] = OccupancyMapping.UNKNOWN 30 | self.map[self.map == OccupancyMapping.FRONTIER] = OccupancyMapping.UNKNOWN 31 | 32 | @staticmethod 33 | def color(value, rgba=False): 34 | c = None 35 | if value == OccupancyMapping.UNKNOWN: 36 | c = [0, 0, 0] 37 | elif value == OccupancyMapping.FREE: 38 | c = [255, 255, 255] 39 | elif value == OccupancyMapping.OCCUPIED: 40 | c = [255, 0, 0] 41 | elif value == OccupancyMapping.WALL: 42 | c = [0, 255, 0] 43 | elif value == OccupancyMapping.FRONTIER: 44 | c = [0, 0, 255] 45 | else: 46 | raise ValueError("Not supported enum") 47 | if rgba: 48 | c.append(255) 49 | return c 50 | 51 | def update( 52 | self, 53 | depth: np.ndarray, 54 | relative_campose: np.ndarray, 55 | cam_insc_inv: np.ndarray, 56 | is_rotate: bool = False, 57 | camera_height_change: float = 0.0, 58 | ): 59 | # depth: [h, w], one image each time 60 | 61 | cam_pts, mask = self.get_point_cloud_from_depth(depth, cam_insc_inv) 62 | self.cam_pts = cam_pts 63 | 64 | not_ceiling_mask = cam_pts[1, :] > -self.ceiling_height_wrt_camera 65 | 66 | composite_mask = np.logical_and(mask, not_ceiling_mask) 67 | 68 | cam_pts = cam_pts[:, composite_mask] 69 | 70 | # here the wld frame is the first camera frame 71 | # x right, y down, z forward 72 | wld_pts = pinhole.cam2wld(cam_pts, cam_pose=relative_campose) 73 | wld_pts[1, :] -= camera_height_change 74 | 75 | # This is a simple method to get floor mask. It can not handle slope floor. 76 | # The best way should be using semantic segmentation, like LSeg. 77 | # However we need a trade-off between speed and accuracy. 78 | floor_mask = np.logical_and( 79 | wld_pts[1, :] > self.camera_height - self.agent_height_tolerance, 80 | wld_pts[1, :] < self.camera_height + self.agent_height_tolerance, 81 | ) 82 | 83 | wall_mask = wld_pts[1, :] < -self.ceiling_height_wrt_camera + 0.2 84 | 85 | grd_xs = np.round(wld_pts[0, :] / self.cell_size + self.num_grid // 2).astype( 86 | np.int32 87 | ) 88 | grd_zs = np.round(self.num_grid // 2 - wld_pts[2, :] / self.cell_size).astype( 89 | np.int32 90 | ) 91 | 92 | # make sure the last occupaied area will not be free anymore. 93 | # This can avoid more collision 94 | last_nofree_mask = np.logical_or( 95 | self.map[grd_zs, grd_xs] == OccupancyMapping.OCCUPIED, 96 | self.map[grd_zs, grd_xs] == OccupancyMapping.WALL, 97 | ) 98 | last_wall_mask = self.map[grd_zs, grd_xs] == OccupancyMapping.WALL 99 | last_map = self.map.copy() 100 | 101 | free_mask = np.logical_and(~last_nofree_mask, floor_mask) 102 | 103 | self.map[grd_zs, grd_xs] = OccupancyMapping.OCCUPIED 104 | # free_mask = floor_mask 105 | 106 | self.map[grd_zs[free_mask], grd_xs[free_mask]] = OccupancyMapping.FREE 107 | self.map[grd_zs[wall_mask], grd_xs[wall_mask]] = OccupancyMapping.WALL 108 | self.map[grd_zs[last_wall_mask], grd_xs[last_wall_mask]] = OccupancyMapping.WALL 109 | 110 | # delete the possible wall area 111 | floor_mask = np.logical_and(floor_mask, np.logical_not(wall_mask)) 112 | 113 | # calculate eight neighbors 114 | dx = np.array([-1, 0, 1, -1, 1, -1, 0, 1]).reshape(-1, 1) 115 | dz = np.array([-1, -1, -1, 0, 0, 1, 1, 1]).reshape(-1, 1) 116 | 117 | floor_grds = np.stack([grd_xs[floor_mask], grd_zs[floor_mask]]) 118 | unique_floor_grds = np.unique(floor_grds, axis=1) 119 | x_grds_floor, z_grds_floor = unique_floor_grds 120 | neighbor_indices_x = x_grds_floor + dx 121 | neighbor_indices_z = z_grds_floor + dz 122 | 123 | frontier_mask = ( 124 | np.sum( 125 | self.map[neighbor_indices_z, neighbor_indices_x] 126 | == OccupancyMapping.UNKNOWN, 127 | axis=0, 128 | ) 129 | > 1 130 | ) 131 | no_frontier_mask = np.logical_or( 132 | np.sum( 133 | self.map[neighbor_indices_z, neighbor_indices_x] 134 | == OccupancyMapping.WALL, 135 | axis=0, 136 | ) 137 | > 1, 138 | np.sum( 139 | self.map[neighbor_indices_z, neighbor_indices_x] 140 | == OccupancyMapping.OCCUPIED, 141 | axis=0, 142 | ) 143 | > 3, 144 | ) 145 | 146 | frontier_mask = np.logical_and(frontier_mask, np.logical_not(no_frontier_mask)) 147 | self.map[ 148 | z_grds_floor[frontier_mask], x_grds_floor[frontier_mask] 149 | ] = OccupancyMapping.FRONTIER 150 | 151 | # # Post-process 152 | # # This is because the camera height can not be accurate using predicted pose. 153 | # # So there could be some critical points where large free space turn to occupancy suddenly 154 | # # We will avoid this 155 | # increment_occu_mask = np.logical_and( 156 | # self.map == OccupancyMapping.OCCUPIED, 157 | # np.logical_not(last_map == OccupancyMapping.OCCUPIED), 158 | # ) 159 | # floor2occu_mask = np.logical_and( 160 | # increment_occu_mask, last_map == OccupancyMapping.FREE 161 | # ) 162 | # if np.sum(floor2occu_mask) > 50 and is_rotate: # reset 163 | # self.map = last_map.copy() 164 | -------------------------------------------------------------------------------- /orion/map/voxel.py: -------------------------------------------------------------------------------- 1 | """ 2 | Voxel for VLmap. Adapted from VLmap repo. https://github.com/vlmaps/vlmaps. 3 | """ 4 | 5 | import numpy as np 6 | import torch 7 | 8 | from orion import logger 9 | from orion.abstract.perception import ExtractorModule 10 | from orion.config.my_config import MapConfig 11 | from orion.map.map import Mapping 12 | from orion.utils.geometry import PinholeCameraModel as pinhole 13 | 14 | 15 | class VoxelMapping(Mapping): 16 | def __init__( 17 | self, mapcfg: MapConfig, extractor: ExtractorModule, accelerate_mapping=True 18 | ): 19 | super().__init__(mapcfg=mapcfg) 20 | self.feat_dim = extractor.feat_dim # feature dim =1 means gt labels 21 | self.extractor = extractor 22 | 23 | self.featmap = np.zeros( 24 | (self.num_grid, self.num_grid, self.num_vxl_height, self.feat_dim), 25 | dtype=np.float32, 26 | ) 27 | self.vxl_count = np.zeros( 28 | (self.num_grid, self.num_grid, self.num_vxl_height), dtype=np.int32 29 | ) # zxy 30 | 31 | self.rgbmap = np.zeros( 32 | (self.num_grid, self.num_grid, self.num_vxl_height, 3), dtype=np.uint8 33 | ) 34 | self.gtmap = np.zeros( 35 | (self.num_grid, self.num_grid, self.num_vxl_height), dtype=np.int16 36 | ) 37 | 38 | self.accelerate_mapping = accelerate_mapping 39 | 40 | def get_feature(self, rgb: np.ndarray): 41 | return self.extractor.predict(rgb) 42 | 43 | def update( 44 | self, 45 | feats: torch.Tensor, 46 | depth: np.ndarray, 47 | rgb: np.ndarray, 48 | semantic: np.ndarray, 49 | relative_campose: np.ndarray, 50 | cam_insc_inv: np.ndarray, 51 | ): 52 | # depth: [h, w], one image each time 53 | # feature: [h, w, feat_dim] 54 | 55 | cam_pts, mask = self.get_point_cloud_from_depth(depth, cam_insc_inv) 56 | not_ceiling_mask = cam_pts[1, :] > -self.ceiling_height_wrt_camera 57 | composite_mask = np.logical_and(mask, not_ceiling_mask) 58 | cam_pts = cam_pts[:, composite_mask] 59 | 60 | rgb = rgb.reshape(-1, 3) 61 | rgb = rgb[composite_mask, :] 62 | 63 | gt_semantic = semantic.reshape(-1) 64 | gt_semantic = gt_semantic[composite_mask] 65 | 66 | feats = feats.reshape(-1, self.feat_dim) 67 | composite_mask = torch.from_numpy(composite_mask) 68 | composite_mask = composite_mask.to(feats.device) # use gpu to accelerate 69 | feats = feats[composite_mask, :] 70 | feats = feats.cpu().numpy() 71 | composite_mask = None 72 | 73 | # downsample 74 | cam_pts = cam_pts[:, :: self.downsample_factor] 75 | rgb = rgb[:: self.downsample_factor, :] 76 | gt_semantic = gt_semantic[:: self.downsample_factor] 77 | feats = feats[:: self.downsample_factor, :] 78 | 79 | wld_pts = pinhole.cam2wld(cam_pts, cam_pose=relative_campose) 80 | vxl_zs = np.round(self.num_grid // 2 - wld_pts[2, :] / self.cell_size).astype( 81 | np.int32 82 | ) 83 | vxl_xs = np.round(wld_pts[0, :] / self.cell_size + self.num_grid // 2).astype( 84 | np.int32 85 | ) 86 | vxl_ys = np.maximum( 87 | np.round((self.camera_height - wld_pts[1, :]) / self.cell_size).astype( 88 | np.int32 89 | ), 90 | 0, 91 | ) 92 | 93 | if self.accelerate_mapping: 94 | # get unique voxel indices and the index 95 | vxl_zxys, vxl_indices = np.unique( 96 | np.stack([vxl_zs, vxl_xs, vxl_ys], axis=1), axis=0, return_index=True 97 | ) 98 | else: 99 | vxl_zxys = np.stack([vxl_zs, vxl_xs, vxl_ys], axis=1) 100 | vxl_indices = np.arange(vxl_zxys.shape[0]) 101 | 102 | for vxl_zxy, vxl_ind in zip(vxl_zxys, vxl_indices): 103 | self.is_in_grid(vxl_zxy) 104 | self.featmap[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]] = ( 105 | self.featmap[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]] 106 | * self.vxl_count[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]] 107 | + feats[vxl_ind] 108 | ) / (self.vxl_count[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]] + 1) 109 | self.vxl_count[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]] += 1 110 | 111 | self.rgbmap[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]] = rgb[vxl_ind] 112 | self.gtmap[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]] = gt_semantic[vxl_ind] 113 | 114 | def is_in_grid(self, vxl_zxy): 115 | if vxl_zxy[0] < 0 or vxl_zxy[0] >= self.num_grid: 116 | logger.warning(f"vxl_zxy[0] out of range: {vxl_zxy[0]}") 117 | vxl_zxy[0] = np.clip(vxl_zxy[0], 0, self.num_grid - 1) 118 | if vxl_zxy[1] < 0 or vxl_zxy[1] >= self.num_grid: 119 | logger.warning(f"vxl_zxy[1] out of range: {vxl_zxy[1]}") 120 | vxl_zxy[1] = np.clip(vxl_zxy[1], 0, self.num_grid - 1) 121 | if vxl_zxy[2] < 0 or vxl_zxy[2] >= self.num_vxl_height: 122 | logger.warning(f"vxl_zxy[2] out of range: {vxl_zxy[2]}") 123 | vxl_zxy[2] = np.clip(vxl_zxy[2], 0, self.num_vxl_height - 1) 124 | -------------------------------------------------------------------------------- /orion/map/voxel_sparse.py: -------------------------------------------------------------------------------- 1 | """ 2 | Voxel for VLmap 3 | """ 4 | 5 | import numpy as np 6 | import torch 7 | 8 | from orion import logger 9 | from orion.abstract.perception import ExtractorModule 10 | from orion.config.my_config import MapConfig 11 | from orion.map.map import Mapping 12 | from orion.utils.geometry import PinholeCameraModel as pinhole 13 | 14 | 15 | class VoxelMappingSparse(Mapping): 16 | def __init__( 17 | self, mapcfg: MapConfig, extractor: ExtractorModule, accelerate_mapping=True 18 | ): 19 | super().__init__(mapcfg=mapcfg) 20 | self.feat_dim = extractor.feat_dim # feature dim =1 means gt labels 21 | self.extractor = extractor 22 | 23 | self.feat_values = np.empty(shape=(0, self.feat_dim), dtype=np.float32) 24 | self.indices = np.empty(shape=(0, 3), dtype=np.int16) 25 | self.count_values = np.empty(shape=(0), dtype=np.int32) 26 | self.rgb_values = np.empty(shape=(0, 3), dtype=np.uint8) 27 | self.gt_values = np.empty(shape=(0), dtype=np.int8) 28 | 29 | self.accelerate_mapping = accelerate_mapping 30 | 31 | def get_feature(self, rgb: np.ndarray): 32 | return self.extractor.predict(rgb) 33 | 34 | def update( 35 | self, 36 | feats: torch.Tensor, 37 | depth: np.ndarray, 38 | rgb: np.ndarray, 39 | semantic: np.ndarray, 40 | relative_campose: np.ndarray, 41 | cam_insc_inv: np.ndarray, 42 | ): 43 | # depth: [h, w], one image each time 44 | # feature: [h, w, feat_dim] 45 | 46 | cam_pts, mask = self.get_point_cloud_from_depth(depth, cam_insc_inv) 47 | not_ceiling_mask = cam_pts[1, :] > -self.ceiling_height_wrt_camera 48 | composite_mask = np.logical_and(mask, not_ceiling_mask) 49 | cam_pts = cam_pts[:, composite_mask] 50 | 51 | rgb = rgb.reshape(-1, 3) 52 | rgb = rgb[composite_mask, :] 53 | 54 | gt_semantic = semantic.reshape(-1) 55 | gt_semantic = gt_semantic[composite_mask] 56 | 57 | feats = feats.reshape(-1, self.feat_dim) 58 | composite_mask = torch.from_numpy(composite_mask) 59 | composite_mask = composite_mask.to(feats.device) # use gpu to accelerate 60 | feats = feats[composite_mask, :] 61 | feats = feats.cpu().numpy() 62 | composite_mask = None 63 | 64 | # downsample 65 | cam_pts = cam_pts[:, :: self.downsample_factor] 66 | rgb = rgb[:: self.downsample_factor, :] 67 | gt_semantic = gt_semantic[:: self.downsample_factor] 68 | feats = feats[:: self.downsample_factor, :] 69 | 70 | wld_pts = pinhole.cam2wld(cam_pts, cam_pose=relative_campose) 71 | vxl_zs = np.round(self.num_grid // 2 - wld_pts[2, :] / self.cell_size).astype( 72 | np.int32 73 | ) 74 | vxl_xs = np.round(wld_pts[0, :] / self.cell_size + self.num_grid // 2).astype( 75 | np.int32 76 | ) 77 | vxl_ys = np.maximum( 78 | np.round((self.camera_height - wld_pts[1, :]) / self.cell_size).astype( 79 | np.int32 80 | ), 81 | 0, 82 | ) 83 | 84 | if self.accelerate_mapping: 85 | # get unique voxel indices and the index 86 | vxl_zxys, vxl_indices = np.unique( 87 | np.stack([vxl_zs, vxl_xs, vxl_ys], axis=1), axis=0, return_index=True 88 | ) 89 | else: 90 | vxl_zxys = np.stack([vxl_zs, vxl_xs, vxl_ys], axis=1) 91 | vxl_indices = np.arange(vxl_zxys.shape[0]) 92 | 93 | for vxl_zxy, vxl_ind in zip(vxl_zxys, vxl_indices): 94 | self.is_in_grid(vxl_zxy) 95 | 96 | if len(self.indices) == 0: 97 | self.indices = np.vstack([self.indices, vxl_zxy]) 98 | self.feat_values = np.vstack([self.feat_values, feats[vxl_ind]]) 99 | self.count_values = np.append(self.count_values, 1) 100 | self.rgb_values = np.vstack([self.rgb_values, rgb[vxl_ind]]) 101 | self.gt_values = np.append(self.gt_values, gt_semantic[vxl_ind]) 102 | else: 103 | idx = np.where((self.indices == vxl_zxy).all(axis=1))[0] 104 | if len(idx) > 0: # already in the indices 105 | row_index = idx[0] 106 | self.feat_values[row_index] = ( 107 | self.feat_values[row_index] * self.count_values[row_index] 108 | + feats[vxl_ind] 109 | ) / (self.count_values[row_index] + 1) 110 | self.count_values[row_index] += 1 111 | self.rgb_values[row_index] = rgb[vxl_ind] 112 | self.gt_values[row_index] = gt_semantic[vxl_ind] 113 | else: 114 | self.indices = np.vstack([self.indices, vxl_zxy]) 115 | self.feat_values = np.vstack([self.feat_values, feats[vxl_ind]]) 116 | self.count_values = np.append(self.count_values, 1) 117 | self.rgb_values = np.vstack([self.rgb_values, rgb[vxl_ind]]) 118 | self.gt_values = np.append(self.gt_values, gt_semantic[vxl_ind]) 119 | 120 | def is_in_grid(self, vxl_zxy): 121 | if vxl_zxy[0] < 0 or vxl_zxy[0] >= self.num_grid: 122 | logger.warning(f"vxl_zxy[0] out of range: {vxl_zxy[0]}") 123 | vxl_zxy[0] = np.clip(vxl_zxy[0], 0, self.num_grid - 1) 124 | if vxl_zxy[1] < 0 or vxl_zxy[1] >= self.num_grid: 125 | logger.warning(f"vxl_zxy[1] out of range: {vxl_zxy[1]}") 126 | vxl_zxy[1] = np.clip(vxl_zxy[1], 0, self.num_grid - 1) 127 | if vxl_zxy[2] < 0 or vxl_zxy[2] >= self.num_vxl_height: 128 | logger.warning(f"vxl_zxy[2] out of range: {vxl_zxy[2]}") 129 | vxl_zxy[2] = np.clip(vxl_zxy[2], 0, self.num_vxl_height - 1) 130 | -------------------------------------------------------------------------------- /orion/navigation/shortest_path_follower_wrapper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Interface for habitat ShortestPathFollower and My ShortestPathFollower 3 | """ 4 | 5 | 6 | import cv2 7 | import numpy as np 8 | 9 | from orion import logger 10 | from orion.abstract.pose import Agent2DPose 11 | from orion.navigation.fmm_planner import FMMPlanner 12 | from orion.navigation.waypoint_planner import PointPlanner 13 | from orion.utils import visulization as vis 14 | from orion.utils.geometry import CoordinateTransform 15 | 16 | 17 | class ShortestPathFollowerBase: 18 | def is_navigable(self, pose: Agent2DPose): 19 | raise NotImplementedError 20 | 21 | def get_next_action(self, *args, **kwargs): 22 | raise NotImplementedError 23 | 24 | def set_traversible_map(self, traversible: np.ndarray): 25 | raise NotImplementedError 26 | 27 | def get_navigable_mask(self): 28 | raise NotImplementedError 29 | 30 | def revise_pose(self, pose: Agent2DPose): 31 | # since the map is constantly changing, we need to revise the pose 32 | if self.is_navigable(pose): 33 | return pose 34 | else: 35 | logger.info("[PathFollower] Pose is not navigable, revise it auto") 36 | navi_mask = self.get_navigable_mask() 37 | navi_mask = vis.get_largest_connected_area(navi_mask) 38 | 39 | new_pose = PointPlanner.plan_reachable_point( 40 | cen_x=pose.x, 41 | cen_z=pose.z, 42 | navigable_mask=navi_mask, 43 | max_radius=5, 44 | ) 45 | if new_pose is None: 46 | logger.info( 47 | "[PathFollower] Can not find reachable nearby point, return original pose" 48 | ) 49 | return pose 50 | else: 51 | return Agent2DPose(new_pose.x, new_pose.z, pose.t) 52 | 53 | 54 | class MyFollower(ShortestPathFollowerBase): 55 | def __init__( 56 | self, 57 | num_rots: int = 360 // 15, 58 | step_size: int = int(0.25 / 0.05), 59 | goal_radius: int = int(0.3 / 0.05), 60 | wheel_radius: int = int(0.2 / 0.05), 61 | ): 62 | self.follower = FMMPlanner(num_rots, step_size, goal_radius, wheel_radius) 63 | 64 | def set_traversible_map(self, traversible: np.ndarray): 65 | self.follower.set_traversible_map(traversible) 66 | 67 | def is_navigable(self, pose: Agent2DPose): 68 | return self.follower.is_navigable(pose) 69 | 70 | def get_navigable_mask(self): 71 | return self.follower.original_traversible 72 | 73 | def get_next_action( 74 | self, 75 | start: Agent2DPose, 76 | goal: Agent2DPose, 77 | pre_collision_dict=None, 78 | goal_dist=None, 79 | ): 80 | _ = self.follower.get_action( 81 | start, goal, pre_collision_dict=pre_collision_dict, goal_dist=goal_dist 82 | ) 83 | return _[0] 84 | 85 | 86 | class HabitatFollower(ShortestPathFollowerBase): 87 | def __init__( 88 | self, 89 | env_sim, 90 | navigatable_mask: np.ndarray, 91 | transform_fn: CoordinateTransform, 92 | goal_radius: float = 0.3, 93 | return_one_hot: bool = False, 94 | ): 95 | self.env_sim = env_sim 96 | from habitat.tasks.nav.shortest_path_follower import ( 97 | ShortestPathFollower as HabitatShortestPathFollower, 98 | ) 99 | 100 | self.follower = HabitatShortestPathFollower( 101 | env_sim, goal_radius=goal_radius, return_one_hot=return_one_hot 102 | ) 103 | self.navigable_mask = navigatable_mask 104 | self.transform_fn = transform_fn 105 | 106 | def set_traversible_map(self, traversible: np.ndarray): 107 | pass 108 | 109 | def is_navigable(self, pose: Agent2DPose): 110 | pos = self.transform_fn.grd2agt_pos(pose) 111 | return self.env_sim.is_navigable(pos) 112 | 113 | def get_navigable_mask(self): 114 | return self.navigable_mask 115 | 116 | def get_next_action(self, start: Agent2DPose, goal: Agent2DPose, *args, **kwargs): 117 | goal_pos = self.transform_fn.grd2agt_pos(goal) 118 | return self.follower.get_next_action(goal_pos) 119 | -------------------------------------------------------------------------------- /orion/perception/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/perception/__init__.py -------------------------------------------------------------------------------- /orion/perception/detector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/perception/detector/__init__.py -------------------------------------------------------------------------------- /orion/perception/detector/clipgradcam.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | from PIL import Image 5 | 6 | import orion.perception.detector.gradcam.CLIP.clip as clip 7 | 8 | 9 | class CLIPGradCAM: 10 | def __init__(self): 11 | self.device = "cuda" if torch.cuda.is_available() else "cpu" 12 | self.model, self.preprocess = clip.load( 13 | "ViT-B/32", device=self.device, jit=False 14 | ) 15 | 16 | def interpret(self, image, texts, start_layer=-1): 17 | batch_size = texts.shape[0] 18 | images = image.repeat(batch_size, 1, 1, 1) 19 | logits_per_image, logits_per_text = self.model(images, texts) 20 | probs = logits_per_image.softmax(dim=-1).detach().cpu().numpy() 21 | index = [i for i in range(batch_size)] 22 | one_hot = np.zeros( 23 | (logits_per_image.shape[0], logits_per_image.shape[1]), dtype=np.float32 24 | ) 25 | one_hot[torch.arange(logits_per_image.shape[0]), index] = 1 26 | one_hot = torch.from_numpy(one_hot).requires_grad_(True) 27 | one_hot = torch.sum(one_hot.cuda() * logits_per_image) 28 | self.model.zero_grad() 29 | 30 | image_attn_blocks = list( 31 | dict(self.model.visual.transformer.resblocks.named_children()).values() 32 | ) 33 | 34 | if start_layer == -1: 35 | # calculate index of last layer 36 | start_layer = len(image_attn_blocks) - 1 37 | 38 | num_tokens = image_attn_blocks[0].attn_probs.shape[-1] 39 | R = torch.eye( 40 | num_tokens, num_tokens, dtype=image_attn_blocks[0].attn_probs.dtype 41 | ).to(self.device) 42 | R = R.unsqueeze(0).expand(batch_size, num_tokens, num_tokens) 43 | for i, blk in enumerate(image_attn_blocks): 44 | if i < start_layer: 45 | continue 46 | grad = torch.autograd.grad(one_hot, [blk.attn_probs], retain_graph=True)[ 47 | 0 48 | ].detach() 49 | cam = blk.attn_probs.detach() 50 | cam = cam.reshape(-1, cam.shape[-1], cam.shape[-1]) 51 | grad = grad.reshape(-1, grad.shape[-1], grad.shape[-1]) 52 | cam = grad * cam 53 | cam = cam.reshape(batch_size, -1, cam.shape[-1], cam.shape[-1]) 54 | cam = cam.clamp(min=0).mean(dim=1) 55 | R = R + torch.bmm(cam, R) 56 | image_relevance = R[:, 0, 1:] 57 | 58 | dim = int(image_relevance.numel() ** 0.5) 59 | image_relevance = image_relevance.reshape(1, 1, dim, dim) 60 | image_relevance = torch.nn.functional.interpolate( 61 | image_relevance, size=224, mode="bilinear" 62 | ) 63 | image_relevance = image_relevance.reshape(224, 224).cuda().data.cpu().numpy() 64 | image_relevance = (image_relevance - image_relevance.min()) / ( 65 | image_relevance.max() - image_relevance.min() 66 | ) 67 | 68 | return self.find_centroid(image_relevance) 69 | 70 | def find_centroid(self, object_mask): 71 | us, vs = np.where(object_mask > 0.99) 72 | if len(us) == 0: 73 | return None 74 | 75 | mean_u = np.mean(us) 76 | mean_v = np.mean(vs) 77 | index = np.argmin((us - mean_u) ** 2 + (vs - mean_v) ** 2, axis=None) 78 | y, x = us[index], vs[index] 79 | return (x, y) # (7,215) 80 | 81 | def predict(self, rgb: np.ndarray, txt: str): 82 | img = self.preprocess(Image.fromarray(rgb)).unsqueeze(0).to(self.device) 83 | texts = [txt] 84 | text = clip.tokenize(texts).to(self.device) 85 | pt = self.interpret(image=img, texts=text) 86 | if pt is None: 87 | return None 88 | else: 89 | # resize back to original image size 90 | x, y = pt 91 | x = int(pt[0] * rgb.shape[1] / 224) 92 | x = max(min(x, rgb.shape[1] - 1), 0) 93 | y = int(pt[1] * rgb.shape[0] / 224) 94 | y = max(min(y, rgb.shape[0] - 1), 0) 95 | return (x, y) 96 | -------------------------------------------------------------------------------- /orion/perception/detector/groundingSAM.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import warnings 4 | 5 | # Grounding DINO 6 | import groundingdino.datasets.transforms as T 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import torch 10 | from groundingdino.models import build_model 11 | from groundingdino.util import box_ops 12 | from groundingdino.util.slconfig import SLConfig 13 | from groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap 14 | from PIL import Image 15 | # segment anything 16 | from segment_anything import SamPredictor, build_sam 17 | 18 | from orion import logger 19 | from orion.abstract.interfaces import Observations, TextQuery 20 | from orion.abstract.perception import DetectionModule, MaskedBBOX 21 | from orion.config.my_config import GroundingDINOConfig 22 | 23 | warnings.filterwarnings("ignore") 24 | 25 | 26 | def load_image(image): 27 | if isinstance(image, str) and os.path.exists(image): 28 | image_path = image 29 | image_pil = Image.open(image_path).convert("RGB") # load image 30 | else: 31 | image_pil = Image.fromarray(image) 32 | 33 | transform = T.Compose( 34 | [ 35 | T.RandomResize([800], max_size=1333), 36 | T.ToTensor(), 37 | T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 38 | ] 39 | ) 40 | image, _ = transform(image_pil, None) # 3, h, w 41 | return image_pil, image 42 | 43 | 44 | def load_model(model_config_path, model_checkpoint_path, device): 45 | args = SLConfig.fromfile(model_config_path) 46 | args.device = device 47 | model = build_model(args) 48 | checkpoint = torch.load(model_checkpoint_path, map_location="cpu") 49 | load_res = model.load_state_dict( 50 | clean_state_dict(checkpoint["model"]), strict=False 51 | ) 52 | _ = model.eval() 53 | return model 54 | 55 | 56 | def get_grounding_output( 57 | model, image, caption, box_threshold, text_threshold, with_logits=True, device="cpu" 58 | ): 59 | caption = caption.lower() 60 | caption = caption.strip() 61 | if not caption.endswith("."): 62 | caption = caption + "." 63 | model = model.to(device) 64 | image = image.to(device) 65 | with torch.no_grad(): 66 | outputs = model(image[None], captions=[caption]) 67 | logits = outputs["pred_logits"].cpu().sigmoid()[0] # (nq, 256) 68 | boxes = outputs["pred_boxes"].cpu()[0] # (nq, 4) 69 | logits.shape[0] 70 | 71 | # filter output 72 | logits_filt = logits.clone() 73 | boxes_filt = boxes.clone() 74 | filt_mask = logits_filt.max(dim=1)[0] > box_threshold 75 | logits_filt = logits_filt[filt_mask] # num_filt, 256 76 | boxes_filt = boxes_filt[filt_mask] # num_filt, 4 77 | logits_filt.shape[0] 78 | 79 | # get phrase 80 | tokenlizer = model.tokenizer 81 | tokenized = tokenlizer(caption) 82 | # build pred 83 | pred_phrases = [] 84 | for logit, box in zip(logits_filt, boxes_filt): 85 | pred_phrase = get_phrases_from_posmap( 86 | logit > text_threshold, tokenized, tokenlizer 87 | ) 88 | if with_logits: 89 | pred_phrases.append(pred_phrase + f"({str(logit.max().item())[:4]})") 90 | else: 91 | pred_phrases.append(pred_phrase) 92 | 93 | return boxes_filt, pred_phrases 94 | 95 | 96 | def show_mask(mask, ax, random_color=False): 97 | if random_color: 98 | color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) 99 | else: 100 | color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6]) 101 | h, w = mask.shape[-2:] 102 | mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) 103 | ax.imshow(mask_image) 104 | 105 | 106 | def show_box(box, ax, label): 107 | x0, y0 = box[0], box[1] 108 | w, h = box[2] - box[0], box[3] - box[1] 109 | ax.add_patch( 110 | plt.Rectangle((x0, y0), w, h, edgecolor="green", facecolor=(0, 0, 0, 0), lw=2) 111 | ) 112 | ax.text(x0, y0, label) 113 | 114 | 115 | def save_mask_data(output_dir, mask_list, box_list, label_list): 116 | value = 0 # 0 for background 117 | 118 | mask_img = torch.zeros(mask_list.shape[-2:]) 119 | for idx, mask in enumerate(mask_list): 120 | mask_img[mask.cpu().numpy()[0] == True] = value + idx + 1 121 | plt.figure(figsize=(10, 10)) 122 | plt.imshow(mask_img.numpy()) 123 | plt.axis("off") 124 | plt.savefig( 125 | os.path.join(output_dir, "mask.jpg"), 126 | bbox_inches="tight", 127 | dpi=300, 128 | pad_inches=0.0, 129 | ) 130 | 131 | json_data = [{"value": value, "label": "background"}] 132 | for label, box in zip(label_list, box_list): 133 | value += 1 134 | name, logit = label.split("(") 135 | logit = logit[:-1] # the last is ')' 136 | json_data.append( 137 | { 138 | "value": value, 139 | "label": name, 140 | "logit": float(logit), 141 | "box": box.numpy().tolist(), 142 | } 143 | ) 144 | with open(os.path.join(output_dir, "mask.json"), "w") as f: 145 | json.dump(json_data, f) 146 | 147 | 148 | class GroundingSAM(DetectionModule): 149 | def __init__(self, cfg=GroundingDINOConfig()): 150 | self.cfg = cfg 151 | 152 | # load model 153 | self.model = load_model( 154 | self.cfg.config_file, self.cfg.grounded_checkpoint, device=self.cfg.device 155 | ) 156 | 157 | # initialize SAM 158 | self.predictor = SamPredictor( 159 | build_sam(checkpoint=self.cfg.sam_checkpoint).to(self.cfg.device) 160 | ) 161 | 162 | def predict(self, rgb: np.ndarray, txt: TextQuery) -> MaskedBBOX: 163 | """text prompt should be a sentence or multiple words separated by 164 | ' . '. tgt_object should be a single noun word 165 | """ 166 | # load image 167 | 168 | txt_prompt: str = txt.prompt.lower() if txt.prompt else "" 169 | txt_object: str = txt.target.lower() if txt.target else "" 170 | 171 | image_pil, image = load_image(rgb) 172 | 173 | # run grounding dino model 174 | boxes_filt, pred_phrases = get_grounding_output( 175 | self.model, 176 | image, 177 | txt_prompt, 178 | self.cfg.box_threshold, 179 | self.cfg.text_threshold, 180 | device=self.cfg.device, 181 | ) 182 | 183 | if all(txt_object not in p for p in pred_phrases): 184 | return MaskedBBOX(False, [], [], []) 185 | 186 | self.predictor.set_image(rgb) 187 | size = image_pil.size 188 | H, W = size[1], size[0] 189 | for i in range(boxes_filt.size(0)): 190 | boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H]) 191 | boxes_filt[i][:2] -= boxes_filt[i][2:] / 2 192 | boxes_filt[i][2:] += boxes_filt[i][:2] 193 | 194 | boxes_filt = boxes_filt.cpu() 195 | 196 | transformed_boxes = self.predictor.transform.apply_boxes_torch( 197 | boxes_filt, rgb.shape[:2] 198 | ).to(self.cfg.device) 199 | 200 | masks, _, _ = self.predictor.predict_torch( 201 | point_coords=None, 202 | point_labels=None, 203 | boxes=transformed_boxes.to(self.cfg.device), 204 | multimask_output=False, 205 | ) 206 | 207 | bboxes = boxes_filt.numpy().astype(np.int32) 208 | texts = pred_phrases 209 | masks = masks.cpu().numpy() 210 | 211 | tuple_list = [] 212 | for bbox, text, mask in zip(bboxes, texts, masks): 213 | if txt_object in text: 214 | tuple_list.append((bbox, text, mask)) 215 | 216 | return MaskedBBOX.from_tuple_list(True, tuple_list) 217 | -------------------------------------------------------------------------------- /orion/perception/extractor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/perception/extractor/__init__.py -------------------------------------------------------------------------------- /orion/perception/extractor/clipbase.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | 3 | import numpy as np 4 | import open_clip 5 | import torch 6 | from PIL import Image 7 | 8 | from orion.abstract.interfaces import TextQueries 9 | from orion.config.my_config import CLIPConfig 10 | 11 | 12 | class CLIPBase: 13 | """use clip text encoder to get vector representation of text, 14 | other vision backbone to get pixel-wise vector representation of image, 15 | two vectors should are in the same semantic space 16 | """ 17 | 18 | def __init__(self, cfg: CLIPConfig): 19 | self.device = cfg.device 20 | self.clip_version = cfg.clip_version 21 | self.openclip_pretained = cfg.openclip_pretained 22 | self.feat_dim = {"ViT-B-32": 512, "ViT-B-16": 512, "ViT-L-14": 768}[ 23 | cfg.clip_version 24 | ] 25 | ( 26 | self.clip_model, 27 | _, 28 | self.clip_preprocess, 29 | ) = open_clip.create_model_and_transforms( 30 | self.clip_version, pretrained=self.openclip_pretained 31 | ) 32 | self.tokenizer = open_clip.get_tokenizer(self.clip_version) 33 | self.clip_model.to(self.device) 34 | self.cfg = cfg 35 | 36 | def encode_text(self, txts: Union[List[str], TextQueries]) -> torch.Tensor: 37 | if isinstance(txts, list): 38 | txts = TextQueries(txts) 39 | tok = self.tokenizer(txts.prompts).to(self.device) 40 | with torch.no_grad(), torch.cuda.amp.autocast(): 41 | text_features = self.clip_model.encode_text(tok) 42 | text_features /= text_features.norm(dim=-1, keepdim=True) 43 | return text_features 44 | 45 | def encode_image(self, image: Union[np.ndarray, Image.Image, str]) -> torch.Tensor: 46 | # single image -> single vec 47 | if isinstance(image, np.ndarray): 48 | assert len(image.shape) == 3, "image should be [h, w, c]" 49 | image = Image.fromarray(image) 50 | elif isinstance(image, str): 51 | image = Image.open(image) 52 | 53 | image = self.clip_preprocess(image).unsqueeze(0).to(self.device) 54 | with torch.no_grad(), torch.cuda.amp.autocast(): 55 | image_features = self.clip_model.encode_image(image) 56 | image_features /= image_features.norm(dim=-1, keepdim=True) 57 | return image_features 58 | 59 | def score(self, image_feat: torch.Tensor, text_feat: torch.Tensor) -> np.ndarray: 60 | return (100.0 * image_feat @ text_feat.T).softmax(dim=-1).cpu().numpy() 61 | 62 | def predict(self, rgb: np.ndarray, txts: TextQueries): 63 | image_feat = self.encode_image(rgb) 64 | text_feat = self.encode_text(txts) 65 | text_probs = self.score(image_feat, text_feat) 66 | return text_probs 67 | -------------------------------------------------------------------------------- /orion/perception/extractor/concept_fusion_extractor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from ConceptFusion repo. https://github.com/concept-fusion/concept-fusion 3 | """ 4 | 5 | 6 | from typing import Union 7 | 8 | import numpy as np 9 | import torch 10 | from segment_anything import SamAutomaticMaskGenerator, sam_model_registry 11 | 12 | from orion.abstract.perception import PerceptionModule 13 | from orion.config.my_config import (CLIPConfig_vitL14_datacomp, 14 | ConceptFusionConfig) 15 | from orion.perception.extractor.clipbase import CLIPBase 16 | 17 | 18 | class ConceptFusionExtractor(PerceptionModule): 19 | def __init__( 20 | self, cfg: ConceptFusionConfig = ConceptFusionConfig(), height=480, width=640 21 | ): 22 | self.height = height 23 | self.width = width 24 | self.sam = sam_model_registry[cfg.sam_model_type](checkpoint=cfg.sam_ckpt_path) 25 | self.sam.to(device=cfg.device) 26 | self.mask_generator = SamAutomaticMaskGenerator( 27 | model=self.sam, 28 | points_per_side=8, 29 | pred_iou_thresh=0.92, 30 | crop_n_layers=1, 31 | crop_n_points_downscale_factor=2, 32 | ) 33 | 34 | self.clip_model = CLIPBase(CLIPConfig_vitL14_datacomp()) 35 | self.device = cfg.device 36 | self.feat_dim = self.clip_model.feat_dim 37 | 38 | @torch.no_grad() 39 | def predict(self, rgb: Union[np.ndarray, torch.Tensor]) -> torch.Tensor: 40 | # Extracting SAM masks.. 41 | masks = self.mask_generator.generate(rgb) # around 4s in cuda for 480x640 42 | 43 | with torch.cuda.amp.autocast(): 44 | # Extracting global CLIP features 45 | global_feat = self.clip_model.encode_image(rgb) 46 | global_feat /= global_feat.norm(dim=-1, keepdim=True) # (1, h, w, feat_dim) 47 | global_feat = torch.nn.functional.normalize(global_feat, dim=-1) 48 | feat_dim = global_feat.shape[-1] 49 | cosine_similarity = torch.nn.CosineSimilarity(dim=-1) 50 | 51 | feat_per_roi = [] 52 | roi_nonzero_inds = [] 53 | similarity_scores = [] 54 | for maskidx in range(len(masks)): 55 | try: 56 | _x, _y, _w, _h = tuple(masks[maskidx]["bbox"]) # xywh bounding box 57 | seg = masks[maskidx]["segmentation"] 58 | nonzero_inds = torch.argwhere( 59 | torch.from_numpy(masks[maskidx]["segmentation"]) 60 | ) 61 | # Note: Image is (H, W, 3). In SAM output, y coords are along height, x along width 62 | img_roi = rgb[_y : _y + _h, _x : _x + _w, :] 63 | roifeat = self.clip_model.encode_image(img_roi) 64 | roifeat = torch.nn.functional.normalize(roifeat, dim=-1) 65 | except: 66 | roifeat = global_feat.clone().detach() 67 | feat_per_roi.append(roifeat) 68 | roi_nonzero_inds.append(nonzero_inds) 69 | _sim = cosine_similarity(global_feat, roifeat) 70 | similarity_scores.append(_sim) 71 | 72 | similarity_scores = torch.cat(similarity_scores) 73 | softmax_scores = torch.nn.functional.softmax(similarity_scores, dim=0) 74 | outfeat = torch.zeros(self.height, self.width, feat_dim, dtype=torch.half) 75 | for maskidx in range(len(masks)): 76 | _weighted_feat = ( 77 | softmax_scores[maskidx] * global_feat 78 | + (1 - softmax_scores[maskidx]) * feat_per_roi[maskidx] 79 | ) 80 | _weighted_feat = torch.nn.functional.normalize(_weighted_feat, dim=-1) 81 | outfeat[ 82 | roi_nonzero_inds[maskidx][:, 0], roi_nonzero_inds[maskidx][:, 1] 83 | ] += (_weighted_feat[0].detach().cpu().half()) 84 | outfeat[ 85 | roi_nonzero_inds[maskidx][:, 0], roi_nonzero_inds[maskidx][:, 1] 86 | ] = torch.nn.functional.normalize( 87 | outfeat[ 88 | roi_nonzero_inds[maskidx][:, 0], roi_nonzero_inds[maskidx][:, 1] 89 | ].float(), 90 | dim=-1, 91 | ).half() 92 | 93 | outfeat = outfeat.unsqueeze(0).float() 94 | outfeat = torch.nn.functional.normalize(outfeat, dim=-1) 95 | outfeat = outfeat[0].half() # --> H, W, feat_dim 96 | return outfeat.cpu() 97 | -------------------------------------------------------------------------------- /orion/perception/extractor/lseg_extractor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from VLmap repo. https://github.com/vlmaps/vlmaps. 3 | """ 4 | 5 | from typing import Union 6 | 7 | import clip 8 | import numpy as np 9 | import torch 10 | 11 | from orion.abstract.interfaces import TextQueries 12 | from orion.abstract.perception import PerceptionModule 13 | from orion.config.my_config import LsegConfig 14 | from orion.perception.extractor.lseg_module import LSegEncDecNet 15 | 16 | 17 | class LSegExtractor(PerceptionModule): 18 | def __init__(self, cfg: LsegConfig = LsegConfig()): 19 | model = LSegEncDecNet( 20 | arch_option=0, block_depth=0, activation="lrelu", visualize=False 21 | ) 22 | 23 | model_state_dict = model.state_dict() 24 | pretrained_state_dict = torch.load(cfg.ckpt_path) 25 | pretrained_state_dict = { 26 | k.lstrip("net."): v for k, v in pretrained_state_dict["state_dict"].items() 27 | } 28 | model_state_dict.update(pretrained_state_dict) 29 | model.load_state_dict(pretrained_state_dict) 30 | 31 | model.eval() 32 | model = model.to(cfg.device) 33 | self.model = model 34 | 35 | self.feat_dim = self.model.out_c 36 | self.device = cfg.device 37 | self.cfg = cfg 38 | 39 | @torch.no_grad() 40 | def predict(self, rgb: Union[np.ndarray, torch.Tensor]) -> torch.Tensor: 41 | if isinstance(rgb, np.ndarray): 42 | rgb = np.expand_dims(rgb, axis=0) 43 | else: 44 | rgb = torch.unsqueeze(rgb, dim=0) 45 | outputs = self.model.encode(rgb) 46 | return outputs[0].permute(1, 2, 0) # [H, W, D] 47 | 48 | @torch.no_grad() 49 | def encode_text(self, text_list: TextQueries) -> torch.Tensor: 50 | if isinstance(text_list, list): 51 | text_list = TextQueries(prompts=text_list) 52 | text = clip.tokenize(text_list.prompts).to(self.device) 53 | text_features = self.model.clip_pretrained.encode_text(text) 54 | text_features /= text_features.norm(dim=-1, keepdim=True) 55 | text_features = text_features.float() 56 | return text_features 57 | -------------------------------------------------------------------------------- /orion/user_simulator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/user_simulator/__init__.py -------------------------------------------------------------------------------- /orion/user_simulator/goals/4ok3usBNeis/final.json: -------------------------------------------------------------------------------- 1 | { 2 | "room_info": { 3 | "bedroom": "Alice|Bob", 4 | "bathroom": "shared", 5 | "living room": "shared" 6 | }, 7 | "bed_0": { 8 | "base": "{\"center\": [279, 316], \"mass\": 1422.5}", 9 | "nearby_obj": "nightstand_0", 10 | "object_desc": "with grey dotted white sheet|has a pillow", 11 | "attr": "bought from IKEA", 12 | "room_id": "bedroom_1", 13 | "explain": "a furniture for sleeping with sheets and mattress.", 14 | "type": "big" 15 | }, 16 | "nightstand_0": { 17 | "base": "{\"center\": [251, 313], \"mass\": 111.5}", 18 | "nearby_obj": "bed_0", 19 | "object_desc": "a brown side table with a drawer and open shelf", 20 | "attr": "bought from IKEA", 21 | "room_id": "bedroom_1", 22 | "explain": "a table beside the bed", 23 | "type": "small" 24 | }, 25 | "cabinet_0": { 26 | "base": "{\"center\": [196, 269], \"mass\": 27.5}", 27 | "nearby_obj": "couch_0", 28 | "object_desc": "a small short cabinet at the room corner", 29 | "attr": null, 30 | "room_id": "bedroom_2", 31 | "explain": "a cupboard usually near sofa", 32 | "type": "small" 33 | }, 34 | "couch_0": { 35 | "base": "{\"center\": [216, 286], \"mass\": 618.0}", 36 | "nearby_obj": "cabinet_0", 37 | "object_desc": "red long grid-patterned sofa", 38 | "attr": null, 39 | "room_id": "bedroom_2", 40 | "explain": "a sofa with long upholstered seat and cushions", 41 | "type": "big" 42 | }, 43 | "toilet_0": { 44 | "base": "{\"center\": [270, 188], \"mass\": 53.0}", 45 | "nearby_obj": "washbasin_0", 46 | "object_desc": "small white toilet with a toilet paper on it", 47 | "attr": null, 48 | "room_id": "bathroom_1", 49 | "explain": "a plumbing fixture on the floor for human waste disposal", 50 | "type": "small" 51 | }, 52 | "towel_0": { 53 | "base": "{\"center\": [270, 188], \"mass\": 3.5}", 54 | "nearby_obj": "", 55 | "object_desc": "blue towel hanging on the bar", 56 | "attr": null, 57 | "room_id": "bathroom_1", 58 | "explain": "a piece of cloth used for drying things", 59 | "type": "small" 60 | }, 61 | "washbasin_0": { 62 | "base": "{\"center\": [279, 196], \"mass\": 124.5}", 63 | "nearby_obj": "toilet_0", 64 | "object_desc": "white ceramic sink upon the counter", 65 | "attr": null, 66 | "room_id": "bathroom_1", 67 | "explain": "sink or basin to wash hands", 68 | "type": "small" 69 | }, 70 | "chair_0": { 71 | "base": "{\"center\": [373, 223], \"mass\": 126.5}", 72 | "nearby_obj": "bench_0|rack_0", 73 | "object_desc": "red wooden chair", 74 | "attr": "bought from Maiden Home", 75 | "room_id": "living room_1", 76 | "explain": "a furniture for one person to sit on", 77 | "type": "small" 78 | }, 79 | "bench_0": { 80 | "base": "{\"center\": [388, 202], \"mass\": 216.0}", 81 | "nearby_obj": "shoe_0|chair_0", 82 | "object_desc": "yellow mahogany bench", 83 | "attr": "bought from West Elm", 84 | "room_id": "living room_1", 85 | "explain": "a long wooden seat for several people", 86 | "type": "big" 87 | }, 88 | "freezer_0": { 89 | "base": "{\"center\": [287, 139], \"mass\": 227.0}", 90 | "nearby_obj": "dresser_0|refrigerator_0|shelf_0", 91 | "object_desc": "", 92 | "attr": "Amazon Special", 93 | "room_id": "living room_1", 94 | "explain": "a container shorten than fridge to store frozen food", 95 | "type": "big" 96 | }, 97 | "refrigerator_0": { 98 | "base": "{\"center\": [305, 119], \"mass\": 78.5}", 99 | "nearby_obj": "freezer_0", 100 | "object_desc": "white tall standing fridge", 101 | "attr": null, 102 | "room_id": "living room_1", 103 | "explain": "fridge to store food and drinks", 104 | "type": "big" 105 | }, 106 | "dresser_0": { 107 | "base": "{\"center\": [301, 170], \"mass\": 65.0}", 108 | "nearby_obj": "freezer_0|shelf_0", 109 | "object_desc": "with multiple drawers", 110 | "attr": "", 111 | "room_id": "living room_1", 112 | "explain": "a large cabinet putting dresses", 113 | "type": "big" 114 | }, 115 | "shelf_0": { 116 | "base": "{\"center\": [271, 154], \"mass\": 119.0}", 117 | "nearby_obj": "freezer_0", 118 | "object_desc": "board fixed on the wall", 119 | "attr": null, 120 | "room_id": "living room_1", 121 | "explain": "a flat length of wood attached to the wall", 122 | "type": "ambiguous" 123 | }, 124 | "rack_0": { 125 | "base": "{\"center\": [368, 238], \"mass\": 75.0}", 126 | "nearby_obj": "chair_0", 127 | "object_desc": "large tall shelf|hold audio players", 128 | "attr": null, 129 | "room_id": "living room_1", 130 | "explain": "a shelf with bars to hold things", 131 | "type": "ambiguous" 132 | }, 133 | "rack_1": { 134 | "base": "{\"center\": [376, 159], \"mass\": 65.5}", 135 | "nearby_obj": "shoe_0", 136 | "object_desc": "the shelf store detergents|near the shoes", 137 | "attr": null, 138 | "room_id": "living room_1", 139 | "explain": "a shelf with bars to hold things", 140 | "type": "ambiguous" 141 | }, 142 | "speaker_0": { 143 | "base": "{\"center\": [368, 238], \"mass\": 15.0}", 144 | "nearby_obj": "rack_0", 145 | "object_desc": "audio speaker on the shelf", 146 | "attr": null, 147 | "room_id": "living room_1", 148 | "explain": "black device to display sound", 149 | "type": "small" 150 | }, 151 | "shoe_0": { 152 | "base": "{\"center\": [384, 170], \"mass\": 110.5}", 153 | "nearby_obj": "rack_1", 154 | "object_desc": "shoes on the floor", 155 | "attr": null, 156 | "room_id": "living room_1", 157 | "explain": "a covering for the foot", 158 | "type": "small" 159 | }, 160 | "laundry machine_0": { 161 | "base": "{\"center\": [236, 213], \"mass\": 228.0}", 162 | "nearby_obj": "", 163 | "object_desc": "locate in lanudry room", 164 | "attr": "bought from Walmart at 2020", 165 | "room_id": null, 166 | "explain": "washing machine", 167 | "type": "big" 168 | } 169 | } -------------------------------------------------------------------------------- /orion/user_simulator/goals/LT9Jq6dN3Ea/final.json: -------------------------------------------------------------------------------- 1 | { 2 | "room_info": { 3 | "bathroom": "shared", 4 | "kitchen": "shared", 5 | "living room": "shared", 6 | "reading room": "Alice" 7 | }, 8 | "table_0": { 9 | "base": "{\"center\": [288, 242], \"mass\": 600.5}", 10 | "nearby_obj": "couch_0|chair_0", 11 | "object_desc": "", 12 | "attr": "special designed by IKEA", 13 | "room_id": "living room_1", 14 | "explain": "a flat surface supported by legs", 15 | "type": "ambiguous", 16 | "same_goal": "table_0|table_1|table_2|table_3|table_4" 17 | }, 18 | "couch_0": { 19 | "base": "{\"center\": [247, 253], \"mass\": 642.0}", 20 | "nearby_obj": "table_0|chair_0", 21 | "object_desc": "near a tall landing lamp|facing the fireplace and tv", 22 | "attr": "bought from Amazon", 23 | "room_id": null, 24 | "explain": "a sofa with long upholstered seat for multiple people", 25 | "type": "big", 26 | "same_goal": "couch_0|couch_1|couch_2" 27 | }, 28 | "tv_0": { 29 | "base": "{\"center\": [279, 202], \"mass\": 75.0}", 30 | "nearby_obj": "fireplace_0", 31 | "object_desc": "", 32 | "attr": null, 33 | "room_id": "living room_1", 34 | "explain": "a black monitor for television broadcasts.", 35 | "type": "big", 36 | "same_goal": "tv_0" 37 | }, 38 | "fireplace_0": { 39 | "base": "{\"center\": [277, 194], \"mass\": 108.5}", 40 | "nearby_obj": "tv_0", 41 | "object_desc": "", 42 | "attr": null, 43 | "room_id": "living room_1", 44 | "explain": "a structure with fire for heating", 45 | "type": "small", 46 | "same_goal": "fireplace_0" 47 | }, 48 | "stool_0": { 49 | "base": "{\"center\": [313, 384], \"mass\": 68.5}", 50 | "nearby_obj": "", 51 | "object_desc": "high-leg stools", 52 | "attr": null, 53 | "room_id": "kitchen_1", 54 | "explain": "a small backless seat for seating", 55 | "type": "small", 56 | "same_goal": "stool_0|stool_1" 57 | }, 58 | "dining table_0": { 59 | "base": "{\"center\": [294, 342], \"mass\": 260.5}", 60 | "nearby_obj": "dining chair_0", 61 | "object_desc": "with white flower bunch on it", 62 | "attr": "elegant classic design", 63 | "room_id": "kitchen_1", 64 | "explain": "table for dining", 65 | "type": "ambiguous", 66 | "same_goal": "dining table_0|dining table_1" 67 | }, 68 | "bathroom cabinet_0": { 69 | "base": "{\"center\": [382, 258], \"mass\": 43.5}", 70 | "nearby_obj": "toilet_0", 71 | "object_desc": "blue storage unit for towels", 72 | "attr": null, 73 | "room_id": "bathroom_1", 74 | "explain": "a storage wardrobe for bathroom essentials", 75 | "type": "small", 76 | "same_goal": "bathroom cabinet_0" 77 | }, 78 | "toilet_0": { 79 | "base": "{\"center\": [384, 270], \"mass\": 98.5}", 80 | "nearby_obj": "bathroom cabinet_0", 81 | "object_desc": "", 82 | "attr": null, 83 | "room_id": "bathroom_1", 84 | "explain": "a plumbing fixture on the floor for human waste disposal", 85 | "type": "small", 86 | "same_goal": "toilet_0" 87 | }, 88 | "desk_0": { 89 | "base": "{\"center\": [384, 198], \"mass\": 661.5}", 90 | "nearby_obj": "rack_0|chair_3|desk chair_0", 91 | "object_desc": "large yellow desk for home reading|with books and teapot on it", 92 | "attr": "", 93 | "room_id": "reading room_1", 94 | "explain": "a table used for working or writing with storage drawers.", 95 | "type": "big", 96 | "same_goal": "desk_0" 97 | }, 98 | "rack_0": { 99 | "base": "{\"center\": [357, 205], \"mass\": 219.5}", 100 | "nearby_obj": "desk_0|desk chair_0", 101 | "object_desc": "a shelf placed many artifacts" , 102 | "attr": null, 103 | "room_id": "reading room_1", 104 | "explain": "a structure used for holding or displaying items.", 105 | "type": "ambiguous", 106 | "same_goal": "rack_0" 107 | }, 108 | "rack_1": { 109 | "base": "{\"center\": [449, 396], \"mass\": 96.5}", 110 | "nearby_obj": "", 111 | "object_desc": "white open shelf for clothes", 112 | "attr": "bought from Walmart", 113 | "room_id": null, 114 | "explain": "a structure used for holding or displaying items.", 115 | "type": "ambiguous", 116 | "same_goal": "rack_1" 117 | } 118 | } -------------------------------------------------------------------------------- /orion/user_simulator/goals/MHPLjHsuG27/final.json: -------------------------------------------------------------------------------- 1 | { 2 | "room_info": { 3 | "bathroom": "shared", 4 | "kitchen": "shared", 5 | "living room": "shared" 6 | }, 7 | "chair_0": { 8 | "base": "{\"center\": [412, 288], \"mass\": 242.0}", 9 | "nearby_obj": "lamp_0|table_0", 10 | "object_desc": "around chairs and lamp", 11 | "attr": "bought from Castlery 20 years ago", 12 | "room_id": "living room_1", 13 | "explain": "a furniture seat with backrest and legs for one person", 14 | "type": "big", 15 | "same_goal": "chair_0|chair_1|chair_2" 16 | }, 17 | "lamp_0": { 18 | "base": "{\"center\": [443, 306], \"mass\": 43.5}", 19 | "nearby_obj": "chair_0|table_0", 20 | "object_desc": "lighting the dining area", 21 | "attr": "BrentWood white lamp", 22 | "room_id": "living room_1", 23 | "explain": "a light source with shade", 24 | "type": "small", 25 | "same_goal": "lamp_0|lamp_1|lamp_2" 26 | }, 27 | "tv_0": { 28 | "base": "{\"center\": [430, 206], \"mass\": 71.5}", 29 | "nearby_obj": "side table_0|recliner_0", 30 | "object_desc": "hanging on the wall", 31 | "attr": null, 32 | "room_id": "living room_1", 33 | "explain": "a black monitor for television broadcasts.", 34 | "type": "small", 35 | "same_goal": "tv_0" 36 | }, 37 | "lounge chair_0": { 38 | "base": "{\"center\": [449, 234], \"mass\": 335.5}", 39 | "nearby_obj": "side table_1|side table_0", 40 | "object_desc": "mesh weaved C-shape|near the window", 41 | "attr": "bought from Walmart", 42 | "room_id": "living room_1", 43 | "explain": "a chair designed for relaxation or lounging.", 44 | "type": "big", 45 | "same_goal": "lounge chair_0" 46 | }, 47 | "coffee table_0": { 48 | "base": "{\"center\": [406, 233], \"mass\": 376.5}", 49 | "nearby_obj": "lounge chair_0|side table_0|recliner_0|l-shaped sofa_0", 50 | "object_desc": "surrounded by a sofa|low to the ground", 51 | "attr": null, 52 | "room_id": "living room_1", 53 | "explain": "a low table placed around sofa", 54 | "type": "big", 55 | "same_goal": "coffee table_0" 56 | }, 57 | "recliner_0": { 58 | "base": "{\"center\": [417, 205], \"mass\": 271.5}", 59 | "nearby_obj": "coffee table_0|l-shaped sofa_0|side table_0", 60 | "object_desc": "recliner chair with cusions", 61 | "attr": "bought from Maiden Home this year", 62 | "room_id": "living room_1", 63 | "explain": "a lying chair that can be adjusted to a reclining position", 64 | "type": "big", 65 | "same_goal": "recliner_0" 66 | }, 67 | "kitchen cabinet_0": { 68 | "base": "{\"center\": [327, 348], \"mass\": 261.0}", 69 | "nearby_obj": "kitchen counter_0", 70 | "object_desc": "", 71 | "attr": "bought from Pottery", 72 | "room_id": "kitchen_1", 73 | "explain": "a storage unit in a kitchen", 74 | "type": "big", 75 | "same_goal": "kitchen cabinet_0|kitchen shelf_0" 76 | }, 77 | "toilet_0": { 78 | "base": "{\"center\": [356, 222], \"mass\": 55.5}", 79 | "nearby_obj": "bathroom counter_0", 80 | "object_desc": "", 81 | "attr": "bought from Wayfair", 82 | "room_id": "bathroom_1", 83 | "explain": "a plumbing fixture on the floor for human waste disposal", 84 | "type": "small", 85 | "same_goal": "toilet_0" 86 | }, 87 | "bathroom counter_0": { 88 | "base": "{\"center\": [354, 217], \"mass\": 15.0}", 89 | "nearby_obj": "toilet_0", 90 | "object_desc": "a shelf decorated with a mirror", 91 | "attr": null, 92 | "room_id": "bathroom_1", 93 | "explain": "a place to put toothbrush and toothpaste", 94 | "type": "small", 95 | "same_goal": "bathroom counter_0" 96 | }, 97 | "clock_0": { 98 | "base": "{\"center\": [375, 327], \"mass\": 20.5}", 99 | "nearby_obj": "", 100 | "object_desc": "stored in the room corner", 101 | "attr": "old-fashioned clock", 102 | "room_id": null, 103 | "explain": "a round device that shows the time", 104 | "type": "small", 105 | "same_goal": "clock_0" 106 | } 107 | } -------------------------------------------------------------------------------- /orion/user_simulator/goals/QaLdnwvtxbs/final.json: -------------------------------------------------------------------------------- 1 | { 2 | "room_info": { 3 | "bedroom": "shared", 4 | "bathroom": "Alice|Bob", 5 | "living room": "shared" 6 | }, 7 | "chair_5": { 8 | "base": "{\"center\": [379, 236], \"mass\": 30.5}", 9 | "nearby_obj": "table_4", 10 | "object_desc": "for haircut and makeup|in dressing room", 11 | "attr": null, 12 | "room_id": "bedroom_1", 13 | "explain": "a furniture seat with backrest and legs for one person", 14 | "type": "ambiguous", 15 | "same_goal": "chair_5" 16 | }, 17 | "table_4": { 18 | "base": "{\"center\": [387, 234], \"mass\": 132.5}", 19 | "nearby_obj": "chair_5", 20 | "object_desc": "mirror on the table|with small light bulbs", 21 | "attr": "bought from Pottery Barn", 22 | "room_id": "bedroom_1", 23 | "explain": "a flat surface supported by legs", 24 | "type": "ambiguous", 25 | "same_goal": "table_4" 26 | }, 27 | "bed_0": { 28 | "base": "{\"center\": [410, 297], \"mass\": 1907.0}", 29 | "nearby_obj": "chair_2|sofa_1", 30 | "object_desc": "with bed tables on both sides", 31 | "attr": "bought from IKEA at 2019", 32 | "room_id": "bedroom_1", 33 | "explain": "a furniture for sleeping with sheets and mattress.", 34 | "type": "big", 35 | "same_goal": "bed_0" 36 | }, 37 | "chair_2": { 38 | "base": "{\"center\": [384, 338], \"mass\": 90.5}", 39 | "nearby_obj": "bed_0|tv_1|sofa_1|telephone_0|desk_0", 40 | "object_desc": "yellow bedroom chair", 41 | "attr": "bought from CB2 at 1980", 42 | "room_id": "bedroom_1", 43 | "explain": "a furniture seat with backrest and legs for one person", 44 | "type": "ambiguous", 45 | "same_goal": "chair_2" 46 | }, 47 | "desk_0": { 48 | "base": "{\"center\": [369, 329], \"mass\": 491.0}", 49 | "nearby_obj": "sofa_1|chair_2|telephone_0|tv_1", 50 | "object_desc": "long white ceramic platform", 51 | "attr": "bought from Wayfair 10 years ago", 52 | "room_id": "bedroom_1", 53 | "explain": "a table used for working or writing with storage drawers.", 54 | "type": "big", 55 | "same_goal": "desk_0" 56 | }, 57 | "telephone_0": { 58 | "base": "{\"center\": [374, 349], \"mass\": 20.5}", 59 | "nearby_obj": "tv_1|chair_2|desk_0", 60 | "object_desc": "fixed landline dialing|black color", 61 | "attr": null, 62 | "room_id": "bedroom_1", 63 | "explain": "a device on the table for long-distance voice communication", 64 | "type": "small", 65 | "same_goal": "telephone_0" 66 | }, 67 | "chair_0": { 68 | "base": "{\"center\": [333, 379], \"mass\": 207.5}", 69 | "nearby_obj": "table_0", 70 | "object_desc": "multiple chairs for dining", 71 | "attr": null, 72 | "room_id": "living room_1", 73 | "explain": "a furniture seat with backrest and legs for one person", 74 | "type": "ambiguous", 75 | "same_goal": "chair_0|chair_1|chair_3|chair_4" 76 | }, 77 | "table_0": { 78 | "base": "{\"center\": [327, 369], \"mass\": 707.5}", 79 | "nearby_obj": "chair_0", 80 | "object_desc": "dining table with dining chairs", 81 | "attr": null, 82 | "room_id": "living room_1", 83 | "explain": "a flat surface supported by legs", 84 | "type": "ambiguous", 85 | "same_goal": "table_0" 86 | }, 87 | "table_1": { 88 | "base": "{\"center\": [247, 409], \"mass\": 495.5}", 89 | "nearby_obj": "tv_0|sofa_0", 90 | "object_desc": "near a tv stand|low height", 91 | "attr": "bought from Amazon", 92 | "room_id": "living room_1", 93 | "explain": "a flat surface supported by legs", 94 | "type": "ambiguous", 95 | "same_goal": "table_1|table_2|desk_1" 96 | }, 97 | "toilet_0": { 98 | "base": "{\"center\": [215, 284], \"mass\": 113.5}", 99 | "nearby_obj": "washbasin counter_0", 100 | "object_desc": "", 101 | "attr": null, 102 | "room_id": "bathroom_1", 103 | "explain": "a plumbing fixture on the floor for human waste disposal", 104 | "type": "big", 105 | "same_goal": "toilet_0" 106 | }, 107 | "bathtub_0": { 108 | "base": "{\"center\": [298, 252], \"mass\": 567.0}", 109 | "nearby_obj": "", 110 | "object_desc": "small ellipse shape", 111 | "attr": null, 112 | "room_id": "bathroom_2", 113 | "explain": "a container for bathing.", 114 | "type": "big", 115 | "same_goal": "bathtub_0" 116 | } 117 | } -------------------------------------------------------------------------------- /orion/user_simulator/goals/TEEsavR23oF/final.json: -------------------------------------------------------------------------------- 1 | { 2 | "room_info": { 3 | "bedroom": "shared", 4 | "bathroom": "shared", 5 | "living room": "shared" 6 | }, 7 | "chair_0": { 8 | "base": "{\"center\": [284, 300], \"mass\": 134.5}", 9 | "nearby_obj": "plant_0|printer_0|couch_1|computer desk_0", 10 | "object_desc": "mid-back computer desk chair", 11 | "attr": "Amazon basics classic puresoft", 12 | "room_id": "living room_1", 13 | "explain": "a chair paired with computer desk", 14 | "type": "ambiguous", 15 | "same_goal": "chair_0" 16 | }, 17 | "chair_1": { 18 | "base": "{\"center\": [276, 337], \"mass\": 124.0}", 19 | "nearby_obj": "printer_0|computer desk_0", 20 | "object_desc": "portable camping chair|colorful low-height", 21 | "attr": null, 22 | "room_id": "living room_1", 23 | "explain": "small chair for children use", 24 | "type": "ambiguous", 25 | "same_goal": "chair_1" 26 | }, 27 | "table_0": { 28 | "base": "{\"center\": [345, 279], \"mass\": 67.0}", 29 | "nearby_obj": "couch_1", 30 | "object_desc": "small stool table|putting newspaper", 31 | "attr": null, 32 | "room_id": "living room_1", 33 | "explain": "table stool", 34 | "type": "small", 35 | "same_goal": "table_0" 36 | }, 37 | "newspaper_0": { 38 | "base": "{\"center\": [339, 278], \"mass\": 34.5}", 39 | "nearby_obj": "table_0", 40 | "object_desc": "newspaper on the table|New York Times paper", 41 | "attr": null, 42 | "room_id": "living room_1", 43 | "explain": "paper for reading", 44 | "type": "small", 45 | "same_goal": "newspaper_0" 46 | }, 47 | "couch_0": { 48 | "base": "{\"center\": [348, 317], \"mass\": 1161.5}", 49 | "nearby_obj": "table_0|bicycle_0|wardrobe_2", 50 | "object_desc": "has plush toys", 51 | "attr": "bought from Walmart at 2020", 52 | "room_id": "living room_1", 53 | "explain": "a sofa with long upholstered seat for multiple people", 54 | "type": "ambiguous", 55 | "same_goal": "couch_0" 56 | }, 57 | "couch_1": { 58 | "base": "{\"center\": [302, 281], \"mass\": 417.5}", 59 | "nearby_obj": "computer desk_0|table_0|chair_0|plant_0", 60 | "object_desc": "covered by blanket and bag", 61 | "attr": "bought from IKEA", 62 | "room_id": "living room_1", 63 | "explain": "a sofa with long upholstered seat for multiple people", 64 | "type": "ambiguous", 65 | "same_goal": "couch_1|couch_2" 66 | }, 67 | "bicycle_0": { 68 | "base": "{\"center\": [370, 326], \"mass\": 473.5}", 69 | "nearby_obj": "couch_0", 70 | "object_desc": "indoor cycling bike", 71 | "attr": "bought from Home Cardio Gym", 72 | "room_id": "living room_1", 73 | "explain": "an exercise bike", 74 | "type": "big", 75 | "same_goal": "bicycle_0" 76 | }, 77 | "computer desk_0": { 78 | "base": "{\"center\": [275, 306], \"mass\": 178.5}", 79 | "nearby_obj": "couch_1|plant_0|chair_0|printer_0", 80 | "object_desc": "", 81 | "attr": "bought from Pottery Barn", 82 | "room_id": "living room_1", 83 | "explain": "table for putting the devices like computer", 84 | "type": "big", 85 | "same_goal": "computer desk_0" 86 | }, 87 | "printer_0": { 88 | "base": "{\"center\": [271, 315], \"mass\": 54.0}", 89 | "nearby_obj": "chair_0|computer desk_0", 90 | "object_desc": "printer on the table", 91 | "attr": null, 92 | "room_id": "living room_1", 93 | "explain": "a device that produces document copies", 94 | "type": "small", 95 | "same_goal": "printer_0" 96 | }, 97 | "plant_0": { 98 | "base": "{\"center\": [279, 276], \"mass\": 101.5}", 99 | "nearby_obj": "chair_0|computer desk_0|couch_1", 100 | "object_desc": "green plant to decorate the room", 101 | "attr": null, 102 | "room_id": "living room_1", 103 | "explain": "a plant with green leaves and roots for decoration", 104 | "type": "big", 105 | "same_goal": "plant_0" 106 | }, 107 | "wardrobe_0": { 108 | "base": "{\"center\": [380, 406], \"mass\": 185.0}", 109 | "nearby_obj": "tv_0", 110 | "object_desc": "rustic brown|face to bed", 111 | "attr": "Superjare TV stand", 112 | "room_id": "bedroom_1", 113 | "explain": "a table to put TV on", 114 | "type": "ambiguous", 115 | "same_goal": "wardrobe_0" 116 | }, 117 | "wardrobe_1": { 118 | "base": "{\"center\": [302, 347], \"mass\": 144.5}", 119 | "nearby_obj": "wardrobe_2|chair_1", 120 | "object_desc": "open shelf wardrobe|putting books and box", 121 | "attr": "bought from Castlery", 122 | "room_id": "living room_1", 123 | "explain": "a large cabinet for storing clothes and other items.", 124 | "type": "ambiguous", 125 | "same_goal": "wardrobe_1" 126 | }, 127 | "wardrobe_2": { 128 | "base": "{\"center\": [327, 343], \"mass\": 83.5}", 129 | "nearby_obj": "wardrobe_1|couch_0", 130 | "object_desc": "has mirror", 131 | "attr": "Elite 2-door", 132 | "room_id": "living room_1", 133 | "explain": "cabient with door for putting clothes", 134 | "type": "ambiguous", 135 | "same_goal": "wardrobe_2" 136 | }, 137 | "wardrobe_3": { 138 | "base": "{\"center\": [399, 370], \"mass\": 50.0}", 139 | "nearby_obj": "bed_0", 140 | "object_desc": "putting clothes", 141 | "attr": "bought from MoMA Design Store", 142 | "room_id": "bedroom_1", 143 | "explain": "a large cabinet for storing clothes and other items.", 144 | "type": "ambiguous", 145 | "same_goal": "wardrobe_3" 146 | }, 147 | "bed_0": { 148 | "base": "{\"center\": [419, 409], \"mass\": 851.0}", 149 | "nearby_obj": "nightstand_0|tv_0", 150 | "object_desc": "has a frame and mattress", 151 | "attr": "bought from Amazon", 152 | "room_id": "bedroom_1", 153 | "explain": "a furniture for sleeping with sheets and mattress.", 154 | "type": "big", 155 | "same_goal": "bed_0" 156 | }, 157 | "tv_0": { 158 | "base": "{\"center\": [373, 407], \"mass\": 94.5}", 159 | "nearby_obj": "wardrobe_0", 160 | "object_desc": "", 161 | "attr": null, 162 | "room_id": "bedroom_1", 163 | "explain": "a black monitor for television broadcasts.", 164 | "type": "big", 165 | "same_goal": "tv_0" 166 | }, 167 | "nightstand_0": { 168 | "base": "{\"center\": [439, 437], \"mass\": 125.5}", 169 | "nearby_obj": "bed_0", 170 | "object_desc": "made of mahaogany", 171 | "attr": "bought from CB2", 172 | "room_id": "bedroom_1", 173 | "explain": "a table near the bed", 174 | "type": "small", 175 | "same_goal": "nightstand_0|nightstand_1" 176 | }, 177 | "toilet_0": { 178 | "base": "{\"center\": [441, 301], \"mass\": 15.5}", 179 | "nearby_obj": "cabinet_0", 180 | "object_desc": "like white bowl", 181 | "attr": null, 182 | "room_id": "bathroom_1", 183 | "explain": "a plumbing fixture on the floor for human waste disposal", 184 | "type": "small", 185 | "same_goal": "toilet_0" 186 | }, 187 | "cabinet_0": { 188 | "base": "{\"center\": [444, 329], \"mass\": 255.5}", 189 | "nearby_obj": "toilet_0", 190 | "object_desc": "for washroom use", 191 | "attr": "", 192 | "room_id": "bathroom_1", 193 | "explain": "a table supports the washbasin", 194 | "type": "big", 195 | "same_goal": "cabinet_0" 196 | } 197 | } -------------------------------------------------------------------------------- /orion/user_simulator/goals/cvZr5TUy5C5/final.json: -------------------------------------------------------------------------------- 1 | { 2 | "room_info": { 3 | "bathroom": "shared", 4 | "kitchen": "David", 5 | "living room": "shared", 6 | "office room": "Alice", 7 | "dining room": "shared" 8 | }, 9 | "armchair_0": { 10 | "base": "{\"center\": [238, 172], \"mass\": 360.5}", 11 | "nearby_obj": "", 12 | "object_desc": "made of real fur", 13 | "attr": null, 14 | "room_id": "dining room_1", 15 | "explain": "a chair with armrests", 16 | "type": "big", 17 | "same_goal": "armchair_0|armchair_1" 18 | }, 19 | "kitchen shelf_0": { 20 | "base": "{\"center\": [383, 147], \"mass\": 343.0}", 21 | "nearby_obj": "microwave_0", 22 | "object_desc": "made of wood", 23 | "attr": null, 24 | "room_id": "kitchen_1", 25 | "explain": "a surface holds dishes, utensils in kitchen", 26 | "type": "big", 27 | "same_goal": "kitchen shelf_0|kitchen shelf_1|kitchen shelf_2|kitchen shelf_3|kitchen shelf_4" 28 | }, 29 | "oven_0": { 30 | "base": "{\"center\": [360, 213], \"mass\": 54.0}", 31 | "nearby_obj": "refrigerator_0", 32 | "object_desc": "", 33 | "attr": null, 34 | "room_id": "kitchen_1", 35 | "explain": "a kitchen appliance used for baking and roasting", 36 | "type": "small", 37 | "same_goal": "oven_0" 38 | }, 39 | "bookshelf_0": { 40 | "base": "{\"center\": [341, 301], \"mass\": 255.0}", 41 | "nearby_obj": "computer_0|computer desk_0", 42 | "object_desc": "sink cabinet_0", 43 | "attr": "Alice's bookshelf", 44 | "room_id": "office room_1", 45 | "explain": "horizontal shelves for storing books", 46 | "type": "big", 47 | "same_goal": "bookshelf_0" 48 | }, 49 | "printer_0": { 50 | "base": "{\"center\": [294, 342], \"mass\": 83.5}", 51 | "nearby_obj": "computer_0|computer desk_0", 52 | "object_desc": "", 53 | "attr": null, 54 | "room_id": "office room_1", 55 | "explain": "a device that produces document copies", 56 | "type": "small", 57 | "same_goal": "printer_0" 58 | }, 59 | "computer desk_0": { 60 | "base": "{\"center\": [307, 312], \"mass\": 414.0}", 61 | "nearby_obj": "bookshelf_0|printer_0|computer_0", 62 | "object_desc": "cabinet_1", 63 | "attr": "bought from CB2", 64 | "room_id": "office room_1", 65 | "explain": "a desk for holding computer", 66 | "type": "ambiguous", 67 | "same_goal": "computer desk_0|computer chair_0" 68 | }, 69 | "computer_0": { 70 | "base": "{\"center\": [307, 316], \"mass\": 134.0}", 71 | "nearby_obj": "bookshelf_0|printer_0|computer desk_0", 72 | "object_desc": "", 73 | "attr": null, 74 | "room_id": "office room_1", 75 | "explain": "an electronic device with monitor", 76 | "type": "big", 77 | "same_goal": "computer_0" 78 | }, 79 | "table_0": { 80 | "base": "{\"center\": [478, 315], \"mass\": 837.0}", 81 | "nearby_obj": "", 82 | "object_desc": "", 83 | "attr": "bought from Maiden Home this year", 84 | "room_id": "living room_1", 85 | "explain": "a flat surface supported by legs", 86 | "type": "ambiguous", 87 | "same_goal": "table_2|table_0" 88 | }, 89 | "clock_0": { 90 | "base": "{\"center\": [425, 319], \"mass\": 25.5}", 91 | "nearby_obj": "", 92 | "object_desc": "hanging on the wall", 93 | "attr": null, 94 | "room_id": "living room_1", 95 | "explain": "an cicle instrument to display time", 96 | "type": "small", 97 | "same_goal": "clock_0" 98 | }, 99 | "vase_0": { 100 | "base": "{\"center\": [490, 358], \"mass\": 62.5}", 101 | "nearby_obj": "", 102 | "object_desc": "flower vase", 103 | "attr": null, 104 | "room_id": "living room_1", 105 | "explain": "a decorative container for flowers", 106 | "type": "small", 107 | "same_goal": "vase_0|vase_1|flower vase_0" 108 | }, 109 | "fireplace_0": { 110 | "base": "{\"center\": [541, 256], \"mass\": 561.5}", 111 | "nearby_obj": "plant_0|circular sofa_0", 112 | "object_desc": "", 113 | "attr": "bought from Pottery Barn at 1990", 114 | "room_id": "living room_1", 115 | "explain": "a structure with fire for heating", 116 | "type": "big", 117 | "same_goal": "fireplace_0" 118 | } 119 | } -------------------------------------------------------------------------------- /orion/user_simulator/goals/h1zeeAwLh9Z/final.json: -------------------------------------------------------------------------------- 1 | { 2 | "room_info": { 3 | "bedroom": "Alice|Bob", 4 | "bathroom": "Alice|Bob", 5 | "living room": "shared" 6 | }, 7 | "bed_0": { 8 | "base": "{\"center\": [316, 270], \"mass\": 1167.5}", 9 | "nearby_obj": "bedside lamp_0|nightstand_0|armchair_0", 10 | "object_desc": "covered with white thin sheet|face the stone wall", 11 | "attr": null, 12 | "room_id": "bedroom_1", 13 | "explain": "a furniture for sleeping with sheets and mattress.", 14 | "type": "ambiguous", 15 | "same_goal": "bed_0" 16 | }, 17 | "nightstand_0": { 18 | "base": "{\"center\": [305, 238], \"mass\": 146.0}", 19 | "nearby_obj": "bed_0", 20 | "object_desc": "yellow table with three drawers", 21 | "attr": null, 22 | "room_id": "bedroom_1", 23 | "explain": "a small table beside a bed", 24 | "type": "small", 25 | "same_goal": "nightstand_0|nightstand_1" 26 | }, 27 | "bedside lamp_0": { 28 | "base": "{\"center\": [283, 281], \"mass\": 76.5}", 29 | "nearby_obj": "armchair_0|bed_0", 30 | "object_desc": "", 31 | "attr": "modern style", 32 | "room_id": "bedroom_1", 33 | "explain": "a lamp placed on a side table or nightstand next to a bed.", 34 | "type": "ambiguous", 35 | "same_goal": "bedside lamp_0" 36 | }, 37 | "armchair_0": { 38 | "base": "{\"center\": [284, 308], \"mass\": 87.0}", 39 | "nearby_obj": "bedside lamp_0|nightstand_0", 40 | "object_desc": "white cushion with wooden legs|face the bed", 41 | "attr": null, 42 | "room_id": "bedroom_1", 43 | "explain": "a chair with armrests", 44 | "type": "small", 45 | "same_goal": "armchair_0" 46 | }, 47 | "bed_1": { 48 | "base": "{\"center\": [283, 483], \"mass\": 1014.5}", 49 | "nearby_obj": "bedside lamp_1", 50 | "object_desc": "bed in the attic", 51 | "attr": null, 52 | "room_id": "bedroom_2", 53 | "explain": "a furniture for sleeping with sheets and mattress.", 54 | "type": "ambiguous", 55 | "same_goal": "bed_1" 56 | }, 57 | "bedside lamp_1": { 58 | "base": "{\"center\": [270, 455], \"mass\": 26.5}", 59 | "nearby_obj": "bed_1", 60 | "object_desc": "", 61 | "attr": "old fashioned", 62 | "room_id": "bedroom_2", 63 | "explain": "a lamp placed on a side table or nightstand next to a bed.", 64 | "type": "ambiguous", 65 | "same_goal": "bedside lamp_1" 66 | }, 67 | "couch_0": { 68 | "base": "{\"center\": [461, 303], \"mass\": 687.0}", 69 | "nearby_obj": "side table_0|table lamp_0", 70 | "object_desc": "", 71 | "attr": "bought from IKEA", 72 | "room_id": "living room_1", 73 | "explain": "a sofa with long upholstered seat for multiple people", 74 | "type": "big", 75 | "same_goal": "couch_0" 76 | }, 77 | "side table_0": { 78 | "base": "{\"center\": [457, 262], \"mass\": 36.5}", 79 | "nearby_obj": "couch_0", 80 | "object_desc": "", 81 | "attr": "furinno 3-Tier", 82 | "room_id": "living room_1", 83 | "explain": "a small table placed beside a sofa or chair", 84 | "type": "small", 85 | "same_goal": "side table_0" 86 | }, 87 | "table lamp_0": { 88 | "base": "{\"center\": [458, 325], \"mass\": 27.5}", 89 | "nearby_obj": "couch_0", 90 | "object_desc": "with dark light", 91 | "attr": "Amber Brown mission style", 92 | "room_id": "living room_1", 93 | "explain": "a lamp placed on a table", 94 | "type": "ambiguous", 95 | "same_goal": "table lamp_0" 96 | } 97 | } -------------------------------------------------------------------------------- /orion/user_simulator/goals/mL8ThkuaVTM/final.json: -------------------------------------------------------------------------------- 1 | { 2 | "room_info": { 3 | "kitchen": "shared", 4 | "living room": "shared" 5 | }, 6 | "chair_0": { 7 | "base": "{\"center\": [319, 206], \"mass\": 134.5}", 8 | "nearby_obj": "kitchen cabinet_0|refrigerator_0|table_0", 9 | "object_desc": "a set of 4 chairs", 10 | "attr": "bought from Castlery 20 years ago", 11 | "room_id": "living room_1", 12 | "explain": "a furniture seat with backrest and legs for one person", 13 | "type": "big", 14 | "same_goal": "chair_0|chair_1|chair_2|chair_3" 15 | }, 16 | "table_0": { 17 | "base": "{\"center\": [330, 211], \"mass\": 311.5}", 18 | "nearby_obj": "chair_0", 19 | "object_desc": "dining table surrounded with Castlery chairs", 20 | "attr": "bought from Walmart", 21 | "room_id": "living room_1", 22 | "explain": "a flat surface supported by legs", 23 | "type": "ambiguous", 24 | "same_goal": "table_0|table_1" 25 | }, 26 | "couch_0": { 27 | "base": "{\"center\": [300, 170], \"mass\": 1145.5}", 28 | "nearby_obj": "led tv_0|chair_0|coffee table_0", 29 | "object_desc": "l-shaped long couch", 30 | "attr": null, 31 | "room_id": "living room_1", 32 | "explain": "a sofa with long upholstered seat for multiple people", 33 | "type": "big", 34 | "same_goal": "couch_0" 35 | }, 36 | "led tv_0": { 37 | "base": "{\"center\": [264, 175], \"mass\": 38.0}", 38 | "nearby_obj": "couch_0|coffee table_0", 39 | "object_desc": "Samsung television", 40 | "attr": null, 41 | "room_id": null, 42 | "explain": "a light emitting diode television", 43 | "type": "big", 44 | "same_goal": "led tv_0" 45 | }, 46 | "fireplace_0": { 47 | "base": "{\"center\": [261, 209], \"mass\": 172.0}", 48 | "nearby_obj": "coffee table_0|led tv_0", 49 | "object_desc": "has a long steel pipe|beneath a clock", 50 | "attr": "bought from Pottery Barn 5 uears ago", 51 | "room_id": "living room_1", 52 | "explain": "a structure with fire for heating", 53 | "type": "big", 54 | "same_goal": "fireplace_0|firewood holder_0" 55 | }, 56 | "coffee table_0": { 57 | "base": "{\"center\": [279, 176], \"mass\": 244.5}", 58 | "nearby_obj": "fireplace_0|led tv_0|couch_0", 59 | "object_desc": "low-lying table|decorated with a green plant", 60 | "attr": "Maiden Home", 61 | "room_id": "living room_1", 62 | "explain": "a low table placed around sofa", 63 | "type": "ambiguous", 64 | "same_goal": "coffee table_0|coffee table_1" 65 | }, 66 | "kitchen cabinet_0": { 67 | "base": "{\"center\": [319, 265], \"mass\": 955.0}", 68 | "nearby_obj": "table_0|chair_0|refrigerator_0|oven_0", 69 | "object_desc": "", 70 | "attr": "bought from IKEA", 71 | "room_id": "kitchen_1", 72 | "explain": "a storage unit in a kitchen", 73 | "type": "big", 74 | "same_goal": "kitchen cabinet_0" 75 | }, 76 | "oven_0": { 77 | "base": "{\"center\": [305, 271], \"mass\": 46.0}", 78 | "nearby_obj": "refrigerator_0|kitchen cabinet_0", 79 | "object_desc": "inserted in the white cabinet", 80 | "attr": null, 81 | "room_id": "kitchen_1", 82 | "explain": "a kitchen appliance used for baking and roasting", 83 | "type": "small", 84 | "same_goal": "oven_0" 85 | }, 86 | "refrigerator_0": { 87 | "base": "{\"center\": [305, 240], \"mass\": 56.5}", 88 | "nearby_obj": "chair_0|table_0|kitchen cabinet_0", 89 | "object_desc": "smooth steel surface", 90 | "attr": "Galanz Mount freezer", 91 | "room_id": "kitchen_1", 92 | "explain": "an appliance to preserve food at low temperature", 93 | "type": "small", 94 | "same_goal": "refrigerator_0" 95 | } 96 | } -------------------------------------------------------------------------------- /orion/user_simulator/goals/qyAac8rV8Zk/final.json: -------------------------------------------------------------------------------- 1 | { 2 | "room_info": { 3 | "bathroom": "shared", 4 | "kitchen": "shared", 5 | "living room": "shared" 6 | }, 7 | "chair_1": { 8 | "base": "{\"center\": [445, 370], \"mass\": 153.5}", 9 | "nearby_obj": "table_0|computer_0|printer_0|computer desk_0", 10 | "object_desc": "electronic gaming chair", 11 | "attr": null, 12 | "room_id": "living room_1", 13 | "explain": "a furniture seat with backrest and legs for one person", 14 | "type": "small", 15 | "same_goal": "chair_1" 16 | }, 17 | "printer_0": { 18 | "base": "{\"center\": [451, 356], \"mass\": 76.5}", 19 | "nearby_obj": "chair_1", 20 | "object_desc": "black printer on a pile of books", 21 | "attr": null, 22 | "room_id": "living room_1", 23 | "explain": "a device that produces document copies", 24 | "type": "small", 25 | "same_goal": "printer_0" 26 | }, 27 | "computer_0": { 28 | "base": "{\"center\": [433, 364], \"mass\": 51.5}", 29 | "nearby_obj": "chair_1|computer desk_0", 30 | "object_desc": "has monitor", 31 | "attr": null, 32 | "room_id": "living room_1", 33 | "explain": "an electronic device with monitor", 34 | "type": "small", 35 | "same_goal": "computer_0" 36 | }, 37 | "kitchen cabinet_0": { 38 | "base": "{\"center\": [344, 351], \"mass\": 142.0}", 39 | "nearby_obj": "", 40 | "object_desc": "made of blue oak", 41 | "attr": null, 42 | "room_id": "kitchen_1", 43 | "explain": "a storage unit in a kitchen", 44 | "type": "big", 45 | "same_goal": "kitchen cabinet_1|kitchen cabinet_2|kitchen cabinet_0" 46 | }, 47 | "microwave_0": { 48 | "base": "{\"center\": [305, 323], \"mass\": 50.5}", 49 | "nearby_obj": "table_1", 50 | "object_desc": "", 51 | "attr": null, 52 | "room_id": "kitchen_1", 53 | "explain": "a kitchen appliance used for heating food quickly.", 54 | "type": "small", 55 | "same_goal": "microwave_0" 56 | }, 57 | "trashcan_1": { 58 | "base": "{\"center\": [304, 288], \"mass\": 32.5}", 59 | "nearby_obj": "table_1", 60 | "object_desc": "high tech automatic", 61 | "attr": null, 62 | "room_id": "kitchen_1", 63 | "explain": "a low container for waste materials", 64 | "type": "small", 65 | "same_goal": "trashcan_1" 66 | }, 67 | "refrigerator_0": { 68 | "base": "{\"center\": [362, 322], \"mass\": 65.5}", 69 | "nearby_obj": "table_1", 70 | "object_desc": "stainless steel electric fridge", 71 | "attr": null, 72 | "room_id": "kitchen_1", 73 | "explain": "an appliance to preserve food at low temperature", 74 | "type": "small", 75 | "same_goal": "refrigerator_0" 76 | }, 77 | "chair_0": { 78 | "base": "{\"center\": [434, 292], \"mass\": 603.5}", 79 | "nearby_obj": "table_4", 80 | "object_desc": "", 81 | "attr": "Maiden Home chair", 82 | "room_id": null, 83 | "explain": "a furniture seat with backrest and legs for one person", 84 | "type": "big", 85 | "same_goal": "chair_0|chair_2" 86 | }, 87 | "toilet_0": { 88 | "base": "{\"center\": [518, 316], \"mass\": 85.5}", 89 | "nearby_obj": "", 90 | "object_desc": "", 91 | "attr": null, 92 | "room_id": "bathroom_1", 93 | "explain": "a plumbing fixture on the floor for human waste disposal", 94 | "type": "small", 95 | "same_goal": "toilet_0" 96 | } 97 | } -------------------------------------------------------------------------------- /orion/user_simulator/goals/y9hTuugGdiq/final.json: -------------------------------------------------------------------------------- 1 | { 2 | "room_info": { 3 | "bedroom": "Alice|Bob|Tony", 4 | "bathroom": "Bob|shared", 5 | "kitchen": "shared", 6 | "living room": "shared" 7 | }, 8 | "sofa_0": { 9 | "base": "{\"center\": [228, 393], \"mass\": 932.5}", 10 | "nearby_obj": "coffee table_0", 11 | "object_desc": "l-shaped sofa with grey cushions", 12 | "attr": "bought from Walmart", 13 | "room_id": "living room_1", 14 | "explain": "a couch with long upholstered seat for multiple people", 15 | "type": "big", 16 | "same_goal": "sofa_0" 17 | }, 18 | "bed_1": { 19 | "base": "{\"center\": [438, 297], \"mass\": 1206.5}", 20 | "nearby_obj": "lamp_1|nightstand_3", 21 | "object_desc": "a bed with red pillow", 22 | "attr": null, 23 | "room_id": "bedroom_1", 24 | "explain": "a furniture for sleeping with sheets and mattress.", 25 | "type": "ambiguous", 26 | "same_goal": "bed_1" 27 | }, 28 | "lamp_1": { 29 | "base": "{\"center\": [449, 268], \"mass\": 70.0}", 30 | "nearby_obj": "bed_1|nightstand_3", 31 | "object_desc": "light lamp with square fabric shade", 32 | "attr": null, 33 | "room_id": "bedroom_1", 34 | "explain": "a light source with shade", 35 | "type": "ambiguous", 36 | "same_goal": "lamp_1|lamp_2" 37 | }, 38 | "nightstand_3": { 39 | "base": "{\"center\": [445, 265], \"mass\": 34.5}", 40 | "nearby_obj": "bed_1|lamp_1", 41 | "object_desc": "red wood material", 42 | "attr": "bought from IKEA", 43 | "room_id": "bedroom_1", 44 | "explain": "a table near the bed", 45 | "type": "ambiguous", 46 | "same_goal": "nightstand_3|nightstand_0" 47 | }, 48 | "bed_0": { 49 | "base": "{\"center\": [518, 251], \"mass\": 1924.5}", 50 | "nearby_obj": "lamp_0|nightstand_1", 51 | "object_desc": "a bed with a black stool to step on", 52 | "attr": null, 53 | "room_id": "bedroom_2", 54 | "explain": "a furniture for sleeping with sheets and mattress.", 55 | "type": "ambiguous", 56 | "same_goal": "bed_0" 57 | }, 58 | "lamp_0": { 59 | "base": "{\"center\": [524, 219], \"mass\": 75.5}", 60 | "nearby_obj": "bed_0|nightstand_1", 61 | "object_desc": "", 62 | "attr": "ROOTRO touch bedside table lamp", 63 | "room_id": "bedroom_2", 64 | "explain": "the light put beside the bed", 65 | "type": "ambiguous", 66 | "same_goal": "lamp_0|lamp_3" 67 | }, 68 | "nightstand_1": { 69 | "base": "{\"center\": [519, 217], \"mass\": 47.5}", 70 | "nearby_obj": "bed_0|lamp_0", 71 | "object_desc": "brown table with drawers", 72 | "attr": null, 73 | "room_id": "bedroom_2", 74 | "explain": "a table near the bed", 75 | "type": "ambiguous", 76 | "same_goal": "nightstand_1|nightstand_2" 77 | }, 78 | "bed_2": { 79 | "base": "{\"center\": [336, 320], \"mass\": 694.0}", 80 | "nearby_obj": "", 81 | "object_desc": "colorful sheets|across the laundry room|with white frames to protect the baby", 82 | "attr": null, 83 | "room_id": "bedroom_3", 84 | "explain": "a furniture for sleeping with sheets and mattress.", 85 | "type": "ambiguous", 86 | "same_goal": "bed_2" 87 | }, 88 | "microwave_0": { 89 | "base": "{\"center\": [314, 257], \"mass\": 136.0}", 90 | "nearby_obj": "kitchen cabinet_0|kitchen counter_0", 91 | "object_desc": "holding on the shelf", 92 | "attr": null, 93 | "room_id": "kitchen_1", 94 | "explain": "a kitchen appliance used for heating food quickly.", 95 | "type": "small", 96 | "same_goal": "microwave_0" 97 | }, 98 | "kitchen counter_0": { 99 | "base": "{\"center\": [265, 284], \"mass\": 1407.0}", 100 | "nearby_obj": "refrigerator_0|kitchen cabinet_0|microwave_0", 101 | "object_desc": "large fireproof countertop", 102 | "attr": "bought from IKEA", 103 | "room_id": "kitchen_1", 104 | "explain": "a flat surface for food preparation", 105 | "type": "small", 106 | "same_goal": "kitchen counter_0" 107 | } 108 | } -------------------------------------------------------------------------------- /orion/user_simulator/rule_based_sim.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is deprecated. 3 | """ 4 | 5 | import re 6 | from typing import List, Tuple 7 | import numpy as np 8 | from orion.abstract.pose import Agent2DPose 9 | 10 | 11 | from orion.config.my_config import * 12 | from orion.user_simulator.topograph import Instance 13 | from orion.config.chatgpt_config import * 14 | from orion.user_simulator.base import UserSimulatorBase 15 | 16 | import random 17 | 18 | random.seed(1) 19 | 20 | 21 | class RuleUserSimulator(UserSimulatorBase): 22 | def generate_hint(self, instance: Instance) -> str: 23 | # from semantic map. neighbor objects in view. largest object in circle 15. randomly 24 | 25 | dist, angle, is_in_view = self.rel_pose(instance, self.agtpose) 26 | 27 | # if far, return object hint 28 | if dist > 100 or not is_in_view: 29 | # return f"the {instance.name} is {dist} meters away from you" 30 | nearby_objs = self.topo_graph.get_sorted_neighbors(instance.id) 31 | if len(nearby_objs) > 0: 32 | nearby_obj = nearby_objs.pop(0) 33 | nearby_obj_name = re.sub(r"_\d+$", "", nearby_obj) 34 | if nearby_obj_name == instance.name: 35 | return f"the {instance.name} is near to another {nearby_obj_name}" 36 | else: 37 | return f"the {instance.name} is near to a {nearby_obj_name}" 38 | # if close, return postion hint 39 | else: 40 | if -30 < angle <= 30: 41 | return f"the {instance.name} is in front of you around {dist} units" 42 | elif 30 < angle <= 60: 43 | return f"the {instance.name} is in front of you and at your right side around {dist} units" 44 | elif 60 < angle <= 120: 45 | return f"the {instance.name} is at your right side around {dist} units" 46 | elif 120 < angle <= 150: 47 | return f"the {instance.name} is behind you and at your right side around {dist} units" 48 | elif 150 < angle <= 180 or -180 <= angle <= -150: 49 | return f"the {instance.name} is behind you around {dist} units" 50 | elif -150 < angle <= -120: 51 | return f"the {instance.name} is behind you and at your left side around {dist} units" 52 | elif -120 < angle <= -60: 53 | return f"the {instance.name} is at your left side around {dist} units" 54 | elif -60 < angle <= -30: 55 | return f"the {instance.name} is in front of you and at your left side around {dist} units" 56 | 57 | def step( 58 | self, 59 | agent_response: str, 60 | semantic_img: np.ndarray, 61 | agtpose: Agent2DPose, 62 | step_count: int, 63 | ) -> Tuple[bool, str]: 64 | self.agtpose = agtpose 65 | 66 | goal_reached = self._eval_with_semantic_img( 67 | self.goal_gen.current_goal, agtpose, semantic_img 68 | ) 69 | nearest_tuple = self._get_egoview_info_for_goal( 70 | self.goal_gen.current_goal, agtpose 71 | ) 72 | 73 | maxtry_reached, task_finished = self.goal_gen.step( 74 | goal_reached, steps=step_count - self.last_step_count 75 | ) 76 | self.last_step_count = step_count 77 | 78 | if task_finished: 79 | return True, "That's all for today. Thank you for your help." 80 | 81 | return_str = "" 82 | if step_count > 0: 83 | if goal_reached: 84 | return_str += f"Yes that's correct. " 85 | else: 86 | return_str += f"No, you're wrong. " 87 | 88 | goal = self.goal_gen.current_goal 89 | ins = self.topo_graph.instance_dict[goal.goal] 90 | if maxtry_reached or goal_reached or step_count == 0: 91 | return_str += f"Now I want you to find the {ins.name} " 92 | if ins.nearby_obj: 93 | near_obj_str = ", ".join( 94 | [re.sub(r"_\d+$", "", obj) for obj in ins.nearby_obj] 95 | ) 96 | return_str += f"which near to the {near_obj_str}. " 97 | 98 | else: 99 | hints = self.generate_hint(ins) 100 | if hints is not None: 101 | return_str += f" Hints: {hints}. " 102 | else: 103 | return_str += f" Please find the {ins.name}. " 104 | 105 | return task_finished, return_str 106 | 107 | 108 | if __name__ == "__main__": 109 | for scene, floor in SCENE_ID_FLOOR_SET: 110 | usr_sim = RuleUserSimulator(scene, floor, max_round=2, category=2) 111 | for k, v in usr_sim.topo_graph.instance_dict.items(): 112 | print(k, v) 113 | input() 114 | for ii in usr_sim.goal_gen.goals: 115 | print(ii) 116 | ctt = 0 117 | task_finished = False 118 | while True: 119 | user_input = input("user: ") 120 | if user_input == "next" or task_finished: 121 | break 122 | if (ctt + 1) % 3 == 0: 123 | # mock 124 | g = usr_sim.goal_gen.current_goal 125 | ins = usr_sim.topo_graph.instance_dict[g.goal] 126 | x, z = ins.center 127 | agtpose = Agent2DPose(x, z, 0) 128 | cls_id = usr_sim.name_dic[ins.name][0] 129 | semantic_img = np.ones((100, 100)) * cls_id 130 | else: 131 | semantic_img = np.zeros((100, 100)) 132 | agtpose = Agent2DPose(0, 0, 0) 133 | 134 | task_finished, response = usr_sim.step( 135 | user_input, semantic_img, agtpose, ctt 136 | ) 137 | print("bot: ", response) 138 | ctt += 1 139 | 140 | input("press enter to continue") 141 | -------------------------------------------------------------------------------- /orion/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/utils/__init__.py -------------------------------------------------------------------------------- /orion/utils/clip_score_utils.py: -------------------------------------------------------------------------------- 1 | """given the recognition probability, find out the true postive images""" 2 | 3 | import numpy as np 4 | 5 | from orion import logger 6 | 7 | np.set_printoptions(precision=3, suppress=True, linewidth=200) 8 | 9 | 10 | class CLIPScorer: 11 | def __init__(self): 12 | self.prob_list = [] 13 | self.masktime = 0 14 | self.found_goal = False 15 | 16 | def reset(self): 17 | self.prob_list = [] 18 | self.masktime = 0 19 | 20 | def add_prob(self, prob): 21 | self.prob_list.append(prob) 22 | 23 | def is_goal_found(self, prob): 24 | if len(self.prob_list) > 0: 25 | mean_prob = np.mean(self.prob_list) 26 | else: 27 | mean_prob = 0 28 | self.add_prob(prob) 29 | if mean_prob > 0.9: 30 | theshold = 0.99 31 | elif mean_prob > 0.8: 32 | theshold = 0.95 33 | elif mean_prob > 0.6: 34 | theshold = 0.9 35 | else: 36 | theshold = 0.8 37 | 38 | # logger.info( 39 | # f"\t mean prob {mean_prob:2f}, theshold {theshold}, prob {prob:3f}, {self.prob_list[-3:]}, {np.mean(self.prob_list[-3:])}" 40 | # ) 41 | if ( 42 | prob > theshold 43 | and len(self.prob_list) > 3 44 | and np.mean(self.prob_list[-3:]) > theshold 45 | and self.masktime == 0 46 | ): 47 | logger.info("\033[31m [CLIP] detect a pulse\033[m") 48 | self.found_goal = True 49 | return True 50 | elif ( 51 | len(self.prob_list) > 1 52 | and prob - max(self.prob_list[-2], mean_prob) > 0.5 53 | and self.masktime == 0 54 | ): 55 | logger.info("\033[31m [CLIP] detect a pulse\033[m") 56 | self.found_goal = True 57 | return True 58 | elif ( 59 | len(self.prob_list) > 3 60 | and self.prob_list[-1] - max(self.prob_list[-3], mean_prob) > 0.5 61 | and self.prob_list[-2] - max(self.prob_list[-3], mean_prob) > 0.4 62 | and self.masktime == 0 63 | ): 64 | logger.info("\033[31m [CLIP] detect a pulse\033[m") 65 | self.found_goal = True 66 | return True 67 | else: 68 | self.masktime = max(0, self.masktime - 1) 69 | self.found_goal = False 70 | return False 71 | 72 | def set_masktime(self, masktime=5): 73 | logger.info(f" CLIP set masktime to {masktime}") 74 | self.masktime = masktime 75 | -------------------------------------------------------------------------------- /orion/utils/file_load.py: -------------------------------------------------------------------------------- 1 | ### file loading ### 2 | import cv2 3 | import numpy as np 4 | 5 | from orion.abstract.pose import Agent3DPose 6 | 7 | 8 | def get_floor_set_str(floor_set): 9 | a, b = floor_set 10 | if a < 0: 11 | a = "B{}".format(-a) 12 | else: 13 | a = "U{}".format(a) 14 | if b < 0: 15 | b = "B{}".format(-b) 16 | else: 17 | b = "U{}".format(b) 18 | return "{}_{}".format(a, b) 19 | 20 | 21 | def load_image(rgb_path): 22 | rgb = cv2.imread(rgb_path) 23 | rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB) 24 | return rgb 25 | 26 | 27 | def load_depth(depth_filepath): 28 | with open(depth_filepath, "rb") as f: 29 | depth = np.load(f) 30 | if len(depth.shape) == 3: 31 | depth = depth.squeeze() 32 | if depth.dtype == np.uint16: 33 | depth = depth.astype(np.float32) / 1000.0 34 | return depth 35 | 36 | 37 | def load_semantic(semantic_filepath, obj2cls_dic): 38 | with open(semantic_filepath, "rb") as f: 39 | semantic = np.load(f) 40 | if len(semantic.shape) == 3: 41 | semantic = semantic.squeeze() 42 | semantic = np.asarray(semantic).astype(np.int32) 43 | semantic = cvt_sem_id_2_cls_id(semantic, obj2cls_dic) 44 | return semantic 45 | 46 | 47 | def cvt_sem_id_2_cls_id(semantic: np.ndarray, obj2cls: dict): 48 | h, w = semantic.shape 49 | semantic = semantic.flatten() 50 | u, inv = np.unique(semantic, return_inverse=True) 51 | return np.array([obj2cls[x][0] for x in u])[inv].reshape((h, w)) 52 | 53 | 54 | def load_obj2cls_dict(filepath): 55 | obj2cls_dic = {} 56 | label_dic = {} 57 | with open(filepath, "r") as f: 58 | for line in f: 59 | line = line.strip() 60 | if not line: 61 | continue 62 | row = line.split(":") 63 | obj_id = int(row[0]) 64 | cls_id = int(row[1].split(",")[0].strip()) 65 | cls_name = row[1].split(",")[1].strip() 66 | obj2cls_dic[obj_id] = (cls_id, cls_name) 67 | label_dic[cls_id] = cls_name 68 | label_dic = dict(sorted(label_dic.items(), key=lambda x: x[0])) 69 | return obj2cls_dic, label_dic 70 | 71 | 72 | def load_pose(pose_filepath): 73 | with open(pose_filepath, "r") as f: 74 | line = f.readline() 75 | return Agent3DPose.from_str(line) 76 | -------------------------------------------------------------------------------- /orion/utils/gradio_interface.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Queue 2 | from typing import List 3 | import gradio as gr 4 | import cv2 5 | from orion.agent_env.chatgpt_control_base import ChatGPTControlBase 6 | from orion.agent_env.chatgpt_control_orion import ChatGPTControlORION 7 | from orion.agent_env.chatgpt_control_cow import ChatGPTControlCoW 8 | from orion.agent_env.chatgpt_control_vlmap import ChatGPTControlVLMap 9 | 10 | 11 | END_SENT = "" 12 | END_TURN = "" 13 | 14 | 15 | class GradioInterface: 16 | def __init__( 17 | self, 18 | image_queue: Queue, 19 | user_message_queue: Queue, 20 | bot_message_queue: Queue, 21 | ): 22 | self.last_image = cv2.imread("orion/gradio_init_img.jpg") 23 | self.image_queue = image_queue 24 | self.user_message_queue = user_message_queue 25 | self.bot_message_queue = bot_message_queue 26 | 27 | def get_img(self): 28 | if self.image_queue.empty(): 29 | return self.last_image 30 | else: 31 | self.last_image = self.image_queue.get() 32 | return self.last_image 33 | 34 | def process_user_message(self, user_message, history): 35 | self.user_message_queue.put(user_message) 36 | return "", history + [[user_message, None]] 37 | 38 | def process_bot_message(self, chat_history: List): 39 | chat_history.append([None, ""]) 40 | bot_message_chuck: str = self.bot_message_queue.get() 41 | 42 | while bot_message_chuck != END_TURN: 43 | if bot_message_chuck == END_SENT: 44 | chat_history.append([None, ""]) 45 | else: 46 | if "Command" in bot_message_chuck: 47 | bot_message_chuck = bot_message_chuck.replace("Command", "Action") 48 | chat_history[-1][1] += bot_message_chuck 49 | yield chat_history 50 | 51 | bot_message_chuck = self.bot_message_queue.get() 52 | 53 | def run(self): 54 | with gr.Blocks(theme=gr.themes.Default(text_size="lg")) as gradio_demo: 55 | with gr.Column(): 56 | with gr.Box(): 57 | gr.Markdown("## 🔥Navigation ChatBot Demo🚀") 58 | with gr.Row(): 59 | with gr.Column(scale=1): 60 | plot = gr.Image(self.last_image) 61 | with gr.Column(scale=2.5): 62 | chatbot = gr.Chatbot() 63 | chatbot.style(height=600) 64 | msg = gr.Textbox() 65 | msg.submit( 66 | self.process_user_message, 67 | [msg, chatbot], 68 | [msg, chatbot], 69 | show_progress=True, 70 | ).then(self.process_bot_message, chatbot, chatbot) 71 | gradio_demo.load(self.get_img, None, plot, every=0.01) 72 | 73 | gradio_demo.queue().launch( 74 | server_name="127.0.0.1", server_port=7877, share=True 75 | ) 76 | 77 | class GradioDemoChatGPTControlORION(ChatGPTControlORION): 78 | def __init__( 79 | self, image_queue: Queue, user_message_queue: Queue, bot_message_queue: Queue, 80 | *args, **kwargs 81 | ): 82 | super().__init__(*args, **kwargs) 83 | self.image_queue = image_queue 84 | self.user_message_queue = user_message_queue 85 | self.bot_message_queue = bot_message_queue 86 | 87 | def display(self, *args, **kwargs): 88 | super().display(*args, **kwargs) 89 | # make image smaller twice size for gradio 90 | self.display_image = cv2.resize( 91 | self.display_image, (self.display_image.shape[1] // 2, self.display_image.shape[0] // 2) 92 | ) 93 | self.image_queue.put(cv2.cvtColor(self.display_image, cv2.COLOR_BGR2RGB)) 94 | 95 | def _get_user_input(self): 96 | user_input = self.user_message_queue.get() 97 | return user_input 98 | 99 | def _send_funcall_msg(self, msg): 100 | super()._send_funcall_msg(msg) 101 | self.bot_message_queue.put( 102 | "**API results**: *" + msg.replace("\n", "
") + "*" 103 | ) 104 | self.bot_message_queue.put(END_SENT) 105 | 106 | def _get_chatgpt_response(self): 107 | if self.use_stream: 108 | response = "" 109 | for chunk in self.chatgpt.get_system_response_stream(): 110 | self.bot_message_queue.put(chunk.replace("\n", "
")) 111 | response += chunk 112 | else: 113 | response = self.chatgpt.get_system_response() 114 | self.bot_message_queue.put(response.replace("\n", "
")) 115 | self.bot_message_queue.put(END_SENT) 116 | return response 117 | 118 | def _post_process(self, command): 119 | super()._post_process(command) 120 | final_response = command["args"]["content"] 121 | self.bot_message_queue.put("**" + final_response + "**") 122 | self.bot_message_queue.put(END_TURN) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python>=4.7.0.72 2 | transformers==4.30.2 3 | scipy==1.7.3 4 | pycocotools==2.0.7 5 | matplotlib 6 | git+https://github.com/zhanghang1989/PyTorch-Encoding/ 7 | pytorch-lightning>=1.9.5 8 | imageio 9 | ftfy==6.1.1 10 | regex 11 | tqdm 12 | git+https://github.com/openai/CLIP.git 13 | altair==5.0.0 14 | streamlit 15 | timm 16 | tensorboardX==2.6.2.2 17 | test-tube 18 | wandb 19 | open_clip_torch>=2.20.0 20 | openai==1.12.0 21 | scikit-fmm>=2022.3.26 22 | scikit-image>=0.19.3 23 | scikit-learn>=1.0.2 24 | httpx==0.24.0 25 | aiofiles==23.1.0 26 | fastapi==0.88.0 27 | Pillow==9.5.0 28 | requests==2.28.2 29 | requests-oauthlib==1.3.1 30 | Jinja2==3.1.2 31 | ffmpy==0.3.0 32 | urllib3==1.26.15 33 | gradio==3.29.0 -------------------------------------------------------------------------------- /scripts/build_vlmap.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | 5 | import torch 6 | from orion.map.map_build.build_voxel import VoxelMapBuilder, OfflineDataLoader 7 | from orion.config.my_config import MapConfig, SCENE_ID_FLOOR_SET 8 | from orion.utils.file_load import get_floor_set_str 9 | from tqdm import tqdm 10 | 11 | from orion.config.my_config import * 12 | from orion.utils import visulization as vis 13 | from orion.map.map_search.search_voxel import VLMapSearch 14 | from orion.perception.extractor.concept_fusion_extractor import ConceptFusionExtractor 15 | from orion.perception.extractor.lseg_extractor import LSegExtractor 16 | 17 | 18 | def build_vlmap_one_scene(root_dir, feature_type): 19 | map_builder = VoxelMapBuilder( 20 | save_dir=os.path.join(root_dir, f"{feature_type}_vlmap"), 21 | extractor_type = "lseg" if feature_type == "lseg" else "conceptfusion", 22 | extractor = LSegExtractor() if feature_type == "lseg" else ConceptFusionExtractor(), 23 | accelerate_mapping=True 24 | ) 25 | 26 | dataloader = OfflineDataLoader( 27 | data_dir=os.path.join(root_dir, "recordings"), 28 | mapcfg=MapConfig(), 29 | ) 30 | 31 | for idx in tqdm(range(len(dataloader))): 32 | obs = dataloader[idx] 33 | map_builder.build(obs) 34 | 35 | map_builder._save() 36 | 37 | # just make sure realease GPU memory 38 | del map_builder.extractor 39 | del map_builder.vxlmap 40 | del map_builder 41 | 42 | 43 | def draw_vlmap_one_scene(root_dir, feature_type): 44 | # Draw topdown rgb 45 | map_querier = VLMapSearch( 46 | load_sparse_map_path=os.path.join( 47 | root_dir, f"{feature_type}_vlmap", "sparse_vxl_map.npz") 48 | ) 49 | mapshape = map_querier._3dshape 50 | mapshape = (mapshape[0], mapshape[1], mapshape[2], 3) 51 | topdown_rgb = map_querier.get_BEV_map( 52 | indices=map_querier.indices, 53 | values=map_querier.rgb_values, 54 | map_shape=mapshape 55 | ) 56 | vis.plot_uint8img_with_plt( 57 | topdown_rgb, 58 | "topdown_rgb", 59 | crop=True, 60 | save=True, 61 | save_path=os.path.join( 62 | root_dir, f"{feature_type}_vlmap", "topdown_rgb.png"), 63 | ) 64 | 65 | # Test query list 66 | predict_map, query_labels = map_querier.query(VLMAP_QUERY_LIST_COMMON) 67 | nomap_mask_crop = map_querier.no_map_mask_crop 68 | predict_map_crop = predict_map[ 69 | map_querier.zmin : map_querier.zmax + 1, 70 | map_querier.xmin : map_querier.xmax + 1 71 | ] 72 | vis.plot_BEV_semantic_map( 73 | predict_map_crop, 74 | nomap_mask_crop, 75 | labels=query_labels, 76 | save=True, 77 | save_path=os.path.join( 78 | root_dir, f"{feature_type}_vlmap", "topdown_vlmap.png"), 79 | ) 80 | 81 | del map_querier 82 | 83 | 84 | 85 | def main(scene_id, floor, feature_type): 86 | data_dir = f"data/experiments/predict_{scene_id}_{get_floor_set_str(floor)}" 87 | build_vlmap_one_scene(data_dir, feature_type) 88 | torch.cuda.empty_cache() 89 | time.sleep(5) 90 | 91 | draw_vlmap_one_scene(data_dir, feature_type) 92 | torch.cuda.empty_cache() 93 | time.sleep(5) 94 | 95 | 96 | if __name__ == "__main__": 97 | argparser = argparse.ArgumentParser() 98 | argparser.add_argument( 99 | "--scene_id", 100 | type=str, 101 | default="4ok3usBNeis", 102 | help="scene id, either 'all' or a specific scene id in SCENE_ID_FLOOR_SET", 103 | ) 104 | argparser.add_argument( 105 | "--feature_type", 106 | choices=["lseg", "conceptfusion"], 107 | default="lseg", 108 | help="feature type, either 'lseg' or 'conceptfusion'", 109 | ) 110 | args = argparser.parse_args() 111 | 112 | scene_dic = {item[0]: item for item in SCENE_ID_FLOOR_SET} 113 | if args.scene_id == "all": 114 | for scene_id, floor in SCENE_ID_FLOOR_SET: 115 | main(scene_id, floor, args.feature_type) 116 | else: 117 | assert args.scene_id in scene_dic 118 | scene_id, floor = scene_dic[args.scene_id] 119 | main(scene_id, floor, args.feature_type) -------------------------------------------------------------------------------- /scripts/collect_scene_fbe.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | from orion.agent_env.fbe import FBEAgentEnv 4 | from orion.config.my_config import SCENE_ID_FLOOR_SET 5 | 6 | 7 | def collect_data_one_scene(scene_id, floor): 8 | game = FBEAgentEnv( 9 | scene_ids=[scene_id], 10 | floor_set=floor, 11 | fast_explore=False, 12 | display_shortside=256, 13 | save_dir_name="predict", 14 | auto_record=True, 15 | display_setting="rgb+occumap+topdownmap", 16 | headless=True, 17 | use_gt_pose=True, 18 | load_existing_occumap=False, 19 | save_new_occumap=True, 20 | ) 21 | game.run() 22 | 23 | 24 | if __name__ == "__main__": 25 | argparser = argparse.ArgumentParser() 26 | argparser.add_argument( 27 | "--scene_id", 28 | type=str, 29 | default="4ok3usBNeis", 30 | help="scene id, either 'all' or a specific scene id in SCENE_ID_FLOOR_SET", 31 | ) 32 | args = argparser.parse_args() 33 | 34 | scene_dic = {item[0]: item for item in SCENE_ID_FLOOR_SET} 35 | if args.scene_id == "all": 36 | for scene_id, floor in SCENE_ID_FLOOR_SET: 37 | collect_data_one_scene(scene_id, floor) 38 | time.sleep(5) 39 | else: 40 | assert args.scene_id in scene_dic 41 | collect_data_one_scene(scene_dic[args.scene_id][0], scene_dic[args.scene_id][1]) 42 | -------------------------------------------------------------------------------- /scripts/create_video.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | import sys 5 | import os 6 | import imageio 7 | import numpy as np 8 | import cv2 9 | import tqdm 10 | 11 | from habitat_sim.utils.common import d3_40_colors_rgb 12 | 13 | 14 | has_gpu = True # @param {type: "boolean"} 15 | codec = "h264" 16 | if has_gpu: 17 | codec = "h264_nvenc" 18 | 19 | 20 | def load_depth(depth_filepath): 21 | with open(depth_filepath, "rb") as f: 22 | depth = np.load(f) 23 | return depth 24 | 25 | 26 | def get_fast_video_writer(video_file: str, fps: int = 60): 27 | if ( 28 | "google.colab" in sys.modules 29 | and os.path.splitext(video_file)[-1] == ".mp4" 30 | and os.environ.get("IMAGEIO_FFMPEG_EXE") == "/usr/bin/ffmpeg" 31 | ): 32 | # USE GPU Accelerated Hardware Encoding 33 | writer = imageio.get_writer( 34 | video_file, 35 | fps=fps, 36 | codec=codec, 37 | mode="I", 38 | bitrate="1000k", 39 | format="FFMPEG", 40 | ffmpeg_log_level="info", 41 | quality=10, 42 | output_params=["-minrate", "500k", "-maxrate", "5000k"], 43 | ) 44 | else: 45 | # Use software encoding 46 | writer = imageio.get_writer(video_file, fps=fps) 47 | return writer 48 | 49 | 50 | def create_video(data_dir: str, fps: int = 30): 51 | rgb_dir = os.path.join(data_dir, "rgb") 52 | rgb_list = sorted( 53 | os.listdir(rgb_dir), key=lambda x: int(x.split("_")[-1].split(".")[0]) 54 | ) 55 | rgb_list = [os.path.join(rgb_dir, x) for x in rgb_list] 56 | 57 | depth_dir = os.path.join(data_dir, "depth") 58 | depth_list = sorted( 59 | os.listdir(depth_dir), key=lambda x: int(x.split("_")[-1].split(".")[0]) 60 | ) 61 | depth_list = [os.path.join(depth_dir, x) for x in depth_list] 62 | 63 | semantic_dir = os.path.join(data_dir, "semantic") 64 | semantic_list = sorted( 65 | os.listdir(semantic_dir), key=lambda x: int(x.split("_")[-1].split(".")[0]) 66 | ) 67 | semantic_list = [os.path.join(semantic_dir, x) for x in semantic_list] 68 | 69 | assert len(rgb_list) == len(depth_list) == len(semantic_list) 70 | 71 | output_path = os.path.join(data_dir, "recording_video.mp4") 72 | out_writer = get_fast_video_writer(output_path, fps=fps) 73 | 74 | pbar = tqdm.tqdm(total=len(rgb_list), position=0, leave=True) 75 | for i, (rgb_path, depth_path, semantic_path) in enumerate( 76 | list(zip(rgb_list, depth_list, semantic_list)) 77 | ): 78 | bgr = cv2.imread(rgb_path) 79 | rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) 80 | 81 | depth = np.load(open(depth_path, "rb")) 82 | if depth.dtype == np.uint16: 83 | depth = depth.astype(np.float32) / 1000 84 | depth_vis = (depth / 10 * 255).astype(np.uint8) 85 | depth_color = cv2.applyColorMap(depth_vis, cv2.COLORMAP_JET) 86 | semantic = np.load(open(semantic_path, "rb")) 87 | semantic_color = d3_40_colors_rgb[semantic.squeeze() % 40] 88 | output_im = np.concatenate((rgb, depth_color, semantic_color), axis=1) 89 | out_writer.append_data(output_im) 90 | pbar.update(1) 91 | out_writer.close() 92 | 93 | 94 | if __name__ == "__main__": 95 | create_video("data/experiments/fbetest_4ok3usBNeis_B1_U1/recordings") 96 | -------------------------------------------------------------------------------- /scripts/user_agent_talk_cow.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import time 5 | from orion import logger 6 | from orion.config.chatgpt_config import * 7 | from orion.agent_env.chatgpt_control_cow import ChatGPTControlCoW 8 | from orion.user_simulator.chatgpt_based_sim import ( 9 | ChatGPTUserSimulator, 10 | CountourMaskPrediction, 11 | ) 12 | from orion.abstract.interaction_history import SucMsg 13 | 14 | 15 | class ChatGPTControlAndUserSim(ChatGPTControlCoW): 16 | def __init__(self, max_trial, max_round, category, chatgpt_usrsim_config, clear_gptctx=False, *args, **kwargs): 17 | super().__init__(*args, **kwargs) 18 | 19 | self.usr_sim = ChatGPTUserSimulator( 20 | chatgpt_usrsim_config=chatgpt_usrsim_config, 21 | scene_id=kwargs["scene_ids"][0], 22 | floor_plan=kwargs["floor_set"], 23 | max_trial=max_trial, 24 | max_round=max_round, 25 | category=category, 26 | is_cow_baseline=self.is_cow_baseline, 27 | is_vlamp_baseline=self.is_vlmap_baseline, 28 | ) 29 | logger.info("User Simulator Initialized.") 30 | logger.info(f"Max Trial: {max_trial}, Max Round: {max_round}, Cat: {category}") 31 | for k, v in self.usr_sim.topo_graph.instance_dict.items(): 32 | logger.info(f"Instance {k}: {v}") 33 | 34 | logger.info("User Goal init") 35 | for g in self.usr_sim.goal_gen.goals: 36 | logger.info(g) 37 | 38 | self.clear_gptctx = clear_gptctx 39 | assert self.is_vlmap_baseline is False 40 | assert self.is_cow_baseline is True 41 | 42 | suffix = f"cow_t{max_trial}r{max_round}_{category}" 43 | if self.use_memory: 44 | suffix += "_mem" 45 | else: 46 | suffix += "_nomem" 47 | if self.use_vlmap: 48 | suffix += "_vmp" 49 | else: 50 | suffix += "_novmp" 51 | if self.use_explore: 52 | suffix += "_exp" 53 | else: 54 | suffix += "_noexp" 55 | if self.clear_gptctx: 56 | suffix += "_noctx" # clear every new round 57 | else: 58 | suffix += "_ctx" 59 | 60 | logger.info(f"Dump dir suffix: {suffix}") 61 | self.dump_dir = os.path.join(self.save_dir, f"dump_{suffix}") 62 | if not os.path.exists(self.dump_dir): 63 | os.makedirs(self.dump_dir) 64 | else: 65 | logger.warning(f"Dump dir {self.dump_dir} already exists!") 66 | input("Press Enter to continue...") 67 | self.is_first_turn = True 68 | 69 | def _get_user_input(self): 70 | logger.info("\nGenreate User Utterance with GPT Simulator...") 71 | is_first_turn = self.is_first_turn 72 | if self.is_first_turn: 73 | agtresponse = "Hello, what should I do?" 74 | self.is_first_turn = False 75 | else: 76 | agtresponse = self.agent_response 77 | 78 | agt_predict = CountourMaskPrediction( 79 | predict_contours=self.predict_contours, 80 | predict_masks=self.predict_masks, 81 | ) 82 | 83 | task_finished, is_new_goal, is_new_round, goal_succ, instruction = self.usr_sim.step( 84 | agent_response=agtresponse, 85 | agtpose=self.agent_state.pose_2d, 86 | semantic_img=self.observations.semantic, 87 | agt_predict=agt_predict, 88 | step_count=self.step_count, 89 | first_turn=is_first_turn, 90 | ) 91 | self.task_finished = task_finished 92 | logger.info(f"[User Simulator] {instruction}") 93 | self.interaction_history.append( 94 | SucMsg(reward=goal_succ) 95 | ) 96 | if is_new_round: 97 | self.usr_sim.goal_gen._is_new_round = False 98 | last_round = self.usr_sim.goal_gen.last_round -1 99 | logger.info(f"Start new Round! From round {last_round} to {last_round+1}") 100 | if self.use_memory: 101 | self.object_memory.save(self.dump_dir, suffix=f"_round{last_round}") 102 | 103 | gpt_context = self.chatgpt.messages 104 | gpt_context_path = os.path.join(self.dump_dir, f"gptctx_round{last_round}.json") 105 | json.dump(gpt_context, open(gpt_context_path, "w")) 106 | 107 | if self.clear_gptctx: 108 | self.chatgpt.clear_ctx() 109 | 110 | if is_new_goal: 111 | # save early 112 | eval_result_path = os.path.join(self.dump_dir, "result.json") 113 | self.usr_sim.goal_gen.save(eval_result_path) 114 | conversation_path = os.path.join(self.dump_dir, "dialog.json") 115 | json.dump(self.record_conversations, open(conversation_path, "w")) 116 | 117 | # save money 118 | if re.search(r"(gpt-4|gpt4)", self.chatgpt.model): 119 | self.chatgpt.clear_ctx() 120 | 121 | 122 | return instruction 123 | 124 | def save(self): 125 | super().save() 126 | if not os.path.exists(self.dump_dir): 127 | os.makedirs(self.dump_dir, exist_ok=True) 128 | self.interaction_history.save(self.dump_dir) 129 | if self.use_memory: 130 | self.object_memory.save(self.dump_dir, suffix="_final") 131 | eval_result_path = os.path.join(self.dump_dir, "result.json") 132 | results = self.usr_sim.goal_gen.save(eval_result_path) 133 | logger.info(f"Dumped user simulator result to {eval_result_path}") 134 | conversation_path = os.path.join(self.dump_dir, "dialog.json") 135 | json.dump(self.record_conversations, open(conversation_path, "w")) 136 | logger.info(f"Dumped conversation to {conversation_path}") 137 | 138 | try: 139 | self.usr_sim.eval(results) 140 | except: 141 | pass 142 | 143 | 144 | 145 | 146 | if __name__ == "__main__": 147 | 148 | import argparse 149 | 150 | parser = argparse.ArgumentParser() 151 | parser.add_argument("--scene_id", type=str, default="4ok3usBNeis") 152 | parser.add_argument("--floor_b", type=int, default=-1) 153 | parser.add_argument("--floor_u", type=int, default=1) 154 | parser.add_argument("--category", type=str, default="mixed", choices=["landmark", "instruction", "description", "correction", "mixed", "none"]) 155 | args = parser.parse_args() 156 | 157 | max_trial=5 158 | max_round=1 159 | category=args.category 160 | use_memory=False 161 | use_vlmap=False 162 | use_explore=True 163 | clear_gptctx=False 164 | 165 | chatgpt_config=AzureGPT4Config() 166 | chatgpt_usrsim_config=AzureGPT35Config() 167 | 168 | game = ChatGPTControlAndUserSim( 169 | max_trial=max_trial, 170 | max_round=max_round, 171 | category=category, 172 | chatgpt_config=chatgpt_config, 173 | chatgpt_usrsim_config=chatgpt_usrsim_config, 174 | use_memory=use_memory, 175 | use_vlmap=use_vlmap, 176 | use_explore=use_explore, 177 | clear_gptctx=clear_gptctx, 178 | is_vlmap_baseline=False, 179 | is_cow_baseline=True, 180 | record_interaction=False, 181 | use_stream=False, 182 | fast_explore=True, 183 | scene_ids=[args.scene_id], 184 | floor_set=(args.floor_b, args.floor_u), 185 | display_shortside=256, 186 | save_dir_name="predict", 187 | auto_record=False, 188 | display_setting="rgb+occumap+topdownmap", 189 | headless=True, 190 | use_gt_pose=True, 191 | load_existing_occumap=True, 192 | save_new_occumap=False, 193 | ) 194 | 195 | game.run() 196 | -------------------------------------------------------------------------------- /scripts/user_agent_talk_orion.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import time 5 | from orion import logger 6 | from orion.agent_env.chatgpt_control_orion import ChatGPTControlORION 7 | from orion.user_simulator.chatgpt_based_sim import ( 8 | ChatGPTUserSimulator, 9 | CountourMaskPrediction, 10 | ) 11 | from orion.abstract.interaction_history import SucMsg 12 | from orion.config.chatgpt_config import * 13 | 14 | 15 | class ChatGPTControlAndUserSim(ChatGPTControlORION): 16 | def __init__(self, max_trial, max_round, category, chatgpt_usrsim_config, clear_gptctx=False, *args, **kwargs): 17 | super().__init__(*args, **kwargs) 18 | 19 | self.usr_sim = ChatGPTUserSimulator( 20 | chatgpt_usrsim_config=chatgpt_usrsim_config, 21 | scene_id=kwargs["scene_ids"][0], 22 | floor_plan=kwargs["floor_set"], 23 | max_trial=max_trial, 24 | max_round=max_round, 25 | category=category, 26 | is_cow_baseline=self.is_cow_baseline, 27 | is_vlamp_baseline=self.is_vlmap_baseline, 28 | ) 29 | logger.info("User Simulator Initialized.") 30 | logger.info(f"Max Trial: {max_trial}, Max Round: {max_round}, Cat: {category}") 31 | for k, v in self.usr_sim.topo_graph.instance_dict.items(): 32 | logger.info(f"Instance {k}: {v}") 33 | 34 | logger.info("User Goal init") 35 | for g in self.usr_sim.goal_gen.goals: 36 | logger.info(g) 37 | 38 | self.clear_gptctx = clear_gptctx 39 | assert self.is_vlmap_baseline is False 40 | assert self.is_cow_baseline is False 41 | 42 | suffix = f"orion_t{max_trial}r{max_round}_{category}" 43 | if self.use_memory: 44 | suffix += "_mem" 45 | else: 46 | suffix += "_nomem" 47 | if self.use_vlmap: 48 | suffix += "_vmp" 49 | else: 50 | suffix += "_novmp" 51 | if self.use_explore: 52 | suffix += "_exp" 53 | else: 54 | suffix += "_noexp" 55 | if self.clear_gptctx: 56 | suffix += "_noctx" # clear every new round 57 | else: 58 | suffix += "_ctx" 59 | 60 | logger.info(f"Dump dir suffix: {suffix}") 61 | self.dump_dir = os.path.join(self.save_dir, f"dump_{suffix}") 62 | if not os.path.exists(self.dump_dir): 63 | os.makedirs(self.dump_dir) 64 | else: 65 | logger.warning(f"Dump dir {self.dump_dir} already exists!") 66 | input("Press Enter to continue...") 67 | self.is_first_turn = True 68 | 69 | def _get_user_input(self): 70 | logger.info("\nGenreate User Utterance with GPT Simulator...") 71 | is_first_turn = self.is_first_turn 72 | if self.is_first_turn: 73 | agtresponse = "Hello, what should I do?" 74 | self.is_first_turn = False 75 | else: 76 | agtresponse = self.agent_response 77 | 78 | agt_predict = CountourMaskPrediction( 79 | predict_contours=self.predict_contours, 80 | predict_masks=self.predict_masks, 81 | ) 82 | 83 | task_finished, is_new_goal, is_new_round, goal_succ, instruction = self.usr_sim.step( 84 | agent_response=agtresponse, 85 | agtpose=self.agent_state.pose_2d, 86 | semantic_img=self.observations.semantic, 87 | agt_predict=agt_predict, 88 | step_count=self.step_count, 89 | first_turn=is_first_turn, 90 | ) 91 | self.task_finished = task_finished 92 | logger.info(f"[User Simulator] {instruction}") 93 | self.interaction_history.append( 94 | SucMsg(reward=goal_succ) 95 | ) 96 | if is_new_round: 97 | self.usr_sim.goal_gen._is_new_round = False 98 | last_round = self.usr_sim.goal_gen.last_round -1 99 | logger.info(f"Start new Round! From round {last_round} to {last_round+1}") 100 | if self.use_memory: 101 | self.object_memory.save(self.dump_dir, suffix=f"_round{last_round}") 102 | 103 | gpt_context = self.chatgpt.messages 104 | gpt_context_path = os.path.join(self.dump_dir, f"gptctx_round{last_round}.json") 105 | json.dump(gpt_context, open(gpt_context_path, "w")) 106 | 107 | if self.clear_gptctx: 108 | self.chatgpt.clear_ctx() 109 | 110 | if is_new_goal: 111 | # save early 112 | eval_result_path = os.path.join(self.dump_dir, "result.json") 113 | self.usr_sim.goal_gen.save(eval_result_path) 114 | conversation_path = os.path.join(self.dump_dir, "dialog.json") 115 | json.dump(self.record_conversations, open(conversation_path, "w")) 116 | 117 | # save money 118 | if re.search(r"(gpt-4|gpt4)", self.chatgpt.model): 119 | self.chatgpt.clear_ctx() 120 | 121 | 122 | return instruction 123 | 124 | def save(self): 125 | super().save() 126 | if not os.path.exists(self.dump_dir): 127 | os.makedirs(self.dump_dir, exist_ok=True) 128 | self.interaction_history.save(self.dump_dir) 129 | if self.use_memory: 130 | self.object_memory.save(self.dump_dir, suffix="_final") 131 | eval_result_path = os.path.join(self.dump_dir, "result.json") 132 | results = self.usr_sim.goal_gen.save(eval_result_path) 133 | logger.info(f"Dumped user simulator result to {eval_result_path}") 134 | conversation_path = os.path.join(self.dump_dir, "dialog.json") 135 | json.dump(self.record_conversations, open(conversation_path, "w")) 136 | logger.info(f"Dumped conversation to {conversation_path}") 137 | 138 | try: 139 | self.usr_sim.eval(results) 140 | except: 141 | pass 142 | 143 | 144 | if __name__ == "__main__": 145 | 146 | import argparse 147 | 148 | parser = argparse.ArgumentParser() 149 | parser.add_argument("--scene_id", type=str, default="4ok3usBNeis") 150 | parser.add_argument("--floor_b", type=int, default=-1) 151 | parser.add_argument("--floor_u", type=int, default=1) 152 | parser.add_argument("--category", type=str, default="mixed", choices=["landmark", "instruction", "description", "correction", "mixed", "none"]) 153 | args = parser.parse_args() 154 | 155 | max_trial=5 156 | max_round=1 157 | category=args.category 158 | use_memory=True 159 | use_vlmap=True 160 | use_explore=True 161 | clear_gptctx=False 162 | 163 | chatgpt_config=AzureGPT4Config() 164 | chatgpt_usrsim_config=AzureGPT35Config() 165 | 166 | game = ChatGPTControlAndUserSim( 167 | max_trial=max_trial, 168 | max_round=max_round, 169 | category=category, 170 | chatgpt_config=chatgpt_config, 171 | chatgpt_usrsim_config=chatgpt_usrsim_config, 172 | use_memory=use_memory, 173 | use_vlmap=use_vlmap, 174 | use_explore=use_explore, 175 | clear_gptctx=clear_gptctx, 176 | record_interaction=True, 177 | use_stream=False, 178 | fast_explore=True, 179 | scene_ids=[args.scene_id], 180 | floor_set=(args.floor_b, args.floor_u), 181 | display_shortside=256, 182 | save_dir_name="predict", 183 | auto_record=False, 184 | display_setting="rgb+occumap+topdownmap", 185 | headless=True, 186 | use_gt_pose=True, 187 | load_existing_occumap=True, 188 | save_new_occumap=False, 189 | ) 190 | 191 | game.run() 192 | -------------------------------------------------------------------------------- /scripts/user_agent_talk_vlmap.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import time 5 | from orion import logger 6 | from orion.agent_env.chatgpt_control_vlmap import ChatGPTControlVLMap 7 | from orion.user_simulator.chatgpt_based_sim import ( 8 | ChatGPTUserSimulator, 9 | CountourMaskPrediction, 10 | ) 11 | from orion.abstract.interaction_history import SucMsg 12 | from orion.config.chatgpt_config import * 13 | 14 | 15 | class ChatGPTControlAndUserSim(ChatGPTControlVLMap): 16 | def __init__(self, max_trial, max_round, category, chatgpt_usrsim_config, clear_gptctx=False, *args, **kwargs): 17 | super().__init__(*args, **kwargs) 18 | 19 | self.usr_sim = ChatGPTUserSimulator( 20 | chatgpt_usrsim_config=chatgpt_usrsim_config, 21 | scene_id=kwargs["scene_ids"][0], 22 | floor_plan=kwargs["floor_set"], 23 | max_trial=max_trial, 24 | max_round=max_round, 25 | category=category, 26 | is_cow_baseline=self.is_cow_baseline, 27 | is_vlamp_baseline=self.is_vlmap_baseline, 28 | ) 29 | logger.info("User Simulator Initialized.") 30 | logger.info(f"Max Trial: {max_trial}, Max Round: {max_round}, Cat: {category}") 31 | for k, v in self.usr_sim.topo_graph.instance_dict.items(): 32 | logger.info(f"Instance {k}: {v}") 33 | 34 | logger.info("User Goal init") 35 | for g in self.usr_sim.goal_gen.goals: 36 | logger.info(g) 37 | 38 | self.clear_gptctx = clear_gptctx 39 | assert self.is_vlmap_baseline is True 40 | assert self.is_cow_baseline is False 41 | 42 | suffix = f"vlmap_t{max_trial}r{max_round}_{category}" 43 | if self.use_memory: 44 | suffix += "_mem" 45 | else: 46 | suffix += "_nomem" 47 | if self.use_vlmap: 48 | suffix += "_vmp" 49 | else: 50 | suffix += "_novmp" 51 | if self.use_explore: 52 | suffix += "_exp" 53 | else: 54 | suffix += "_noexp" 55 | if self.clear_gptctx: 56 | suffix += "_noctx" # clear every new round 57 | else: 58 | suffix += "_ctx" 59 | 60 | logger.info(f"Dump dir suffix: {suffix}") 61 | self.dump_dir = os.path.join(self.save_dir, f"dump_{suffix}") 62 | if not os.path.exists(self.dump_dir): 63 | os.makedirs(self.dump_dir) 64 | else: 65 | logger.warning(f"Dump dir {self.dump_dir} already exists!") 66 | input("Press Enter to continue...") 67 | self.is_first_turn = True 68 | 69 | def _get_user_input(self): 70 | logger.info("\nGenreate User Utterance with GPT Simulator...") 71 | is_first_turn = self.is_first_turn 72 | if self.is_first_turn: 73 | agtresponse = "Hello, what should I do?" 74 | self.is_first_turn = False 75 | else: 76 | agtresponse = self.agent_response 77 | 78 | agt_predict = CountourMaskPrediction( 79 | predict_contours=self.predict_contours, 80 | predict_masks=self.predict_masks, 81 | ) 82 | 83 | task_finished, is_new_goal, is_new_round, goal_succ, instruction = self.usr_sim.step( 84 | agent_response=agtresponse, 85 | agtpose=self.agent_state.pose_2d, 86 | semantic_img=self.observations.semantic, 87 | agt_predict=agt_predict, 88 | step_count=self.step_count, 89 | first_turn=is_first_turn, 90 | ) 91 | self.task_finished = task_finished 92 | logger.info(f"[User Simulator] {instruction}") 93 | self.interaction_history.append( 94 | SucMsg(reward=goal_succ) 95 | ) 96 | if is_new_round: 97 | self.usr_sim.goal_gen._is_new_round = False 98 | last_round = self.usr_sim.goal_gen.last_round -1 99 | logger.info(f"Start new Round! From round {last_round} to {last_round+1}") 100 | if self.use_memory: 101 | self.object_memory.save(self.dump_dir, suffix=f"_round{last_round}") 102 | 103 | gpt_context = self.chatgpt.messages 104 | gpt_context_path = os.path.join(self.dump_dir, f"gptctx_round{last_round}.json") 105 | json.dump(gpt_context, open(gpt_context_path, "w")) 106 | 107 | if self.clear_gptctx: 108 | self.chatgpt.clear_ctx() 109 | 110 | if is_new_goal: 111 | # save early 112 | eval_result_path = os.path.join(self.dump_dir, "result.json") 113 | self.usr_sim.goal_gen.save(eval_result_path) 114 | conversation_path = os.path.join(self.dump_dir, "dialog.json") 115 | json.dump(self.record_conversations, open(conversation_path, "w")) 116 | 117 | # save money 118 | if re.search(r"(gpt-4|gpt4)", self.chatgpt.model): 119 | self.chatgpt.clear_ctx() 120 | 121 | 122 | return instruction 123 | 124 | def save(self): 125 | super().save() 126 | if not os.path.exists(self.dump_dir): 127 | os.makedirs(self.dump_dir, exist_ok=True) 128 | self.interaction_history.save(self.dump_dir) 129 | if self.use_memory: 130 | self.object_memory.save(self.dump_dir, suffix="_final") 131 | eval_result_path = os.path.join(self.dump_dir, "result.json") 132 | results = self.usr_sim.goal_gen.save(eval_result_path) 133 | logger.info(f"Dumped user simulator result to {eval_result_path}") 134 | conversation_path = os.path.join(self.dump_dir, "dialog.json") 135 | json.dump(self.record_conversations, open(conversation_path, "w")) 136 | logger.info(f"Dumped conversation to {conversation_path}") 137 | 138 | try: 139 | self.usr_sim.eval(results) 140 | except: 141 | pass 142 | 143 | 144 | if __name__ == "__main__": 145 | 146 | import argparse 147 | 148 | parser = argparse.ArgumentParser() 149 | parser.add_argument("--scene_id", type=str, default="4ok3usBNeis") 150 | parser.add_argument("--floor_b", type=int, default=-1) 151 | parser.add_argument("--floor_u", type=int, default=1) 152 | parser.add_argument("--category", type=str, default="mixed", choices=["landmark", "instruction", "description", "correction", "mixed", "none"]) 153 | args = parser.parse_args() 154 | 155 | max_trial=5 156 | max_round=1 157 | category=args.category 158 | use_memory=False 159 | use_vlmap=True 160 | use_explore=False 161 | clear_gptctx=False 162 | 163 | chatgpt_config=AzureGPT4Config() 164 | chatgpt_usrsim_config=AzureGPT35Config() 165 | 166 | game = ChatGPTControlAndUserSim( 167 | max_trial=max_trial, 168 | max_round=max_round, 169 | category=category, 170 | chatgpt_config=chatgpt_config, 171 | chatgpt_usrsim_config=chatgpt_usrsim_config, 172 | use_memory=use_memory, 173 | use_vlmap=use_vlmap, 174 | use_explore=use_explore, 175 | clear_gptctx=clear_gptctx, 176 | is_vlmap_baseline=True, 177 | vlmap_dir="lseg_vlmap", 178 | is_cow_baseline=False, 179 | record_interaction=False, 180 | use_stream=False, 181 | fast_explore=True, 182 | scene_ids=[args.scene_id], 183 | floor_set=(args.floor_b, args.floor_u), 184 | display_shortside=256, 185 | save_dir_name="predict", 186 | auto_record=False, 187 | display_setting="rgb+occumap+topdownmap", 188 | headless=True, 189 | use_gt_pose=True, 190 | load_existing_occumap=True, 191 | save_new_occumap=False, 192 | ) 193 | 194 | game.run() 195 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name="navchat", 5 | author="Umich SLED Lab", 6 | packages=find_packages(), 7 | ) 8 | -------------------------------------------------------------------------------- /tests/test_fmm_planner.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | from orion.abstract.pose import Agent2DPose 6 | from orion.navigation.fmm_planner import INV_ACTION_DICT, FMMPlanner 7 | 8 | data_dir = "data/experiments/predict_4ok3usBNeis_B1_U1" 9 | 10 | occumap_mask = np.load(os.path.join(data_dir, "occupancy_map.npy")) 11 | im = occumap_mask == 1 # floor 12 | 13 | planner = FMMPlanner() 14 | planner.set_traversible_map(im) 15 | 16 | y, x = np.where(planner.traversible_map) 17 | 18 | while True: 19 | goal_ind = np.random.choice(y.size) 20 | start_ind = np.random.choice(y.size) 21 | 22 | goal = Agent2DPose(x[goal_ind], y[goal_ind], 0) 23 | start = Agent2DPose(x[start_ind], y[start_ind], -np.pi / 2) 24 | 25 | print(f"start: {start}, goal: {goal}") 26 | reachable, states, a_list = planner.plan(start, goal, plot=True) 27 | # red square is the start 28 | # blue cross is the goal 29 | # red line is the planned path 30 | print(reachable, [INV_ACTION_DICT[a] for a in a_list]) 31 | -------------------------------------------------------------------------------- /tests/test_gradio_helloworld.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import gradio as gr 4 | 5 | with gr.Blocks() as demo: 6 | chatbot = gr.Chatbot() 7 | msg = gr.Textbox() 8 | 9 | def respond(message, chat_history): 10 | bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"]) 11 | chat_history.append((message, bot_message)) 12 | return "", chat_history 13 | 14 | msg.submit(respond, [msg, chatbot], [msg, chatbot]) 15 | 16 | if __name__ == "__main__": 17 | demo.launch(share=True, server_port=7860, debug=True) 18 | -------------------------------------------------------------------------------- /tests/test_point_planner.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | import numpy as np 5 | from matplotlib import pyplot as plt 6 | 7 | from orion.map.occupancy import OccupancyMapping 8 | from orion.navigation.waypoint_planner import PointPlanner 9 | 10 | data_dir = "data/experiments/predict_4ok3usBNeis_B1_U1" 11 | 12 | 13 | occupancy_map = np.load(os.path.join(data_dir, "occupancy_map.npy")) 14 | navigation_mask = np.load(os.path.join(data_dir, "gt_navigable_mask.npy")) 15 | 16 | 17 | y, x = np.where(navigation_mask == 1) 18 | ymin, ymax = y.min(), y.max() 19 | xmin, xmax = x.min(), x.max() 20 | 21 | occupancy_map_crop = occupancy_map[ymin : ymax + 1, xmin : xmax + 1] 22 | navigation_mask_crop = navigation_mask[ymin : ymax + 1, xmin : xmax + 1] 23 | 24 | src = (102, 176) 25 | tgt = (46, 164) 26 | pts, reached = PointPlanner.line_search( 27 | src[0], 28 | src[1], 29 | tgt[0], 30 | tgt[1], 31 | occupancy_map_crop == OccupancyMapping.WALL, 32 | stop_at_wall=True, 33 | ) 34 | print("Can reach the tgt pt? ", reached) 35 | print("pts along the line:", pts) 36 | pts = np.array(pts) 37 | navigation_mask_crop_color = np.stack( 38 | [navigation_mask_crop, navigation_mask_crop, navigation_mask_crop], axis=-1 39 | ) 40 | navigation_mask_crop_color = navigation_mask_crop_color.astype(np.uint8) 41 | navigation_mask_crop_color = 122 + navigation_mask_crop_color * 122 42 | navigation_mask_crop_color[occupancy_map_crop == OccupancyMapping.WALL] = [0, 0, 0] 43 | 44 | cv2.circle(navigation_mask_crop_color, (src[0], src[1]), 2, (0, 255, 0), -1) # green 45 | cv2.circle(navigation_mask_crop_color, (tgt[0], tgt[1]), 2, (0, 0, 255), -1) # blue 46 | cv2.polylines(navigation_mask_crop_color, [pts], False, (255, 0, 0), 1) # red 47 | plt.imshow(navigation_mask_crop_color) 48 | plt.show() 49 | -------------------------------------------------------------------------------- /tests/test_vlmap_planner.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | from orion.config.my_config import * 6 | from orion.map.map_search.search_voxel import VLMapSearch 7 | from orion.map.occupancy import OccupancyMapping 8 | 9 | data_dir = "data/experiments/predict_4ok3usBNeis_B1_U1" 10 | 11 | map_querier = VLMapSearch( 12 | load_sparse_map_path=os.path.join(data_dir, "lseg_vlmap/sparse_vxl_map.npz"), 13 | ) 14 | 15 | occu_map = np.load(os.path.join(data_dir, "occupancy_map.npy")) 16 | navigatable_mask = np.load(os.path.join(data_dir, "gt_navigable_mask.npy")) 17 | 18 | navigatable_mask_crop = navigatable_mask[ 19 | map_querier.zmin : map_querier.zmax + 1, map_querier.xmin : map_querier.xmax + 1 20 | ] 21 | wall_mask = occu_map == OccupancyMapping.WALL 22 | wall_mask_crop = wall_mask[ 23 | map_querier.zmin : map_querier.zmax + 1, map_querier.xmin : map_querier.xmax + 1 24 | ] 25 | 26 | 27 | predict_map, query_labels = map_querier.query(["fridge"]) 28 | predict_map_crop = predict_map[ 29 | map_querier.zmin : map_querier.zmax + 1, map_querier.xmin : map_querier.xmax + 1 30 | ] 31 | 32 | for tgt_name in query_labels: 33 | if tgt_name in ["other", "floor", "wall"]: 34 | continue 35 | map_querier.plan( 36 | tgt_name, 37 | query_labels, 38 | predict_map_crop, 39 | navigatable_mask_crop, 40 | wall_mask_crop, 41 | show=True, 42 | ) 43 | # red dot is the ceter of the target object 44 | # yellow dot is the viewpoint 45 | --------------------------------------------------------------------------------