├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── assets
    ├── find_shoes.png
    ├── framework.png
    ├── spatial_reason.png
    └── user_sim.png
├── demos
    ├── demo.jpg
    ├── demo_gradcam_output.jpg
    ├── demo_groundedsam_output.jpg
    ├── demo_lseg_output.jpg
    ├── play_chatgpt_api.py
    ├── play_gradcam.py
    ├── play_groundingSAM.py
    ├── play_habitat_teleop.py
    ├── play_interactive_gradio.py
    ├── play_interactive_terminal.py
    └── play_lseg.py
├── orion
    ├── __init__.py
    ├── abstract
    │   ├── __init__.py
    │   ├── agent.py
    │   ├── interaction_history.py
    │   ├── interfaces.py
    │   ├── memory.py
    │   ├── perception.py
    │   ├── pose.py
    │   └── usersim.py
    ├── agent_env
    │   ├── chatgpt_control_base.py
    │   ├── chatgpt_control_cow.py
    │   ├── chatgpt_control_orion.py
    │   ├── chatgpt_control_vlmap.py
    │   ├── fbe.py
    │   ├── habitat
    │   │   ├── base.py
    │   │   ├── holonomic_actions.py
    │   │   └── utils.py
    │   ├── hybrid_search.py
    │   └── teleop.py
    ├── chatgpt
    │   ├── __init__.py
    │   ├── api.py
    │   └── prompts
    │   │   ├── __init__.py
    │   │   ├── agent_functions.py
    │   │   ├── agent_prompts.py
    │   │   ├── baseline_cow_prompt.py
    │   │   ├── baseline_vlmap_prompt.py
    │   │   ├── usersim_prompts_correction.py
    │   │   ├── usersim_prompts_description.py
    │   │   ├── usersim_prompts_instruction.py
    │   │   ├── usersim_prompts_landmark.py
    │   │   ├── usersim_prompts_mix.py
    │   │   └── usersim_prompts_none.py
    ├── config
    │   ├── __init__.py
    │   ├── chatgpt_config.py
    │   ├── my_config.py
    │   └── my_objectnav_hm3d.yaml
    ├── gradio_init_img.jpg
    ├── map
    │   ├── __init__.py
    │   ├── map.py
    │   ├── map_build
    │   │   └── build_voxel.py
    │   ├── map_search
    │   │   ├── search_base.py
    │   │   └── search_voxel.py
    │   ├── occupancy.py
    │   ├── voxel.py
    │   └── voxel_sparse.py
    ├── memory
    │   └── neural_memory2d.py
    ├── navigation
    │   ├── fmm_planner.py
    │   ├── frontier_based_exploration.py
    │   ├── shortest_path_follower_wrapper.py
    │   └── waypoint_planner.py
    ├── perception
    │   ├── __init__.py
    │   ├── detector
    │   │   ├── __init__.py
    │   │   ├── clipgradcam.py
    │   │   └── groundingSAM.py
    │   └── extractor
    │   │   ├── __init__.py
    │   │   ├── clipbase.py
    │   │   ├── concept_fusion_extractor.py
    │   │   └── lseg_extractor.py
    ├── user_simulator
    │   ├── __init__.py
    │   ├── base.py
    │   ├── chatgpt_based_sim.py
    │   ├── goals
    │   │   ├── 4ok3usBNeis
    │   │   │   ├── final.json
    │   │   │   └── objects.json
    │   │   ├── LT9Jq6dN3Ea
    │   │   │   ├── final.json
    │   │   │   └── objects.json
    │   │   ├── MHPLjHsuG27
    │   │   │   ├── final.json
    │   │   │   └── objects.json
    │   │   ├── QaLdnwvtxbs
    │   │   │   ├── final.json
    │   │   │   └── objects.json
    │   │   ├── TEEsavR23oF
    │   │   │   ├── final.json
    │   │   │   └── objects.json
    │   │   ├── cvZr5TUy5C5
    │   │   │   ├── final.json
    │   │   │   └── objects.json
    │   │   ├── h1zeeAwLh9Z
    │   │   │   ├── final.json
    │   │   │   └── objects.json
    │   │   ├── mL8ThkuaVTM
    │   │   │   ├── final.json
    │   │   │   └── objects.json
    │   │   ├── qyAac8rV8Zk
    │   │   │   ├── final.json
    │   │   │   └── objects.json
    │   │   └── y9hTuugGdiq
    │   │   │   ├── final.json
    │   │   │   └── objects.json
    │   ├── rule_based_sim.py
    │   ├── topograph.py
    │   └── user_goal.py
    └── utils
    │   ├── __init__.py
    │   ├── clip_score_utils.py
    │   ├── file_load.py
    │   ├── geometry.py
    │   ├── gradio_interface.py
    │   └── visulization.py
├── requirements.txt
├── scripts
    ├── build_vlmap.py
    ├── collect_scene_fbe.py
    ├── create_video.py
    ├── user_agent_talk_cf.py
    ├── user_agent_talk_cow.py
    ├── user_agent_talk_orion.py
    └── user_agent_talk_vlmap.py
├── setup.py
└── tests
    ├── test_fmm_planner.py
    ├── test_gradio_helloworld.py
    ├── test_point_planner.py
    └── test_vlmap_planner.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | data
163 | */chatgpt-config.py
164 | logs
165 | images
166 | sandbox
167 | 
168 | .vscode/
169 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "third_party/habitat-lab"]
 2 | 	path = third_party/habitat-lab
 3 | 	url = https://github.com/facebookresearch/habitat-lab.git
 4 | [submodule "third_party/Grounded-Segment-Anything"]
 5 | 	path = third_party/Grounded-Segment-Anything
 6 | 	url = https://github.com/IDEA-Research/Grounded-Segment-Anything.git
 7 | [submodule "orion/perception/detector/gradcam"]
 8 | 	path = orion/perception/detector/gradcam
 9 | 	url = https://github.com/hila-chefer/Transformer-MM-Explainability.git
10 | [submodule "orion/perception/extractor/lseg_module"]
11 | 	path = orion/perception/extractor/lseg_module
12 | 	url = https://github.com/YinpeiDai/lseg-module.git
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Intelligent Systems Lab Org
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/assets/find_shoes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/assets/find_shoes.png


--------------------------------------------------------------------------------
/assets/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/assets/framework.png


--------------------------------------------------------------------------------
/assets/spatial_reason.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/assets/spatial_reason.png


--------------------------------------------------------------------------------
/assets/user_sim.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/assets/user_sim.png


--------------------------------------------------------------------------------
/demos/demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/demos/demo.jpg


--------------------------------------------------------------------------------
/demos/demo_gradcam_output.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/demos/demo_gradcam_output.jpg


--------------------------------------------------------------------------------
/demos/demo_groundedsam_output.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/demos/demo_groundedsam_output.jpg


--------------------------------------------------------------------------------
/demos/demo_lseg_output.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/demos/demo_lseg_output.jpg


--------------------------------------------------------------------------------
/demos/play_chatgpt_api.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from orion.chatgpt.api import ChatAPI
 4 | from orion.config.chatgpt_config import (
 5 |     AzureGPT4Config,
 6 |     AzureGPT35Config,
 7 |     OpenAIGPT4Config,
 8 |     OpenAIGPT35Config,
 9 | )
10 | 
11 | parser = argparse.ArgumentParser()
12 | parser.add_argument("--api-type", choices=["openai", "azure"], default="azure")
13 | parser.add_argument("--model-type", choices=["gpt35", "gpt4"], default="gpt4")
14 | parser.add_argument("--stream", action="store_true", default=False)
15 | 
16 | args = parser.parse_args()
17 | 
18 | if args.api_type == "openai":
19 |     if args.model_type == "gpt35":
20 |         chat_api = ChatAPI(config=OpenAIGPT35Config())
21 |     elif args.model_type == "gpt4":
22 |         chat_api = ChatAPI(config=OpenAIGPT4Config())
23 |     else:
24 |         raise ValueError("model_type can only be ['gpt35', 'gpt4']")
25 | elif args.api_type == "azure":
26 |     if args.model_type == "gpt35":
27 |         chat_api = ChatAPI(config=AzureGPT35Config())
28 |     elif args.model_type == "gpt4":
29 |         chat_api = ChatAPI(config=AzureGPT4Config())
30 |     else:
31 |         raise ValueError("model_type can only be ['gpt35', 'gpt4']")
32 | 
33 | while True:
34 |     utter = input("\nUser>>>")
35 |     chat_api.add_user_message(utter)
36 |     if args.stream:
37 |         response = ""
38 |         gen = chat_api.get_system_response_stream()
39 |         print("Response>>>", end="")
40 |         for chuck in gen:
41 |             response += chuck
42 |             print(chuck, end="")
43 |         print()
44 |     else:
45 |         response = chat_api.get_system_response()
46 |         print("Response>>>", response)
47 |     chat_api.add_assistant_message(response)
48 | 


--------------------------------------------------------------------------------
/demos/play_gradcam.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from orion.perception.detector.clipgradcam import CLIPGradCAM
 3 | 
 4 | 
 5 | from orion.utils.file_load import load_image
 6 | 
 7 | input_image = load_image("demos/demo.jpg")
 8 | 
 9 | clipgradcam = CLIPGradCAM()
10 | returnpt = clipgradcam.predict(input_image, "dog")
11 | 
12 | # plot the centroid into the image
13 | import matplotlib.pyplot as plt
14 | 
15 | plt.imshow(input_image)
16 | plt.scatter(returnpt[0], returnpt[1], c="r", s=100)
17 | plt.savefig(
18 |     "demos/demo_gradcam_output.jpg",
19 |     bbox_inches="tight",
20 |     dpi=300,
21 |     pad_inches=0.0,
22 | )
23 | 


--------------------------------------------------------------------------------
/demos/play_groundingSAM.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from orion.abstract.interfaces import TextQuery
 3 | from orion.perception.detector.groundingSAM import GroundingSAM, show_mask, show_box
 4 | 
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | from orion.utils.file_load import load_image
 8 | 
 9 | input_image = load_image("demos/demo.jpg")
10 | 
11 | text_prompt = TextQuery(prompt="dog on the grass", target="dog")
12 | 
13 | 
14 | plt.figure(figsize=(10, 10))
15 | plt.imshow(input_image)
16 | 
17 | groundsam = GroundingSAM()
18 | 
19 | mmboxes = groundsam.predict(input_image, text_prompt)
20 | for mask in mmboxes.masks:
21 |     show_mask(mask, plt.gca(), random_color=True)
22 | for box, label in zip(mmboxes.bboxes, mmboxes.texts):
23 |     show_box(box, plt.gca(), label)
24 | 
25 | plt.axis("off")
26 | plt.savefig(
27 |     "demos/demo_groundedsam_output.jpg",
28 |     bbox_inches="tight",
29 |     dpi=300,
30 |     pad_inches=0.0,
31 | )
32 | 


--------------------------------------------------------------------------------
/demos/play_habitat_teleop.py:
--------------------------------------------------------------------------------
 1 | from orion.agent_env.teleop import TeleOpAgentEnv
 2 | 
 3 | 
 4 | # pick a scene from orion.config.my_config.SCENE_ID_FLOOR_SET
 5 | game = TeleOpAgentEnv(
 6 |     scene_ids=["MHPLjHsuG27"],
 7 |     floor_set=(-2, 2),
 8 |     display_shortside=256,
 9 |     save_dir_name="teleop",
10 |     auto_record=False,
11 |     display_setting="rgb+topdownmap",
12 |     use_gt_pose=True,
13 |     load_existing_occumap=True,
14 | )
15 | game.run()
16 | 


--------------------------------------------------------------------------------
/demos/play_interactive_gradio.py:
--------------------------------------------------------------------------------
 1 | from multiprocessing import Process, Queue
 2 | 
 3 | from orion.utils.gradio_interface import (
 4 |     GradioInterface,
 5 |     GradioDemoChatGPTControlORION,
 6 | )
 7 | from orion.config.chatgpt_config import AzureGPT4Config
 8 | 
 9 | 
10 | def run_gradio(image_queue, user_message_queue, bot_message_queue):
11 |     gradio_interface = GradioInterface(
12 |         image_queue=image_queue,
13 |         user_message_queue=user_message_queue,
14 |         bot_message_queue=bot_message_queue,
15 |     )
16 |     gradio_interface.run()
17 | 
18 | 
19 | def main():
20 |     user_message_queue = Queue()
21 |     bot_message_queue = Queue()
22 |     image_queue = Queue()
23 | 
24 |     p = Process(
25 |         target=run_gradio, args=(image_queue, user_message_queue, bot_message_queue)
26 |     )
27 |     p.start()
28 | 
29 |     game = GradioDemoChatGPTControlORION(
30 |         image_queue=image_queue,
31 |         user_message_queue=user_message_queue,
32 |         bot_message_queue=bot_message_queue,
33 |         chatgpt_config=AzureGPT4Config(),
34 |         dump_dir="dump_dir",
35 |         use_stream=True,
36 |         record_interaction=False,
37 |         use_memory=True,
38 |         use_vlmap=True,
39 |         fast_explore=True,
40 |         display_shortside=480,
41 |         save_dir_name="predict",
42 |         scene_ids=["4ok3usBNeis"],
43 |         floor_set=(-1, 1),
44 |         auto_record=False,
45 |         display_setting="rgb+topdownmap",
46 |         display_horizontally=False,
47 |         headless=True,
48 |         use_gt_pose=True,
49 |         load_existing_occumap=True,
50 |         save_new_occumap=False,
51 |     )
52 | 
53 |     game.run()
54 | 
55 |     p.join()
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     main()
60 | 


--------------------------------------------------------------------------------
/demos/play_interactive_terminal.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from orion.agent_env.chatgpt_control_orion import ChatGPTControlORION
  3 | from orion.agent_env.chatgpt_control_cow import ChatGPTControlCoW
  4 | from orion.agent_env.chatgpt_control_vlmap import ChatGPTControlVLMap
  5 | from orion.agent_env.hybrid_search import HybridSearchAgentEnv
  6 | from orion.config.chatgpt_config import (
  7 |     AzureGPT35Config,
  8 |     AzureGPT4Config,
  9 |     OpenAIGPT35Config,
 10 |     OpenAIGPT4Config,
 11 | )
 12 | 
 13 | 
 14 | if __name__ == "__main__":
 15 |     parser = argparse.ArgumentParser()
 16 |     parser.add_argument("--api-type", choices=["openai", "azure"], default="azure")
 17 |     parser.add_argument("--model-type", choices=["gpt35", "gpt4"], default="gpt4")
 18 |     parser.add_argument("--stream", action="store_true", default=False)
 19 |     parser.add_argument("--use_memory", type=bool, default=True)
 20 |     parser.add_argument("--use_vlmap", type=bool, default=True)
 21 |     parser.add_argument("--fast_explore", action="store_true", default=False)
 22 |     parser.add_argument("--scene_id", type=str, default="4ok3usBNeis")
 23 |     parser.add_argument("--floor_set", type=int, nargs=2, default=(-1, 1))
 24 |     parser.add_argument("--use_chatgpt", type=bool, default=True)
 25 |     parser.add_argument(
 26 |         "--method-type", type=str, default="orion", choices=["orion", "vlmap", "cow"]
 27 |     )
 28 |     parser.add_argument(
 29 |         "--vlmap_dir",
 30 |         type=str,
 31 |         default="lseg_vlmap",
 32 |         choices=["lseg_vlmap", "conceptfusion_vlmap"],
 33 |     )
 34 |     parser.add_argument("--dump_dir", type=str, default="dump")
 35 |     parser.add_argument("--record_interaction", type=bool, default=False)
 36 | 
 37 |     args = parser.parse_args()
 38 | 
 39 |     if args.api_type == "openai":
 40 |         if args.model_type == "gpt35":
 41 |             chatgpt_config = OpenAIGPT35Config()
 42 |         elif args.model_type == "gpt4":
 43 |             chatgpt_config = OpenAIGPT4Config()  # type: ignore
 44 |         else:
 45 |             raise ValueError("model_type can only be ['gpt35', 'gpt4']")
 46 |     elif args.api_type == "azure":
 47 |         if args.model_type == "gpt35":
 48 |             chatgpt_config = AzureGPT35Config()
 49 |         elif args.model_type == "gpt4":
 50 |             chatgpt_config = AzureGPT4Config()
 51 |         else:
 52 |             raise ValueError("model_type can only be ['gpt35', 'gpt4']")
 53 | 
 54 |     if args.method_type == "orion":
 55 |         is_vlmap_baseline = False
 56 |         is_cow_baseline = False
 57 |         ChatGPTControl = ChatGPTControlORION
 58 |         dump_dir = args.dump_dir + "/orion"
 59 |     elif args.method_type == "vlmap":
 60 |         is_vlmap_baseline = True
 61 |         is_cow_baseline = False
 62 |         ChatGPTControl = ChatGPTControlVLMap
 63 |         dump_dir = args.dump_dir + "/vlmap"
 64 |     elif args.method_type == "cow":
 65 |         is_vlmap_baseline = False
 66 |         is_cow_baseline = True
 67 |         ChatGPTControl = ChatGPTControlCoW
 68 |         dump_dir = args.dump_dir + "/cow"
 69 |     else:
 70 |         raise ValueError("method_type can only be ['orion', 'vlmap', 'cow']")
 71 | 
 72 |     if args.use_chatgpt:
 73 |         # talk in natural language in the cmd line, e.g. "go to the shelf"
 74 |         game = ChatGPTControl(
 75 |             use_stream=args.stream,
 76 |             use_memory=args.use_memory,
 77 |             use_vlmap=args.use_vlmap,
 78 |             fast_explore=args.fast_explore,
 79 |             display_shortside=256,
 80 |             save_dir_name="predict",
 81 |             auto_record=False,
 82 |             display_setting="rgb+occumap+topdownmap",
 83 |             headless=False,
 84 |             use_gt_pose=True,
 85 |             load_existing_occumap=True,
 86 |             save_new_occumap=False,
 87 |             scene_ids=[args.scene_id],
 88 |             floor_set=args.floor_set,
 89 |             chatgpt_config=chatgpt_config,
 90 |             vlmap_dir=args.vlmap_dir,
 91 |             is_vlmap_baseline=is_vlmap_baseline,
 92 |             is_cow_baseline=is_cow_baseline,
 93 |             dump_dir=dump_dir,
 94 |             record_interaction=args.record_interaction,
 95 |         )
 96 | 
 97 |     else:
 98 |         # talk with restricted inputs
 99 |         # the input string is (phrase|noun), e.g. "red apple|apple"
100 |         # or just input a noun, e.g. "apple" to the cmd line
101 |         game = HybridSearchAgentEnv(
102 |             use_memory=args.use_memory,
103 |             use_vlmap=args.use_vlmap,
104 |             fast_explore=args.fast_explore,
105 |             scene_ids=[args.scene_id],
106 |             floor_set=args.floor_set,
107 |             display_shortside=256,
108 |             save_dir_name="predict",
109 |             auto_record=False,
110 |             display_setting="rgb+occumap+topdownmap",
111 |             use_gt_pose=True,
112 |             load_existing_occumap=True,
113 |             save_new_occumap=False,
114 |             vlmap_dir=args.vlmap_dir,
115 |             is_vlmap_baseline=is_vlmap_baseline,
116 |             is_cow_baseline=is_cow_baseline,
117 |         )
118 | 
119 |     game.run()
120 | 


--------------------------------------------------------------------------------
/demos/play_lseg.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import cv2
 3 | 
 4 | from orion.utils.visulization import plot_pixel_feature_match
 5 | from orion.utils.file_load import load_image
 6 | from orion.config.my_config import LsegConfig
 7 | from orion.perception.extractor.clipbase import CLIPBase
 8 | from orion.abstract.interfaces import TextQueries
 9 | from orion.config.my_config import CLIPConfig_vitB32_openai, VLMAP_QUERY_LIST_BASE
10 | from orion.perception.extractor.lseg_extractor import LSegExtractor
11 | 
12 | # # Test LSegExtractor
13 | lseg_extractor = LSegExtractor(cfg=LsegConfig())
14 | 
15 | 
16 | input_image = load_image("demos/demo.jpg")
17 | # NB: has to change size to 480x640 !!!!
18 | # Otherwise, the Lseg will not work
19 | input_image = cv2.resize(input_image, (640, 480))
20 | 
21 | clip_extractor = CLIPBase(CLIPConfig_vitB32_openai(device="cpu"))
22 | 
23 | text_list = VLMAP_QUERY_LIST_BASE + ["dog", "cat", "grass", "tree"]
24 | text_feat = clip_extractor.encode_text(TextQueries(text_list))
25 | text_feat = text_feat.cpu().numpy()
26 | print("text feature size: ", text_feat.shape)
27 | 
28 | with torch.no_grad():
29 |     pixel_feat = lseg_extractor.predict(input_image).cpu().numpy()
30 |     print("img feature size:", pixel_feat.shape)
31 | 
32 | plot_pixel_feature_match(
33 |     pixel_feat, text_feat, text_list, save_path="demos/demo_lseg_output.jpg"
34 | )
35 | 


--------------------------------------------------------------------------------
/orion/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from datetime import datetime
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | logging.basicConfig(
 7 |     # filename=f"logs/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log",
 8 |     format="[%(asctime)s %(levelname)s] %(message)s",
 9 |     level=logging.INFO,
10 |     datefmt="%Y-%m-%d %H:%M:%S",
11 | )
12 | 


--------------------------------------------------------------------------------
/orion/abstract/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/abstract/__init__.py


--------------------------------------------------------------------------------
/orion/abstract/agent.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from attr import define
 4 | 
 5 | from .pose import Agent2DPose, Agent3DPose
 6 | 
 7 | 
 8 | @define
 9 | class AgentState:
10 |     pose_2d: Agent2DPose
11 |     pose_3d: Agent3DPose
12 |     camera_height: float = 0.88  # in meter
13 | 
14 | 
15 | class AgentEnv(ABC):
16 |     """
17 |     Base agent that can interact with the environment
18 |     """
19 | 
20 |     @abstractmethod
21 |     def _prepare_env(self, env_config, *args, **kwargs):
22 |         """
23 |         1. set env config.
24 |         2. initialize env that can interact with agent.
25 |         3. get ground truth semantics
26 |         """
27 | 
28 |     @abstractmethod
29 |     def _prepare_occupancy_map(self, *args, **kwargs):
30 |         """
31 |         Here we use frontier-based exploration policy.
32 |         """
33 | 
34 |     @abstractmethod
35 |     def _prepare_low_level_planner(self, *args, **kwargs):
36 |         """
37 |         low-level PointNav
38 |         """
39 | 
40 |     @abstractmethod
41 |     def _prepare_agent_state(self, *args, **kwargs):
42 |         """
43 |         pose, inventory, etc.
44 |         """
45 | 
46 |     @abstractmethod
47 |     def _observation_wrapper(self, *args, **kwargs):
48 |         pass
49 | 
50 |     @abstractmethod
51 |     def reset(self):
52 |         pass
53 | 
54 |     def _prepare_perception(self, *args, **kwargs):
55 |         """grounding SAM"""
56 | 
57 |     def _prepare_vlmap(self, *args, **kwargs):
58 |         """vlmap is running background"""
59 | 
60 |     def _prepare_memory(self, *args, **kwargs):
61 |         """
62 |         Memory that agent can store experiences
63 |         """
64 | 


--------------------------------------------------------------------------------
/orion/abstract/interaction_history.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | from copy import deepcopy
  4 | from typing import List, Optional, Union
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | from attr import define
  9 | 
 10 | from orion import logger
 11 | from orion.abstract.agent import AgentState
 12 | from orion.abstract.interfaces import Observations
 13 | from orion.abstract.perception import MaskedBBOX
 14 | from orion.abstract.pose import Agent2DPose, Agent3DPose
 15 | 
 16 | 
 17 | @define
 18 | class UsrMsg:
 19 |     # usr utterance
 20 |     text: str
 21 | 
 22 | 
 23 | @define
 24 | class SucMsg:
 25 |     # success message
 26 |     reward: float
 27 | 
 28 | 
 29 | @define
 30 | class GPTMsg:
 31 |     # chatgpt response
 32 |     content: str
 33 | 
 34 | 
 35 | @define
 36 | class FuncMsg:
 37 |     # function return message
 38 |     content: str
 39 | 
 40 | 
 41 | @define
 42 | class BotMsg:
 43 |     # robot response
 44 |     text: str
 45 | 
 46 | 
 47 | @define
 48 | class PointAct:
 49 |     # waypoint
 50 |     pt2d: Agent2DPose
 51 |     pt3d: Agent3DPose  # pose by simulaotor [x, y, z]
 52 | 
 53 | 
 54 | @define
 55 | class StepAct:
 56 |     action: Optional[str]  # low-level action
 57 |     next_obs: Observations
 58 |     next_state: AgentState
 59 | 
 60 |     def compression(self):
 61 |         self.next_obs = None
 62 |         # # depth consumes too much memory
 63 |         # self.next_obs.semantic = None
 64 |         # self.next_obs.depth = (self.next_obs.depth * 1000).astype(np.uint16)
 65 |         # if self.next_obs.info is not None and "collisions" in self.next_obs.info and self.next_obs.info["collisions"]["is_collision"]:
 66 |         #     self.next_obs.info = {"collisions": self.next_obs.info["collisions"]}
 67 |         # else:
 68 |         #     self.next_obs.info = None
 69 | 
 70 | 
 71 | @define
 72 | class DetAct:
 73 |     mmbox: MaskedBBOX
 74 | 
 75 | 
 76 | JointType = Union[UsrMsg, GPTMsg, BotMsg, PointAct, StepAct, DetAct]
 77 | 
 78 | 
 79 | class InteractionHistory:
 80 |     def __init__(self, record=False):
 81 |         self.record = record
 82 |         self.interactions: List[JointType] = []
 83 | 
 84 |     def append(self, item: JointType):
 85 |         if not self.record:
 86 |             return
 87 |         _item = deepcopy(item)
 88 | 
 89 |         if isinstance(_item, StepAct):
 90 |             _item.compression()
 91 | 
 92 |         self.interactions.append(_item)
 93 | 
 94 |     def __len__(self):
 95 |         return len(self.interactions)
 96 | 
 97 |     def __getitem__(self, idx):
 98 |         return self.interactions[idx]
 99 | 
100 |     def __iter__(self):
101 |         return iter(self.interactions)
102 | 
103 |     def save(self, save_dir):
104 |         if not os.path.exists(save_dir):
105 |             os.makedirs(save_dir)
106 |         save_path = os.path.join(save_dir, "interaction_history.pkl")
107 |         logger.info(f"save interaction history to {save_path}")
108 |         with open(save_path, "wb") as f:
109 |             pickle.dump(self.interactions, f)
110 | 


--------------------------------------------------------------------------------
/orion/abstract/interfaces.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Optional
 2 | 
 3 | import attr
 4 | import numpy as np
 5 | 
 6 | 
 7 | @attr.s(auto_attribs=True)
 8 | class Observations:
 9 |     """Sensor observations."""
10 | 
11 |     # (camera_height, camera_width, 3) in [0, 255]
12 |     rgb: np.ndarray
13 |     # (camera_height, camera_width, 1) in meters, not normalized!
14 |     depth: Optional[np.ndarray] = None
15 |     # (camera_height, camera_width, 1) in [0, num_sem_categories - 1]
16 |     semantic: Optional[np.ndarray] = None
17 |     info: Optional[Dict[str, Any]] = None  # additional information
18 |     rel_cam_pose: Optional[np.ndarray] = None  # relative to the first camera frame
19 |     compass: Optional[np.ndarray] = None  # in radians
20 |     gps: Optional[np.ndarray] = None  # in meters
21 | 
22 | 
23 | @attr.s(auto_attribs=True)
24 | class TextQuery:
25 |     """A text prompt to query vlmap or perception model."""
26 | 
27 |     prompt: str  # discription of the taregt object,
28 |     # like "a yellow chair near the bed"
29 |     target: Optional[str] = None  # a noun word for the target object
30 |     # like 'chair', 'bedroom'
31 | 
32 |     def __attrs_post_init__(self):
33 |         if self.target is None:
34 |             self.target = self.prompt
35 | 
36 |     def __str__(self):
37 |         return f"(target: {self.target}, prompt: {self.prompt})"
38 | 
39 | 
40 | @attr.s(auto_attribs=True)
41 | class TextQueries:
42 |     """A list of text prompt to query vlmap or perception model."""
43 | 
44 |     prompts: List[str]
45 |     targets: Optional[List[str]] = None
46 | 
47 |     def __attrs_post_init__(self):
48 |         if self.targets is None:
49 |             self.targets = self.prompts
50 | 
51 |     def to_str(self, prompt=True) -> str:
52 |         """Return a string representation of prompts or targets."""
53 |         items = self.prompts if prompt else self.targets
54 |         return " . ".join(items) if items else ""
55 | 
56 |     def __getitem__(self, idx):
57 |         return self.prompts[idx], self.targets[idx]
58 | 
59 |     def __len__(self):
60 |         return len(self.prompts)
61 | 
62 |     def __iter__(self):
63 |         return iter(zip(self.prompts, self.targets))
64 | 


--------------------------------------------------------------------------------
/orion/abstract/memory.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any
 3 | 
 4 | from attr import define
 5 | 
 6 | 
 7 | @define
 8 | class EpisodicMemory(ABC):
 9 |     """
10 |     Base memory that can store experiences
11 |     """
12 | 
13 |     positive_memory: Any
14 |     negative_memory: Any
15 | 
16 |     @abstractmethod
17 |     def add(self, *args, **kwargs):
18 |         pass
19 | 
20 |     @abstractmethod
21 |     def delete(self, *args, **kwargs):
22 |         pass
23 | 
24 |     @abstractmethod
25 |     def update(self, *args, **kwargs):
26 |         pass
27 | 
28 |     @abstractmethod
29 |     def save(self, *args, **kwargs):
30 |         pass
31 | 
32 |     @abstractmethod
33 |     def load(self, *args, **kwargs):
34 |         pass
35 | 
36 |     @abstractmethod
37 |     def retrieve(self, *args, **kwargs):
38 |         pass
39 | 


--------------------------------------------------------------------------------
/orion/abstract/perception.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from dataclasses import dataclass
  3 | from typing import Any, List, Optional, Union
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | 
  8 | from orion import logger
  9 | from orion.abstract.interfaces import TextQuery
 10 | 
 11 | 
 12 | class PerceptionModule(ABC):
 13 |     @abstractmethod
 14 |     def predict(
 15 |         self, rgb: np.ndarray, txt: TextQuery
 16 |     ) -> Union["MaskedBBOX", float, np.ndarray, torch.Tensor]:
 17 |         """
 18 |         single image prediction
 19 |         """
 20 | 
 21 | 
 22 | class DetectionModule(PerceptionModule):
 23 |     """img -> bboxes"""
 24 | 
 25 |     @abstractmethod
 26 |     def predict(self, rgb: np.ndarray, txt: TextQuery) -> "MaskedBBOX":
 27 |         pass
 28 | 
 29 | 
 30 | class ExtractorModule(PerceptionModule):
 31 |     """img -> feature vector"""
 32 | 
 33 |     def __init__(self, *args, **kwargs):
 34 |         super().__init__(*args, **kwargs)
 35 |         self.feat_dim = None
 36 | 
 37 |     @abstractmethod
 38 |     def predict(
 39 |         self, rgb: np.ndarray, txt: Optional[TextQuery] = None
 40 |     ) -> Union[np.ndarray, torch.Tensor]:
 41 |         pass
 42 | 
 43 | 
 44 | @dataclass
 45 | class MaskedBBOX:
 46 |     flag: bool
 47 |     bboxes: List[Any]  # (x1, y1, x2, y2)
 48 |     texts: List[str]
 49 |     masks: List[np.ndarray]
 50 | 
 51 |     def __bool__(self):
 52 |         return self.flag
 53 | 
 54 |     def __len__(self):
 55 |         return len(self.bboxes)
 56 | 
 57 |     def __iter__(self):
 58 |         return iter(zip(self.bboxes, self.texts, self.masks))
 59 | 
 60 |     def __getitem__(self, idx):
 61 |         return self.bboxes[idx], self.texts[idx], self.masks[idx]
 62 | 
 63 |     @classmethod
 64 |     def from_tuple_list(cls, flag, tuple_list):
 65 |         if len(tuple_list) == 0:
 66 |             return cls(flag, [], [], [])
 67 |         tuple_list = sorted(
 68 |             tuple_list, key=lambda x: cls._bbox_area(x[0]), reverse=True
 69 |         )
 70 |         return cls(flag, *zip(*tuple_list))
 71 | 
 72 |     @staticmethod
 73 |     def _bbox_area(bbox):
 74 |         if bbox[2] < bbox[0] or bbox[3] < bbox[1]:
 75 |             return 0
 76 |         return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
 77 | 
 78 |     def de_duplication(self):
 79 |         if len(self) in [0, 1]:
 80 |             return self
 81 | 
 82 |         mask_idx = []
 83 |         for i in range(len(self)):
 84 |             for j in range(i + 1, len(self)):
 85 |                 bbox_i = self.bboxes[i]
 86 |                 bbox_j = self.bboxes[j]
 87 |                 bbox_j_area = self._bbox_area(bbox_j)
 88 |                 intersect_box = [
 89 |                     max(bbox_i[0], bbox_j[0]),
 90 |                     max(bbox_i[1], bbox_j[1]),
 91 |                     min(bbox_i[2], bbox_j[2]),
 92 |                     min(bbox_i[3], bbox_j[3]),
 93 |                 ]
 94 |                 intersect_area = self._bbox_area(intersect_box)
 95 | 
 96 |                 mask_i: np.ndarray = self.masks[i].squeeze()
 97 |                 mask_j: np.ndarray = self.masks[j].squeeze()
 98 | 
 99 |                 intersect_mask = np.logical_and(mask_i, mask_j)
100 | 
101 |                 if (
102 |                     intersect_area / bbox_j_area > 0.8
103 |                     or np.sum(intersect_mask) / np.sum(mask_j) > 0.9
104 |                 ):
105 |                     logger.info(
106 |                         f"[MBBOX] de_duplication: " "{self.bboxes[i]}, {self.bboxes[j]}"
107 |                     )
108 |                     mask_idx.append(j)
109 | 
110 |         tuple_list = []
111 |         logger.info(f"[MBBOX] before: {len(self)}")
112 |         for i in range(len(self)):
113 |             if i not in mask_idx:
114 |                 tuple_list.append((self.bboxes[i], self.texts[i], self.masks[i]))
115 |         logger.info(f"[MBBOX] after: {len(tuple_list)}")
116 |         return MaskedBBOX.from_tuple_list(self.flag, tuple_list)
117 | 


--------------------------------------------------------------------------------
/orion/abstract/usersim.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | 
 4 | class UserSimulator(ABC):
 5 |     @abstractmethod
 6 |     def reset(self, *args, **kwargs):
 7 |         pass
 8 | 
 9 |     @abstractmethod
10 |     def step(self, *args, **kwargs):
11 |         pass
12 | 
13 |     @abstractmethod
14 |     def evaluate(self, *args, **kwargs):
15 |         pass
16 | 


--------------------------------------------------------------------------------
/orion/agent_env/fbe.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Frontier Based Exploration
  3 | This is used to explore the environment, collecting rgbd data and build vlmap.
  4 | """
  5 | 
  6 | import numpy as np
  7 | 
  8 | from orion import logger
  9 | from orion.abstract.pose import Agent2DPose, FrontierWayPoint
 10 | from orion.agent_env.habitat.base import HabitatAgentEnv
 11 | from orion.agent_env.habitat.utils import try_action_import_v2
 12 | from orion.utils import visulization as vis
 13 | 
 14 | MOVE_FORWARD, _, TURN_LEFT, TURN_RIGHT, STOP = try_action_import_v2()
 15 | 
 16 | 
 17 | class FBEAgentEnv(HabitatAgentEnv):
 18 |     def __init__(self, fast_explore: bool, *args, **kwargs):
 19 |         super().__init__(*args, **kwargs)
 20 |         self.fast_explore = fast_explore
 21 | 
 22 |     def move_and_spin(self, spin_angle=360, move_ahead=0):
 23 |         for _ in range(move_ahead):
 24 |             self.step(MOVE_FORWARD)
 25 |         if spin_angle > 0:
 26 |             for _ in range(spin_angle // self.config.SIMULATOR.TURN_ANGLE):
 27 |                 self.step(TURN_RIGHT)
 28 |         else:
 29 |             for _ in range(-spin_angle // self.config.SIMULATOR.TURN_ANGLE):
 30 |                 self.step(TURN_LEFT)
 31 | 
 32 |     def loop(self):
 33 |         self.fbe.reset(reset_floor=True, init_spin=True)
 34 |         while self.step_count < 500:
 35 |             if self.fbe.mode == self.fbe.InitSpinMode:
 36 |                 self.move_and_spin(360)
 37 |                 self.fbe._init_check_large_room()
 38 |                 self.fbe.mode = self.fbe.ExploreMode
 39 |                 self.fbe.set_explore_strategy(self.fast_explore)
 40 | 
 41 |             elif self.fbe.mode == self.fbe.ExploreMode:
 42 |                 self.follower.set_traversible_map(self.fbe.traversable_map)
 43 |                 navigable_mask = self.follower.get_navigable_mask()
 44 |                 if not self.fbe.fast_explore:
 45 |                     # go to viewpt, look around, then go to goalpt, look around
 46 |                     next_plan: FrontierWayPoint = self.fbe.plan(
 47 |                         navigable_mask, with_viewpoint=True
 48 |                     )
 49 |                 else:
 50 |                     # go to goalpt directly, update at every serveral steps
 51 |                     next_plan: FrontierWayPoint = self.fbe.plan(
 52 |                         navigable_mask, with_viewpoint=False
 53 |                     )
 54 |                 if next_plan is None:
 55 |                     logger.info("all goal finished===")
 56 |                     self.fbe.reset(reset_floor=True, init_spin=True)
 57 |                     break
 58 |                 else:
 59 |                     if not self.fbe.fast_explore:
 60 |                         viewpt: Agent2DPose = next_plan.viewpt
 61 |                         if viewpt is not None:
 62 |                             logger.info(f"=== move to view point first {viewpt}===")
 63 | 
 64 |                             best_action = self._get_next_action(viewpt)
 65 |                             if best_action == 0:
 66 |                                 logger.info(
 67 |                                     "cannot change position with the follower==="
 68 |                                 )
 69 |                             while best_action != 0:
 70 |                                 self.step(best_action)
 71 |                                 best_action = self._get_next_action(viewpt)
 72 |                             self.move_and_spin(-90)
 73 |                             self.move_and_spin(180)
 74 | 
 75 |                         goalpt: Agent2DPose = next_plan.goalpt
 76 |                         if goalpt is not None:
 77 |                             logger.info(f"=== move to goal point {goalpt}===")
 78 |                             best_action = self._get_next_action(
 79 |                                 goalpt, new_goal_dist=10
 80 |                             )
 81 |                             if best_action == 0:
 82 |                                 logger.info(
 83 |                                     "cannot change position with the follower==="
 84 |                                 )
 85 |                             while best_action != 0:
 86 |                                 self.step(best_action)
 87 |                                 best_action = self._get_next_action(
 88 |                                     goalpt, new_goal_dist=10
 89 |                                 )
 90 |                             self.move_and_spin(-90)
 91 |                             self.move_and_spin(180)
 92 | 
 93 |                     else:
 94 |                         goalpt: Agent2DPose = next_plan.goalpt
 95 |                         if goalpt is None:
 96 |                             continue
 97 |                         logger.info(f"=== move to goal point {goalpt}===")
 98 |                         best_action = self._get_next_action(goalpt, new_goal_dist=10)
 99 |                         count = self.fbe.fbecfg.fast_explore_forwardcount
100 |                         if best_action == 0:
101 |                             logger.info("cannot change position with the follower===")
102 |                         while best_action != 0 and count > 0:
103 |                             self.step(best_action)
104 |                             if best_action == MOVE_FORWARD:
105 |                                 count -= 1
106 |                             best_action = self._get_next_action(
107 |                                 goalpt, new_goal_dist=10
108 |                             )
109 | 
110 |                         if (
111 |                             self.fbe.l2(goalpt, self._grdpose)
112 |                             < self.fbe.fbecfg.dist_large_thres
113 |                         ):
114 |                             self.move_and_spin(-90)
115 |                             self.move_and_spin(180)
116 | 
117 | 
118 | if __name__ == "__main__":
119 |     from orion.config.my_config import SCENE_ID_FLOOR_SET
120 | 
121 |     game = FBEAgentEnv(
122 |         total_round=1,
123 |         scene_ids=["4ok3usBNeis"],
124 |         floor_set=(-1, 1),
125 |         fast_explore=True,
126 |         display_shortside=256,
127 |         save_dir_name="predict",
128 |         auto_record=True,
129 |         record_dir="recordings_prelim_fbe",
130 |         display_setting="rgb+occumap+topdownmap",
131 |         use_gt_pose=False,
132 |         load_existing_occumap=True,
133 |         save_new_occumap=False,
134 |     )
135 |     game.run()
136 | 


--------------------------------------------------------------------------------
/orion/agent_env/habitat/holonomic_actions.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import habitat
 3 | import habitat_sim
 4 | from habitat.core.registry import registry
 5 | from habitat.core.simulator import ActionSpaceConfiguration
 6 | from habitat.sims.habitat_simulator.actions import (
 7 |     HabitatSimActions,
 8 |     HabitatSimV1ActionSpaceConfiguration,
 9 | )
10 | from habitat.tasks.nav.nav import SimulatorTaskAction
11 | 
12 | 
13 | @habitat.registry.register_action_space_configuration(name="Holonomic")
14 | class HolonomicMovement(HabitatSimV1ActionSpaceConfiguration):
15 |     def get(self):
16 |         config = super().get()
17 |         config[HabitatSimActions.MOVE_BACKWARD] = habitat_sim.ActionSpec(
18 |             "move_backward",
19 |             habitat_sim.ActuationSpec(amount=self.config.FORWARD_STEP_SIZE),
20 |         )
21 |         return config
22 | 
23 | 
24 | @habitat.registry.register_task_action
25 | class MoveBackwardAction(SimulatorTaskAction):
26 |     name = "MOVE_BACKWARD"
27 | 
28 |     def _get_uuid(self, *args, **kwargs) -> str:
29 |         return "move_backward"
30 | 
31 |     def step(self, *args, **kwargs):
32 |         r"""Update ``_metric``, this method is called from ``Env`` on each
33 |         ``step``.
34 |         """
35 |         return self._sim.step(HabitatSimActions.MOVE_BACKWARD)
36 | 
37 | 
38 | HabitatSimActions.extend_action_space("MOVE_BACKWARD")
39 | 


--------------------------------------------------------------------------------
/orion/agent_env/habitat/utils.py:
--------------------------------------------------------------------------------
 1 | import habitat
 2 | from habitat.core.utils import try_cv2_import
 3 | from habitat.utils.visualizations import maps
 4 | from habitat.utils.visualizations.utils import append_text_to_image, images_to_video
 5 | 
 6 | cv2 = try_cv2_import()
 7 | 
 8 | 
 9 | def transform_rgb_bgr(image):
10 |     return image[:, :, [2, 1, 0]]
11 | 
12 | 
13 | def quiet():
14 |     import os
15 | 
16 |     os.environ["MAGNUM_LOG"] = "quiet"
17 |     os.environ["HABITAT_SIM_LOG"] = "quiet"
18 | 
19 | 
20 | def update_fov(config, fov=90):
21 |     config.defrost()
22 |     config.SIMULATOR.DEPTH_SENSOR.HFOV = fov
23 |     config.SIMULATOR.RGB_SENSOR.HFOV = fov
24 |     config.SIMULATOR.SEMANTIC_SENSOR.HFOV = fov
25 |     config.freeze()
26 | 
27 | 
28 | def update_scene(config, split="val", scene_ids=["4ok3usBNeis"]):
29 |     config.defrost()
30 |     if split is not None:
31 |         config.DATASET.SPLIT = split
32 |     if scene_ids is not None:
33 |         config.DATASET.CONTENT_SCENES = scene_ids
34 |     config.freeze()
35 | 
36 | 
37 | def update_holonomic_action(config):
38 |     config.defrost()
39 |     config.TASK.ACTIONS.MOVE_BACKWARD = habitat.config.Config()
40 |     config.TASK.ACTIONS.MOVE_BACKWARD.TYPE = "MoveBackwardAction"
41 |     config.SIMULATOR.ACTION_SPACE_CONFIG = "Holonomic"
42 |     config.freeze()
43 | 
44 | 
45 | def add_top_down_map_and_collision(config):
46 |     config.defrost()
47 |     config.TASK.MEASUREMENTS.append("TOP_DOWN_MAP")
48 |     config.TASK.MEASUREMENTS.append("COLLISIONS")
49 |     config.freeze()
50 | 
51 | 
52 | def try_action_import():
53 |     try:
54 |         from habitat.sims.habitat_simulator.actions import HabitatSimActions
55 | 
56 |         MOVE_FORWARD = HabitatSimActions.MOVE_FORWARD
57 |         TURN_LEFT = HabitatSimActions.TURN_LEFT
58 |         TURN_RIGHT = HabitatSimActions.TURN_RIGHT
59 |         STOP = HabitatSimActions.STOP
60 |     except:
61 |         MOVE_FORWARD = 0
62 |         TURN_LEFT = 1
63 |         TURN_RIGHT = 2
64 |         STOP = 3
65 |     return MOVE_FORWARD, TURN_LEFT, TURN_RIGHT, STOP
66 | 
67 | 
68 | def try_action_import_v2():
69 |     try:
70 |         from habitat.sims.habitat_simulator.actions import HabitatSimActions
71 | 
72 |         MOVE_FORWARD = HabitatSimActions.MOVE_FORWARD
73 |         MOVE_BACKWARD = "MOVE_BACKWARD"
74 |         TURN_LEFT = HabitatSimActions.TURN_LEFT
75 |         TURN_RIGHT = HabitatSimActions.TURN_RIGHT
76 |         # LOOK_UP = HabitatSimActions.LOOK_UP
77 |         # LOOK_DOWN = HabitatSimActions.LOOK_DOWN
78 |         STOP = HabitatSimActions.STOP
79 |         return MOVE_FORWARD, MOVE_BACKWARD, TURN_LEFT, TURN_RIGHT, STOP
80 |     except:
81 |         raise NotImplementedError
82 | 


--------------------------------------------------------------------------------
/orion/agent_env/teleop.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | 
 3 | from orion.agent_env.habitat.base import HabitatAgentEnv
 4 | from orion.agent_env.habitat.utils import try_action_import_v2
 5 | 
 6 | FORWARD_KEY = "w"
 7 | BACKWARD_KEY = "s"
 8 | LEFT_KEY = "a"
 9 | RIGHT_KEY = "d"
10 | FINISH = "p"
11 | 
12 | MOVE_FORWARD, MOVE_BACKWARD, TURN_LEFT, TURN_RIGHT, STOP = try_action_import_v2()
13 | 
14 | 
15 | class TeleOpAgentEnv(HabitatAgentEnv):
16 |     def loop(self):
17 |         while True:
18 |             keystroke = cv2.waitKey(0)
19 |             if keystroke == ord(FORWARD_KEY):
20 |                 action = MOVE_FORWARD
21 |             elif keystroke == ord(BACKWARD_KEY):
22 |                 action = MOVE_BACKWARD
23 |             elif keystroke == ord(LEFT_KEY):
24 |                 action = TURN_LEFT
25 |             elif keystroke == ord(RIGHT_KEY):
26 |                 action = TURN_RIGHT
27 |             elif keystroke == ord(FINISH):
28 |                 action = STOP
29 |             else:
30 |                 print("INVALID KEY")
31 |                 action = None
32 |             if action is not None:
33 |                 self.step(action)
34 | 


--------------------------------------------------------------------------------
/orion/chatgpt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/chatgpt/__init__.py


--------------------------------------------------------------------------------
/orion/chatgpt/api.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from typing import Dict, List, Union
  3 | 
  4 | from openai import AzureOpenAI, OpenAI
  5 | 
  6 | from orion import logger
  7 | from orion.config.chatgpt_config import AzureConfig, OpenAIConfig
  8 | 
  9 | 
 10 | class ChatAPI:
 11 |     def __init__(
 12 |         self,
 13 |         config: Union[OpenAIConfig, AzureConfig],
 14 |     ):
 15 |         self.messages: List[Dict[str, str]] = []
 16 |         self.history: List[Dict[str, str]] = []
 17 | 
 18 |         if isinstance(config, AzureConfig):
 19 |             self.client = AzureOpenAI(
 20 |                 api_key=config.api_key,
 21 |                 api_version=config.api_version,
 22 |                 azure_endpoint=config.azure_endpoint,
 23 |             )
 24 |         elif isinstance(config, OpenAIConfig):
 25 |             self.client = OpenAI(api_key=config.api_key)
 26 |         else:
 27 |             print(type(config))
 28 |             raise ValueError("api_type can only be ['azure', 'openai']")
 29 | 
 30 |         self.model = config.model
 31 |         self.max_limit = min(config.limit - 2000, 32000)
 32 |         self.price = config.price
 33 |         self.usage_tokens = 0
 34 |         self.cost = 0  # USD money cost
 35 | 
 36 |     def add_function_message(self, content: str):
 37 |         if not re.search(r"^Function Return:", content):
 38 |             content = "Function Return:\n" + content
 39 |         self.messages.append({"role": "user", "content": content})
 40 |         self.history.append(self.messages[-1])
 41 | 
 42 |     def add_user_message(self, content: str):
 43 |         if not re.search(r"^User Utterance:", content):
 44 |             content = "User Utterance: " + content
 45 |         self.messages.append({"role": "user", "content": content})
 46 |         self.history.append(self.messages[-1])
 47 | 
 48 |     def add_assistant_message(self, content: str):
 49 |         self.messages.append({"role": "assistant", "content": content})
 50 |         self.history.append(self.messages[-1])
 51 | 
 52 |     def get_system_response(self) -> str:
 53 |         try:
 54 |             self.do_truncation = False
 55 | 
 56 |             response = self.client.chat.completions.create(
 57 |                 model=self.model, messages=self.messages
 58 |             )
 59 |             response_message = response.choices[0].message
 60 | 
 61 |             usage_tokens = response.usage.total_tokens
 62 |             self.cost += usage_tokens * self.price / 1000
 63 |             logger.info(
 64 |                 f"[ChatGPT] current model {self.model}, usage_tokens: {usage_tokens}, "
 65 |                 f"cost: ${self.cost:.5f}, price: ${self.price:.5f}"
 66 |             )
 67 |             if usage_tokens > self.max_limit:
 68 |                 logger.info(
 69 |                     f"[ChatGPT] truncate the conversation to avoid token usage limit, save money"
 70 |                 )
 71 |                 self.truncate()
 72 | 
 73 |             return response_message.content
 74 |         except Exception as e:
 75 |             logger.warning(f"[ChatGPT] Error: {e}")
 76 |             return "Sorry, I am not able to respond to that."
 77 | 
 78 |     def get_system_response_stream(self):
 79 |         response = self.client.chat.completions.create(
 80 |             model=self.model, messages=self.messages, stream=True
 81 |         )
 82 |         for chuck in response:
 83 |             if len(chuck.choices) > 0 and chuck.choices[0].finish_reason != "stop":
 84 |                 if chuck.choices[0].delta.content is None:
 85 |                     continue
 86 |                 yield chuck.choices[0].delta.content
 87 | 
 88 |         # stream mode does not support token usage check, give a rough estimation
 89 |         usage_tokens = int(sum([len(item["content"]) for item in self.message]) / 3.5)
 90 |         self.usage_tokens = usage_tokens
 91 |         self.cost += usage_tokens * self.price / 1000
 92 |         logger.info(
 93 |             f"[ChatGPT] current model {self.model}, usage_tokens approximation: {usage_tokens},"
 94 |             f" cost: ${self.cost:.2f}, price: ${self.price:.2f}"
 95 |         )
 96 | 
 97 |         if usage_tokens > self.max_limit:
 98 |             logger.info(
 99 |                 f"[ChatGPT] truncate the conversation to avoid token usage limit"
100 |             )
101 |             self.truncate()
102 | 
103 |     @property
104 |     def message(self):
105 |         return self.messages
106 | 
107 |     @message.setter
108 |     def message(self, message):
109 |         """
110 |         Usually at the dialog begining
111 |         {"role": "system", "content": system_prompt},
112 |         {"role": "user", "content": user_message_first_turn},
113 |         {"role": "assistant", "content": assistant_message_first_turn},
114 |         """
115 |         self.init_length = len(message)
116 |         self.messages = message
117 |         self.history.extend(self.messages)
118 | 
119 |     def truncate(self, percentage: int = 3):
120 |         self.do_truncation = True
121 |         usr_idx = [
122 |             idx
123 |             for idx in range(len(self.messages))
124 |             if self.messages[idx]["role"] == "user"
125 |         ]
126 |         middle_idx = usr_idx[len(usr_idx) // percentage]
127 |         logger.info(
128 |             f"\033[33m [ChatGPT] truncate the conversation at index: {middle_idx} from {usr_idx} \033[m"
129 |         )
130 |         self.messages = self.messages[: self.init_length] + self.messages[middle_idx:]
131 | 
132 |     def clear(self):
133 |         """end the conversation"""
134 |         self.messages = []
135 |         self.history = []
136 | 
137 |     def clear_ctx(self):
138 |         """clear the context"""
139 |         usr_idx = [
140 |             idx
141 |             for idx in range(len(self.messages))
142 |             if self.messages[idx]["role"] == "user"
143 |         ]
144 |         if len(usr_idx) == 0:
145 |             return
146 |         elif len(usr_idx) <= 3:
147 |             last_idx = usr_idx[0]
148 |         else:
149 |             last_idx = usr_idx[-3]  # keep the last 4 user messages
150 |         logger.info(
151 |             f"\033[33m [ChatGPT] clear context, user message remove at index: {last_idx} from {usr_idx} \033[m"
152 |         )
153 |         self.messages = self.messages[: self.init_length] + self.messages[last_idx:]
154 | 


--------------------------------------------------------------------------------
/orion/chatgpt/prompts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/chatgpt/prompts/__init__.py


--------------------------------------------------------------------------------
/orion/chatgpt/prompts/baseline_cow_prompt.py:
--------------------------------------------------------------------------------
  1 | SYSTEM_PROMPT = """You are controling a robot to navigate to target objects according to the user's instructions.
  2 | 
  3 | Your goals are:
  4 | 1. Try to understand the user utterance, decide which API to call.
  5 | 2. Using the return messsges of the API to infer what to do next. you can continue call APIs to control robot or talk with the user.
  6 | 
  7 | All commands you can use are below:
  8 | 
  9 | There are 4 APIs you can call:
 10 | [
 11 |     {
 12 |         "name": "dialog",
 13 |         "description": "talk to the user, usually the last function called for one turn",
 14 |         "parameters": {
 15 |             "type": "object",
 16 |             "properties": {
 17 |                 "content": {
 18 |                     "type": "string",
 19 |                     "description": "dialog content",
 20 |                 }
 21 |             },
 22 |         },
 23 |         "required": ["content"],
 24 |     },
 25 |     {
 26 |         "name": "search_object",
 27 |         "description": "use the frontier-based exploration to search the object, return possible detected results. issue this command again can continue the searching",
 28 |         "parameters": {
 29 |             "type": "object",
 30 |             "properties": {
 31 |                 "target": {
 32 |                     "type": "string",
 33 |                     "description": "the target object you want to detect",
 34 |                 }
 35 |             },
 36 |     },
 37 |     "required": ["target"],
 38 |     },
 39 |     {
 40 |         "name": "rotate",
 41 |         "description": "rotate the robot left or right",
 42 |         "parameters": {
 43 |             "type": "object",
 44 |             "properties": {
 45 |                 "angle": {
 46 |                     "type": "number",
 47 |                     "description": "the angle degree to rotate the robot, > 0 for right, < 0 for left, should be in [-180, 180]",
 48 |                 }
 49 |             },
 50 |         },
 51 |         "required": ["angle"],
 52 |     },
 53 |     {
 54 |         "name": "move",
 55 |         "description": "issue the command to move the robot in the environment forward or backward",
 56 |         "parameters": {
 57 |             "type": "object",
 58 |             "properties": {
 59 |                 "distance": {
 60 |                     "type": "number",
 61 |                     "description": "the total units to move the robot, > 0 for forward, < 0 for backward",
 62 |                 }
 63 |             },
 64 |         },
 65 |         "required": ["distance"],
 66 |     },
 67 |     {
 68 |         "name": "goto_points",
 69 |         "description": "move the robot to specific points",
 70 |         "parameters": {
 71 |             "type": "object",
 72 |             "properties": {
 73 |                 "points": {
 74 |                     "type": "array",
 75 |                     "description": "The list of points to go. Each point is a polar coordinate (distance, angle) tuple with respect to current robot position.",
 76 |                 }
 77 |             },
 78 |         },
 79 |         "required": ["points"],
 80 |     },
 81 | ]
 82 | 
 83 | -------------
 84 | 
 85 | Important Notes:
 86 | 1. When user gives a new goal, you should use `search_object` to find it, if failed, continue to use `search_object` to find the next possible object.
 87 | 2. If the user correct your detection, you should remember what the object_id acutally is, then next time will not make the same mistake.
 88 | 
 89 | Examples:
 90 | 
 91 | User Utterance: You need to find the laundry machine in room, we bought it from Walmart in 2020. It is on your left side, approximately 100 units away. Can you move towards it?
 92 | 
 93 | {
 94 |     "Thought": "Now I should search the laundry machine",
 95 |     "Command": {"name": "search_object", "args": {"target": "laundry machine"}}
 96 | }
 97 | 
 98 | .
 99 | Function Return:
100 | Found possitble object.
101 | 
102 | {
103 |     "Thought": "I found the laundry machine, I should confirm with user",
104 |     "Command": {"name": "dialog", "args": {"content": "I found the laundry machine, is it correct?"}}
105 | }
106 | 
107 | User Utterance: No, it's a freezer.
108 | 
109 | {
110 |     "Thought": "I should search again",
111 |     "Command": {"name": "search_object", "args": {"target": "laundry machine"}}
112 | }
113 | 
114 | Function Return: Already reached laundry_machine_2.
115 | 
116 | {
117 |     "Thought": "I can just confirm with user, then go to the next possible laundry machine",
118 |     "Command": {"name": "dialog", "args": {"content": "I found the laundry machine, is it correct?"}}
119 | }
120 | 
121 | User Utterance: No, the laundry machine is on your right side, approximately 100 units away. Can you move towards it?
122 | 
123 | {
124 |     "Thought": "I should go to the next possible laundry machine, right can be 0-180, I'll set it to 90",
125 |     "Command": {"name": "goto_points", "args": {"points": [[100, 90]]}}
126 | }
127 | 
128 | Function Return: Already reached sepecified points.
129 | 
130 | {
131 |     "Thought": "I should confirm with user",
132 |     "Command": {"name": "dialog", "args": {"content": "I found the laundry machine, is it correct?"}}
133 | }
134 | 
135 | User Utterance: Yes, it's correct.
136 | 
137 | """
138 | 
139 | SYSTEM_PROMPT_FUNCALL = SYSTEM_PROMPT
140 | 


--------------------------------------------------------------------------------
/orion/chatgpt/prompts/usersim_prompts_description.py:
--------------------------------------------------------------------------------
  1 | SYSTEM_PROMPT = """Given multiple selected object goals, you are supposed to talk to a robot to reach them one by one by providing different types of userfeedback to guide the navigation.
  2 | 
  3 | Each turn, you will be given below two messages sources to generate natural language instructions.
  4 | 
  5 | 1. The utterance from the robot.
  6 | 2. Function messages from the system. return as a dictionary, including:
  7 |     {
  8 |         "is_current_goal_reached": bool,    # whether the robot reached the current goal. if reached, the next goal will be given.
  9 |         "is_max_trial_reached": bool,       # whether the robot reached the maximum trial number for the current goal. if reached maximum, the next goal will be given.
 10 |         "current_goal/next_goal": {         # the current goal or the next goal. if the current goal or maximum trials reached, current goal is empty, next goal will be given.
 11 |             "object_id": str,               # unique id of the object
 12 |             "object_name": str,
 13 |             "room_name": str,               # which room name the goal is located. This is used for Landmark User Feedback or general feedback of the object goal.
 14 |             "description": str,             # descriptive visual information of the current object goal,  split by '|'. This is used for Description User Feedback.
 15 |             "explaination": str,            # the explaination from dictionary. This is used for Description User Feedback, to help robot understand the object goal better.
 16 |             "num_trial": int,               # total number of trials for the current goal by the robot. Maximum number is 5.
 17 |             "num_round": int                # total number of rounds for all objects. You will ask the robot for find each object one by one for several rounds.
 18 |         }
 19 |     }
 20 | 
 21 | Note:
 22 | 1. You can only convey the object name to the robot, not the object id.
 23 | 2. Do not give all description and explaination at once! you can give one by one for each turn during the robot trials. Also, please add more language variation, make it varied for differnt turns.
 24 | 3. Be sure to be adhere to the function messages provided by the system, but add more language variation. Do not simply copy from the information.
 25 | 4. If is_current_goal_reached=true, this means the robot already reaches the goal, You can tell robot "you already reached the goal xxx. Let's look for the next goal..."
 26 | 5. Do not add too much information in one turn, the description and explaination should be given one by one for each turn.
 27 | 
 28 | You should only respond in a JSON format dict as described below:
 29 | {
 30 |     "Thought": "think about the current goal, the mistakes the robot may make, the possible feedback you can provide, how to decribe the goal more variant, etc.",
 31 |     "Response": "Your response to the robot."
 32 | }
 33 | Make sure the generated string can be parsed by `json.loads`.
 34 | 
 35 | Example:
 36 | 
 37 | Robot Utterance: Hello, what should I do?
 38 | Function Message:
 39 | {
 40 |   "current_goal": {},
 41 |   "is_current_goal_reached": false,
 42 |   "is_max_trial_reached": false,
 43 |   "next_goal": {
 44 |     "object_id": "recliner_0",
 45 |     "object_name": "recliner",
 46 |     "room_name": "living room",
 47 |     "description": "massage and heat for elderly",
 48 |     "explaination": "a lying chair that can be adjusted to a reclining position",
 49 |     "num_trial": 0,
 50 |     "num_round": 1
 51 |   }
 52 | }
 53 | 
 54 | 
 55 | {
 56 |     "Thought": "Current goal is a recliner, I can tell the robot the description of the object.",
 57 |     "Response": "Can you find a recliner for me? It's a lying chair that can be adjusted to a reclining position."
 58 | }
 59 | 
 60 | 
 61 | Robot Utterance: is it correct?
 62 | Function Message:
 63 | {
 64 |   "current_goal": {
 65 |     "object_id": "recliner_0",
 66 |     "object_name": "recliner",
 67 |     "room_name": "living room",
 68 |     "description": "massage and heat for elderly",
 69 |     "explaination": "a lying chair that can be adjusted to a reclining position",
 70 |     "num_trial": 0,
 71 |     "num_round": 1
 72 |   },
 73 |   "is_current_goal_reached": false,
 74 |   "is_max_trial_reached": false,
 75 |   "next_goal": {}
 76 | }
 77 | 
 78 | {
 79 |     "Thought": "is_current_goal_reached is false, so the robot not reach the goal, I can tell the robot the spatial information of the goal object.",
 80 |     "Response": "No. it's not the recliner I'm looking for. The recliner I'm looking for is looks like a lying chair, it's for massage and heat for elderly"
 81 | }
 82 | 
 83 | Robot Utterance: I found 2 possible couches, one is 12 units away at -33 degrees, and the other is 30 units away at 24 degrees. Is either of them the couch you're looking for?
 84 | Function Message:
 85 | {
 86 |   "current_goal": {},
 87 |   "is_current_goal_reached": true,
 88 |   "is_max_trial_reached": false,
 89 |   "next_goal": {
 90 |     "object_id": "tv_0",
 91 |     "object_name": "tv",
 92 |     "room_name": "bedroom",
 93 |     "description": "LED TV",
 94 |     "explaination": "a television for living show"
 95 |     "num_trial": 0,
 96 |     "num_round": 1
 97 |   }
 98 | }
 99 | 
100 | {
101 |     "Thought": "The robot found two possible couches, the goal is reached, I can tell the robot the next goal.",
102 |     "Response": "Yes. You found the couch I'm looking for. Now, find the TV in the bedroom, it's a LED TV for living show."
103 | }
104 | 
105 | 
106 | Robot Utterance: I'm sorry but I cannot verify the origin of the wardrobe. Is the wardrobe I detected the one you're looking for?
107 | Function Message:
108 | {
109 |   "current_goal": {
110 |     "object_id": "wardrobe_3",
111 |     "object_name": "wardrobe",
112 |     "room_name": "bedroom",
113 |     "description": "a shelf near the bed",
114 |     "explaination": "a large, tall cupboard for storing clothes",
115 |     "num_trial": 2,
116 |     "num_round": 1
117 |   },
118 |   "is_current_goal_reached": false,
119 |   "is_max_trial_reached": false,
120 |   "next_goal": {}
121 | }
122 | 
123 | {
124 |     "Thought": "The robot detect is wrong since current goal is not reached, I can tell the robot more information about the goal object.",
125 |     "Response": "No, you are wrong. The wardrobe I'm looking for is a large, tall cupboard for storing clothes, and it's near the bed."
126 | }
127 | 
128 | Robot Utterance: I found the microwave, is it correct?
129 | Function Message:
130 | {
131 |   "current_goal": {},
132 |   "is_current_goal_reached": false,
133 |   "is_max_trial_reached": true,
134 |   "next_goal": {
135 |     "object_id": "kitchen counter_0",
136 |     "object_name": "kitchen counter",
137 |     "room_name": "kitchen",
138 |     "description": "",
139 |     "explaination": "a flat surface in a kitchen for preparing food",
140 |     "num_trial": 0,
141 |     "num_round": 1
142 |   }
143 | }
144 | 
145 | {
146 |     "Thought": "The goal is not reached, but the maximum trial is reached, I can tell the robot the next goal.",
147 |     "Response": "Your detection is wrong. Now, find the kitchen counter in the kitchen, it's a flat surface for preparing food."
148 | }
149 | 
150 | ----------
151 | Let's start
152 | 
153 | """
154 | 


--------------------------------------------------------------------------------
/orion/chatgpt/prompts/usersim_prompts_none.py:
--------------------------------------------------------------------------------
  1 | SYSTEM_PROMPT = """Given multiple selected object goals, you are supposed to talk to a robot to reach them one by one.
  2 | 
  3 | Each turn, you will be given below two messages sources to generate natural language instructions.
  4 | 
  5 | 1. The utterance from the robot.
  6 | 2. Function messages from the system. return as a dictionary, including:
  7 |     {
  8 |         "is_current_goal_reached": bool,    # whether the robot reached the current goal. if reached, the next goal will be given.
  9 |         "is_max_trial_reached": bool,       # whether the robot reached the maximum trial number for the current goal. if reached maximum, the next goal will be given.
 10 |         "current_goal/next_goal": {         # the current goal or the next goal. if the current goal or maximum trials reached, current goal is empty, next goal will be given.
 11 |             "object_id": str,               # unique id of the object
 12 |             "object_name": str,
 13 |             "room_name": str,               # which room name the goal is located. This is used for Landmark User Feedback or general feedback of the object goal.
 14 |             "num_trial": int,               # total number of trials for the current goal by the robot. Maximum number is 5.
 15 |             "num_round": int                # total number of rounds for all objects. You will ask the robot for find each object one by one for several rounds.
 16 |         }
 17 |     }
 18 | 
 19 | 
 20 | You should only respond in a JSON format dict as described below:
 21 | {
 22 |     "Thought": "think about the current goal, whether the robot reach it etc.",
 23 |     "Response": "Your response to the robot."
 24 | }
 25 | Make sure the generated string can be parsed by `json.loads`.
 26 | 
 27 | Example:
 28 | 
 29 | Robot Utterance: Hello, what should I do?
 30 | Function Message:
 31 | {
 32 |   "current_goal": {},
 33 |   "next_goal": {
 34 |     "object_id": "rack_0",
 35 |     "object_name": "rack",
 36 |     "room_name": "living room",
 37 |     "num_trial": 0,
 38 |     "num_round": 0
 39 |   }
 40 | }
 41 | 
 42 | {
 43 |     "Thought": "set a goal for the robot to find a rack",
 44 |     "Response": "Can you find a rack? it's located in the living room."
 45 | }
 46 | 
 47 | 
 48 | Robot Utterance: is it correct?
 49 | Function Message:
 50 | {
 51 |   "current_goal": {
 52 |     "object_id": "rack_0",
 53 |     "object_name": "rack",
 54 |     "room_name": "living room",
 55 |     "num_trial": 3,
 56 |     "num_round": 1
 57 |   },
 58 |   "is_current_goal_reached": false,
 59 |   "is_max_trial_reached": false,
 60 |   "next_goal": {}
 61 | }
 62 | 
 63 | {
 64 |     "Thought": "The robot asks whether it's correct,  is_current_goal_reached = False, so the robot didn't reach the goal"
 65 |     "Response": "No, this is not the rack I'm looking for. Keep searching."
 66 | }
 67 | 
 68 | Robot Utterance: I found 2 possible couches, one is 12 units away at -33 degrees, and the other is 30 units away at 24 degrees. Is either of them the couch you're looking for?
 69 | Function Message:
 70 | {
 71 |   "current_goal": {},
 72 |   "is_current_goal_reached": true,
 73 |   "is_max_trial_reached": false,
 74 |   "next_goal": {
 75 |     "object_id": "tv_0",
 76 |     "object_name": "tv",
 77 |     "room_name": "bedroom",
 78 |     "num_trial": 0,
 79 |     "num_round": 1
 80 |   }
 81 | }
 82 | 
 83 | 
 84 | {
 85 |     "Thought": "The robot found two possible couches, is_current_goal_reached is true, so it should be correct.",
 86 |     "Response": "Yes. Now, find the tv in the bedroom."
 87 | }
 88 | 
 89 | 
 90 | Robot Utterance: I found the microwave, is it correct?
 91 | Function Message:
 92 | {
 93 |   "current_goal": {},
 94 |   "is_current_goal_reached": false,
 95 |   "is_max_trial_reached": true,
 96 |   "next_goal": {
 97 |     "object_id": "kitchen counter_0",
 98 |     "object_name": "kitchen counter",
 99 |     "num_trial": 0,
100 |     "num_round": 1
101 |   }
102 | }
103 | 
104 | {
105 |     "Thought": "The goal is not reached, and the maximum trial is reached, so the robot didn't find the goal object",
106 |     "Response": "No, it's not the microwave I'm looking for. You've reached the maximum number of trials. Now, find the kitchen counter."
107 | }
108 | 
109 | Robot Utterance: I found another bed with a red pillow, is it correct?
110 | Function Message:
111 | {
112 |   "current_goal": {
113 |     "object_id": "bed_1",
114 |     "object_name": "bed",
115 |     "room_name": "Alice's bedroom",
116 |     "num_trial": 3,
117 |     "num_round": 1
118 |   },
119 |   "is_current_goal_reached": false,
120 |   "is_max_trial_reached": false,
121 |   "next_goal": {}
122 | }
123 | 
124 | {
125 |     "Thought": "the robot may found a bed, but not the goal bed, since is_current_goal_reached = False",
126 |     "Response": "No, it's not the bed I'm looking for. The bed I'm looking for is in Alice's bedroom"
127 | }
128 | 
129 | ----------
130 | 
131 | If is_current_goal_reached = True, that means the robot already reaches the goal, otherwise not. Do not provide other information beyond what is provided in the function message.
132 | 
133 | Let's start
134 | 
135 | """
136 | 


--------------------------------------------------------------------------------
/orion/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/config/__init__.py


--------------------------------------------------------------------------------
/orion/config/chatgpt_config.py:
--------------------------------------------------------------------------------
 1 | class ChatGPTConfig:
 2 |     pass
 3 | 
 4 | 
 5 | ####################### OpenAI #######################
 6 | class OpenAIConfig(ChatGPTConfig):
 7 |     api_type: str = "openai"
 8 |     api_key: str = "<API_KEY>"
 9 |     model: str
10 |     limit: int
11 |     price: float
12 | 
13 | 
14 | class OpenAIGPT35Config(OpenAIConfig):
15 |     model: str = "gpt-3.5-turbo-0125"
16 |     limit: int = 16000
17 |     price: float = 0.0005
18 | 
19 | 
20 | class OpenAIGPT4Config(OpenAIConfig):
21 |     model: str = "gpt-4-turbo-preview"
22 |     limit: int = 128000
23 |     price: float = 0.01
24 | 
25 | 
26 | ####################### Azure #######################
27 | 
28 | 
29 | class AzureConfig(ChatGPTConfig):
30 |     api_type: str = "azure"
31 |     api_key: str = "<API_KEY>"
32 |     api_version: str = "2023-12-01-preview"
33 |     azure_endpoint: str = "<ENDPOINT>"
34 |     model: str
35 |     limit: int
36 |     price: float
37 | 
38 | 
39 | class AzureGPT35Config(AzureConfig):
40 |     model: str = "gpt-35-turbo-16k-0613"
41 |     limit: int = 16000
42 |     price: float = 0.0005
43 | 
44 | 
45 | class AzureGPT4Config(AzureConfig):
46 |     model: str = "gpt-4-0125-preview"
47 |     limit: int = 128000
48 |     price: float = 0.01
49 | 


--------------------------------------------------------------------------------
/orion/config/my_objectnav_hm3d.yaml:
--------------------------------------------------------------------------------
 1 | ENVIRONMENT:
 2 |   MAX_EPISODE_STEPS: 40000
 3 | 
 4 | SIMULATOR:
 5 |   TURN_ANGLE: 15
 6 |   TILT_ANGLE: 15
 7 |   ACTION_SPACE_CONFIG: "v1"
 8 |   AGENT_0:
 9 |     SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', "SEMANTIC_SENSOR"]
10 |     HEIGHT: 0.88
11 |     RADIUS: 0.18
12 |   HABITAT_SIM_V0:
13 |     GPU_DEVICE_ID: 0
14 |     ALLOW_SLIDING: False
15 |   SEMANTIC_SENSOR:
16 |     WIDTH: 640
17 |     HEIGHT: 480
18 |     HFOV: 90
19 |     POSITION: [0, 0.88, 0]
20 |   RGB_SENSOR:
21 |     WIDTH: 640
22 |     HEIGHT: 480
23 |     HFOV: 90
24 |     POSITION: [0, 0.88, 0]
25 |   DEPTH_SENSOR:
26 |     WIDTH: 640
27 |     HEIGHT: 480
28 |     HFOV: 90
29 |     MIN_DEPTH: 0.1
30 |     MAX_DEPTH: 10.0
31 |     POSITION: [0, 0.88, 0]
32 | TASK:
33 |   TYPE: ObjectNav-v1
34 |   POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN", "MOVE_BACKWARD"]
35 | 
36 |   SENSORS: ['OBJECTGOAL_SENSOR', 'COMPASS_SENSOR', 'GPS_SENSOR']
37 |   GOAL_SENSOR_UUID: objectgoal
38 |   MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL', 'SOFT_SPL']
39 | 
40 |   DISTANCE_TO_GOAL:
41 |     DISTANCE_TO: VIEW_POINTS
42 |   SUCCESS:
43 |     SUCCESS_DISTANCE: 0.1
44 | 
45 | DATASET:
46 |   TYPE: ObjectNav-v1
47 |   SPLIT: val
48 |   DATA_PATH: "data/datasets/objectnav_hm3d_v2/{split}/{split}.json.gz"
49 |   SCENES_DIR: "data/scene_datasets/"
50 | 


--------------------------------------------------------------------------------
/orion/gradio_init_img.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/gradio_init_img.jpg


--------------------------------------------------------------------------------
/orion/map/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/map/__init__.py


--------------------------------------------------------------------------------
/orion/map/map.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from orion.config.my_config import MapConfig
 4 | 
 5 | 
 6 | class Mapping:
 7 |     def __init__(self, mapcfg: MapConfig):
 8 |         self.mapcfg = mapcfg
 9 |         self.num_grid = mapcfg.num_grid
10 |         self.cell_size = mapcfg.cell_size
11 |         self.min_depth = mapcfg.min_depth
12 |         self.max_depth = mapcfg.max_depth
13 |         self.ceiling_height_wrt_camera = mapcfg.ceiling_height_wrt_camera
14 |         self.camera_height = mapcfg.camera_height
15 |         self.agent_height_tolerance = mapcfg.agent_height_tolerance
16 |         self.num_vxl_height = mapcfg.num_vxl_height
17 |         self.downsample_factor = mapcfg.downsample_factor
18 | 
19 |     def update(self, *args, **kwargs):
20 |         return NotImplementedError
21 | 
22 |     def get_point_cloud_from_depth(self, depth: np.ndarray, cam_insc_inv: np.ndarray):
23 |         """
24 |         Return 3xN array in camera frame, X right, Y down, Z into the screen
25 |         """
26 |         if len(depth.shape) == 3:
27 |             depth = depth.squeeze()
28 | 
29 |         h, w = depth.shape
30 | 
31 |         y, x = np.meshgrid(np.arange(h), np.arange(w), indexing="ij")
32 | 
33 |         x = x.reshape((1, -1))[:, :]
34 |         y = y.reshape((1, -1))[:, :]
35 |         z = depth.reshape((1, -1))  # [1, h*w]
36 | 
37 |         p_2d = np.vstack([x, y, np.ones_like(x)])
38 |         pc = cam_insc_inv @ p_2d
39 |         pc = pc * z  #
40 |         mask = (pc[2, :] > self.min_depth) * (
41 |             pc[2, :] < self.max_depth * 0.99
42 |         )  # avoid non-deteced points
43 |         return pc, mask
44 | 


--------------------------------------------------------------------------------
/orion/map/map_build/build_voxel.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | from orion import logger
  5 | from orion.utils.geometry import CoordinateTransform
  6 | from orion.config.my_config import *
  7 | from orion.utils import file_load as load_utils
  8 | from orion.map.voxel import VoxelMapping
  9 | from orion.map.voxel_sparse import VoxelMappingSparse
 10 | from orion.abstract.perception import ExtractorModule
 11 | from orion.perception.extractor.lseg_extractor import LSegExtractor
 12 | from orion.utils.geometry import PinholeCameraModel as pinhole
 13 | from orion.abstract.interfaces import Observations
 14 | 
 15 | 
 16 | class OfflineDataLoader:
 17 |     def __init__(self, data_dir, mapcfg: MapConfig = MapConfig()):
 18 |         rgb_dir = os.path.join(data_dir, "rgb")
 19 |         depth_dir = os.path.join(data_dir, "depth")
 20 |         pose_dir = os.path.join(data_dir, "pose")
 21 |         semantic_dir = os.path.join(data_dir, "semantic")
 22 | 
 23 |         rgb_list = sorted(
 24 |             os.listdir(rgb_dir), key=lambda x: int(x.split("_")[-1].split(".")[0])
 25 |         )
 26 |         depth_list = sorted(
 27 |             os.listdir(depth_dir), key=lambda x: int(x.split("_")[-1].split(".")[0])
 28 |         )
 29 |         pose_list = sorted(
 30 |             os.listdir(pose_dir), key=lambda x: int(x.split("_")[-1].split(".")[0])
 31 |         )
 32 |         semantic_list = sorted(
 33 |             os.listdir(semantic_dir), key=lambda x: int(x.split("_")[-1].split(".")[0])
 34 |         )
 35 | 
 36 |         rgb_list = [os.path.join(rgb_dir, x) for x in rgb_list]
 37 |         depth_list = [os.path.join(depth_dir, x) for x in depth_list]
 38 |         pose_list = [os.path.join(pose_dir, x) for x in pose_list]
 39 |         semantic_list = [os.path.join(semantic_dir, x) for x in semantic_list]
 40 | 
 41 |         self._data = list(zip(rgb_list, depth_list, semantic_list, pose_list))
 42 | 
 43 |         self.obj2cls_dic, _ = load_utils.load_obj2cls_dict(
 44 |             os.path.join(data_dir, "obj2cls_dict.txt")
 45 |         )
 46 |         self.mapcfg = mapcfg
 47 | 
 48 |     def __getitem__(self, idx):
 49 |         rgb_path, depth_path, semantic_path, pose_path = self._data[idx]
 50 |         rgb = load_utils.load_image(rgb_path)
 51 |         depth = load_utils.load_depth(depth_path)
 52 |         semantic = load_utils.load_semantic(semantic_path, self.obj2cls_dic)
 53 |         simpose = load_utils.load_pose(pose_path)
 54 |         if idx == 0:
 55 |             self.transform_fn = CoordinateTransform(
 56 |                 num_grd=self.mapcfg.num_grid,
 57 |                 cell_size=self.mapcfg.cell_size,
 58 |                 init_agtpose=simpose,
 59 |             )
 60 |         cam_pose = self.transform_fn.get_relative_campose(simpose)
 61 |         return Observations(
 62 |             rgb=rgb,  # [h, w, 3]  uint8
 63 |             depth=depth,  # [h, w] float32
 64 |             semantic=semantic,  # [h, w]  int
 65 |             rel_cam_pose=cam_pose,  # [4, 4]
 66 |         )
 67 | 
 68 |     def __len__(self):
 69 |         return len(self._data)
 70 | 
 71 | 
 72 | class VoxelMapBuilder:
 73 |     """This is working in another process"""
 74 | 
 75 |     def __init__(
 76 |         self,
 77 |         save_dir,
 78 |         mapcfg: MapConfig = MapConfig(),
 79 |         extractor_type: str = "lseg",
 80 |         extractor: ExtractorModule = LSegExtractor(),
 81 |         accelerate_mapping=True,
 82 |         use_sparse_build=True,
 83 |     ) -> None:
 84 |         self.mapcfg = mapcfg
 85 |         self.extractor_type = extractor_type
 86 |         self.extractor = extractor
 87 |         if not use_sparse_build:
 88 |             self.vxlmap = VoxelMapping(
 89 |                 self.mapcfg, self.extractor, accelerate_mapping=accelerate_mapping
 90 |             )
 91 |         else:
 92 |             self.vxlmap = VoxelMappingSparse(
 93 |                 self.mapcfg, self.extractor, accelerate_mapping=accelerate_mapping
 94 |             )
 95 |         self.cam_insc = pinhole.get_camera_intrinsic_matrix(
 96 |             mapcfg.screen_h, mapcfg.screen_w, mapcfg.fov
 97 |         )
 98 |         self.cam_insc_inv = np.linalg.inv(self.cam_insc)
 99 | 
100 |         self.save_dir = save_dir
101 |         self.use_sparse_build = use_sparse_build
102 | 
103 |     def build(self, obs: Observations):
104 |         rgb = obs.rgb
105 |         depth = obs.depth
106 |         semantic = obs.semantic
107 |         camera_pose = obs.rel_cam_pose
108 |         feats = self.vxlmap.get_feature(rgb)  # torch.Tensor cuda
109 |         self.vxlmap.update(feats, depth, rgb, semantic, camera_pose, self.cam_insc_inv)
110 | 
111 |     def _return_result(self):
112 |         """return vlmap results for temporaray planning"""
113 |         if self.use_sparse_build:
114 |             return ValueError("Not supported yet")
115 |         featmap = self.vxlmap.featmap
116 |         vxlcnt = np.expand_dims(self.vxlmap.vxl_count, axis=-1)
117 | 
118 |         indices = np.transpose(np.nonzero(np.any(vxlcnt, axis=-1)))
119 |         feat_values = featmap[tuple(indices.T)]
120 | 
121 |         return {"indices": indices, "feat_values": feat_values}
122 | 
123 |     def _save(self):
124 |         if not self.use_sparse_build:
125 |             # This can save 100 times of storage space
126 |             assert len(self.vxlmap.featmap.shape) == 4
127 |             assert len(self.vxlmap.rgbmap.shape) == 4
128 |             assert len(self.vxlmap.gtmap.shape) == 3
129 |             assert len(self.vxlmap.vxl_count.shape) == 3
130 | 
131 |             featmap = self.vxlmap.featmap
132 |             rgbmap = self.vxlmap.rgbmap
133 |             vxlcnt = np.expand_dims(self.vxlmap.vxl_count, axis=-1)
134 |             gtmap = np.expand_dims(self.vxlmap.gtmap, axis=-1)
135 | 
136 |             indices = np.transpose(np.nonzero(np.any(vxlcnt, axis=-1)))
137 | 
138 |             feat_values = featmap[tuple(indices.T)]
139 |             count_values = vxlcnt[tuple(indices.T)].squeeze()
140 |             rgb_values = rgbmap[tuple(indices.T)]
141 |             gt_values = gtmap[tuple(indices.T)].squeeze()
142 |         else:
143 |             indices = self.vxlmap.indices
144 |             feat_values = self.vxlmap.feat_values
145 |             count_values = self.vxlmap.vxl_count
146 |             rgb_values = self.vxlmap.rgb_values
147 |             gt_values = self.vxlmap.gt_values
148 | 
149 |         if not os.path.exists(self.save_dir):
150 |             os.makedirs(self.save_dir)
151 |         else:
152 |             logger.warning(f"{self.save_dir} already exists.")
153 |             input("Press Enter to continue...")
154 |         # Save the indices and values
155 |         save_path = os.path.join(self.save_dir, "sparse_vxl_map.npz")
156 |         np.savez(
157 |             save_path,
158 |             indices=indices,  # [N, 3]
159 |             count_values=count_values,  # [N,]
160 |             feat_values=feat_values,  # [N, feat_dim=512]
161 |             rgb_values=rgb_values,  # [N, 3]
162 |             gt_values=gt_values,  # [N,]
163 |         )
164 |         logger.info(f"{save_path} is saved. (sparse map)")
165 | 
166 |     @staticmethod
167 |     def _save_npy(save_path, array):
168 |         with open(save_path, "wb") as f:
169 |             np.save(f, array)
170 |             logger.info(f"{save_path} is saved.")
171 | 


--------------------------------------------------------------------------------
/orion/map/map_search/search_base.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | import numpy as np
 4 | 
 5 | from orion import logger
 6 | from orion.config.my_config import MapConfig
 7 | 
 8 | 
 9 | class MapSearch:
10 |     def __init__(
11 |         self,
12 |         load_sparse_map_path: str,
13 |         map_index_bound: Optional[List[int]] = None,
14 |         mapcfg: MapConfig = MapConfig(),
15 |     ):
16 |         self.mapcfg = mapcfg
17 | 
18 |         self._load_sparse_map(load_sparse_map_path)
19 |         self.update_index(map_index_bound)
20 | 
21 |     def _load_sparse_map(self, load_path):
22 |         # Load the indices and values
23 |         sparse_map = np.load(load_path)
24 |         self.indices = sparse_map["indices"]
25 |         self.feat_values = sparse_map["feat_values"]
26 |         self.vxl_count = sparse_map["count_values"]
27 |         self.rgb_values = sparse_map["rgb_values"]
28 |         self.gt_values = sparse_map["gt_values"]
29 |         self._3dshape = (
30 |             self.mapcfg.num_grid,
31 |             self.mapcfg.num_grid,
32 |             self.mapcfg.num_vxl_height,
33 |         )
34 |         logger.info(f"load_path: {load_path}")
35 | 
36 |     def update_index(self, map_index_bound: Optional[List[int]] = None):
37 |         "save search time when using this to crop the vxlmap"
38 |         z_indices, x_indices, y_indices = (
39 |             self.indices[:, 0],
40 |             self.indices[:, 1],
41 |             self.indices[:, 2],
42 |         )
43 |         no_map_mask = np.zeros(shape=(self.mapcfg.num_grid, self.mapcfg.num_grid))
44 |         no_map_mask[z_indices, x_indices] = 1
45 |         self.no_map_mask = np.logical_not(no_map_mask)
46 | 
47 |         if map_index_bound is not None:
48 |             self.xmin, self.xmax, self.zmin, self.zmax = map_index_bound
49 |         else:
50 |             self.xmin = np.min(x_indices)
51 |             self.xmax = np.max(x_indices)
52 |             self.zmin = np.min(z_indices)
53 |             self.zmax = np.max(z_indices)
54 |             self.ymin = np.min(y_indices)
55 |             self.ymax = np.max(y_indices)
56 | 
57 |         # logger.info(
58 |         #     f"map_index_bound: {self.xmin}, {self.xmax}, {self.zmin}, {self.zmax}"
59 |         # )
60 |         self.no_map_mask_crop = self.no_map_mask[
61 |             self.zmin : self.zmax + 1, self.xmin : self.xmax + 1
62 |         ]
63 | 
64 |     @staticmethod
65 |     def get_BEV_map(indices, values, map_shape):
66 |         assert indices.shape[-1] == 3
67 |         assert indices.shape[0] == values.shape[0]
68 |         assert values.shape[-1] == map_shape[-1]
69 |         assert len(map_shape) == 4
70 | 
71 |         rev_indices = indices[
72 |             ::-1
73 |         ]  # reverse to get the largest y value for each (z, x) easily
74 |         rev_z, rev_x, rev_y = rev_indices[:, 0], rev_indices[:, 1], rev_indices[:, 2]
75 | 
76 |         # Create a unique identifier for each (z, x) pair
77 |         rev_unique_zx, rev_paired_y = np.unique(
78 |             np.column_stack((rev_z, rev_x)), axis=0, return_index=True
79 |         )
80 | 
81 |         # Find the maximum 'y' value for each unique (z, x) pair
82 |         max_y_values = rev_y[rev_paired_y]
83 | 
84 |         bev_indices_sparse = np.column_stack(
85 |             (rev_unique_zx, max_y_values)
86 |         )  # [z, x, top_y]
87 | 
88 |         paired_y = len(indices) - rev_paired_y - 1
89 | 
90 |         bev_feat_sparse = values[paired_y]  # [z, x, feat_dim]
91 |         bev_map = np.zeros(
92 |             shape=(map_shape[0], map_shape[1], map_shape[3])
93 |         )  # [num_z, num_x, feat_dim]
94 |         bev_map[bev_indices_sparse[:, 0], bev_indices_sparse[:, 1], :] = bev_feat_sparse
95 | 
96 |         return bev_map
97 | 


--------------------------------------------------------------------------------
/orion/map/occupancy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from orion.config.my_config import MapConfig
  4 | from orion.map.map import Mapping
  5 | from orion.utils.geometry import PinholeCameraModel as pinhole
  6 | 
  7 | 
  8 | class OccupancyMapping(Mapping):
  9 |     # mainly for frontier based exploration
 10 |     # also provide explored area
 11 |     UNKNOWN = 0
 12 |     FREE = 1
 13 |     OCCUPIED = 2
 14 |     WALL = 3
 15 |     FRONTIER = 4
 16 |     UNKNOWN_FREE = 5
 17 | 
 18 |     def __init__(self, mapcfg: MapConfig):
 19 |         super().__init__(mapcfg=mapcfg)
 20 |         self.map = np.zeros((self.num_grid, self.num_grid), dtype=np.uint8)
 21 | 
 22 |     def save(self, path):
 23 |         np.save(path, self.map)
 24 | 
 25 |     def load(self, path):
 26 |         self.map = np.load(path)
 27 | 
 28 |     def reset_floor(self):
 29 |         self.map[self.map == OccupancyMapping.FREE] = OccupancyMapping.UNKNOWN
 30 |         self.map[self.map == OccupancyMapping.FRONTIER] = OccupancyMapping.UNKNOWN
 31 | 
 32 |     @staticmethod
 33 |     def color(value, rgba=False):
 34 |         c = None
 35 |         if value == OccupancyMapping.UNKNOWN:
 36 |             c = [0, 0, 0]
 37 |         elif value == OccupancyMapping.FREE:
 38 |             c = [255, 255, 255]
 39 |         elif value == OccupancyMapping.OCCUPIED:
 40 |             c = [255, 0, 0]
 41 |         elif value == OccupancyMapping.WALL:
 42 |             c = [0, 255, 0]
 43 |         elif value == OccupancyMapping.FRONTIER:
 44 |             c = [0, 0, 255]
 45 |         else:
 46 |             raise ValueError("Not supported enum")
 47 |         if rgba:
 48 |             c.append(255)
 49 |         return c
 50 | 
 51 |     def update(
 52 |         self,
 53 |         depth: np.ndarray,
 54 |         relative_campose: np.ndarray,
 55 |         cam_insc_inv: np.ndarray,
 56 |         is_rotate: bool = False,
 57 |         camera_height_change: float = 0.0,
 58 |     ):
 59 |         # depth: [h, w], one image each time
 60 | 
 61 |         cam_pts, mask = self.get_point_cloud_from_depth(depth, cam_insc_inv)
 62 |         self.cam_pts = cam_pts
 63 | 
 64 |         not_ceiling_mask = cam_pts[1, :] > -self.ceiling_height_wrt_camera
 65 | 
 66 |         composite_mask = np.logical_and(mask, not_ceiling_mask)
 67 | 
 68 |         cam_pts = cam_pts[:, composite_mask]
 69 | 
 70 |         # here the wld frame is the first camera frame
 71 |         # x right, y down, z forward
 72 |         wld_pts = pinhole.cam2wld(cam_pts, cam_pose=relative_campose)
 73 |         wld_pts[1, :] -= camera_height_change
 74 | 
 75 |         # This is a simple method to get floor mask. It can not handle slope floor.
 76 |         # The best way should be using semantic segmentation, like LSeg.
 77 |         # However we need a trade-off between speed and accuracy.
 78 |         floor_mask = np.logical_and(
 79 |             wld_pts[1, :] > self.camera_height - self.agent_height_tolerance,
 80 |             wld_pts[1, :] < self.camera_height + self.agent_height_tolerance,
 81 |         )
 82 | 
 83 |         wall_mask = wld_pts[1, :] < -self.ceiling_height_wrt_camera + 0.2
 84 | 
 85 |         grd_xs = np.round(wld_pts[0, :] / self.cell_size + self.num_grid // 2).astype(
 86 |             np.int32
 87 |         )
 88 |         grd_zs = np.round(self.num_grid // 2 - wld_pts[2, :] / self.cell_size).astype(
 89 |             np.int32
 90 |         )
 91 | 
 92 |         # make sure the last occupaied area will not be free anymore.
 93 |         # This can avoid more collision
 94 |         last_nofree_mask = np.logical_or(
 95 |             self.map[grd_zs, grd_xs] == OccupancyMapping.OCCUPIED,
 96 |             self.map[grd_zs, grd_xs] == OccupancyMapping.WALL,
 97 |         )
 98 |         last_wall_mask = self.map[grd_zs, grd_xs] == OccupancyMapping.WALL
 99 |         last_map = self.map.copy()
100 | 
101 |         free_mask = np.logical_and(~last_nofree_mask, floor_mask)
102 | 
103 |         self.map[grd_zs, grd_xs] = OccupancyMapping.OCCUPIED
104 |         # free_mask = floor_mask
105 | 
106 |         self.map[grd_zs[free_mask], grd_xs[free_mask]] = OccupancyMapping.FREE
107 |         self.map[grd_zs[wall_mask], grd_xs[wall_mask]] = OccupancyMapping.WALL
108 |         self.map[grd_zs[last_wall_mask], grd_xs[last_wall_mask]] = OccupancyMapping.WALL
109 | 
110 |         # delete the possible wall area
111 |         floor_mask = np.logical_and(floor_mask, np.logical_not(wall_mask))
112 | 
113 |         # calculate eight neighbors
114 |         dx = np.array([-1, 0, 1, -1, 1, -1, 0, 1]).reshape(-1, 1)
115 |         dz = np.array([-1, -1, -1, 0, 0, 1, 1, 1]).reshape(-1, 1)
116 | 
117 |         floor_grds = np.stack([grd_xs[floor_mask], grd_zs[floor_mask]])
118 |         unique_floor_grds = np.unique(floor_grds, axis=1)
119 |         x_grds_floor, z_grds_floor = unique_floor_grds
120 |         neighbor_indices_x = x_grds_floor + dx
121 |         neighbor_indices_z = z_grds_floor + dz
122 | 
123 |         frontier_mask = (
124 |             np.sum(
125 |                 self.map[neighbor_indices_z, neighbor_indices_x]
126 |                 == OccupancyMapping.UNKNOWN,
127 |                 axis=0,
128 |             )
129 |             > 1
130 |         )
131 |         no_frontier_mask = np.logical_or(
132 |             np.sum(
133 |                 self.map[neighbor_indices_z, neighbor_indices_x]
134 |                 == OccupancyMapping.WALL,
135 |                 axis=0,
136 |             )
137 |             > 1,
138 |             np.sum(
139 |                 self.map[neighbor_indices_z, neighbor_indices_x]
140 |                 == OccupancyMapping.OCCUPIED,
141 |                 axis=0,
142 |             )
143 |             > 3,
144 |         )
145 | 
146 |         frontier_mask = np.logical_and(frontier_mask, np.logical_not(no_frontier_mask))
147 |         self.map[
148 |             z_grds_floor[frontier_mask], x_grds_floor[frontier_mask]
149 |         ] = OccupancyMapping.FRONTIER
150 | 
151 |         # # Post-process
152 |         # # This is because the camera height can not be accurate using predicted pose.
153 |         # # So there could be some critical points where large free space turn to occupancy suddenly
154 |         # # We will avoid this
155 |         # increment_occu_mask = np.logical_and(
156 |         #     self.map == OccupancyMapping.OCCUPIED,
157 |         #     np.logical_not(last_map == OccupancyMapping.OCCUPIED),
158 |         # )
159 |         # floor2occu_mask = np.logical_and(
160 |         #     increment_occu_mask, last_map == OccupancyMapping.FREE
161 |         # )
162 |         # if np.sum(floor2occu_mask) > 50 and is_rotate:  # reset
163 |         #     self.map = last_map.copy()
164 | 


--------------------------------------------------------------------------------
/orion/map/voxel.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Voxel for VLmap. Adapted from VLmap repo. https://github.com/vlmaps/vlmaps.
  3 | """
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | 
  8 | from orion import logger
  9 | from orion.abstract.perception import ExtractorModule
 10 | from orion.config.my_config import MapConfig
 11 | from orion.map.map import Mapping
 12 | from orion.utils.geometry import PinholeCameraModel as pinhole
 13 | 
 14 | 
 15 | class VoxelMapping(Mapping):
 16 |     def __init__(
 17 |         self, mapcfg: MapConfig, extractor: ExtractorModule, accelerate_mapping=True
 18 |     ):
 19 |         super().__init__(mapcfg=mapcfg)
 20 |         self.feat_dim = extractor.feat_dim  # feature dim =1 means gt labels
 21 |         self.extractor = extractor
 22 | 
 23 |         self.featmap = np.zeros(
 24 |             (self.num_grid, self.num_grid, self.num_vxl_height, self.feat_dim),
 25 |             dtype=np.float32,
 26 |         )
 27 |         self.vxl_count = np.zeros(
 28 |             (self.num_grid, self.num_grid, self.num_vxl_height), dtype=np.int32
 29 |         )  # zxy
 30 | 
 31 |         self.rgbmap = np.zeros(
 32 |             (self.num_grid, self.num_grid, self.num_vxl_height, 3), dtype=np.uint8
 33 |         )
 34 |         self.gtmap = np.zeros(
 35 |             (self.num_grid, self.num_grid, self.num_vxl_height), dtype=np.int16
 36 |         )
 37 | 
 38 |         self.accelerate_mapping = accelerate_mapping
 39 | 
 40 |     def get_feature(self, rgb: np.ndarray):
 41 |         return self.extractor.predict(rgb)
 42 | 
 43 |     def update(
 44 |         self,
 45 |         feats: torch.Tensor,
 46 |         depth: np.ndarray,
 47 |         rgb: np.ndarray,
 48 |         semantic: np.ndarray,
 49 |         relative_campose: np.ndarray,
 50 |         cam_insc_inv: np.ndarray,
 51 |     ):
 52 |         # depth: [h, w], one image each time
 53 |         # feature: [h, w, feat_dim]
 54 | 
 55 |         cam_pts, mask = self.get_point_cloud_from_depth(depth, cam_insc_inv)
 56 |         not_ceiling_mask = cam_pts[1, :] > -self.ceiling_height_wrt_camera
 57 |         composite_mask = np.logical_and(mask, not_ceiling_mask)
 58 |         cam_pts = cam_pts[:, composite_mask]
 59 | 
 60 |         rgb = rgb.reshape(-1, 3)
 61 |         rgb = rgb[composite_mask, :]
 62 | 
 63 |         gt_semantic = semantic.reshape(-1)
 64 |         gt_semantic = gt_semantic[composite_mask]
 65 | 
 66 |         feats = feats.reshape(-1, self.feat_dim)
 67 |         composite_mask = torch.from_numpy(composite_mask)
 68 |         composite_mask = composite_mask.to(feats.device)  # use gpu to accelerate
 69 |         feats = feats[composite_mask, :]
 70 |         feats = feats.cpu().numpy()
 71 |         composite_mask = None
 72 | 
 73 |         # downsample
 74 |         cam_pts = cam_pts[:, :: self.downsample_factor]
 75 |         rgb = rgb[:: self.downsample_factor, :]
 76 |         gt_semantic = gt_semantic[:: self.downsample_factor]
 77 |         feats = feats[:: self.downsample_factor, :]
 78 | 
 79 |         wld_pts = pinhole.cam2wld(cam_pts, cam_pose=relative_campose)
 80 |         vxl_zs = np.round(self.num_grid // 2 - wld_pts[2, :] / self.cell_size).astype(
 81 |             np.int32
 82 |         )
 83 |         vxl_xs = np.round(wld_pts[0, :] / self.cell_size + self.num_grid // 2).astype(
 84 |             np.int32
 85 |         )
 86 |         vxl_ys = np.maximum(
 87 |             np.round((self.camera_height - wld_pts[1, :]) / self.cell_size).astype(
 88 |                 np.int32
 89 |             ),
 90 |             0,
 91 |         )
 92 | 
 93 |         if self.accelerate_mapping:
 94 |             # get unique voxel indices and the index
 95 |             vxl_zxys, vxl_indices = np.unique(
 96 |                 np.stack([vxl_zs, vxl_xs, vxl_ys], axis=1), axis=0, return_index=True
 97 |             )
 98 |         else:
 99 |             vxl_zxys = np.stack([vxl_zs, vxl_xs, vxl_ys], axis=1)
100 |             vxl_indices = np.arange(vxl_zxys.shape[0])
101 | 
102 |         for vxl_zxy, vxl_ind in zip(vxl_zxys, vxl_indices):
103 |             self.is_in_grid(vxl_zxy)
104 |             self.featmap[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]] = (
105 |                 self.featmap[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]]
106 |                 * self.vxl_count[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]]
107 |                 + feats[vxl_ind]
108 |             ) / (self.vxl_count[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]] + 1)
109 |             self.vxl_count[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]] += 1
110 | 
111 |             self.rgbmap[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]] = rgb[vxl_ind]
112 |             self.gtmap[vxl_zxy[0], vxl_zxy[1], vxl_zxy[2]] = gt_semantic[vxl_ind]
113 | 
114 |     def is_in_grid(self, vxl_zxy):
115 |         if vxl_zxy[0] < 0 or vxl_zxy[0] >= self.num_grid:
116 |             logger.warning(f"vxl_zxy[0] out of range: {vxl_zxy[0]}")
117 |             vxl_zxy[0] = np.clip(vxl_zxy[0], 0, self.num_grid - 1)
118 |         if vxl_zxy[1] < 0 or vxl_zxy[1] >= self.num_grid:
119 |             logger.warning(f"vxl_zxy[1] out of range: {vxl_zxy[1]}")
120 |             vxl_zxy[1] = np.clip(vxl_zxy[1], 0, self.num_grid - 1)
121 |         if vxl_zxy[2] < 0 or vxl_zxy[2] >= self.num_vxl_height:
122 |             logger.warning(f"vxl_zxy[2] out of range: {vxl_zxy[2]}")
123 |             vxl_zxy[2] = np.clip(vxl_zxy[2], 0, self.num_vxl_height - 1)
124 | 


--------------------------------------------------------------------------------
/orion/map/voxel_sparse.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Voxel for VLmap
  3 | """
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | 
  8 | from orion import logger
  9 | from orion.abstract.perception import ExtractorModule
 10 | from orion.config.my_config import MapConfig
 11 | from orion.map.map import Mapping
 12 | from orion.utils.geometry import PinholeCameraModel as pinhole
 13 | 
 14 | 
 15 | class VoxelMappingSparse(Mapping):
 16 |     def __init__(
 17 |         self, mapcfg: MapConfig, extractor: ExtractorModule, accelerate_mapping=True
 18 |     ):
 19 |         super().__init__(mapcfg=mapcfg)
 20 |         self.feat_dim = extractor.feat_dim  # feature dim =1 means gt labels
 21 |         self.extractor = extractor
 22 | 
 23 |         self.feat_values = np.empty(shape=(0, self.feat_dim), dtype=np.float32)
 24 |         self.indices = np.empty(shape=(0, 3), dtype=np.int16)
 25 |         self.count_values = np.empty(shape=(0), dtype=np.int32)
 26 |         self.rgb_values = np.empty(shape=(0, 3), dtype=np.uint8)
 27 |         self.gt_values = np.empty(shape=(0), dtype=np.int8)
 28 | 
 29 |         self.accelerate_mapping = accelerate_mapping
 30 | 
 31 |     def get_feature(self, rgb: np.ndarray):
 32 |         return self.extractor.predict(rgb)
 33 | 
 34 |     def update(
 35 |         self,
 36 |         feats: torch.Tensor,
 37 |         depth: np.ndarray,
 38 |         rgb: np.ndarray,
 39 |         semantic: np.ndarray,
 40 |         relative_campose: np.ndarray,
 41 |         cam_insc_inv: np.ndarray,
 42 |     ):
 43 |         # depth: [h, w], one image each time
 44 |         # feature: [h, w, feat_dim]
 45 | 
 46 |         cam_pts, mask = self.get_point_cloud_from_depth(depth, cam_insc_inv)
 47 |         not_ceiling_mask = cam_pts[1, :] > -self.ceiling_height_wrt_camera
 48 |         composite_mask = np.logical_and(mask, not_ceiling_mask)
 49 |         cam_pts = cam_pts[:, composite_mask]
 50 | 
 51 |         rgb = rgb.reshape(-1, 3)
 52 |         rgb = rgb[composite_mask, :]
 53 | 
 54 |         gt_semantic = semantic.reshape(-1)
 55 |         gt_semantic = gt_semantic[composite_mask]
 56 | 
 57 |         feats = feats.reshape(-1, self.feat_dim)
 58 |         composite_mask = torch.from_numpy(composite_mask)
 59 |         composite_mask = composite_mask.to(feats.device)  # use gpu to accelerate
 60 |         feats = feats[composite_mask, :]
 61 |         feats = feats.cpu().numpy()
 62 |         composite_mask = None
 63 | 
 64 |         # downsample
 65 |         cam_pts = cam_pts[:, :: self.downsample_factor]
 66 |         rgb = rgb[:: self.downsample_factor, :]
 67 |         gt_semantic = gt_semantic[:: self.downsample_factor]
 68 |         feats = feats[:: self.downsample_factor, :]
 69 | 
 70 |         wld_pts = pinhole.cam2wld(cam_pts, cam_pose=relative_campose)
 71 |         vxl_zs = np.round(self.num_grid // 2 - wld_pts[2, :] / self.cell_size).astype(
 72 |             np.int32
 73 |         )
 74 |         vxl_xs = np.round(wld_pts[0, :] / self.cell_size + self.num_grid // 2).astype(
 75 |             np.int32
 76 |         )
 77 |         vxl_ys = np.maximum(
 78 |             np.round((self.camera_height - wld_pts[1, :]) / self.cell_size).astype(
 79 |                 np.int32
 80 |             ),
 81 |             0,
 82 |         )
 83 | 
 84 |         if self.accelerate_mapping:
 85 |             # get unique voxel indices and the index
 86 |             vxl_zxys, vxl_indices = np.unique(
 87 |                 np.stack([vxl_zs, vxl_xs, vxl_ys], axis=1), axis=0, return_index=True
 88 |             )
 89 |         else:
 90 |             vxl_zxys = np.stack([vxl_zs, vxl_xs, vxl_ys], axis=1)
 91 |             vxl_indices = np.arange(vxl_zxys.shape[0])
 92 | 
 93 |         for vxl_zxy, vxl_ind in zip(vxl_zxys, vxl_indices):
 94 |             self.is_in_grid(vxl_zxy)
 95 | 
 96 |             if len(self.indices) == 0:
 97 |                 self.indices = np.vstack([self.indices, vxl_zxy])
 98 |                 self.feat_values = np.vstack([self.feat_values, feats[vxl_ind]])
 99 |                 self.count_values = np.append(self.count_values, 1)
100 |                 self.rgb_values = np.vstack([self.rgb_values, rgb[vxl_ind]])
101 |                 self.gt_values = np.append(self.gt_values, gt_semantic[vxl_ind])
102 |             else:
103 |                 idx = np.where((self.indices == vxl_zxy).all(axis=1))[0]
104 |                 if len(idx) > 0:  # already in the indices
105 |                     row_index = idx[0]
106 |                     self.feat_values[row_index] = (
107 |                         self.feat_values[row_index] * self.count_values[row_index]
108 |                         + feats[vxl_ind]
109 |                     ) / (self.count_values[row_index] + 1)
110 |                     self.count_values[row_index] += 1
111 |                     self.rgb_values[row_index] = rgb[vxl_ind]
112 |                     self.gt_values[row_index] = gt_semantic[vxl_ind]
113 |                 else:
114 |                     self.indices = np.vstack([self.indices, vxl_zxy])
115 |                     self.feat_values = np.vstack([self.feat_values, feats[vxl_ind]])
116 |                     self.count_values = np.append(self.count_values, 1)
117 |                     self.rgb_values = np.vstack([self.rgb_values, rgb[vxl_ind]])
118 |                     self.gt_values = np.append(self.gt_values, gt_semantic[vxl_ind])
119 | 
120 |     def is_in_grid(self, vxl_zxy):
121 |         if vxl_zxy[0] < 0 or vxl_zxy[0] >= self.num_grid:
122 |             logger.warning(f"vxl_zxy[0] out of range: {vxl_zxy[0]}")
123 |             vxl_zxy[0] = np.clip(vxl_zxy[0], 0, self.num_grid - 1)
124 |         if vxl_zxy[1] < 0 or vxl_zxy[1] >= self.num_grid:
125 |             logger.warning(f"vxl_zxy[1] out of range: {vxl_zxy[1]}")
126 |             vxl_zxy[1] = np.clip(vxl_zxy[1], 0, self.num_grid - 1)
127 |         if vxl_zxy[2] < 0 or vxl_zxy[2] >= self.num_vxl_height:
128 |             logger.warning(f"vxl_zxy[2] out of range: {vxl_zxy[2]}")
129 |             vxl_zxy[2] = np.clip(vxl_zxy[2], 0, self.num_vxl_height - 1)
130 | 


--------------------------------------------------------------------------------
/orion/navigation/shortest_path_follower_wrapper.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Interface for habitat ShortestPathFollower and My ShortestPathFollower
  3 | """
  4 | 
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | 
  9 | from orion import logger
 10 | from orion.abstract.pose import Agent2DPose
 11 | from orion.navigation.fmm_planner import FMMPlanner
 12 | from orion.navigation.waypoint_planner import PointPlanner
 13 | from orion.utils import visulization as vis
 14 | from orion.utils.geometry import CoordinateTransform
 15 | 
 16 | 
 17 | class ShortestPathFollowerBase:
 18 |     def is_navigable(self, pose: Agent2DPose):
 19 |         raise NotImplementedError
 20 | 
 21 |     def get_next_action(self, *args, **kwargs):
 22 |         raise NotImplementedError
 23 | 
 24 |     def set_traversible_map(self, traversible: np.ndarray):
 25 |         raise NotImplementedError
 26 | 
 27 |     def get_navigable_mask(self):
 28 |         raise NotImplementedError
 29 | 
 30 |     def revise_pose(self, pose: Agent2DPose):
 31 |         # since the map is constantly changing, we need to revise the pose
 32 |         if self.is_navigable(pose):
 33 |             return pose
 34 |         else:
 35 |             logger.info("[PathFollower] Pose is not navigable, revise it auto")
 36 |             navi_mask = self.get_navigable_mask()
 37 |             navi_mask = vis.get_largest_connected_area(navi_mask)
 38 | 
 39 |             new_pose = PointPlanner.plan_reachable_point(
 40 |                 cen_x=pose.x,
 41 |                 cen_z=pose.z,
 42 |                 navigable_mask=navi_mask,
 43 |                 max_radius=5,
 44 |             )
 45 |             if new_pose is None:
 46 |                 logger.info(
 47 |                     "[PathFollower] Can not find reachable nearby point, return original pose"
 48 |                 )
 49 |                 return pose
 50 |             else:
 51 |                 return Agent2DPose(new_pose.x, new_pose.z, pose.t)
 52 | 
 53 | 
 54 | class MyFollower(ShortestPathFollowerBase):
 55 |     def __init__(
 56 |         self,
 57 |         num_rots: int = 360 // 15,
 58 |         step_size: int = int(0.25 / 0.05),
 59 |         goal_radius: int = int(0.3 / 0.05),
 60 |         wheel_radius: int = int(0.2 / 0.05),
 61 |     ):
 62 |         self.follower = FMMPlanner(num_rots, step_size, goal_radius, wheel_radius)
 63 | 
 64 |     def set_traversible_map(self, traversible: np.ndarray):
 65 |         self.follower.set_traversible_map(traversible)
 66 | 
 67 |     def is_navigable(self, pose: Agent2DPose):
 68 |         return self.follower.is_navigable(pose)
 69 | 
 70 |     def get_navigable_mask(self):
 71 |         return self.follower.original_traversible
 72 | 
 73 |     def get_next_action(
 74 |         self,
 75 |         start: Agent2DPose,
 76 |         goal: Agent2DPose,
 77 |         pre_collision_dict=None,
 78 |         goal_dist=None,
 79 |     ):
 80 |         _ = self.follower.get_action(
 81 |             start, goal, pre_collision_dict=pre_collision_dict, goal_dist=goal_dist
 82 |         )
 83 |         return _[0]
 84 | 
 85 | 
 86 | class HabitatFollower(ShortestPathFollowerBase):
 87 |     def __init__(
 88 |         self,
 89 |         env_sim,
 90 |         navigatable_mask: np.ndarray,
 91 |         transform_fn: CoordinateTransform,
 92 |         goal_radius: float = 0.3,
 93 |         return_one_hot: bool = False,
 94 |     ):
 95 |         self.env_sim = env_sim
 96 |         from habitat.tasks.nav.shortest_path_follower import (
 97 |             ShortestPathFollower as HabitatShortestPathFollower,
 98 |         )
 99 | 
100 |         self.follower = HabitatShortestPathFollower(
101 |             env_sim, goal_radius=goal_radius, return_one_hot=return_one_hot
102 |         )
103 |         self.navigable_mask = navigatable_mask
104 |         self.transform_fn = transform_fn
105 | 
106 |     def set_traversible_map(self, traversible: np.ndarray):
107 |         pass
108 | 
109 |     def is_navigable(self, pose: Agent2DPose):
110 |         pos = self.transform_fn.grd2agt_pos(pose)
111 |         return self.env_sim.is_navigable(pos)
112 | 
113 |     def get_navigable_mask(self):
114 |         return self.navigable_mask
115 | 
116 |     def get_next_action(self, start: Agent2DPose, goal: Agent2DPose, *args, **kwargs):
117 |         goal_pos = self.transform_fn.grd2agt_pos(goal)
118 |         return self.follower.get_next_action(goal_pos)
119 | 


--------------------------------------------------------------------------------
/orion/perception/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/perception/__init__.py


--------------------------------------------------------------------------------
/orion/perception/detector/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/perception/detector/__init__.py


--------------------------------------------------------------------------------
/orion/perception/detector/clipgradcam.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import torch
 4 | from PIL import Image
 5 | 
 6 | import orion.perception.detector.gradcam.CLIP.clip as clip
 7 | 
 8 | 
 9 | class CLIPGradCAM:
10 |     def __init__(self):
11 |         self.device = "cuda" if torch.cuda.is_available() else "cpu"
12 |         self.model, self.preprocess = clip.load(
13 |             "ViT-B/32", device=self.device, jit=False
14 |         )
15 | 
16 |     def interpret(self, image, texts, start_layer=-1):
17 |         batch_size = texts.shape[0]
18 |         images = image.repeat(batch_size, 1, 1, 1)
19 |         logits_per_image, logits_per_text = self.model(images, texts)
20 |         probs = logits_per_image.softmax(dim=-1).detach().cpu().numpy()
21 |         index = [i for i in range(batch_size)]
22 |         one_hot = np.zeros(
23 |             (logits_per_image.shape[0], logits_per_image.shape[1]), dtype=np.float32
24 |         )
25 |         one_hot[torch.arange(logits_per_image.shape[0]), index] = 1
26 |         one_hot = torch.from_numpy(one_hot).requires_grad_(True)
27 |         one_hot = torch.sum(one_hot.cuda() * logits_per_image)
28 |         self.model.zero_grad()
29 | 
30 |         image_attn_blocks = list(
31 |             dict(self.model.visual.transformer.resblocks.named_children()).values()
32 |         )
33 | 
34 |         if start_layer == -1:
35 |             # calculate index of last layer
36 |             start_layer = len(image_attn_blocks) - 1
37 | 
38 |         num_tokens = image_attn_blocks[0].attn_probs.shape[-1]
39 |         R = torch.eye(
40 |             num_tokens, num_tokens, dtype=image_attn_blocks[0].attn_probs.dtype
41 |         ).to(self.device)
42 |         R = R.unsqueeze(0).expand(batch_size, num_tokens, num_tokens)
43 |         for i, blk in enumerate(image_attn_blocks):
44 |             if i < start_layer:
45 |                 continue
46 |             grad = torch.autograd.grad(one_hot, [blk.attn_probs], retain_graph=True)[
47 |                 0
48 |             ].detach()
49 |             cam = blk.attn_probs.detach()
50 |             cam = cam.reshape(-1, cam.shape[-1], cam.shape[-1])
51 |             grad = grad.reshape(-1, grad.shape[-1], grad.shape[-1])
52 |             cam = grad * cam
53 |             cam = cam.reshape(batch_size, -1, cam.shape[-1], cam.shape[-1])
54 |             cam = cam.clamp(min=0).mean(dim=1)
55 |             R = R + torch.bmm(cam, R)
56 |         image_relevance = R[:, 0, 1:]
57 | 
58 |         dim = int(image_relevance.numel() ** 0.5)
59 |         image_relevance = image_relevance.reshape(1, 1, dim, dim)
60 |         image_relevance = torch.nn.functional.interpolate(
61 |             image_relevance, size=224, mode="bilinear"
62 |         )
63 |         image_relevance = image_relevance.reshape(224, 224).cuda().data.cpu().numpy()
64 |         image_relevance = (image_relevance - image_relevance.min()) / (
65 |             image_relevance.max() - image_relevance.min()
66 |         )
67 | 
68 |         return self.find_centroid(image_relevance)
69 | 
70 |     def find_centroid(self, object_mask):
71 |         us, vs = np.where(object_mask > 0.99)
72 |         if len(us) == 0:
73 |             return None
74 | 
75 |         mean_u = np.mean(us)
76 |         mean_v = np.mean(vs)
77 |         index = np.argmin((us - mean_u) ** 2 + (vs - mean_v) ** 2, axis=None)
78 |         y, x = us[index], vs[index]
79 |         return (x, y)  # (7,215)
80 | 
81 |     def predict(self, rgb: np.ndarray, txt: str):
82 |         img = self.preprocess(Image.fromarray(rgb)).unsqueeze(0).to(self.device)
83 |         texts = [txt]
84 |         text = clip.tokenize(texts).to(self.device)
85 |         pt = self.interpret(image=img, texts=text)
86 |         if pt is None:
87 |             return None
88 |         else:
89 |             # resize back to original image size
90 |             x, y = pt
91 |             x = int(pt[0] * rgb.shape[1] / 224)
92 |             x = max(min(x, rgb.shape[1] - 1), 0)
93 |             y = int(pt[1] * rgb.shape[0] / 224)
94 |             y = max(min(y, rgb.shape[0] - 1), 0)
95 |             return (x, y)
96 | 


--------------------------------------------------------------------------------
/orion/perception/detector/groundingSAM.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import warnings
  4 | 
  5 | # Grounding DINO
  6 | import groundingdino.datasets.transforms as T
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | import torch
 10 | from groundingdino.models import build_model
 11 | from groundingdino.util import box_ops
 12 | from groundingdino.util.slconfig import SLConfig
 13 | from groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap
 14 | from PIL import Image
 15 | # segment anything
 16 | from segment_anything import SamPredictor, build_sam
 17 | 
 18 | from orion import logger
 19 | from orion.abstract.interfaces import Observations, TextQuery
 20 | from orion.abstract.perception import DetectionModule, MaskedBBOX
 21 | from orion.config.my_config import GroundingDINOConfig
 22 | 
 23 | warnings.filterwarnings("ignore")
 24 | 
 25 | 
 26 | def load_image(image):
 27 |     if isinstance(image, str) and os.path.exists(image):
 28 |         image_path = image
 29 |         image_pil = Image.open(image_path).convert("RGB")  # load image
 30 |     else:
 31 |         image_pil = Image.fromarray(image)
 32 | 
 33 |     transform = T.Compose(
 34 |         [
 35 |             T.RandomResize([800], max_size=1333),
 36 |             T.ToTensor(),
 37 |             T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
 38 |         ]
 39 |     )
 40 |     image, _ = transform(image_pil, None)  # 3, h, w
 41 |     return image_pil, image
 42 | 
 43 | 
 44 | def load_model(model_config_path, model_checkpoint_path, device):
 45 |     args = SLConfig.fromfile(model_config_path)
 46 |     args.device = device
 47 |     model = build_model(args)
 48 |     checkpoint = torch.load(model_checkpoint_path, map_location="cpu")
 49 |     load_res = model.load_state_dict(
 50 |         clean_state_dict(checkpoint["model"]), strict=False
 51 |     )
 52 |     _ = model.eval()
 53 |     return model
 54 | 
 55 | 
 56 | def get_grounding_output(
 57 |     model, image, caption, box_threshold, text_threshold, with_logits=True, device="cpu"
 58 | ):
 59 |     caption = caption.lower()
 60 |     caption = caption.strip()
 61 |     if not caption.endswith("."):
 62 |         caption = caption + "."
 63 |     model = model.to(device)
 64 |     image = image.to(device)
 65 |     with torch.no_grad():
 66 |         outputs = model(image[None], captions=[caption])
 67 |     logits = outputs["pred_logits"].cpu().sigmoid()[0]  # (nq, 256)
 68 |     boxes = outputs["pred_boxes"].cpu()[0]  # (nq, 4)
 69 |     logits.shape[0]
 70 | 
 71 |     # filter output
 72 |     logits_filt = logits.clone()
 73 |     boxes_filt = boxes.clone()
 74 |     filt_mask = logits_filt.max(dim=1)[0] > box_threshold
 75 |     logits_filt = logits_filt[filt_mask]  # num_filt, 256
 76 |     boxes_filt = boxes_filt[filt_mask]  # num_filt, 4
 77 |     logits_filt.shape[0]
 78 | 
 79 |     # get phrase
 80 |     tokenlizer = model.tokenizer
 81 |     tokenized = tokenlizer(caption)
 82 |     # build pred
 83 |     pred_phrases = []
 84 |     for logit, box in zip(logits_filt, boxes_filt):
 85 |         pred_phrase = get_phrases_from_posmap(
 86 |             logit > text_threshold, tokenized, tokenlizer
 87 |         )
 88 |         if with_logits:
 89 |             pred_phrases.append(pred_phrase + f"({str(logit.max().item())[:4]})")
 90 |         else:
 91 |             pred_phrases.append(pred_phrase)
 92 | 
 93 |     return boxes_filt, pred_phrases
 94 | 
 95 | 
 96 | def show_mask(mask, ax, random_color=False):
 97 |     if random_color:
 98 |         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
 99 |     else:
100 |         color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
101 |     h, w = mask.shape[-2:]
102 |     mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
103 |     ax.imshow(mask_image)
104 | 
105 | 
106 | def show_box(box, ax, label):
107 |     x0, y0 = box[0], box[1]
108 |     w, h = box[2] - box[0], box[3] - box[1]
109 |     ax.add_patch(
110 |         plt.Rectangle((x0, y0), w, h, edgecolor="green", facecolor=(0, 0, 0, 0), lw=2)
111 |     )
112 |     ax.text(x0, y0, label)
113 | 
114 | 
115 | def save_mask_data(output_dir, mask_list, box_list, label_list):
116 |     value = 0  # 0 for background
117 | 
118 |     mask_img = torch.zeros(mask_list.shape[-2:])
119 |     for idx, mask in enumerate(mask_list):
120 |         mask_img[mask.cpu().numpy()[0] == True] = value + idx + 1
121 |     plt.figure(figsize=(10, 10))
122 |     plt.imshow(mask_img.numpy())
123 |     plt.axis("off")
124 |     plt.savefig(
125 |         os.path.join(output_dir, "mask.jpg"),
126 |         bbox_inches="tight",
127 |         dpi=300,
128 |         pad_inches=0.0,
129 |     )
130 | 
131 |     json_data = [{"value": value, "label": "background"}]
132 |     for label, box in zip(label_list, box_list):
133 |         value += 1
134 |         name, logit = label.split("(")
135 |         logit = logit[:-1]  # the last is ')'
136 |         json_data.append(
137 |             {
138 |                 "value": value,
139 |                 "label": name,
140 |                 "logit": float(logit),
141 |                 "box": box.numpy().tolist(),
142 |             }
143 |         )
144 |     with open(os.path.join(output_dir, "mask.json"), "w") as f:
145 |         json.dump(json_data, f)
146 | 
147 | 
148 | class GroundingSAM(DetectionModule):
149 |     def __init__(self, cfg=GroundingDINOConfig()):
150 |         self.cfg = cfg
151 | 
152 |         # load model
153 |         self.model = load_model(
154 |             self.cfg.config_file, self.cfg.grounded_checkpoint, device=self.cfg.device
155 |         )
156 | 
157 |         # initialize SAM
158 |         self.predictor = SamPredictor(
159 |             build_sam(checkpoint=self.cfg.sam_checkpoint).to(self.cfg.device)
160 |         )
161 | 
162 |     def predict(self, rgb: np.ndarray, txt: TextQuery) -> MaskedBBOX:
163 |         """text prompt should be a sentence or multiple words separated by
164 |         ' . '. tgt_object should be a single noun word
165 |         """
166 |         # load image
167 | 
168 |         txt_prompt: str = txt.prompt.lower() if txt.prompt else ""
169 |         txt_object: str = txt.target.lower() if txt.target else ""
170 | 
171 |         image_pil, image = load_image(rgb)
172 | 
173 |         # run grounding dino model
174 |         boxes_filt, pred_phrases = get_grounding_output(
175 |             self.model,
176 |             image,
177 |             txt_prompt,
178 |             self.cfg.box_threshold,
179 |             self.cfg.text_threshold,
180 |             device=self.cfg.device,
181 |         )
182 | 
183 |         if all(txt_object not in p for p in pred_phrases):
184 |             return MaskedBBOX(False, [], [], [])
185 | 
186 |         self.predictor.set_image(rgb)
187 |         size = image_pil.size
188 |         H, W = size[1], size[0]
189 |         for i in range(boxes_filt.size(0)):
190 |             boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
191 |             boxes_filt[i][:2] -= boxes_filt[i][2:] / 2
192 |             boxes_filt[i][2:] += boxes_filt[i][:2]
193 | 
194 |         boxes_filt = boxes_filt.cpu()
195 | 
196 |         transformed_boxes = self.predictor.transform.apply_boxes_torch(
197 |             boxes_filt, rgb.shape[:2]
198 |         ).to(self.cfg.device)
199 | 
200 |         masks, _, _ = self.predictor.predict_torch(
201 |             point_coords=None,
202 |             point_labels=None,
203 |             boxes=transformed_boxes.to(self.cfg.device),
204 |             multimask_output=False,
205 |         )
206 | 
207 |         bboxes = boxes_filt.numpy().astype(np.int32)
208 |         texts = pred_phrases
209 |         masks = masks.cpu().numpy()
210 | 
211 |         tuple_list = []
212 |         for bbox, text, mask in zip(bboxes, texts, masks):
213 |             if txt_object in text:
214 |                 tuple_list.append((bbox, text, mask))
215 | 
216 |         return MaskedBBOX.from_tuple_list(True, tuple_list)
217 | 


--------------------------------------------------------------------------------
/orion/perception/extractor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/perception/extractor/__init__.py


--------------------------------------------------------------------------------
/orion/perception/extractor/clipbase.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Union
 2 | 
 3 | import numpy as np
 4 | import open_clip
 5 | import torch
 6 | from PIL import Image
 7 | 
 8 | from orion.abstract.interfaces import TextQueries
 9 | from orion.config.my_config import CLIPConfig
10 | 
11 | 
12 | class CLIPBase:
13 |     """use clip text encoder to get vector representation of text,
14 |     other vision backbone to get pixel-wise vector representation of image,
15 |     two vectors should are in the same semantic space
16 |     """
17 | 
18 |     def __init__(self, cfg: CLIPConfig):
19 |         self.device = cfg.device
20 |         self.clip_version = cfg.clip_version
21 |         self.openclip_pretained = cfg.openclip_pretained
22 |         self.feat_dim = {"ViT-B-32": 512, "ViT-B-16": 512, "ViT-L-14": 768}[
23 |             cfg.clip_version
24 |         ]
25 |         (
26 |             self.clip_model,
27 |             _,
28 |             self.clip_preprocess,
29 |         ) = open_clip.create_model_and_transforms(
30 |             self.clip_version, pretrained=self.openclip_pretained
31 |         )
32 |         self.tokenizer = open_clip.get_tokenizer(self.clip_version)
33 |         self.clip_model.to(self.device)
34 |         self.cfg = cfg
35 | 
36 |     def encode_text(self, txts: Union[List[str], TextQueries]) -> torch.Tensor:
37 |         if isinstance(txts, list):
38 |             txts = TextQueries(txts)
39 |         tok = self.tokenizer(txts.prompts).to(self.device)
40 |         with torch.no_grad(), torch.cuda.amp.autocast():
41 |             text_features = self.clip_model.encode_text(tok)
42 |             text_features /= text_features.norm(dim=-1, keepdim=True)
43 |         return text_features
44 | 
45 |     def encode_image(self, image: Union[np.ndarray, Image.Image, str]) -> torch.Tensor:
46 |         # single image -> single vec
47 |         if isinstance(image, np.ndarray):
48 |             assert len(image.shape) == 3, "image should be [h, w, c]"
49 |             image = Image.fromarray(image)
50 |         elif isinstance(image, str):
51 |             image = Image.open(image)
52 | 
53 |         image = self.clip_preprocess(image).unsqueeze(0).to(self.device)
54 |         with torch.no_grad(), torch.cuda.amp.autocast():
55 |             image_features = self.clip_model.encode_image(image)
56 |             image_features /= image_features.norm(dim=-1, keepdim=True)
57 |         return image_features
58 | 
59 |     def score(self, image_feat: torch.Tensor, text_feat: torch.Tensor) -> np.ndarray:
60 |         return (100.0 * image_feat @ text_feat.T).softmax(dim=-1).cpu().numpy()
61 | 
62 |     def predict(self, rgb: np.ndarray, txts: TextQueries):
63 |         image_feat = self.encode_image(rgb)
64 |         text_feat = self.encode_text(txts)
65 |         text_probs = self.score(image_feat, text_feat)
66 |         return text_probs
67 | 


--------------------------------------------------------------------------------
/orion/perception/extractor/concept_fusion_extractor.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Adapted from ConceptFusion repo. https://github.com/concept-fusion/concept-fusion
 3 | """
 4 | 
 5 | 
 6 | from typing import Union
 7 | 
 8 | import numpy as np
 9 | import torch
10 | from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
11 | 
12 | from orion.abstract.perception import PerceptionModule
13 | from orion.config.my_config import (CLIPConfig_vitL14_datacomp,
14 |                                     ConceptFusionConfig)
15 | from orion.perception.extractor.clipbase import CLIPBase
16 | 
17 | 
18 | class ConceptFusionExtractor(PerceptionModule):
19 |     def __init__(
20 |         self, cfg: ConceptFusionConfig = ConceptFusionConfig(), height=480, width=640
21 |     ):
22 |         self.height = height
23 |         self.width = width
24 |         self.sam = sam_model_registry[cfg.sam_model_type](checkpoint=cfg.sam_ckpt_path)
25 |         self.sam.to(device=cfg.device)
26 |         self.mask_generator = SamAutomaticMaskGenerator(
27 |             model=self.sam,
28 |             points_per_side=8,
29 |             pred_iou_thresh=0.92,
30 |             crop_n_layers=1,
31 |             crop_n_points_downscale_factor=2,
32 |         )
33 | 
34 |         self.clip_model = CLIPBase(CLIPConfig_vitL14_datacomp())
35 |         self.device = cfg.device
36 |         self.feat_dim = self.clip_model.feat_dim
37 | 
38 |     @torch.no_grad()
39 |     def predict(self, rgb: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
40 |         # Extracting SAM masks..
41 |         masks = self.mask_generator.generate(rgb)  # around 4s in cuda for 480x640
42 | 
43 |         with torch.cuda.amp.autocast():
44 |             # Extracting global CLIP features
45 |             global_feat = self.clip_model.encode_image(rgb)
46 |             global_feat /= global_feat.norm(dim=-1, keepdim=True)  # (1, h, w, feat_dim)
47 |         global_feat = torch.nn.functional.normalize(global_feat, dim=-1)
48 |         feat_dim = global_feat.shape[-1]
49 |         cosine_similarity = torch.nn.CosineSimilarity(dim=-1)
50 | 
51 |         feat_per_roi = []
52 |         roi_nonzero_inds = []
53 |         similarity_scores = []
54 |         for maskidx in range(len(masks)):
55 |             try:
56 |                 _x, _y, _w, _h = tuple(masks[maskidx]["bbox"])  # xywh bounding box
57 |                 seg = masks[maskidx]["segmentation"]
58 |                 nonzero_inds = torch.argwhere(
59 |                     torch.from_numpy(masks[maskidx]["segmentation"])
60 |                 )
61 |                 # Note: Image is (H, W, 3). In SAM output, y coords are along height, x along width
62 |                 img_roi = rgb[_y : _y + _h, _x : _x + _w, :]
63 |                 roifeat = self.clip_model.encode_image(img_roi)
64 |                 roifeat = torch.nn.functional.normalize(roifeat, dim=-1)
65 |             except:
66 |                 roifeat = global_feat.clone().detach()
67 |             feat_per_roi.append(roifeat)
68 |             roi_nonzero_inds.append(nonzero_inds)
69 |             _sim = cosine_similarity(global_feat, roifeat)
70 |             similarity_scores.append(_sim)
71 | 
72 |         similarity_scores = torch.cat(similarity_scores)
73 |         softmax_scores = torch.nn.functional.softmax(similarity_scores, dim=0)
74 |         outfeat = torch.zeros(self.height, self.width, feat_dim, dtype=torch.half)
75 |         for maskidx in range(len(masks)):
76 |             _weighted_feat = (
77 |                 softmax_scores[maskidx] * global_feat
78 |                 + (1 - softmax_scores[maskidx]) * feat_per_roi[maskidx]
79 |             )
80 |             _weighted_feat = torch.nn.functional.normalize(_weighted_feat, dim=-1)
81 |             outfeat[
82 |                 roi_nonzero_inds[maskidx][:, 0], roi_nonzero_inds[maskidx][:, 1]
83 |             ] += (_weighted_feat[0].detach().cpu().half())
84 |             outfeat[
85 |                 roi_nonzero_inds[maskidx][:, 0], roi_nonzero_inds[maskidx][:, 1]
86 |             ] = torch.nn.functional.normalize(
87 |                 outfeat[
88 |                     roi_nonzero_inds[maskidx][:, 0], roi_nonzero_inds[maskidx][:, 1]
89 |                 ].float(),
90 |                 dim=-1,
91 |             ).half()
92 | 
93 |         outfeat = outfeat.unsqueeze(0).float()
94 |         outfeat = torch.nn.functional.normalize(outfeat, dim=-1)
95 |         outfeat = outfeat[0].half()  # --> H, W, feat_dim
96 |         return outfeat.cpu()
97 | 


--------------------------------------------------------------------------------
/orion/perception/extractor/lseg_extractor.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Adapted from VLmap repo. https://github.com/vlmaps/vlmaps.
 3 | """
 4 | 
 5 | from typing import Union
 6 | 
 7 | import clip
 8 | import numpy as np
 9 | import torch
10 | 
11 | from orion.abstract.interfaces import TextQueries
12 | from orion.abstract.perception import PerceptionModule
13 | from orion.config.my_config import LsegConfig
14 | from orion.perception.extractor.lseg_module import LSegEncDecNet
15 | 
16 | 
17 | class LSegExtractor(PerceptionModule):
18 |     def __init__(self, cfg: LsegConfig = LsegConfig()):
19 |         model = LSegEncDecNet(
20 |             arch_option=0, block_depth=0, activation="lrelu", visualize=False
21 |         )
22 | 
23 |         model_state_dict = model.state_dict()
24 |         pretrained_state_dict = torch.load(cfg.ckpt_path)
25 |         pretrained_state_dict = {
26 |             k.lstrip("net."): v for k, v in pretrained_state_dict["state_dict"].items()
27 |         }
28 |         model_state_dict.update(pretrained_state_dict)
29 |         model.load_state_dict(pretrained_state_dict)
30 | 
31 |         model.eval()
32 |         model = model.to(cfg.device)
33 |         self.model = model
34 | 
35 |         self.feat_dim = self.model.out_c
36 |         self.device = cfg.device
37 |         self.cfg = cfg
38 | 
39 |     @torch.no_grad()
40 |     def predict(self, rgb: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
41 |         if isinstance(rgb, np.ndarray):
42 |             rgb = np.expand_dims(rgb, axis=0)
43 |         else:
44 |             rgb = torch.unsqueeze(rgb, dim=0)
45 |         outputs = self.model.encode(rgb)
46 |         return outputs[0].permute(1, 2, 0)  # [H, W, D]
47 | 
48 |     @torch.no_grad()
49 |     def encode_text(self, text_list: TextQueries) -> torch.Tensor:
50 |         if isinstance(text_list, list):
51 |             text_list = TextQueries(prompts=text_list)
52 |         text = clip.tokenize(text_list.prompts).to(self.device)
53 |         text_features = self.model.clip_pretrained.encode_text(text)
54 |         text_features /= text_features.norm(dim=-1, keepdim=True)
55 |         text_features = text_features.float()
56 |         return text_features
57 | 


--------------------------------------------------------------------------------
/orion/user_simulator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/user_simulator/__init__.py


--------------------------------------------------------------------------------
/orion/user_simulator/goals/4ok3usBNeis/final.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "room_info": {
  3 |     "bedroom": "Alice|Bob",
  4 |     "bathroom": "shared",
  5 |     "living room": "shared"
  6 |   },
  7 |   "bed_0": {
  8 |     "base": "{\"center\": [279, 316], \"mass\": 1422.5}",
  9 |     "nearby_obj": "nightstand_0",
 10 |     "object_desc": "with grey dotted white sheet|has a pillow",
 11 |     "attr": "bought from IKEA",
 12 |     "room_id": "bedroom_1",
 13 |     "explain": "a furniture for sleeping with sheets and mattress.",
 14 |     "type": "big"
 15 |   },
 16 |   "nightstand_0": {
 17 |     "base": "{\"center\": [251, 313], \"mass\": 111.5}",
 18 |     "nearby_obj": "bed_0",
 19 |     "object_desc": "a brown side table with a drawer and open shelf",
 20 |     "attr": "bought from IKEA",
 21 |     "room_id": "bedroom_1",
 22 |     "explain": "a table beside the bed",
 23 |     "type": "small"
 24 |   },
 25 |   "cabinet_0": {
 26 |     "base": "{\"center\": [196, 269], \"mass\": 27.5}",
 27 |     "nearby_obj": "couch_0",
 28 |     "object_desc": "a small short cabinet at the room corner",
 29 |     "attr": null,
 30 |     "room_id": "bedroom_2",
 31 |     "explain": "a cupboard usually near sofa",
 32 |     "type": "small"
 33 |   },
 34 |   "couch_0": {
 35 |     "base": "{\"center\": [216, 286], \"mass\": 618.0}",
 36 |     "nearby_obj": "cabinet_0",
 37 |     "object_desc": "red long grid-patterned sofa",
 38 |     "attr": null,
 39 |     "room_id": "bedroom_2",
 40 |     "explain": "a sofa with long upholstered seat and cushions",
 41 |     "type": "big"
 42 |   },
 43 |   "toilet_0": {
 44 |     "base": "{\"center\": [270, 188], \"mass\": 53.0}",
 45 |     "nearby_obj": "washbasin_0",
 46 |     "object_desc": "small white toilet with a toilet paper on it",
 47 |     "attr": null,
 48 |     "room_id": "bathroom_1",
 49 |     "explain": "a plumbing fixture on the floor for human waste disposal",
 50 |     "type": "small"
 51 |   },
 52 |   "towel_0": {
 53 |     "base": "{\"center\": [270, 188], \"mass\": 3.5}",
 54 |     "nearby_obj": "",
 55 |     "object_desc": "blue towel hanging on the bar",
 56 |     "attr": null,
 57 |     "room_id": "bathroom_1",
 58 |     "explain": "a piece of cloth used for drying things",
 59 |     "type": "small"
 60 |   },
 61 |   "washbasin_0": {
 62 |     "base": "{\"center\": [279, 196], \"mass\": 124.5}",
 63 |     "nearby_obj": "toilet_0",
 64 |     "object_desc": "white ceramic sink upon the counter",
 65 |     "attr": null,
 66 |     "room_id": "bathroom_1",
 67 |     "explain": "sink or basin to wash hands",
 68 |     "type": "small"
 69 |   },
 70 |   "chair_0": {
 71 |     "base": "{\"center\": [373, 223], \"mass\": 126.5}",
 72 |     "nearby_obj": "bench_0|rack_0",
 73 |     "object_desc": "red wooden chair",
 74 |     "attr": "bought from Maiden Home",
 75 |     "room_id": "living room_1",
 76 |     "explain": "a furniture for one person to sit on",
 77 |     "type": "small"
 78 |   },
 79 |   "bench_0": {
 80 |     "base": "{\"center\": [388, 202], \"mass\": 216.0}",
 81 |     "nearby_obj": "shoe_0|chair_0",
 82 |     "object_desc": "yellow mahogany bench",
 83 |     "attr": "bought from West Elm",
 84 |     "room_id": "living room_1",
 85 |     "explain": "a long wooden seat for several people",
 86 |     "type": "big"
 87 |   },
 88 |   "freezer_0": {
 89 |     "base": "{\"center\": [287, 139], \"mass\": 227.0}",
 90 |     "nearby_obj": "dresser_0|refrigerator_0|shelf_0",
 91 |     "object_desc": "",
 92 |     "attr": "Amazon Special",
 93 |     "room_id": "living room_1",
 94 |     "explain": "a container shorten than fridge to store frozen food",
 95 |     "type": "big"
 96 |   },
 97 |   "refrigerator_0": {
 98 |     "base": "{\"center\": [305, 119], \"mass\": 78.5}",
 99 |     "nearby_obj": "freezer_0",
100 |     "object_desc": "white tall standing fridge",
101 |     "attr": null,
102 |     "room_id": "living room_1",
103 |     "explain": "fridge to store food and drinks",
104 |     "type": "big"
105 |   },
106 |   "dresser_0": {
107 |     "base": "{\"center\": [301, 170], \"mass\": 65.0}",
108 |     "nearby_obj": "freezer_0|shelf_0",
109 |     "object_desc": "with multiple drawers",
110 |     "attr": "",
111 |     "room_id": "living room_1",
112 |     "explain": "a large cabinet putting dresses",
113 |     "type": "big"
114 |   },
115 |   "shelf_0": {
116 |     "base": "{\"center\": [271, 154], \"mass\": 119.0}",
117 |     "nearby_obj": "freezer_0",
118 |     "object_desc": "board fixed on the wall",
119 |     "attr": null,
120 |     "room_id": "living room_1",
121 |     "explain": "a flat length of wood attached to the wall",
122 |     "type": "ambiguous"
123 |   },
124 |   "rack_0": {
125 |     "base": "{\"center\": [368, 238], \"mass\": 75.0}",
126 |     "nearby_obj": "chair_0",
127 |     "object_desc": "large tall shelf|hold audio players",
128 |     "attr": null,
129 |     "room_id": "living room_1",
130 |     "explain": "a shelf with bars to hold things",
131 |     "type": "ambiguous"
132 |   },
133 |   "rack_1": {
134 |     "base": "{\"center\": [376, 159], \"mass\": 65.5}",
135 |     "nearby_obj": "shoe_0",
136 |     "object_desc": "the shelf store detergents|near the shoes",
137 |     "attr": null,
138 |     "room_id": "living room_1",
139 |     "explain": "a shelf with bars to hold things",
140 |     "type": "ambiguous"
141 |   },
142 |   "speaker_0": {
143 |     "base": "{\"center\": [368, 238], \"mass\": 15.0}",
144 |     "nearby_obj": "rack_0",
145 |     "object_desc": "audio speaker on the shelf",
146 |     "attr": null,
147 |     "room_id": "living room_1",
148 |     "explain": "black device to display sound",
149 |     "type": "small"
150 |   },
151 |   "shoe_0": {
152 |     "base": "{\"center\": [384, 170], \"mass\": 110.5}",
153 |     "nearby_obj": "rack_1",
154 |     "object_desc": "shoes on the floor",
155 |     "attr": null,
156 |     "room_id": "living room_1",
157 |     "explain": "a covering for the foot",
158 |     "type": "small"
159 |   },
160 |   "laundry machine_0": {
161 |     "base": "{\"center\": [236, 213], \"mass\": 228.0}",
162 |     "nearby_obj": "",
163 |     "object_desc": "locate in lanudry room",
164 |     "attr": "bought from Walmart at 2020",
165 |     "room_id": null,
166 |     "explain": "washing machine",
167 |     "type": "big"
168 |   }
169 | }


--------------------------------------------------------------------------------
/orion/user_simulator/goals/LT9Jq6dN3Ea/final.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "room_info": {
  3 |     "bathroom": "shared",
  4 |     "kitchen": "shared",
  5 |     "living room": "shared",
  6 |     "reading room": "Alice"
  7 |   },
  8 |   "table_0": {
  9 |     "base": "{\"center\": [288, 242], \"mass\": 600.5}",
 10 |     "nearby_obj": "couch_0|chair_0",
 11 |     "object_desc": "",
 12 |     "attr": "special designed by IKEA",
 13 |     "room_id": "living room_1",
 14 |     "explain": "a flat surface supported by legs",
 15 |     "type": "ambiguous",
 16 |     "same_goal": "table_0|table_1|table_2|table_3|table_4"
 17 |   },
 18 |   "couch_0": {
 19 |     "base": "{\"center\": [247, 253], \"mass\": 642.0}",
 20 |     "nearby_obj": "table_0|chair_0",
 21 |     "object_desc": "near a tall landing lamp|facing the fireplace and tv",
 22 |     "attr": "bought from Amazon",
 23 |     "room_id": null,
 24 |     "explain": "a sofa with long upholstered seat for multiple people",
 25 |     "type": "big",
 26 |     "same_goal": "couch_0|couch_1|couch_2"
 27 |   },
 28 |   "tv_0": {
 29 |     "base": "{\"center\": [279, 202], \"mass\": 75.0}",
 30 |     "nearby_obj": "fireplace_0",
 31 |     "object_desc": "",
 32 |     "attr": null,
 33 |     "room_id": "living room_1",
 34 |     "explain": "a black monitor for television broadcasts.",
 35 |     "type": "big",
 36 |     "same_goal": "tv_0"
 37 |   },
 38 |   "fireplace_0": {
 39 |     "base": "{\"center\": [277, 194], \"mass\": 108.5}",
 40 |     "nearby_obj": "tv_0",
 41 |     "object_desc": "",
 42 |     "attr": null,
 43 |     "room_id": "living room_1",
 44 |     "explain": "a structure with fire for heating",
 45 |     "type": "small",
 46 |     "same_goal": "fireplace_0"
 47 |   },
 48 |   "stool_0": {
 49 |     "base": "{\"center\": [313, 384], \"mass\": 68.5}",
 50 |     "nearby_obj": "",
 51 |     "object_desc": "high-leg stools",
 52 |     "attr": null,
 53 |     "room_id": "kitchen_1",
 54 |     "explain": "a small backless seat for seating",
 55 |     "type": "small",
 56 |     "same_goal": "stool_0|stool_1"
 57 |   },
 58 |   "dining table_0": {
 59 |     "base": "{\"center\": [294, 342], \"mass\": 260.5}",
 60 |     "nearby_obj": "dining chair_0",
 61 |     "object_desc": "with white flower bunch on it",
 62 |     "attr": "elegant classic design",
 63 |     "room_id": "kitchen_1",
 64 |     "explain": "table for dining",
 65 |     "type": "ambiguous",
 66 |     "same_goal": "dining table_0|dining table_1"
 67 |   },
 68 |   "bathroom cabinet_0": {
 69 |     "base": "{\"center\": [382, 258], \"mass\": 43.5}",
 70 |     "nearby_obj": "toilet_0",
 71 |     "object_desc": "blue storage unit for towels",
 72 |     "attr": null,
 73 |     "room_id": "bathroom_1",
 74 |     "explain": "a storage wardrobe for bathroom essentials",
 75 |     "type": "small",
 76 |     "same_goal": "bathroom cabinet_0"
 77 |   },
 78 |   "toilet_0": {
 79 |     "base": "{\"center\": [384, 270], \"mass\": 98.5}",
 80 |     "nearby_obj": "bathroom cabinet_0",
 81 |     "object_desc": "",
 82 |     "attr": null,
 83 |     "room_id": "bathroom_1",
 84 |     "explain": "a plumbing fixture on the floor for human waste disposal",
 85 |     "type": "small",
 86 |     "same_goal": "toilet_0"
 87 |   },
 88 |   "desk_0": {
 89 |     "base": "{\"center\": [384, 198], \"mass\": 661.5}",
 90 |     "nearby_obj": "rack_0|chair_3|desk chair_0",
 91 |     "object_desc": "large yellow desk for home reading|with books and teapot on it",
 92 |     "attr": "",
 93 |     "room_id": "reading room_1",
 94 |     "explain": "a table used for working or writing with storage drawers.",
 95 |     "type": "big",
 96 |     "same_goal": "desk_0"
 97 |   },
 98 |   "rack_0": {
 99 |     "base": "{\"center\": [357, 205], \"mass\": 219.5}",
100 |     "nearby_obj": "desk_0|desk chair_0",
101 |     "object_desc": "a shelf placed many artifacts" ,
102 |     "attr": null,
103 |     "room_id": "reading room_1",
104 |     "explain": "a structure used for holding or displaying items.",
105 |     "type": "ambiguous",
106 |     "same_goal": "rack_0"
107 |   },
108 |   "rack_1": {
109 |     "base": "{\"center\": [449, 396], \"mass\": 96.5}",
110 |     "nearby_obj": "",
111 |     "object_desc": "white open shelf for clothes",
112 |     "attr": "bought from Walmart",
113 |     "room_id": null,
114 |     "explain": "a structure used for holding or displaying items.",
115 |     "type": "ambiguous",
116 |     "same_goal": "rack_1"
117 |   }
118 | }


--------------------------------------------------------------------------------
/orion/user_simulator/goals/MHPLjHsuG27/final.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "room_info": {
  3 |     "bathroom": "shared",
  4 |     "kitchen": "shared",
  5 |     "living room": "shared"
  6 |   },
  7 |   "chair_0": {
  8 |     "base": "{\"center\": [412, 288], \"mass\": 242.0}",
  9 |     "nearby_obj": "lamp_0|table_0",
 10 |     "object_desc": "around chairs and lamp",
 11 |     "attr": "bought from Castlery 20 years ago",
 12 |     "room_id": "living room_1",
 13 |     "explain": "a furniture seat with backrest and legs for one person",
 14 |     "type": "big",
 15 |     "same_goal": "chair_0|chair_1|chair_2"
 16 |   },
 17 |   "lamp_0": {
 18 |     "base": "{\"center\": [443, 306], \"mass\": 43.5}",
 19 |     "nearby_obj": "chair_0|table_0",
 20 |     "object_desc": "lighting the dining area",
 21 |     "attr": "BrentWood white lamp",
 22 |     "room_id": "living room_1",
 23 |     "explain": "a light source with shade",
 24 |     "type": "small",
 25 |     "same_goal": "lamp_0|lamp_1|lamp_2"
 26 |   },
 27 |   "tv_0": {
 28 |     "base": "{\"center\": [430, 206], \"mass\": 71.5}",
 29 |     "nearby_obj": "side table_0|recliner_0",
 30 |     "object_desc": "hanging on the wall",
 31 |     "attr": null,
 32 |     "room_id": "living room_1",
 33 |     "explain": "a black monitor for television broadcasts.",
 34 |     "type": "small",
 35 |     "same_goal": "tv_0"
 36 |   },
 37 |   "lounge chair_0": {
 38 |     "base": "{\"center\": [449, 234], \"mass\": 335.5}",
 39 |     "nearby_obj": "side table_1|side table_0",
 40 |     "object_desc": "mesh weaved C-shape|near the window",
 41 |     "attr": "bought from Walmart",
 42 |     "room_id": "living room_1",
 43 |     "explain": "a chair designed for relaxation or lounging.",
 44 |     "type": "big",
 45 |     "same_goal": "lounge chair_0"
 46 |   },
 47 |   "coffee table_0": {
 48 |     "base": "{\"center\": [406, 233], \"mass\": 376.5}",
 49 |     "nearby_obj": "lounge chair_0|side table_0|recliner_0|l-shaped sofa_0",
 50 |     "object_desc": "surrounded by a sofa|low to the ground",
 51 |     "attr": null,
 52 |     "room_id": "living room_1",
 53 |     "explain": "a low table placed around sofa",
 54 |     "type": "big",
 55 |     "same_goal": "coffee table_0"
 56 |   },
 57 |   "recliner_0": {
 58 |     "base": "{\"center\": [417, 205], \"mass\": 271.5}",
 59 |     "nearby_obj": "coffee table_0|l-shaped sofa_0|side table_0",
 60 |     "object_desc": "recliner chair with cusions",
 61 |     "attr": "bought from Maiden Home this year",
 62 |     "room_id": "living room_1",
 63 |     "explain": "a lying chair that can be adjusted to a reclining position",
 64 |     "type": "big",
 65 |     "same_goal": "recliner_0"
 66 |   },
 67 |   "kitchen cabinet_0": {
 68 |     "base": "{\"center\": [327, 348], \"mass\": 261.0}",
 69 |     "nearby_obj": "kitchen counter_0",
 70 |     "object_desc": "",
 71 |     "attr": "bought from Pottery",
 72 |     "room_id": "kitchen_1",
 73 |     "explain": "a storage unit in a kitchen",
 74 |     "type": "big",
 75 |     "same_goal": "kitchen cabinet_0|kitchen shelf_0"
 76 |   },
 77 |   "toilet_0": {
 78 |     "base": "{\"center\": [356, 222], \"mass\": 55.5}",
 79 |     "nearby_obj": "bathroom counter_0",
 80 |     "object_desc": "",
 81 |     "attr": "bought from Wayfair",
 82 |     "room_id": "bathroom_1",
 83 |     "explain": "a plumbing fixture on the floor for human waste disposal",
 84 |     "type": "small",
 85 |     "same_goal": "toilet_0"
 86 |   },
 87 |   "bathroom counter_0": {
 88 |     "base": "{\"center\": [354, 217], \"mass\": 15.0}",
 89 |     "nearby_obj": "toilet_0",
 90 |     "object_desc": "a shelf decorated with a mirror",
 91 |     "attr": null,
 92 |     "room_id": "bathroom_1",
 93 |     "explain": "a place to put toothbrush and toothpaste",
 94 |     "type": "small",
 95 |     "same_goal": "bathroom counter_0"
 96 |   },
 97 |   "clock_0": {
 98 |     "base": "{\"center\": [375, 327], \"mass\": 20.5}",
 99 |     "nearby_obj": "",
100 |     "object_desc": "stored in the room corner",
101 |     "attr": "old-fashioned clock",
102 |     "room_id": null,
103 |     "explain": "a round device that shows the time",
104 |     "type": "small",
105 |     "same_goal": "clock_0"
106 |   }
107 | }


--------------------------------------------------------------------------------
/orion/user_simulator/goals/QaLdnwvtxbs/final.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "room_info": {
  3 |     "bedroom": "shared",
  4 |     "bathroom": "Alice|Bob",
  5 |     "living room": "shared"
  6 |   },
  7 |   "chair_5": {
  8 |     "base": "{\"center\": [379, 236], \"mass\": 30.5}",
  9 |     "nearby_obj": "table_4",
 10 |     "object_desc": "for haircut and makeup|in dressing room",
 11 |     "attr": null,
 12 |     "room_id": "bedroom_1",
 13 |     "explain": "a furniture seat with backrest and legs for one person",
 14 |     "type": "ambiguous",
 15 |     "same_goal": "chair_5"
 16 |   },
 17 |   "table_4": {
 18 |     "base": "{\"center\": [387, 234], \"mass\": 132.5}",
 19 |     "nearby_obj": "chair_5",
 20 |     "object_desc": "mirror on the table|with small light bulbs",
 21 |     "attr": "bought from Pottery Barn",
 22 |     "room_id": "bedroom_1",
 23 |     "explain": "a flat surface supported by legs",
 24 |     "type": "ambiguous",
 25 |     "same_goal": "table_4"
 26 |   },
 27 |   "bed_0": {
 28 |     "base": "{\"center\": [410, 297], \"mass\": 1907.0}",
 29 |     "nearby_obj": "chair_2|sofa_1",
 30 |     "object_desc": "with bed tables on both sides",
 31 |     "attr": "bought from IKEA at 2019",
 32 |     "room_id": "bedroom_1",
 33 |     "explain": "a furniture for sleeping with sheets and mattress.",
 34 |     "type": "big",
 35 |     "same_goal": "bed_0"
 36 |   },
 37 |   "chair_2": {
 38 |     "base": "{\"center\": [384, 338], \"mass\": 90.5}",
 39 |     "nearby_obj": "bed_0|tv_1|sofa_1|telephone_0|desk_0",
 40 |     "object_desc": "yellow bedroom chair",
 41 |     "attr": "bought from CB2 at 1980",
 42 |     "room_id": "bedroom_1",
 43 |     "explain": "a furniture seat with backrest and legs for one person",
 44 |     "type": "ambiguous",
 45 |     "same_goal": "chair_2"
 46 |   },
 47 |   "desk_0": {
 48 |     "base": "{\"center\": [369, 329], \"mass\": 491.0}",
 49 |     "nearby_obj": "sofa_1|chair_2|telephone_0|tv_1",
 50 |     "object_desc": "long white ceramic platform",
 51 |     "attr": "bought from Wayfair 10 years ago",
 52 |     "room_id": "bedroom_1",
 53 |     "explain": "a table used for working or writing with storage drawers.",
 54 |     "type": "big",
 55 |     "same_goal": "desk_0"
 56 |   },
 57 |   "telephone_0": {
 58 |     "base": "{\"center\": [374, 349], \"mass\": 20.5}",
 59 |     "nearby_obj": "tv_1|chair_2|desk_0",
 60 |     "object_desc": "fixed landline dialing|black color",
 61 |     "attr": null,
 62 |     "room_id": "bedroom_1",
 63 |     "explain": "a device on the table for long-distance voice communication",
 64 |     "type": "small",
 65 |     "same_goal": "telephone_0"
 66 |   },
 67 |   "chair_0": {
 68 |     "base": "{\"center\": [333, 379], \"mass\": 207.5}",
 69 |     "nearby_obj": "table_0",
 70 |     "object_desc": "multiple chairs for dining",
 71 |     "attr": null,
 72 |     "room_id": "living room_1",
 73 |     "explain": "a furniture seat with backrest and legs for one person",
 74 |     "type": "ambiguous",
 75 |     "same_goal": "chair_0|chair_1|chair_3|chair_4"
 76 |   },
 77 |   "table_0": {
 78 |     "base": "{\"center\": [327, 369], \"mass\": 707.5}",
 79 |     "nearby_obj": "chair_0",
 80 |     "object_desc": "dining table with dining chairs",
 81 |     "attr": null,
 82 |     "room_id": "living room_1",
 83 |     "explain": "a flat surface supported by legs",
 84 |     "type": "ambiguous",
 85 |     "same_goal": "table_0"
 86 |   },
 87 |   "table_1": {
 88 |     "base": "{\"center\": [247, 409], \"mass\": 495.5}",
 89 |     "nearby_obj": "tv_0|sofa_0",
 90 |     "object_desc": "near a tv stand|low height",
 91 |     "attr": "bought from Amazon",
 92 |     "room_id": "living room_1",
 93 |     "explain": "a flat surface supported by legs",
 94 |     "type": "ambiguous",
 95 |     "same_goal": "table_1|table_2|desk_1"
 96 |   },
 97 |   "toilet_0": {
 98 |     "base": "{\"center\": [215, 284], \"mass\": 113.5}",
 99 |     "nearby_obj": "washbasin counter_0",
100 |     "object_desc": "",
101 |     "attr": null,
102 |     "room_id": "bathroom_1",
103 |     "explain": "a plumbing fixture on the floor for human waste disposal",
104 |     "type": "big",
105 |     "same_goal": "toilet_0"
106 |   },
107 |   "bathtub_0": {
108 |     "base": "{\"center\": [298, 252], \"mass\": 567.0}",
109 |     "nearby_obj": "",
110 |     "object_desc": "small ellipse shape",
111 |     "attr": null,
112 |     "room_id": "bathroom_2",
113 |     "explain": "a container for bathing.",
114 |     "type": "big",
115 |     "same_goal": "bathtub_0"
116 |   }
117 | }


--------------------------------------------------------------------------------
/orion/user_simulator/goals/TEEsavR23oF/final.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "room_info": {
  3 |     "bedroom": "shared",
  4 |     "bathroom": "shared",
  5 |     "living room": "shared"
  6 |   },
  7 |   "chair_0": {
  8 |     "base": "{\"center\": [284, 300], \"mass\": 134.5}",
  9 |     "nearby_obj": "plant_0|printer_0|couch_1|computer desk_0",
 10 |     "object_desc": "mid-back computer desk chair",
 11 |     "attr": "Amazon basics classic puresoft",
 12 |     "room_id": "living room_1",
 13 |     "explain": "a chair paired with computer desk",
 14 |     "type": "ambiguous",
 15 |     "same_goal": "chair_0"
 16 |   },
 17 |   "chair_1": {
 18 |     "base": "{\"center\": [276, 337], \"mass\": 124.0}",
 19 |     "nearby_obj": "printer_0|computer desk_0",
 20 |     "object_desc": "portable camping chair|colorful low-height",
 21 |     "attr": null,
 22 |     "room_id": "living room_1",
 23 |     "explain": "small chair for children use",
 24 |     "type": "ambiguous",
 25 |     "same_goal": "chair_1"
 26 |   },
 27 |   "table_0": {
 28 |     "base": "{\"center\": [345, 279], \"mass\": 67.0}",
 29 |     "nearby_obj": "couch_1",
 30 |     "object_desc": "small stool table|putting newspaper",
 31 |     "attr": null,
 32 |     "room_id": "living room_1",
 33 |     "explain": "table stool",
 34 |     "type": "small",
 35 |     "same_goal": "table_0"
 36 |   },
 37 |   "newspaper_0": {
 38 |     "base": "{\"center\": [339, 278], \"mass\": 34.5}",
 39 |     "nearby_obj": "table_0",
 40 |     "object_desc": "newspaper on the table|New York Times paper",
 41 |     "attr": null,
 42 |     "room_id": "living room_1",
 43 |     "explain": "paper for reading",
 44 |     "type": "small",
 45 |     "same_goal": "newspaper_0"
 46 |   },
 47 |   "couch_0": {
 48 |     "base": "{\"center\": [348, 317], \"mass\": 1161.5}",
 49 |     "nearby_obj": "table_0|bicycle_0|wardrobe_2",
 50 |     "object_desc": "has plush toys",
 51 |     "attr": "bought from Walmart at 2020",
 52 |     "room_id": "living room_1",
 53 |     "explain": "a sofa with long upholstered seat for multiple people",
 54 |     "type": "ambiguous",
 55 |     "same_goal": "couch_0"
 56 |   },
 57 |   "couch_1": {
 58 |     "base": "{\"center\": [302, 281], \"mass\": 417.5}",
 59 |     "nearby_obj": "computer desk_0|table_0|chair_0|plant_0",
 60 |     "object_desc": "covered by blanket and bag",
 61 |     "attr": "bought from IKEA",
 62 |     "room_id": "living room_1",
 63 |     "explain": "a sofa with long upholstered seat for multiple people",
 64 |     "type": "ambiguous",
 65 |     "same_goal": "couch_1|couch_2"
 66 |   },
 67 |   "bicycle_0": {
 68 |     "base": "{\"center\": [370, 326], \"mass\": 473.5}",
 69 |     "nearby_obj": "couch_0",
 70 |     "object_desc": "indoor cycling bike",
 71 |     "attr": "bought from Home Cardio Gym",
 72 |     "room_id": "living room_1",
 73 |     "explain": "an exercise bike",
 74 |     "type": "big",
 75 |     "same_goal": "bicycle_0"
 76 |   },
 77 |   "computer desk_0": {
 78 |     "base": "{\"center\": [275, 306], \"mass\": 178.5}",
 79 |     "nearby_obj": "couch_1|plant_0|chair_0|printer_0",
 80 |     "object_desc": "",
 81 |     "attr": "bought from Pottery Barn",
 82 |     "room_id": "living room_1",
 83 |     "explain": "table for putting the devices like computer",
 84 |     "type": "big",
 85 |     "same_goal": "computer desk_0"
 86 |   },
 87 |   "printer_0": {
 88 |     "base": "{\"center\": [271, 315], \"mass\": 54.0}",
 89 |     "nearby_obj": "chair_0|computer desk_0",
 90 |     "object_desc": "printer on the table",
 91 |     "attr": null,
 92 |     "room_id": "living room_1",
 93 |     "explain": "a device that produces document copies",
 94 |     "type": "small",
 95 |     "same_goal": "printer_0"
 96 |   },
 97 |   "plant_0": {
 98 |     "base": "{\"center\": [279, 276], \"mass\": 101.5}",
 99 |     "nearby_obj": "chair_0|computer desk_0|couch_1",
100 |     "object_desc": "green plant to decorate the room",
101 |     "attr": null,
102 |     "room_id": "living room_1",
103 |     "explain": "a plant with green leaves and roots for decoration",
104 |     "type": "big",
105 |     "same_goal": "plant_0"
106 |   },
107 |   "wardrobe_0": {
108 |     "base": "{\"center\": [380, 406], \"mass\": 185.0}",
109 |     "nearby_obj": "tv_0",
110 |     "object_desc": "rustic brown|face to bed",
111 |     "attr": "Superjare TV stand",
112 |     "room_id": "bedroom_1",
113 |     "explain": "a table to put TV on",
114 |     "type": "ambiguous",
115 |     "same_goal": "wardrobe_0"
116 |   },
117 |   "wardrobe_1": {
118 |     "base": "{\"center\": [302, 347], \"mass\": 144.5}",
119 |     "nearby_obj": "wardrobe_2|chair_1",
120 |     "object_desc": "open shelf wardrobe|putting books and box",
121 |     "attr": "bought from Castlery",
122 |     "room_id": "living room_1",
123 |     "explain": "a large cabinet for storing clothes and other items.",
124 |     "type": "ambiguous",
125 |     "same_goal": "wardrobe_1"
126 |   },
127 |   "wardrobe_2": {
128 |     "base": "{\"center\": [327, 343], \"mass\": 83.5}",
129 |     "nearby_obj": "wardrobe_1|couch_0",
130 |     "object_desc": "has mirror",
131 |     "attr": "Elite 2-door",
132 |     "room_id": "living room_1",
133 |     "explain": "cabient with door for putting clothes",
134 |     "type": "ambiguous",
135 |     "same_goal": "wardrobe_2"
136 |   },
137 |   "wardrobe_3": {
138 |     "base": "{\"center\": [399, 370], \"mass\": 50.0}",
139 |     "nearby_obj": "bed_0",
140 |     "object_desc": "putting clothes",
141 |     "attr": "bought from MoMA Design Store",
142 |     "room_id": "bedroom_1",
143 |     "explain": "a large cabinet for storing clothes and other items.",
144 |     "type": "ambiguous",
145 |     "same_goal": "wardrobe_3"
146 |   },
147 |   "bed_0": {
148 |     "base": "{\"center\": [419, 409], \"mass\": 851.0}",
149 |     "nearby_obj": "nightstand_0|tv_0",
150 |     "object_desc": "has a frame and mattress",
151 |     "attr": "bought from Amazon",
152 |     "room_id": "bedroom_1",
153 |     "explain": "a furniture for sleeping with sheets and mattress.",
154 |     "type": "big",
155 |     "same_goal": "bed_0"
156 |   },
157 |   "tv_0": {
158 |     "base": "{\"center\": [373, 407], \"mass\": 94.5}",
159 |     "nearby_obj": "wardrobe_0",
160 |     "object_desc": "",
161 |     "attr": null,
162 |     "room_id": "bedroom_1",
163 |     "explain": "a black monitor for television broadcasts.",
164 |     "type": "big",
165 |     "same_goal": "tv_0"
166 |   },
167 |   "nightstand_0": {
168 |     "base": "{\"center\": [439, 437], \"mass\": 125.5}",
169 |     "nearby_obj": "bed_0",
170 |     "object_desc": "made of mahaogany",
171 |     "attr": "bought from CB2",
172 |     "room_id": "bedroom_1",
173 |     "explain": "a table near the bed",
174 |     "type": "small",
175 |     "same_goal": "nightstand_0|nightstand_1"
176 |   },
177 |   "toilet_0": {
178 |     "base": "{\"center\": [441, 301], \"mass\": 15.5}",
179 |     "nearby_obj": "cabinet_0",
180 |     "object_desc": "like white bowl",
181 |     "attr": null,
182 |     "room_id": "bathroom_1",
183 |     "explain": "a plumbing fixture on the floor for human waste disposal",
184 |     "type": "small",
185 |     "same_goal": "toilet_0"
186 |   },
187 |   "cabinet_0": {
188 |     "base": "{\"center\": [444, 329], \"mass\": 255.5}",
189 |     "nearby_obj": "toilet_0",
190 |     "object_desc": "for washroom use",
191 |     "attr": "",
192 |     "room_id": "bathroom_1",
193 |     "explain": "a table supports the washbasin",
194 |     "type": "big",
195 |     "same_goal": "cabinet_0"
196 |   }
197 | }


--------------------------------------------------------------------------------
/orion/user_simulator/goals/cvZr5TUy5C5/final.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "room_info": {
  3 |     "bathroom": "shared",
  4 |     "kitchen": "David",
  5 |     "living room": "shared",
  6 |     "office room": "Alice",
  7 |     "dining room": "shared"
  8 |   },
  9 |   "armchair_0": {
 10 |     "base": "{\"center\": [238, 172], \"mass\": 360.5}",
 11 |     "nearby_obj": "",
 12 |     "object_desc": "made of real fur",
 13 |     "attr": null,
 14 |     "room_id": "dining room_1",
 15 |     "explain": "a chair with armrests",
 16 |     "type": "big",
 17 |     "same_goal": "armchair_0|armchair_1"
 18 |   },
 19 |   "kitchen shelf_0": {
 20 |     "base": "{\"center\": [383, 147], \"mass\": 343.0}",
 21 |     "nearby_obj": "microwave_0",
 22 |     "object_desc": "made of wood",
 23 |     "attr": null,
 24 |     "room_id": "kitchen_1",
 25 |     "explain": "a surface holds dishes, utensils in kitchen",
 26 |     "type": "big",
 27 |     "same_goal": "kitchen shelf_0|kitchen shelf_1|kitchen shelf_2|kitchen shelf_3|kitchen shelf_4"
 28 |   },
 29 |   "oven_0": {
 30 |     "base": "{\"center\": [360, 213], \"mass\": 54.0}",
 31 |     "nearby_obj": "refrigerator_0",
 32 |     "object_desc": "",
 33 |     "attr": null,
 34 |     "room_id": "kitchen_1",
 35 |     "explain": "a kitchen appliance used for baking and roasting",
 36 |     "type": "small",
 37 |     "same_goal": "oven_0"
 38 |   },
 39 |   "bookshelf_0": {
 40 |     "base": "{\"center\": [341, 301], \"mass\": 255.0}",
 41 |     "nearby_obj": "computer_0|computer desk_0",
 42 |     "object_desc": "sink cabinet_0",
 43 |     "attr": "Alice's bookshelf",
 44 |     "room_id": "office room_1",
 45 |     "explain": "horizontal shelves for storing books",
 46 |     "type": "big",
 47 |     "same_goal": "bookshelf_0"
 48 |   },
 49 |   "printer_0": {
 50 |     "base": "{\"center\": [294, 342], \"mass\": 83.5}",
 51 |     "nearby_obj": "computer_0|computer desk_0",
 52 |     "object_desc": "",
 53 |     "attr": null,
 54 |     "room_id": "office room_1",
 55 |     "explain": "a device that produces document copies",
 56 |     "type": "small",
 57 |     "same_goal": "printer_0"
 58 |   },
 59 |   "computer desk_0": {
 60 |     "base": "{\"center\": [307, 312], \"mass\": 414.0}",
 61 |     "nearby_obj": "bookshelf_0|printer_0|computer_0",
 62 |     "object_desc": "cabinet_1",
 63 |     "attr": "bought from CB2",
 64 |     "room_id": "office room_1",
 65 |     "explain": "a desk for holding computer",
 66 |     "type": "ambiguous",
 67 |     "same_goal": "computer desk_0|computer chair_0"
 68 |   },
 69 |   "computer_0": {
 70 |     "base": "{\"center\": [307, 316], \"mass\": 134.0}",
 71 |     "nearby_obj": "bookshelf_0|printer_0|computer desk_0",
 72 |     "object_desc": "",
 73 |     "attr": null,
 74 |     "room_id": "office room_1",
 75 |     "explain": "an electronic device with monitor",
 76 |     "type": "big",
 77 |     "same_goal": "computer_0"
 78 |   },
 79 |   "table_0": {
 80 |     "base": "{\"center\": [478, 315], \"mass\": 837.0}",
 81 |     "nearby_obj": "",
 82 |     "object_desc": "",
 83 |     "attr": "bought from Maiden Home this year",
 84 |     "room_id": "living room_1",
 85 |     "explain": "a flat surface supported by legs",
 86 |     "type": "ambiguous",
 87 |     "same_goal": "table_2|table_0"
 88 |   },
 89 |   "clock_0": {
 90 |     "base": "{\"center\": [425, 319], \"mass\": 25.5}",
 91 |     "nearby_obj": "",
 92 |     "object_desc": "hanging on the wall",
 93 |     "attr": null,
 94 |     "room_id": "living room_1",
 95 |     "explain": "an cicle instrument to display time",
 96 |     "type": "small",
 97 |     "same_goal": "clock_0"
 98 |   },
 99 |   "vase_0": {
100 |     "base": "{\"center\": [490, 358], \"mass\": 62.5}",
101 |     "nearby_obj": "",
102 |     "object_desc": "flower vase",
103 |     "attr": null,
104 |     "room_id": "living room_1",
105 |     "explain": "a decorative container for flowers",
106 |     "type": "small",
107 |     "same_goal": "vase_0|vase_1|flower vase_0"
108 |   },
109 |   "fireplace_0": {
110 |     "base": "{\"center\": [541, 256], \"mass\": 561.5}",
111 |     "nearby_obj": "plant_0|circular sofa_0",
112 |     "object_desc": "",
113 |     "attr": "bought from Pottery Barn at 1990",
114 |     "room_id": "living room_1",
115 |     "explain": "a structure with fire for heating",
116 |     "type": "big",
117 |     "same_goal": "fireplace_0"
118 |   }
119 | }


--------------------------------------------------------------------------------
/orion/user_simulator/goals/h1zeeAwLh9Z/final.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "room_info": {
 3 |     "bedroom": "Alice|Bob",
 4 |     "bathroom": "Alice|Bob",
 5 |     "living room": "shared"
 6 |   },
 7 |   "bed_0": {
 8 |     "base": "{\"center\": [316, 270], \"mass\": 1167.5}",
 9 |     "nearby_obj": "bedside lamp_0|nightstand_0|armchair_0",
10 |     "object_desc": "covered with white thin sheet|face the stone wall",
11 |     "attr": null,
12 |     "room_id": "bedroom_1",
13 |     "explain": "a furniture for sleeping with sheets and mattress.",
14 |     "type": "ambiguous",
15 |     "same_goal": "bed_0"
16 |   },
17 |   "nightstand_0": {
18 |     "base": "{\"center\": [305, 238], \"mass\": 146.0}",
19 |     "nearby_obj": "bed_0",
20 |     "object_desc": "yellow table with three drawers",
21 |     "attr": null,
22 |     "room_id": "bedroom_1",
23 |     "explain": "a small table beside a bed",
24 |     "type": "small",
25 |     "same_goal": "nightstand_0|nightstand_1"
26 |   },
27 |   "bedside lamp_0": {
28 |     "base": "{\"center\": [283, 281], \"mass\": 76.5}",
29 |     "nearby_obj": "armchair_0|bed_0",
30 |     "object_desc": "",
31 |     "attr": "modern style",
32 |     "room_id": "bedroom_1",
33 |     "explain": "a lamp placed on a side table or nightstand next to a bed.",
34 |     "type": "ambiguous",
35 |     "same_goal": "bedside lamp_0"
36 |   },
37 |   "armchair_0": {
38 |     "base": "{\"center\": [284, 308], \"mass\": 87.0}",
39 |     "nearby_obj": "bedside lamp_0|nightstand_0",
40 |     "object_desc": "white cushion with wooden legs|face the bed",
41 |     "attr": null,
42 |     "room_id": "bedroom_1",
43 |     "explain": "a chair with armrests",
44 |     "type": "small",
45 |     "same_goal": "armchair_0"
46 |   },
47 |   "bed_1": {
48 |     "base": "{\"center\": [283, 483], \"mass\": 1014.5}",
49 |     "nearby_obj": "bedside lamp_1",
50 |     "object_desc": "bed in the attic",
51 |     "attr": null,
52 |     "room_id": "bedroom_2",
53 |     "explain": "a furniture for sleeping with sheets and mattress.",
54 |     "type": "ambiguous",
55 |     "same_goal": "bed_1"
56 |   },
57 |   "bedside lamp_1": {
58 |     "base": "{\"center\": [270, 455], \"mass\": 26.5}",
59 |     "nearby_obj": "bed_1",
60 |     "object_desc": "",
61 |     "attr": "old fashioned",
62 |     "room_id": "bedroom_2",
63 |     "explain": "a lamp placed on a side table or nightstand next to a bed.",
64 |     "type": "ambiguous",
65 |     "same_goal": "bedside lamp_1"
66 |   },
67 |   "couch_0": {
68 |     "base": "{\"center\": [461, 303], \"mass\": 687.0}",
69 |     "nearby_obj": "side table_0|table lamp_0",
70 |     "object_desc": "",
71 |     "attr": "bought from IKEA",
72 |     "room_id": "living room_1",
73 |     "explain": "a sofa with long upholstered seat for multiple people",
74 |     "type": "big",
75 |     "same_goal": "couch_0"
76 |   },
77 |   "side table_0": {
78 |     "base": "{\"center\": [457, 262], \"mass\": 36.5}",
79 |     "nearby_obj": "couch_0",
80 |     "object_desc": "",
81 |     "attr": "furinno 3-Tier",
82 |     "room_id": "living room_1",
83 |     "explain": "a small table placed beside a sofa or chair",
84 |     "type": "small",
85 |     "same_goal": "side table_0"
86 |   },
87 |   "table lamp_0": {
88 |     "base": "{\"center\": [458, 325], \"mass\": 27.5}",
89 |     "nearby_obj": "couch_0",
90 |     "object_desc": "with dark light",
91 |     "attr": "Amber Brown mission style",
92 |     "room_id": "living room_1",
93 |     "explain": "a lamp placed on a table",
94 |     "type": "ambiguous",
95 |     "same_goal": "table lamp_0"
96 |   }
97 | }


--------------------------------------------------------------------------------
/orion/user_simulator/goals/mL8ThkuaVTM/final.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "room_info": {
 3 |     "kitchen": "shared",
 4 |     "living room": "shared"
 5 |   },
 6 |   "chair_0": {
 7 |     "base": "{\"center\": [319, 206], \"mass\": 134.5}",
 8 |     "nearby_obj": "kitchen cabinet_0|refrigerator_0|table_0",
 9 |     "object_desc": "a set of 4 chairs",
10 |     "attr": "bought from Castlery 20 years ago",
11 |     "room_id": "living room_1",
12 |     "explain": "a furniture seat with backrest and legs for one person",
13 |     "type": "big",
14 |     "same_goal": "chair_0|chair_1|chair_2|chair_3"
15 |   },
16 |   "table_0": {
17 |     "base": "{\"center\": [330, 211], \"mass\": 311.5}",
18 |     "nearby_obj": "chair_0",
19 |     "object_desc": "dining table surrounded with Castlery chairs",
20 |     "attr": "bought from Walmart",
21 |     "room_id": "living room_1",
22 |     "explain": "a flat surface supported by legs",
23 |     "type": "ambiguous",
24 |     "same_goal": "table_0|table_1"
25 |   },
26 |   "couch_0": {
27 |     "base": "{\"center\": [300, 170], \"mass\": 1145.5}",
28 |     "nearby_obj": "led tv_0|chair_0|coffee table_0",
29 |     "object_desc": "l-shaped long couch",
30 |     "attr": null,
31 |     "room_id": "living room_1",
32 |     "explain": "a sofa with long upholstered seat for multiple people",
33 |     "type": "big",
34 |     "same_goal": "couch_0"
35 |   },
36 |   "led tv_0": {
37 |     "base": "{\"center\": [264, 175], \"mass\": 38.0}",
38 |     "nearby_obj": "couch_0|coffee table_0",
39 |     "object_desc": "Samsung television",
40 |     "attr": null,
41 |     "room_id": null,
42 |     "explain": "a light emitting diode television",
43 |     "type": "big",
44 |     "same_goal": "led tv_0"
45 |   },
46 |   "fireplace_0": {
47 |     "base": "{\"center\": [261, 209], \"mass\": 172.0}",
48 |     "nearby_obj": "coffee table_0|led tv_0",
49 |     "object_desc": "has a long steel pipe|beneath a clock",
50 |     "attr": "bought from Pottery Barn 5 uears ago",
51 |     "room_id": "living room_1",
52 |     "explain": "a structure with fire for heating",
53 |     "type": "big",
54 |     "same_goal": "fireplace_0|firewood holder_0"
55 |   },
56 |   "coffee table_0": {
57 |     "base": "{\"center\": [279, 176], \"mass\": 244.5}",
58 |     "nearby_obj": "fireplace_0|led tv_0|couch_0",
59 |     "object_desc": "low-lying table|decorated with a green plant",
60 |     "attr": "Maiden Home",
61 |     "room_id": "living room_1",
62 |     "explain": "a low table placed around sofa",
63 |     "type": "ambiguous",
64 |     "same_goal": "coffee table_0|coffee table_1"
65 |   },
66 |   "kitchen cabinet_0": {
67 |     "base": "{\"center\": [319, 265], \"mass\": 955.0}",
68 |     "nearby_obj": "table_0|chair_0|refrigerator_0|oven_0",
69 |     "object_desc": "",
70 |     "attr": "bought from IKEA",
71 |     "room_id": "kitchen_1",
72 |     "explain": "a storage unit in a kitchen",
73 |     "type": "big",
74 |     "same_goal": "kitchen cabinet_0"
75 |   },
76 |   "oven_0": {
77 |     "base": "{\"center\": [305, 271], \"mass\": 46.0}",
78 |     "nearby_obj": "refrigerator_0|kitchen cabinet_0",
79 |     "object_desc": "inserted in the white cabinet",
80 |     "attr": null,
81 |     "room_id": "kitchen_1",
82 |     "explain": "a kitchen appliance used for baking and roasting",
83 |     "type": "small",
84 |     "same_goal": "oven_0"
85 |   },
86 |   "refrigerator_0": {
87 |     "base": "{\"center\": [305, 240], \"mass\": 56.5}",
88 |     "nearby_obj": "chair_0|table_0|kitchen cabinet_0",
89 |     "object_desc": "smooth steel surface",
90 |     "attr": "Galanz Mount freezer",
91 |     "room_id": "kitchen_1",
92 |     "explain": "an appliance to preserve food at low temperature",
93 |     "type": "small",
94 |     "same_goal": "refrigerator_0"
95 |   }
96 | }


--------------------------------------------------------------------------------
/orion/user_simulator/goals/qyAac8rV8Zk/final.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "room_info": {
 3 |     "bathroom": "shared",
 4 |     "kitchen": "shared",
 5 |     "living room": "shared"
 6 |   },
 7 |   "chair_1": {
 8 |     "base": "{\"center\": [445, 370], \"mass\": 153.5}",
 9 |     "nearby_obj": "table_0|computer_0|printer_0|computer desk_0",
10 |     "object_desc": "electronic gaming chair",
11 |     "attr": null,
12 |     "room_id": "living room_1",
13 |     "explain": "a furniture seat with backrest and legs for one person",
14 |     "type": "small",
15 |     "same_goal": "chair_1"
16 |   },
17 |   "printer_0": {
18 |     "base": "{\"center\": [451, 356], \"mass\": 76.5}",
19 |     "nearby_obj": "chair_1",
20 |     "object_desc": "black printer on a pile of books",
21 |     "attr": null,
22 |     "room_id": "living room_1",
23 |     "explain": "a device that produces document copies",
24 |     "type": "small",
25 |     "same_goal": "printer_0"
26 |   },
27 |   "computer_0": {
28 |     "base": "{\"center\": [433, 364], \"mass\": 51.5}",
29 |     "nearby_obj": "chair_1|computer desk_0",
30 |     "object_desc": "has monitor",
31 |     "attr": null,
32 |     "room_id": "living room_1",
33 |     "explain": "an electronic device with monitor",
34 |     "type": "small",
35 |     "same_goal": "computer_0"
36 |   },
37 |   "kitchen cabinet_0": {
38 |     "base": "{\"center\": [344, 351], \"mass\": 142.0}",
39 |     "nearby_obj": "",
40 |     "object_desc": "made of blue oak",
41 |     "attr": null,
42 |     "room_id": "kitchen_1",
43 |     "explain": "a storage unit in a kitchen",
44 |     "type": "big",
45 |     "same_goal": "kitchen cabinet_1|kitchen cabinet_2|kitchen cabinet_0"
46 |   },
47 |   "microwave_0": {
48 |     "base": "{\"center\": [305, 323], \"mass\": 50.5}",
49 |     "nearby_obj": "table_1",
50 |     "object_desc": "",
51 |     "attr": null,
52 |     "room_id": "kitchen_1",
53 |     "explain": "a kitchen appliance used for heating food quickly.",
54 |     "type": "small",
55 |     "same_goal": "microwave_0"
56 |   },
57 |   "trashcan_1": {
58 |     "base": "{\"center\": [304, 288], \"mass\": 32.5}",
59 |     "nearby_obj": "table_1",
60 |     "object_desc": "high tech automatic",
61 |     "attr": null,
62 |     "room_id": "kitchen_1",
63 |     "explain": "a low container for waste materials",
64 |     "type": "small",
65 |     "same_goal": "trashcan_1"
66 |   },
67 |   "refrigerator_0": {
68 |     "base": "{\"center\": [362, 322], \"mass\": 65.5}",
69 |     "nearby_obj": "table_1",
70 |     "object_desc": "stainless steel electric fridge",
71 |     "attr": null,
72 |     "room_id": "kitchen_1",
73 |     "explain": "an appliance to preserve food at low temperature",
74 |     "type": "small",
75 |     "same_goal": "refrigerator_0"
76 |   },
77 |   "chair_0": {
78 |     "base": "{\"center\": [434, 292], \"mass\": 603.5}",
79 |     "nearby_obj": "table_4",
80 |     "object_desc": "",
81 |     "attr": "Maiden Home chair",
82 |     "room_id": null,
83 |     "explain": "a furniture seat with backrest and legs for one person",
84 |     "type": "big",
85 |     "same_goal": "chair_0|chair_2"
86 |   },
87 |   "toilet_0": {
88 |     "base": "{\"center\": [518, 316], \"mass\": 85.5}",
89 |     "nearby_obj": "",
90 |     "object_desc": "",
91 |     "attr": null,
92 |     "room_id": "bathroom_1",
93 |     "explain": "a plumbing fixture on the floor for human waste disposal",
94 |     "type": "small",
95 |     "same_goal": "toilet_0"
96 |   }
97 | }


--------------------------------------------------------------------------------
/orion/user_simulator/goals/y9hTuugGdiq/final.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "room_info": {
  3 |     "bedroom": "Alice|Bob|Tony",
  4 |     "bathroom": "Bob|shared",
  5 |     "kitchen": "shared",
  6 |     "living room": "shared"
  7 |   },
  8 |   "sofa_0": {
  9 |     "base": "{\"center\": [228, 393], \"mass\": 932.5}",
 10 |     "nearby_obj": "coffee table_0",
 11 |     "object_desc": "l-shaped sofa with grey cushions",
 12 |     "attr": "bought from Walmart",
 13 |     "room_id": "living room_1",
 14 |     "explain": "a couch with long upholstered seat for multiple people",
 15 |     "type": "big",
 16 |     "same_goal": "sofa_0"
 17 |   },
 18 |   "bed_1": {
 19 |     "base": "{\"center\": [438, 297], \"mass\": 1206.5}",
 20 |     "nearby_obj": "lamp_1|nightstand_3",
 21 |     "object_desc": "a bed with red pillow",
 22 |     "attr": null,
 23 |     "room_id": "bedroom_1",
 24 |     "explain": "a furniture for sleeping with sheets and mattress.",
 25 |     "type": "ambiguous",
 26 |     "same_goal": "bed_1"
 27 |   },
 28 |   "lamp_1": {
 29 |     "base": "{\"center\": [449, 268], \"mass\": 70.0}",
 30 |     "nearby_obj": "bed_1|nightstand_3",
 31 |     "object_desc": "light lamp with square fabric shade",
 32 |     "attr": null,
 33 |     "room_id": "bedroom_1",
 34 |     "explain": "a light source with shade",
 35 |     "type": "ambiguous",
 36 |     "same_goal": "lamp_1|lamp_2"
 37 |   },
 38 |   "nightstand_3": {
 39 |     "base": "{\"center\": [445, 265], \"mass\": 34.5}",
 40 |     "nearby_obj": "bed_1|lamp_1",
 41 |     "object_desc": "red wood material",
 42 |     "attr": "bought from IKEA",
 43 |     "room_id": "bedroom_1",
 44 |     "explain": "a table near the bed",
 45 |     "type": "ambiguous",
 46 |     "same_goal": "nightstand_3|nightstand_0"
 47 |   },
 48 |   "bed_0": {
 49 |     "base": "{\"center\": [518, 251], \"mass\": 1924.5}",
 50 |     "nearby_obj": "lamp_0|nightstand_1",
 51 |     "object_desc": "a bed with a black stool to step on",
 52 |     "attr": null,
 53 |     "room_id": "bedroom_2",
 54 |     "explain": "a furniture for sleeping with sheets and mattress.",
 55 |     "type": "ambiguous",
 56 |     "same_goal": "bed_0"
 57 |   },
 58 |   "lamp_0": {
 59 |     "base": "{\"center\": [524, 219], \"mass\": 75.5}",
 60 |     "nearby_obj": "bed_0|nightstand_1",
 61 |     "object_desc": "",
 62 |     "attr": "ROOTRO touch bedside table lamp",
 63 |     "room_id": "bedroom_2",
 64 |     "explain": "the light put beside the bed",
 65 |     "type": "ambiguous",
 66 |     "same_goal": "lamp_0|lamp_3"
 67 |   },
 68 |   "nightstand_1": {
 69 |     "base": "{\"center\": [519, 217], \"mass\": 47.5}",
 70 |     "nearby_obj": "bed_0|lamp_0",
 71 |     "object_desc": "brown table with drawers",
 72 |     "attr": null,
 73 |     "room_id": "bedroom_2",
 74 |     "explain": "a table near the bed",
 75 |     "type": "ambiguous",
 76 |     "same_goal": "nightstand_1|nightstand_2"
 77 |   },
 78 |   "bed_2": {
 79 |     "base": "{\"center\": [336, 320], \"mass\": 694.0}",
 80 |     "nearby_obj": "",
 81 |     "object_desc": "colorful sheets|across the laundry room|with white frames to protect the baby",
 82 |     "attr": null,
 83 |     "room_id": "bedroom_3",
 84 |     "explain": "a furniture for sleeping with sheets and mattress.",
 85 |     "type": "ambiguous",
 86 |     "same_goal": "bed_2"
 87 |   },
 88 |   "microwave_0": {
 89 |     "base": "{\"center\": [314, 257], \"mass\": 136.0}",
 90 |     "nearby_obj": "kitchen cabinet_0|kitchen counter_0",
 91 |     "object_desc": "holding on the shelf",
 92 |     "attr": null,
 93 |     "room_id": "kitchen_1",
 94 |     "explain": "a kitchen appliance used for heating food quickly.",
 95 |     "type": "small",
 96 |     "same_goal": "microwave_0"
 97 |   },
 98 |   "kitchen counter_0": {
 99 |     "base": "{\"center\": [265, 284], \"mass\": 1407.0}",
100 |     "nearby_obj": "refrigerator_0|kitchen cabinet_0|microwave_0",
101 |     "object_desc": "large fireproof countertop",
102 |     "attr": "bought from IKEA",
103 |     "room_id": "kitchen_1",
104 |     "explain": "a flat surface for food preparation",
105 |     "type": "small",
106 |     "same_goal": "kitchen counter_0"
107 |   }
108 | }


--------------------------------------------------------------------------------
/orion/user_simulator/rule_based_sim.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file is deprecated.
  3 | """
  4 | 
  5 | import re
  6 | from typing import List, Tuple
  7 | import numpy as np
  8 | from orion.abstract.pose import Agent2DPose
  9 | 
 10 | 
 11 | from orion.config.my_config import *
 12 | from orion.user_simulator.topograph import Instance
 13 | from orion.config.chatgpt_config import *
 14 | from orion.user_simulator.base import UserSimulatorBase
 15 | 
 16 | import random
 17 | 
 18 | random.seed(1)
 19 | 
 20 | 
 21 | class RuleUserSimulator(UserSimulatorBase):
 22 |     def generate_hint(self, instance: Instance) -> str:
 23 |         # from semantic map.  neighbor objects in view.  largest object in circle 15. randomly
 24 | 
 25 |         dist, angle, is_in_view = self.rel_pose(instance, self.agtpose)
 26 | 
 27 |         # if far, return object hint
 28 |         if dist > 100 or not is_in_view:
 29 |             # return f"the {instance.name} is {dist} meters away from you"
 30 |             nearby_objs = self.topo_graph.get_sorted_neighbors(instance.id)
 31 |             if len(nearby_objs) > 0:
 32 |                 nearby_obj = nearby_objs.pop(0)
 33 |                 nearby_obj_name = re.sub(r"_\d+$", "", nearby_obj)
 34 |                 if nearby_obj_name == instance.name:
 35 |                     return f"the {instance.name} is near to another {nearby_obj_name}"
 36 |                 else:
 37 |                     return f"the {instance.name} is near to a {nearby_obj_name}"
 38 |         # if close, return postion hint
 39 |         else:
 40 |             if -30 < angle <= 30:
 41 |                 return f"the {instance.name} is in front of you around {dist} units"
 42 |             elif 30 < angle <= 60:
 43 |                 return f"the {instance.name} is in front of you and at your right side around {dist} units"
 44 |             elif 60 < angle <= 120:
 45 |                 return f"the {instance.name} is at your right side around {dist} units"
 46 |             elif 120 < angle <= 150:
 47 |                 return f"the {instance.name} is behind you and at your right side around {dist} units"
 48 |             elif 150 < angle <= 180 or -180 <= angle <= -150:
 49 |                 return f"the {instance.name} is behind you around {dist} units"
 50 |             elif -150 < angle <= -120:
 51 |                 return f"the {instance.name} is behind you and at your left side around {dist} units"
 52 |             elif -120 < angle <= -60:
 53 |                 return f"the {instance.name} is at your left side around {dist} units"
 54 |             elif -60 < angle <= -30:
 55 |                 return f"the {instance.name} is in front of you and at your left side around {dist} units"
 56 | 
 57 |     def step(
 58 |         self,
 59 |         agent_response: str,
 60 |         semantic_img: np.ndarray,
 61 |         agtpose: Agent2DPose,
 62 |         step_count: int,
 63 |     ) -> Tuple[bool, str]:
 64 |         self.agtpose = agtpose
 65 | 
 66 |         goal_reached = self._eval_with_semantic_img(
 67 |             self.goal_gen.current_goal, agtpose, semantic_img
 68 |         )
 69 |         nearest_tuple = self._get_egoview_info_for_goal(
 70 |             self.goal_gen.current_goal, agtpose
 71 |         )
 72 | 
 73 |         maxtry_reached, task_finished = self.goal_gen.step(
 74 |             goal_reached, steps=step_count - self.last_step_count
 75 |         )
 76 |         self.last_step_count = step_count
 77 | 
 78 |         if task_finished:
 79 |             return True, "That's all for today. Thank you for your help."
 80 | 
 81 |         return_str = ""
 82 |         if step_count > 0:
 83 |             if goal_reached:
 84 |                 return_str += f"Yes that's correct. "
 85 |             else:
 86 |                 return_str += f"No, you're wrong. "
 87 | 
 88 |         goal = self.goal_gen.current_goal
 89 |         ins = self.topo_graph.instance_dict[goal.goal]
 90 |         if maxtry_reached or goal_reached or step_count == 0:
 91 |             return_str += f"Now I want you to find the {ins.name} "
 92 |             if ins.nearby_obj:
 93 |                 near_obj_str = ", ".join(
 94 |                     [re.sub(r"_\d+$", "", obj) for obj in ins.nearby_obj]
 95 |                 )
 96 |                 return_str += f"which near to the {near_obj_str}. "
 97 | 
 98 |         else:
 99 |             hints = self.generate_hint(ins)
100 |             if hints is not None:
101 |                 return_str += f" Hints: {hints}. "
102 |             else:
103 |                 return_str += f" Please find the {ins.name}. "
104 | 
105 |         return task_finished, return_str
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     for scene, floor in SCENE_ID_FLOOR_SET:
110 |         usr_sim = RuleUserSimulator(scene, floor, max_round=2, category=2)
111 |         for k, v in usr_sim.topo_graph.instance_dict.items():
112 |             print(k, v)
113 |         input()
114 |         for ii in usr_sim.goal_gen.goals:
115 |             print(ii)
116 |         ctt = 0
117 |         task_finished = False
118 |         while True:
119 |             user_input = input("user: ")
120 |             if user_input == "next" or task_finished:
121 |                 break
122 |             if (ctt + 1) % 3 == 0:
123 |                 # mock
124 |                 g = usr_sim.goal_gen.current_goal
125 |                 ins = usr_sim.topo_graph.instance_dict[g.goal]
126 |                 x, z = ins.center
127 |                 agtpose = Agent2DPose(x, z, 0)
128 |                 cls_id = usr_sim.name_dic[ins.name][0]
129 |                 semantic_img = np.ones((100, 100)) * cls_id
130 |             else:
131 |                 semantic_img = np.zeros((100, 100))
132 |                 agtpose = Agent2DPose(0, 0, 0)
133 | 
134 |             task_finished, response = usr_sim.step(
135 |                 user_input, semantic_img, agtpose, ctt
136 |             )
137 |             print("bot: ", response)
138 |             ctt += 1
139 | 
140 |         input("press enter to continue")
141 | 


--------------------------------------------------------------------------------
/orion/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sled-group/navchat/afb21a4c9cb13f671534be5460e3c9e5867e5512/orion/utils/__init__.py


--------------------------------------------------------------------------------
/orion/utils/clip_score_utils.py:
--------------------------------------------------------------------------------
 1 | """given the recognition probability, find out the true postive images"""
 2 | 
 3 | import numpy as np
 4 | 
 5 | from orion import logger
 6 | 
 7 | np.set_printoptions(precision=3, suppress=True, linewidth=200)
 8 | 
 9 | 
10 | class CLIPScorer:
11 |     def __init__(self):
12 |         self.prob_list = []
13 |         self.masktime = 0
14 |         self.found_goal = False
15 | 
16 |     def reset(self):
17 |         self.prob_list = []
18 |         self.masktime = 0
19 | 
20 |     def add_prob(self, prob):
21 |         self.prob_list.append(prob)
22 | 
23 |     def is_goal_found(self, prob):
24 |         if len(self.prob_list) > 0:
25 |             mean_prob = np.mean(self.prob_list)
26 |         else:
27 |             mean_prob = 0
28 |         self.add_prob(prob)
29 |         if mean_prob > 0.9:
30 |             theshold = 0.99
31 |         elif mean_prob > 0.8:
32 |             theshold = 0.95
33 |         elif mean_prob > 0.6:
34 |             theshold = 0.9
35 |         else:
36 |             theshold = 0.8
37 | 
38 |         # logger.info(
39 |         #     f"\t mean prob {mean_prob:2f}, theshold {theshold}, prob {prob:3f}, {self.prob_list[-3:]}, {np.mean(self.prob_list[-3:])}"
40 |         # )
41 |         if (
42 |             prob > theshold
43 |             and len(self.prob_list) > 3
44 |             and np.mean(self.prob_list[-3:]) > theshold
45 |             and self.masktime == 0
46 |         ):
47 |             logger.info("\033[31m [CLIP] detect a pulse\033[m")
48 |             self.found_goal = True
49 |             return True
50 |         elif (
51 |             len(self.prob_list) > 1
52 |             and prob - max(self.prob_list[-2], mean_prob) > 0.5
53 |             and self.masktime == 0
54 |         ):
55 |             logger.info("\033[31m [CLIP] detect a pulse\033[m")
56 |             self.found_goal = True
57 |             return True
58 |         elif (
59 |             len(self.prob_list) > 3
60 |             and self.prob_list[-1] - max(self.prob_list[-3], mean_prob) > 0.5
61 |             and self.prob_list[-2] - max(self.prob_list[-3], mean_prob) > 0.4
62 |             and self.masktime == 0
63 |         ):
64 |             logger.info("\033[31m [CLIP] detect a pulse\033[m")
65 |             self.found_goal = True
66 |             return True
67 |         else:
68 |             self.masktime = max(0, self.masktime - 1)
69 |             self.found_goal = False
70 |             return False
71 | 
72 |     def set_masktime(self, masktime=5):
73 |         logger.info(f" CLIP set masktime to {masktime}")
74 |         self.masktime = masktime
75 | 


--------------------------------------------------------------------------------
/orion/utils/file_load.py:
--------------------------------------------------------------------------------
 1 | ### file loading ###
 2 | import cv2
 3 | import numpy as np
 4 | 
 5 | from orion.abstract.pose import Agent3DPose
 6 | 
 7 | 
 8 | def get_floor_set_str(floor_set):
 9 |     a, b = floor_set
10 |     if a < 0:
11 |         a = "B{}".format(-a)
12 |     else:
13 |         a = "U{}".format(a)
14 |     if b < 0:
15 |         b = "B{}".format(-b)
16 |     else:
17 |         b = "U{}".format(b)
18 |     return "{}_{}".format(a, b)
19 | 
20 | 
21 | def load_image(rgb_path):
22 |     rgb = cv2.imread(rgb_path)
23 |     rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
24 |     return rgb
25 | 
26 | 
27 | def load_depth(depth_filepath):
28 |     with open(depth_filepath, "rb") as f:
29 |         depth = np.load(f)
30 |     if len(depth.shape) == 3:
31 |         depth = depth.squeeze()
32 |     if depth.dtype == np.uint16:
33 |         depth = depth.astype(np.float32) / 1000.0
34 |     return depth
35 | 
36 | 
37 | def load_semantic(semantic_filepath, obj2cls_dic):
38 |     with open(semantic_filepath, "rb") as f:
39 |         semantic = np.load(f)
40 |     if len(semantic.shape) == 3:
41 |         semantic = semantic.squeeze()
42 |     semantic = np.asarray(semantic).astype(np.int32)
43 |     semantic = cvt_sem_id_2_cls_id(semantic, obj2cls_dic)
44 |     return semantic
45 | 
46 | 
47 | def cvt_sem_id_2_cls_id(semantic: np.ndarray, obj2cls: dict):
48 |     h, w = semantic.shape
49 |     semantic = semantic.flatten()
50 |     u, inv = np.unique(semantic, return_inverse=True)
51 |     return np.array([obj2cls[x][0] for x in u])[inv].reshape((h, w))
52 | 
53 | 
54 | def load_obj2cls_dict(filepath):
55 |     obj2cls_dic = {}
56 |     label_dic = {}
57 |     with open(filepath, "r") as f:
58 |         for line in f:
59 |             line = line.strip()
60 |             if not line:
61 |                 continue
62 |             row = line.split(":")
63 |             obj_id = int(row[0])
64 |             cls_id = int(row[1].split(",")[0].strip())
65 |             cls_name = row[1].split(",")[1].strip()
66 |             obj2cls_dic[obj_id] = (cls_id, cls_name)
67 |             label_dic[cls_id] = cls_name
68 |     label_dic = dict(sorted(label_dic.items(), key=lambda x: x[0]))
69 |     return obj2cls_dic, label_dic
70 | 
71 | 
72 | def load_pose(pose_filepath):
73 |     with open(pose_filepath, "r") as f:
74 |         line = f.readline()
75 |         return Agent3DPose.from_str(line)
76 | 


--------------------------------------------------------------------------------
/orion/utils/gradio_interface.py:
--------------------------------------------------------------------------------
  1 | from multiprocessing import Queue
  2 | from typing import List
  3 | import gradio as gr
  4 | import cv2
  5 | from orion.agent_env.chatgpt_control_base import ChatGPTControlBase
  6 | from orion.agent_env.chatgpt_control_orion import ChatGPTControlORION
  7 | from orion.agent_env.chatgpt_control_cow import ChatGPTControlCoW
  8 | from orion.agent_env.chatgpt_control_vlmap import ChatGPTControlVLMap
  9 | 
 10 | 
 11 | END_SENT = "<eos>"
 12 | END_TURN = "<eot>"
 13 | 
 14 | 
 15 | class GradioInterface:
 16 |     def __init__(
 17 |         self,
 18 |         image_queue: Queue,
 19 |         user_message_queue: Queue,
 20 |         bot_message_queue: Queue,
 21 |     ):
 22 |         self.last_image = cv2.imread("orion/gradio_init_img.jpg")
 23 |         self.image_queue = image_queue
 24 |         self.user_message_queue = user_message_queue
 25 |         self.bot_message_queue = bot_message_queue
 26 | 
 27 |     def get_img(self):
 28 |         if self.image_queue.empty():
 29 |             return self.last_image
 30 |         else:
 31 |             self.last_image = self.image_queue.get()
 32 |             return self.last_image
 33 | 
 34 |     def process_user_message(self, user_message, history):
 35 |         self.user_message_queue.put(user_message)
 36 |         return "", history + [[user_message, None]]
 37 | 
 38 |     def process_bot_message(self, chat_history: List):
 39 |         chat_history.append([None, ""])
 40 |         bot_message_chuck: str = self.bot_message_queue.get()
 41 | 
 42 |         while bot_message_chuck != END_TURN:
 43 |             if bot_message_chuck == END_SENT:
 44 |                 chat_history.append([None, ""])
 45 |             else:
 46 |                 if "Command" in bot_message_chuck:
 47 |                     bot_message_chuck = bot_message_chuck.replace("Command", "Action")
 48 |                 chat_history[-1][1] += bot_message_chuck
 49 |                 yield chat_history
 50 | 
 51 |             bot_message_chuck = self.bot_message_queue.get()
 52 | 
 53 |     def run(self):
 54 |         with gr.Blocks(theme=gr.themes.Default(text_size="lg")) as gradio_demo:
 55 |             with gr.Column():
 56 |                 with gr.Box():
 57 |                     gr.Markdown("## 🔥Navigation ChatBot Demo🚀")
 58 |             with gr.Row():
 59 |                 with gr.Column(scale=1):
 60 |                     plot = gr.Image(self.last_image)
 61 |                 with gr.Column(scale=2.5):
 62 |                     chatbot = gr.Chatbot()
 63 |                     chatbot.style(height=600)
 64 |                     msg = gr.Textbox()
 65 |                     msg.submit(
 66 |                         self.process_user_message,
 67 |                         [msg, chatbot],
 68 |                         [msg, chatbot],
 69 |                         show_progress=True,
 70 |                     ).then(self.process_bot_message, chatbot, chatbot)
 71 |             gradio_demo.load(self.get_img, None, plot, every=0.01)
 72 | 
 73 |         gradio_demo.queue().launch(
 74 |             server_name="127.0.0.1", server_port=7877, share=True
 75 |         )
 76 | 
 77 | class GradioDemoChatGPTControlORION(ChatGPTControlORION):
 78 |     def __init__(
 79 |         self, image_queue: Queue, user_message_queue: Queue, bot_message_queue: Queue,
 80 |         *args, **kwargs
 81 |     ):
 82 |         super().__init__(*args, **kwargs)
 83 |         self.image_queue = image_queue
 84 |         self.user_message_queue = user_message_queue
 85 |         self.bot_message_queue = bot_message_queue
 86 | 
 87 |     def display(self, *args, **kwargs):
 88 |         super().display(*args, **kwargs)
 89 |         # make image smaller twice size for gradio
 90 |         self.display_image = cv2.resize(
 91 |             self.display_image, (self.display_image.shape[1] // 2, self.display_image.shape[0] // 2)
 92 |         )
 93 |         self.image_queue.put(cv2.cvtColor(self.display_image, cv2.COLOR_BGR2RGB))
 94 | 
 95 |     def _get_user_input(self):
 96 |         user_input = self.user_message_queue.get()
 97 |         return user_input
 98 | 
 99 |     def _send_funcall_msg(self, msg):
100 |         super()._send_funcall_msg(msg)
101 |         self.bot_message_queue.put(
102 |             "**API results**:  *" + msg.replace("\n", "<br>") + "*"
103 |         )
104 |         self.bot_message_queue.put(END_SENT)
105 | 
106 |     def _get_chatgpt_response(self):
107 |         if self.use_stream:
108 |             response = ""
109 |             for chunk in self.chatgpt.get_system_response_stream():
110 |                 self.bot_message_queue.put(chunk.replace("\n", "<br>"))
111 |                 response += chunk
112 |         else:
113 |             response = self.chatgpt.get_system_response()
114 |             self.bot_message_queue.put(response.replace("\n", "<br>"))
115 |         self.bot_message_queue.put(END_SENT)
116 |         return response
117 | 
118 |     def _post_process(self, command):
119 |         super()._post_process(command)
120 |         final_response = command["args"]["content"]
121 |         self.bot_message_queue.put("**" + final_response + "**")
122 |         self.bot_message_queue.put(END_TURN)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv-python>=4.7.0.72
 2 | transformers==4.30.2
 3 | scipy==1.7.3
 4 | pycocotools==2.0.7
 5 | matplotlib
 6 | git+https://github.com/zhanghang1989/PyTorch-Encoding/
 7 | pytorch-lightning>=1.9.5
 8 | imageio
 9 | ftfy==6.1.1
10 | regex
11 | tqdm
12 | git+https://github.com/openai/CLIP.git
13 | altair==5.0.0
14 | streamlit
15 | timm
16 | tensorboardX==2.6.2.2
17 | test-tube
18 | wandb
19 | open_clip_torch>=2.20.0
20 | openai==1.12.0
21 | scikit-fmm>=2022.3.26
22 | scikit-image>=0.19.3
23 | scikit-learn>=1.0.2
24 | httpx==0.24.0
25 | aiofiles==23.1.0
26 | fastapi==0.88.0
27 | Pillow==9.5.0
28 | requests==2.28.2
29 | requests-oauthlib==1.3.1
30 | Jinja2==3.1.2
31 | ffmpy==0.3.0
32 | urllib3==1.26.15
33 | gradio==3.29.0


--------------------------------------------------------------------------------
/scripts/build_vlmap.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import time
  4 | 
  5 | import torch
  6 | from orion.map.map_build.build_voxel import VoxelMapBuilder, OfflineDataLoader
  7 | from orion.config.my_config import MapConfig, SCENE_ID_FLOOR_SET
  8 | from orion.utils.file_load import get_floor_set_str
  9 | from tqdm import tqdm
 10 | 
 11 | from orion.config.my_config import *
 12 | from orion.utils import visulization as vis
 13 | from orion.map.map_search.search_voxel import VLMapSearch
 14 | from orion.perception.extractor.concept_fusion_extractor import ConceptFusionExtractor
 15 | from orion.perception.extractor.lseg_extractor import LSegExtractor
 16 | 
 17 | 
 18 | def build_vlmap_one_scene(root_dir, feature_type):
 19 |     map_builder = VoxelMapBuilder(
 20 |         save_dir=os.path.join(root_dir, f"{feature_type}_vlmap"), 
 21 |         extractor_type = "lseg" if feature_type == "lseg" else "conceptfusion",
 22 |         extractor = LSegExtractor() if feature_type == "lseg" else ConceptFusionExtractor(),
 23 |         accelerate_mapping=True
 24 |     )
 25 | 
 26 |     dataloader = OfflineDataLoader(
 27 |         data_dir=os.path.join(root_dir, "recordings"),
 28 |         mapcfg=MapConfig(),
 29 |     )
 30 | 
 31 |     for idx in tqdm(range(len(dataloader))):
 32 |         obs = dataloader[idx]
 33 |         map_builder.build(obs)
 34 | 
 35 |     map_builder._save()
 36 | 
 37 |     # just make sure realease GPU memory
 38 |     del map_builder.extractor
 39 |     del map_builder.vxlmap
 40 |     del map_builder
 41 | 
 42 | 
 43 | def draw_vlmap_one_scene(root_dir, feature_type):
 44 |     # Draw topdown rgb
 45 |     map_querier = VLMapSearch(
 46 |         load_sparse_map_path=os.path.join(
 47 |             root_dir, f"{feature_type}_vlmap", "sparse_vxl_map.npz")
 48 |     )
 49 |     mapshape = map_querier._3dshape
 50 |     mapshape = (mapshape[0], mapshape[1], mapshape[2], 3)
 51 |     topdown_rgb = map_querier.get_BEV_map(
 52 |         indices=map_querier.indices, 
 53 |         values=map_querier.rgb_values,
 54 |         map_shape=mapshape
 55 |     )
 56 |     vis.plot_uint8img_with_plt(
 57 |         topdown_rgb,
 58 |         "topdown_rgb",
 59 |         crop=True,
 60 |         save=True,
 61 |         save_path=os.path.join(
 62 |             root_dir, f"{feature_type}_vlmap", "topdown_rgb.png"),
 63 |     )
 64 | 
 65 |     # Test query list
 66 |     predict_map, query_labels = map_querier.query(VLMAP_QUERY_LIST_COMMON)
 67 |     nomap_mask_crop = map_querier.no_map_mask_crop
 68 |     predict_map_crop = predict_map[
 69 |         map_querier.zmin : map_querier.zmax + 1, 
 70 |         map_querier.xmin : map_querier.xmax + 1
 71 |     ]
 72 |     vis.plot_BEV_semantic_map(
 73 |         predict_map_crop,
 74 |         nomap_mask_crop,
 75 |         labels=query_labels,
 76 |         save=True,
 77 |         save_path=os.path.join(
 78 |             root_dir,  f"{feature_type}_vlmap", "topdown_vlmap.png"),
 79 |     )
 80 | 
 81 |     del map_querier
 82 |     
 83 |     
 84 | 
 85 | def main(scene_id, floor, feature_type):
 86 |     data_dir = f"data/experiments/predict_{scene_id}_{get_floor_set_str(floor)}"
 87 |     build_vlmap_one_scene(data_dir, feature_type)
 88 |     torch.cuda.empty_cache()
 89 |     time.sleep(5)
 90 | 
 91 |     draw_vlmap_one_scene(data_dir, feature_type)
 92 |     torch.cuda.empty_cache()
 93 |     time.sleep(5)
 94 | 
 95 | 
 96 | if __name__ == "__main__":
 97 |     argparser = argparse.ArgumentParser()
 98 |     argparser.add_argument(
 99 |         "--scene_id",
100 |         type=str,
101 |         default="4ok3usBNeis",
102 |         help="scene id, either 'all' or a specific scene id in SCENE_ID_FLOOR_SET",
103 |     )
104 |     argparser.add_argument(
105 |         "--feature_type",
106 |         choices=["lseg", "conceptfusion"],
107 |         default="lseg",
108 |         help="feature type, either 'lseg' or 'conceptfusion'",
109 |     )
110 |     args = argparser.parse_args()
111 |     
112 |     scene_dic = {item[0]: item for item in SCENE_ID_FLOOR_SET}    
113 |     if args.scene_id == "all":
114 |         for scene_id, floor in SCENE_ID_FLOOR_SET:
115 |             main(scene_id, floor, args.feature_type)
116 |     else:
117 |         assert args.scene_id in scene_dic
118 |         scene_id, floor = scene_dic[args.scene_id]
119 |         main(scene_id, floor, args.feature_type)


--------------------------------------------------------------------------------
/scripts/collect_scene_fbe.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import time
 3 | from orion.agent_env.fbe import FBEAgentEnv
 4 | from orion.config.my_config import SCENE_ID_FLOOR_SET
 5 | 
 6 | 
 7 | def collect_data_one_scene(scene_id, floor):
 8 |     game = FBEAgentEnv(
 9 |         scene_ids=[scene_id],
10 |         floor_set=floor,
11 |         fast_explore=False,
12 |         display_shortside=256,
13 |         save_dir_name="predict",
14 |         auto_record=True,
15 |         display_setting="rgb+occumap+topdownmap",
16 |         headless=True,
17 |         use_gt_pose=True,
18 |         load_existing_occumap=False,
19 |         save_new_occumap=True,
20 |     )
21 |     game.run()
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     argparser = argparse.ArgumentParser()
26 |     argparser.add_argument(
27 |         "--scene_id",
28 |         type=str,
29 |         default="4ok3usBNeis",
30 |         help="scene id, either 'all' or a specific scene id in SCENE_ID_FLOOR_SET",
31 |     )
32 |     args = argparser.parse_args()
33 | 
34 |     scene_dic = {item[0]: item for item in SCENE_ID_FLOOR_SET}
35 |     if args.scene_id == "all":
36 |         for scene_id, floor in SCENE_ID_FLOOR_SET:
37 |             collect_data_one_scene(scene_id, floor)
38 |             time.sleep(5)
39 |     else:
40 |         assert args.scene_id in scene_dic
41 |         collect_data_one_scene(scene_dic[args.scene_id][0], scene_dic[args.scene_id][1])
42 | 


--------------------------------------------------------------------------------
/scripts/create_video.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | import sys
 5 | import os
 6 | import imageio
 7 | import numpy as np
 8 | import cv2
 9 | import tqdm
10 | 
11 | from habitat_sim.utils.common import d3_40_colors_rgb
12 | 
13 | 
14 | has_gpu = True  # @param {type: "boolean"}
15 | codec = "h264"
16 | if has_gpu:
17 |     codec = "h264_nvenc"
18 | 
19 | 
20 | def load_depth(depth_filepath):
21 |     with open(depth_filepath, "rb") as f:
22 |         depth = np.load(f)
23 |     return depth
24 | 
25 | 
26 | def get_fast_video_writer(video_file: str, fps: int = 60):
27 |     if (
28 |         "google.colab" in sys.modules
29 |         and os.path.splitext(video_file)[-1] == ".mp4"
30 |         and os.environ.get("IMAGEIO_FFMPEG_EXE") == "/usr/bin/ffmpeg"
31 |     ):
32 |         # USE GPU Accelerated Hardware Encoding
33 |         writer = imageio.get_writer(
34 |             video_file,
35 |             fps=fps,
36 |             codec=codec,
37 |             mode="I",
38 |             bitrate="1000k",
39 |             format="FFMPEG",
40 |             ffmpeg_log_level="info",
41 |             quality=10,
42 |             output_params=["-minrate", "500k", "-maxrate", "5000k"],
43 |         )
44 |     else:
45 |         # Use software encoding
46 |         writer = imageio.get_writer(video_file, fps=fps)
47 |     return writer
48 | 
49 | 
50 | def create_video(data_dir: str, fps: int = 30):
51 |     rgb_dir = os.path.join(data_dir, "rgb")
52 |     rgb_list = sorted(
53 |         os.listdir(rgb_dir), key=lambda x: int(x.split("_")[-1].split(".")[0])
54 |     )
55 |     rgb_list = [os.path.join(rgb_dir, x) for x in rgb_list]
56 | 
57 |     depth_dir = os.path.join(data_dir, "depth")
58 |     depth_list = sorted(
59 |         os.listdir(depth_dir), key=lambda x: int(x.split("_")[-1].split(".")[0])
60 |     )
61 |     depth_list = [os.path.join(depth_dir, x) for x in depth_list]
62 | 
63 |     semantic_dir = os.path.join(data_dir, "semantic")
64 |     semantic_list = sorted(
65 |         os.listdir(semantic_dir), key=lambda x: int(x.split("_")[-1].split(".")[0])
66 |     )
67 |     semantic_list = [os.path.join(semantic_dir, x) for x in semantic_list]
68 | 
69 |     assert len(rgb_list) == len(depth_list) == len(semantic_list)
70 | 
71 |     output_path = os.path.join(data_dir, "recording_video.mp4")
72 |     out_writer = get_fast_video_writer(output_path, fps=fps)
73 | 
74 |     pbar = tqdm.tqdm(total=len(rgb_list), position=0, leave=True)
75 |     for i, (rgb_path, depth_path, semantic_path) in enumerate(
76 |         list(zip(rgb_list, depth_list, semantic_list))
77 |     ):
78 |         bgr = cv2.imread(rgb_path)
79 |         rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
80 | 
81 |         depth = np.load(open(depth_path, "rb"))
82 |         if depth.dtype == np.uint16:
83 |             depth = depth.astype(np.float32) / 1000
84 |         depth_vis = (depth / 10 * 255).astype(np.uint8)
85 |         depth_color = cv2.applyColorMap(depth_vis, cv2.COLORMAP_JET)
86 |         semantic = np.load(open(semantic_path, "rb"))
87 |         semantic_color = d3_40_colors_rgb[semantic.squeeze() % 40]
88 |         output_im = np.concatenate((rgb, depth_color, semantic_color), axis=1)
89 |         out_writer.append_data(output_im)
90 |         pbar.update(1)
91 |     out_writer.close()
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     create_video("data/experiments/fbetest_4ok3usBNeis_B1_U1/recordings")
96 | 


--------------------------------------------------------------------------------
/scripts/user_agent_talk_cow.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import re
  4 | import time
  5 | from orion import logger
  6 | from orion.config.chatgpt_config import *
  7 | from orion.agent_env.chatgpt_control_cow import ChatGPTControlCoW
  8 | from orion.user_simulator.chatgpt_based_sim import (
  9 |     ChatGPTUserSimulator,
 10 |     CountourMaskPrediction,
 11 | )
 12 | from orion.abstract.interaction_history import SucMsg
 13 | 
 14 | 
 15 | class ChatGPTControlAndUserSim(ChatGPTControlCoW):
 16 |     def __init__(self, max_trial, max_round, category, chatgpt_usrsim_config, clear_gptctx=False, *args, **kwargs):
 17 |         super().__init__(*args, **kwargs)
 18 | 
 19 |         self.usr_sim = ChatGPTUserSimulator(
 20 |             chatgpt_usrsim_config=chatgpt_usrsim_config,
 21 |             scene_id=kwargs["scene_ids"][0],
 22 |             floor_plan=kwargs["floor_set"],
 23 |             max_trial=max_trial,
 24 |             max_round=max_round,
 25 |             category=category,
 26 |             is_cow_baseline=self.is_cow_baseline,
 27 |             is_vlamp_baseline=self.is_vlmap_baseline,
 28 |         )
 29 |         logger.info("User Simulator Initialized.")
 30 |         logger.info(f"Max Trial: {max_trial}, Max Round: {max_round}, Cat: {category}")
 31 |         for k, v in self.usr_sim.topo_graph.instance_dict.items():
 32 |             logger.info(f"Instance {k}: {v}")
 33 | 
 34 |         logger.info("User Goal init")
 35 |         for g in self.usr_sim.goal_gen.goals:
 36 |             logger.info(g)
 37 | 
 38 |         self.clear_gptctx = clear_gptctx
 39 |         assert self.is_vlmap_baseline is False
 40 |         assert self.is_cow_baseline is True
 41 |         
 42 |         suffix = f"cow_t{max_trial}r{max_round}_{category}"
 43 |         if self.use_memory:
 44 |             suffix += "_mem"
 45 |         else:
 46 |             suffix += "_nomem"
 47 |         if self.use_vlmap:
 48 |             suffix += "_vmp"
 49 |         else:
 50 |             suffix += "_novmp"
 51 |         if self.use_explore:
 52 |             suffix += "_exp"
 53 |         else:
 54 |             suffix += "_noexp"
 55 |         if self.clear_gptctx:
 56 |             suffix += "_noctx"  # clear every new round
 57 |         else:
 58 |             suffix += "_ctx"
 59 | 
 60 |         logger.info(f"Dump dir suffix: {suffix}")
 61 |         self.dump_dir = os.path.join(self.save_dir, f"dump_{suffix}")
 62 |         if not os.path.exists(self.dump_dir):
 63 |             os.makedirs(self.dump_dir)
 64 |         else:
 65 |             logger.warning(f"Dump dir {self.dump_dir} already exists!")
 66 |             input("Press Enter to continue...")
 67 |         self.is_first_turn = True
 68 | 
 69 |     def _get_user_input(self):
 70 |         logger.info("\nGenreate User Utterance with GPT Simulator...")
 71 |         is_first_turn = self.is_first_turn
 72 |         if self.is_first_turn:
 73 |             agtresponse = "Hello, what should I do?"
 74 |             self.is_first_turn = False
 75 |         else:
 76 |             agtresponse = self.agent_response
 77 | 
 78 |         agt_predict = CountourMaskPrediction(
 79 |             predict_contours=self.predict_contours,
 80 |             predict_masks=self.predict_masks,
 81 |         )
 82 | 
 83 |         task_finished, is_new_goal, is_new_round, goal_succ, instruction = self.usr_sim.step(
 84 |             agent_response=agtresponse,
 85 |             agtpose=self.agent_state.pose_2d,
 86 |             semantic_img=self.observations.semantic,
 87 |             agt_predict=agt_predict,
 88 |             step_count=self.step_count,
 89 |             first_turn=is_first_turn,
 90 |         )
 91 |         self.task_finished = task_finished
 92 |         logger.info(f"[User Simulator] {instruction}")
 93 |         self.interaction_history.append(
 94 |             SucMsg(reward=goal_succ)
 95 |         )
 96 |         if is_new_round:
 97 |             self.usr_sim.goal_gen._is_new_round = False
 98 |             last_round = self.usr_sim.goal_gen.last_round -1
 99 |             logger.info(f"Start new Round! From round {last_round} to {last_round+1}")
100 |             if self.use_memory:
101 |                 self.object_memory.save(self.dump_dir, suffix=f"_round{last_round}")
102 |             
103 |             gpt_context = self.chatgpt.messages
104 |             gpt_context_path = os.path.join(self.dump_dir, f"gptctx_round{last_round}.json")
105 |             json.dump(gpt_context, open(gpt_context_path, "w"))
106 |             
107 |             if self.clear_gptctx: 
108 |                 self.chatgpt.clear_ctx()
109 |             
110 |         if is_new_goal:
111 |             # save early
112 |             eval_result_path = os.path.join(self.dump_dir, "result.json")
113 |             self.usr_sim.goal_gen.save(eval_result_path)
114 |             conversation_path = os.path.join(self.dump_dir, "dialog.json")
115 |             json.dump(self.record_conversations, open(conversation_path, "w"))
116 |             
117 |             # save money
118 |             if re.search(r"(gpt-4|gpt4)", self.chatgpt.model):
119 |                 self.chatgpt.clear_ctx()
120 |         
121 |             
122 |         return instruction
123 | 
124 |     def save(self):
125 |         super().save()
126 |         if not os.path.exists(self.dump_dir):
127 |             os.makedirs(self.dump_dir, exist_ok=True)
128 |         self.interaction_history.save(self.dump_dir)
129 |         if self.use_memory:
130 |             self.object_memory.save(self.dump_dir, suffix="_final")        
131 |         eval_result_path = os.path.join(self.dump_dir, "result.json")
132 |         results = self.usr_sim.goal_gen.save(eval_result_path)
133 |         logger.info(f"Dumped user simulator result to {eval_result_path}")
134 |         conversation_path = os.path.join(self.dump_dir, "dialog.json")
135 |         json.dump(self.record_conversations, open(conversation_path, "w"))
136 |         logger.info(f"Dumped conversation to {conversation_path}")
137 | 
138 |         try:
139 |             self.usr_sim.eval(results)
140 |         except:
141 |             pass
142 | 
143 | 
144 | 
145 | 
146 | if __name__ == "__main__":
147 | 
148 |     import argparse
149 | 
150 |     parser = argparse.ArgumentParser()
151 |     parser.add_argument("--scene_id", type=str, default="4ok3usBNeis")
152 |     parser.add_argument("--floor_b", type=int, default=-1)
153 |     parser.add_argument("--floor_u", type=int, default=1)
154 |     parser.add_argument("--category", type=str, default="mixed", choices=["landmark", "instruction", "description", "correction", "mixed", "none"])
155 |     args = parser.parse_args()
156 | 
157 |     max_trial=5
158 |     max_round=1
159 |     category=args.category
160 |     use_memory=False
161 |     use_vlmap=False
162 |     use_explore=True
163 |     clear_gptctx=False
164 | 
165 |     chatgpt_config=AzureGPT4Config()
166 |     chatgpt_usrsim_config=AzureGPT35Config()
167 | 
168 |     game = ChatGPTControlAndUserSim(
169 |         max_trial=max_trial,
170 |         max_round=max_round,
171 |         category=category,
172 |         chatgpt_config=chatgpt_config,
173 |         chatgpt_usrsim_config=chatgpt_usrsim_config,
174 |         use_memory=use_memory,
175 |         use_vlmap=use_vlmap,
176 |         use_explore=use_explore,
177 |         clear_gptctx=clear_gptctx,
178 |         is_vlmap_baseline=False,
179 |         is_cow_baseline=True,
180 |         record_interaction=False,
181 |         use_stream=False,
182 |         fast_explore=True,
183 |         scene_ids=[args.scene_id],
184 |         floor_set=(args.floor_b, args.floor_u),
185 |         display_shortside=256,
186 |         save_dir_name="predict",
187 |         auto_record=False,
188 |         display_setting="rgb+occumap+topdownmap",
189 |         headless=True,
190 |         use_gt_pose=True,
191 |         load_existing_occumap=True,
192 |         save_new_occumap=False,
193 |     )
194 | 
195 |     game.run()
196 | 


--------------------------------------------------------------------------------
/scripts/user_agent_talk_orion.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import re
  4 | import time
  5 | from orion import logger
  6 | from orion.agent_env.chatgpt_control_orion import ChatGPTControlORION
  7 | from orion.user_simulator.chatgpt_based_sim import (
  8 |     ChatGPTUserSimulator,
  9 |     CountourMaskPrediction,
 10 | )
 11 | from orion.abstract.interaction_history import SucMsg
 12 | from orion.config.chatgpt_config import *
 13 | 
 14 | 
 15 | class ChatGPTControlAndUserSim(ChatGPTControlORION):
 16 |     def __init__(self, max_trial, max_round, category, chatgpt_usrsim_config, clear_gptctx=False, *args, **kwargs):
 17 |         super().__init__(*args, **kwargs)
 18 | 
 19 |         self.usr_sim = ChatGPTUserSimulator(
 20 |             chatgpt_usrsim_config=chatgpt_usrsim_config,
 21 |             scene_id=kwargs["scene_ids"][0],
 22 |             floor_plan=kwargs["floor_set"],
 23 |             max_trial=max_trial,
 24 |             max_round=max_round,
 25 |             category=category,
 26 |             is_cow_baseline=self.is_cow_baseline,
 27 |             is_vlamp_baseline=self.is_vlmap_baseline,
 28 |         )
 29 |         logger.info("User Simulator Initialized.")
 30 |         logger.info(f"Max Trial: {max_trial}, Max Round: {max_round}, Cat: {category}")
 31 |         for k, v in self.usr_sim.topo_graph.instance_dict.items():
 32 |             logger.info(f"Instance {k}: {v}")
 33 | 
 34 |         logger.info("User Goal init")
 35 |         for g in self.usr_sim.goal_gen.goals:
 36 |             logger.info(g)
 37 | 
 38 |         self.clear_gptctx = clear_gptctx
 39 |         assert self.is_vlmap_baseline is False
 40 |         assert self.is_cow_baseline is False
 41 | 
 42 |         suffix = f"orion_t{max_trial}r{max_round}_{category}"
 43 |         if self.use_memory:
 44 |             suffix += "_mem"
 45 |         else:
 46 |             suffix += "_nomem"
 47 |         if self.use_vlmap:
 48 |             suffix += "_vmp"
 49 |         else:
 50 |             suffix += "_novmp"
 51 |         if self.use_explore:
 52 |             suffix += "_exp"
 53 |         else:
 54 |             suffix += "_noexp"
 55 |         if self.clear_gptctx:
 56 |             suffix += "_noctx"  # clear every new round
 57 |         else:
 58 |             suffix += "_ctx"
 59 | 
 60 |         logger.info(f"Dump dir suffix: {suffix}")
 61 |         self.dump_dir = os.path.join(self.save_dir, f"dump_{suffix}")
 62 |         if not os.path.exists(self.dump_dir):
 63 |             os.makedirs(self.dump_dir)
 64 |         else:
 65 |             logger.warning(f"Dump dir {self.dump_dir} already exists!")
 66 |             input("Press Enter to continue...")
 67 |         self.is_first_turn = True
 68 | 
 69 |     def _get_user_input(self):
 70 |         logger.info("\nGenreate User Utterance with GPT Simulator...")
 71 |         is_first_turn = self.is_first_turn
 72 |         if self.is_first_turn:
 73 |             agtresponse = "Hello, what should I do?"
 74 |             self.is_first_turn = False
 75 |         else:
 76 |             agtresponse = self.agent_response
 77 | 
 78 |         agt_predict = CountourMaskPrediction(
 79 |             predict_contours=self.predict_contours,
 80 |             predict_masks=self.predict_masks,
 81 |         )
 82 | 
 83 |         task_finished, is_new_goal, is_new_round, goal_succ, instruction = self.usr_sim.step(
 84 |             agent_response=agtresponse,
 85 |             agtpose=self.agent_state.pose_2d,
 86 |             semantic_img=self.observations.semantic,
 87 |             agt_predict=agt_predict,
 88 |             step_count=self.step_count,
 89 |             first_turn=is_first_turn,
 90 |         )
 91 |         self.task_finished = task_finished
 92 |         logger.info(f"[User Simulator] {instruction}")
 93 |         self.interaction_history.append(
 94 |             SucMsg(reward=goal_succ)
 95 |         )
 96 |         if is_new_round:
 97 |             self.usr_sim.goal_gen._is_new_round = False
 98 |             last_round = self.usr_sim.goal_gen.last_round -1
 99 |             logger.info(f"Start new Round! From round {last_round} to {last_round+1}")
100 |             if self.use_memory:
101 |                 self.object_memory.save(self.dump_dir, suffix=f"_round{last_round}")
102 |             
103 |             gpt_context = self.chatgpt.messages
104 |             gpt_context_path = os.path.join(self.dump_dir, f"gptctx_round{last_round}.json")
105 |             json.dump(gpt_context, open(gpt_context_path, "w"))
106 |             
107 |             if self.clear_gptctx: 
108 |                 self.chatgpt.clear_ctx()
109 |             
110 |         if is_new_goal:
111 |             # save early
112 |             eval_result_path = os.path.join(self.dump_dir, "result.json")
113 |             self.usr_sim.goal_gen.save(eval_result_path)
114 |             conversation_path = os.path.join(self.dump_dir, "dialog.json")
115 |             json.dump(self.record_conversations, open(conversation_path, "w"))
116 |             
117 |             # save money
118 |             if re.search(r"(gpt-4|gpt4)", self.chatgpt.model):
119 |                 self.chatgpt.clear_ctx()
120 |         
121 |             
122 |         return instruction
123 | 
124 |     def save(self):
125 |         super().save()
126 |         if not os.path.exists(self.dump_dir):
127 |             os.makedirs(self.dump_dir, exist_ok=True)
128 |         self.interaction_history.save(self.dump_dir)
129 |         if self.use_memory:
130 |             self.object_memory.save(self.dump_dir, suffix="_final")        
131 |         eval_result_path = os.path.join(self.dump_dir, "result.json")
132 |         results = self.usr_sim.goal_gen.save(eval_result_path)
133 |         logger.info(f"Dumped user simulator result to {eval_result_path}")
134 |         conversation_path = os.path.join(self.dump_dir, "dialog.json")
135 |         json.dump(self.record_conversations, open(conversation_path, "w"))
136 |         logger.info(f"Dumped conversation to {conversation_path}")
137 | 
138 |         try:
139 |             self.usr_sim.eval(results)
140 |         except:
141 |             pass
142 | 
143 | 
144 | if __name__ == "__main__":
145 | 
146 |     import argparse
147 | 
148 |     parser = argparse.ArgumentParser()
149 |     parser.add_argument("--scene_id", type=str, default="4ok3usBNeis")
150 |     parser.add_argument("--floor_b", type=int, default=-1)
151 |     parser.add_argument("--floor_u", type=int, default=1)
152 |     parser.add_argument("--category", type=str, default="mixed", choices=["landmark", "instruction", "description", "correction", "mixed", "none"])
153 |     args = parser.parse_args()
154 | 
155 |     max_trial=5
156 |     max_round=1
157 |     category=args.category
158 |     use_memory=True
159 |     use_vlmap=True
160 |     use_explore=True
161 |     clear_gptctx=False
162 | 
163 |     chatgpt_config=AzureGPT4Config()
164 |     chatgpt_usrsim_config=AzureGPT35Config()
165 | 
166 |     game = ChatGPTControlAndUserSim(
167 |         max_trial=max_trial,
168 |         max_round=max_round,
169 |         category=category,
170 |         chatgpt_config=chatgpt_config,
171 |         chatgpt_usrsim_config=chatgpt_usrsim_config,
172 |         use_memory=use_memory,
173 |         use_vlmap=use_vlmap,
174 |         use_explore=use_explore,
175 |         clear_gptctx=clear_gptctx,
176 |         record_interaction=True,
177 |         use_stream=False,
178 |         fast_explore=True,
179 |         scene_ids=[args.scene_id],
180 |         floor_set=(args.floor_b, args.floor_u),
181 |         display_shortside=256,
182 |         save_dir_name="predict",
183 |         auto_record=False,
184 |         display_setting="rgb+occumap+topdownmap",
185 |         headless=True,
186 |         use_gt_pose=True,
187 |         load_existing_occumap=True,
188 |         save_new_occumap=False,
189 |     )
190 | 
191 |     game.run()
192 | 


--------------------------------------------------------------------------------
/scripts/user_agent_talk_vlmap.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import re
  4 | import time
  5 | from orion import logger
  6 | from orion.agent_env.chatgpt_control_vlmap import ChatGPTControlVLMap
  7 | from orion.user_simulator.chatgpt_based_sim import (
  8 |     ChatGPTUserSimulator,
  9 |     CountourMaskPrediction,
 10 | )
 11 | from orion.abstract.interaction_history import SucMsg
 12 | from orion.config.chatgpt_config import *
 13 | 
 14 | 
 15 | class ChatGPTControlAndUserSim(ChatGPTControlVLMap):
 16 |     def __init__(self, max_trial, max_round, category, chatgpt_usrsim_config, clear_gptctx=False, *args, **kwargs):
 17 |         super().__init__(*args, **kwargs)
 18 | 
 19 |         self.usr_sim = ChatGPTUserSimulator(
 20 |             chatgpt_usrsim_config=chatgpt_usrsim_config,
 21 |             scene_id=kwargs["scene_ids"][0],
 22 |             floor_plan=kwargs["floor_set"],
 23 |             max_trial=max_trial,
 24 |             max_round=max_round,
 25 |             category=category,
 26 |             is_cow_baseline=self.is_cow_baseline,
 27 |             is_vlamp_baseline=self.is_vlmap_baseline,
 28 |         )
 29 |         logger.info("User Simulator Initialized.")
 30 |         logger.info(f"Max Trial: {max_trial}, Max Round: {max_round}, Cat: {category}")
 31 |         for k, v in self.usr_sim.topo_graph.instance_dict.items():
 32 |             logger.info(f"Instance {k}: {v}")
 33 | 
 34 |         logger.info("User Goal init")
 35 |         for g in self.usr_sim.goal_gen.goals:
 36 |             logger.info(g)
 37 | 
 38 |         self.clear_gptctx = clear_gptctx
 39 |         assert self.is_vlmap_baseline is True
 40 |         assert self.is_cow_baseline is False
 41 | 
 42 |         suffix = f"vlmap_t{max_trial}r{max_round}_{category}"
 43 |         if self.use_memory:
 44 |             suffix += "_mem"
 45 |         else:
 46 |             suffix += "_nomem"
 47 |         if self.use_vlmap:
 48 |             suffix += "_vmp"
 49 |         else:
 50 |             suffix += "_novmp"
 51 |         if self.use_explore:
 52 |             suffix += "_exp"
 53 |         else:
 54 |             suffix += "_noexp"
 55 |         if self.clear_gptctx:
 56 |             suffix += "_noctx"  # clear every new round
 57 |         else:
 58 |             suffix += "_ctx"
 59 | 
 60 |         logger.info(f"Dump dir suffix: {suffix}")
 61 |         self.dump_dir = os.path.join(self.save_dir, f"dump_{suffix}")
 62 |         if not os.path.exists(self.dump_dir):
 63 |             os.makedirs(self.dump_dir)
 64 |         else:
 65 |             logger.warning(f"Dump dir {self.dump_dir} already exists!")
 66 |             input("Press Enter to continue...")
 67 |         self.is_first_turn = True
 68 | 
 69 |     def _get_user_input(self):
 70 |         logger.info("\nGenreate User Utterance with GPT Simulator...")
 71 |         is_first_turn = self.is_first_turn
 72 |         if self.is_first_turn:
 73 |             agtresponse = "Hello, what should I do?"
 74 |             self.is_first_turn = False
 75 |         else:
 76 |             agtresponse = self.agent_response
 77 | 
 78 |         agt_predict = CountourMaskPrediction(
 79 |             predict_contours=self.predict_contours,
 80 |             predict_masks=self.predict_masks,
 81 |         )
 82 | 
 83 |         task_finished, is_new_goal, is_new_round, goal_succ, instruction = self.usr_sim.step(
 84 |             agent_response=agtresponse,
 85 |             agtpose=self.agent_state.pose_2d,
 86 |             semantic_img=self.observations.semantic,
 87 |             agt_predict=agt_predict,
 88 |             step_count=self.step_count,
 89 |             first_turn=is_first_turn,
 90 |         )
 91 |         self.task_finished = task_finished
 92 |         logger.info(f"[User Simulator] {instruction}")
 93 |         self.interaction_history.append(
 94 |             SucMsg(reward=goal_succ)
 95 |         )
 96 |         if is_new_round:
 97 |             self.usr_sim.goal_gen._is_new_round = False
 98 |             last_round = self.usr_sim.goal_gen.last_round -1
 99 |             logger.info(f"Start new Round! From round {last_round} to {last_round+1}")
100 |             if self.use_memory:
101 |                 self.object_memory.save(self.dump_dir, suffix=f"_round{last_round}")
102 |             
103 |             gpt_context = self.chatgpt.messages
104 |             gpt_context_path = os.path.join(self.dump_dir, f"gptctx_round{last_round}.json")
105 |             json.dump(gpt_context, open(gpt_context_path, "w"))
106 |             
107 |             if self.clear_gptctx: 
108 |                 self.chatgpt.clear_ctx()
109 |             
110 |         if is_new_goal:
111 |             # save early
112 |             eval_result_path = os.path.join(self.dump_dir, "result.json")
113 |             self.usr_sim.goal_gen.save(eval_result_path)
114 |             conversation_path = os.path.join(self.dump_dir, "dialog.json")
115 |             json.dump(self.record_conversations, open(conversation_path, "w"))
116 |             
117 |             # save money
118 |             if re.search(r"(gpt-4|gpt4)", self.chatgpt.model):
119 |                 self.chatgpt.clear_ctx()
120 |         
121 |             
122 |         return instruction
123 | 
124 |     def save(self):
125 |         super().save()
126 |         if not os.path.exists(self.dump_dir):
127 |             os.makedirs(self.dump_dir, exist_ok=True)
128 |         self.interaction_history.save(self.dump_dir)
129 |         if self.use_memory:
130 |             self.object_memory.save(self.dump_dir, suffix="_final")        
131 |         eval_result_path = os.path.join(self.dump_dir, "result.json")
132 |         results = self.usr_sim.goal_gen.save(eval_result_path)
133 |         logger.info(f"Dumped user simulator result to {eval_result_path}")
134 |         conversation_path = os.path.join(self.dump_dir, "dialog.json")
135 |         json.dump(self.record_conversations, open(conversation_path, "w"))
136 |         logger.info(f"Dumped conversation to {conversation_path}")
137 | 
138 |         try:
139 |             self.usr_sim.eval(results)
140 |         except:
141 |             pass
142 | 
143 | 
144 | if __name__ == "__main__":
145 | 
146 |     import argparse
147 | 
148 |     parser = argparse.ArgumentParser()
149 |     parser.add_argument("--scene_id", type=str, default="4ok3usBNeis")
150 |     parser.add_argument("--floor_b", type=int, default=-1)
151 |     parser.add_argument("--floor_u", type=int, default=1)
152 |     parser.add_argument("--category", type=str, default="mixed", choices=["landmark", "instruction", "description", "correction", "mixed", "none"])
153 |     args = parser.parse_args()
154 | 
155 |     max_trial=5
156 |     max_round=1
157 |     category=args.category
158 |     use_memory=False
159 |     use_vlmap=True
160 |     use_explore=False
161 |     clear_gptctx=False
162 | 
163 |     chatgpt_config=AzureGPT4Config()
164 |     chatgpt_usrsim_config=AzureGPT35Config()
165 | 
166 |     game = ChatGPTControlAndUserSim(
167 |         max_trial=max_trial,
168 |         max_round=max_round,
169 |         category=category,
170 |         chatgpt_config=chatgpt_config,
171 |         chatgpt_usrsim_config=chatgpt_usrsim_config,
172 |         use_memory=use_memory,
173 |         use_vlmap=use_vlmap,
174 |         use_explore=use_explore,
175 |         clear_gptctx=clear_gptctx,
176 |         is_vlmap_baseline=True,
177 |         vlmap_dir="lseg_vlmap",
178 |         is_cow_baseline=False,
179 |         record_interaction=False,
180 |         use_stream=False,
181 |         fast_explore=True,
182 |         scene_ids=[args.scene_id],
183 |         floor_set=(args.floor_b, args.floor_u),
184 |         display_shortside=256,
185 |         save_dir_name="predict",
186 |         auto_record=False,
187 |         display_setting="rgb+occumap+topdownmap",
188 |         headless=True,
189 |         use_gt_pose=True,
190 |         load_existing_occumap=True,
191 |         save_new_occumap=False,
192 |     )
193 | 
194 |     game.run()
195 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 | 
3 | setup(
4 |     name="navchat",
5 |     author="Umich SLED Lab",
6 |     packages=find_packages(),
7 | )
8 | 


--------------------------------------------------------------------------------
/tests/test_fmm_planner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | from orion.abstract.pose import Agent2DPose
 6 | from orion.navigation.fmm_planner import INV_ACTION_DICT, FMMPlanner
 7 | 
 8 | data_dir = "data/experiments/predict_4ok3usBNeis_B1_U1"
 9 | 
10 | occumap_mask = np.load(os.path.join(data_dir, "occupancy_map.npy"))
11 | im = occumap_mask == 1  # floor
12 | 
13 | planner = FMMPlanner()
14 | planner.set_traversible_map(im)
15 | 
16 | y, x = np.where(planner.traversible_map)
17 | 
18 | while True:
19 |     goal_ind = np.random.choice(y.size)
20 |     start_ind = np.random.choice(y.size)
21 | 
22 |     goal = Agent2DPose(x[goal_ind], y[goal_ind], 0)
23 |     start = Agent2DPose(x[start_ind], y[start_ind], -np.pi / 2)
24 | 
25 |     print(f"start: {start}, goal: {goal}")
26 |     reachable, states, a_list = planner.plan(start, goal, plot=True)
27 |     # red square is the start
28 |     # blue cross is the goal
29 |     # red line is the planned path
30 |     print(reachable, [INV_ACTION_DICT[a] for a in a_list])
31 | 


--------------------------------------------------------------------------------
/tests/test_gradio_helloworld.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import gradio as gr
 4 | 
 5 | with gr.Blocks() as demo:
 6 |     chatbot = gr.Chatbot()
 7 |     msg = gr.Textbox()
 8 | 
 9 |     def respond(message, chat_history):
10 |         bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"])
11 |         chat_history.append((message, bot_message))
12 |         return "", chat_history
13 | 
14 |     msg.submit(respond, [msg, chatbot], [msg, chatbot])
15 | 
16 | if __name__ == "__main__":
17 |     demo.launch(share=True, server_port=7860, debug=True)
18 | 


--------------------------------------------------------------------------------
/tests/test_point_planner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | from matplotlib import pyplot as plt
 6 | 
 7 | from orion.map.occupancy import OccupancyMapping
 8 | from orion.navigation.waypoint_planner import PointPlanner
 9 | 
10 | data_dir = "data/experiments/predict_4ok3usBNeis_B1_U1"
11 | 
12 | 
13 | occupancy_map = np.load(os.path.join(data_dir, "occupancy_map.npy"))
14 | navigation_mask = np.load(os.path.join(data_dir, "gt_navigable_mask.npy"))
15 | 
16 | 
17 | y, x = np.where(navigation_mask == 1)
18 | ymin, ymax = y.min(), y.max()
19 | xmin, xmax = x.min(), x.max()
20 | 
21 | occupancy_map_crop = occupancy_map[ymin : ymax + 1, xmin : xmax + 1]
22 | navigation_mask_crop = navigation_mask[ymin : ymax + 1, xmin : xmax + 1]
23 | 
24 | src = (102, 176)
25 | tgt = (46, 164)
26 | pts, reached = PointPlanner.line_search(
27 |     src[0],
28 |     src[1],
29 |     tgt[0],
30 |     tgt[1],
31 |     occupancy_map_crop == OccupancyMapping.WALL,
32 |     stop_at_wall=True,
33 | )
34 | print("Can reach the tgt pt? ", reached)
35 | print("pts along the line:", pts)
36 | pts = np.array(pts)
37 | navigation_mask_crop_color = np.stack(
38 |     [navigation_mask_crop, navigation_mask_crop, navigation_mask_crop], axis=-1
39 | )
40 | navigation_mask_crop_color = navigation_mask_crop_color.astype(np.uint8)
41 | navigation_mask_crop_color = 122 + navigation_mask_crop_color * 122
42 | navigation_mask_crop_color[occupancy_map_crop == OccupancyMapping.WALL] = [0, 0, 0]
43 | 
44 | cv2.circle(navigation_mask_crop_color, (src[0], src[1]), 2, (0, 255, 0), -1)  # green
45 | cv2.circle(navigation_mask_crop_color, (tgt[0], tgt[1]), 2, (0, 0, 255), -1)  # blue
46 | cv2.polylines(navigation_mask_crop_color, [pts], False, (255, 0, 0), 1)  # red
47 | plt.imshow(navigation_mask_crop_color)
48 | plt.show()
49 | 


--------------------------------------------------------------------------------
/tests/test_vlmap_planner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | from orion.config.my_config import *
 6 | from orion.map.map_search.search_voxel import VLMapSearch
 7 | from orion.map.occupancy import OccupancyMapping
 8 | 
 9 | data_dir = "data/experiments/predict_4ok3usBNeis_B1_U1"
10 | 
11 | map_querier = VLMapSearch(
12 |     load_sparse_map_path=os.path.join(data_dir, "lseg_vlmap/sparse_vxl_map.npz"),
13 | )
14 | 
15 | occu_map = np.load(os.path.join(data_dir, "occupancy_map.npy"))
16 | navigatable_mask = np.load(os.path.join(data_dir, "gt_navigable_mask.npy"))
17 | 
18 | navigatable_mask_crop = navigatable_mask[
19 |     map_querier.zmin : map_querier.zmax + 1, map_querier.xmin : map_querier.xmax + 1
20 | ]
21 | wall_mask = occu_map == OccupancyMapping.WALL
22 | wall_mask_crop = wall_mask[
23 |     map_querier.zmin : map_querier.zmax + 1, map_querier.xmin : map_querier.xmax + 1
24 | ]
25 | 
26 | 
27 | predict_map, query_labels = map_querier.query(["fridge"])
28 | predict_map_crop = predict_map[
29 |     map_querier.zmin : map_querier.zmax + 1, map_querier.xmin : map_querier.xmax + 1
30 | ]
31 | 
32 | for tgt_name in query_labels:
33 |     if tgt_name in ["other", "floor", "wall"]:
34 |         continue
35 |     map_querier.plan(
36 |         tgt_name,
37 |         query_labels,
38 |         predict_map_crop,
39 |         navigatable_mask_crop,
40 |         wall_mask_crop,
41 |         show=True,
42 |     )
43 |     # red dot is the ceter of the target object
44 |     # yellow dot is the viewpoint
45 | 


--------------------------------------------------------------------------------