├── .clang-format ├── .cmake-format.yaml ├── .flake8 ├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── assets ├── clio_online_example.png ├── overview-figure.jpg └── sample_clustered.png ├── clio ├── CMakeLists.txt ├── cmake │ └── clioConfig.cmake.in ├── include │ └── clio │ │ ├── agglomerative_clustering.h │ │ ├── cluster.h │ │ ├── clustering_workspace.h │ │ ├── edge_selector.h │ │ ├── ib_edge_selector.h │ │ ├── ib_utils.h │ │ ├── object_update_functor.h │ │ ├── probability_utilities.h │ │ ├── region_update_functor.h │ │ └── scene_graph_types.h ├── package.xml ├── src │ ├── agglomerative_clustering.cpp │ ├── clustering_workspace.cpp │ ├── ib_edge_selector.cpp │ ├── ib_utils.cpp │ ├── object_update_functor.cpp │ ├── probability_utilities.cpp │ └── region_update_functor.cpp └── tests │ ├── CMakeLists.txt │ ├── include │ └── clio_tests │ │ └── utilities.h │ ├── main.cpp │ ├── src │ └── utilities.cpp │ ├── test_agglomerative_clustering.cpp │ ├── test_clustering_workspace.cpp │ ├── test_ib_edge_selector.cpp │ ├── test_object_update_functor.cpp │ └── test_probability_utilities.cpp ├── clio_batch ├── __init__.py ├── cluster_plotting.py ├── cluster_utilities.py ├── helpers.py ├── ib_cluster.py ├── information_metrics.py └── object_cluster.py ├── clio_data_maker ├── __init__.py ├── ply_viewer.py └── rescale_bbox.py ├── clio_eval ├── __init__.py ├── evaluate_dsg.py ├── evaluate_helpers.py ├── experiments │ ├── configs │ │ ├── ablations │ │ │ ├── 3d_clustering.yaml │ │ │ ├── 3d_clustering_oc.yaml │ │ │ └── realtime_clustering.yaml │ │ └── cluster │ │ │ ├── large_delta_00001.yaml │ │ │ └── large_delta_00001_oc.yaml │ ├── evaluate_ablations.py │ ├── result_utils.py │ └── run_3d_object_ablations.py ├── iou.py ├── scene_transforms.yaml ├── utils.py ├── visualize_objects.py └── viz_scene.py ├── clio_ros ├── CMakeLists.txt ├── app │ └── task_server ├── config │ ├── realsense │ │ ├── pipeline.yaml │ │ ├── realsense.yaml │ │ └── realsense_fine.yaml │ ├── segmentation │ │ ├── large_clip.yaml │ │ ├── open_clip.yaml │ │ └── small_clip.yaml │ └── visualizer │ │ └── default.yaml ├── launch │ ├── clio.launch │ └── realsense.launch ├── package.xml └── rviz │ └── default.rviz ├── install └── clio.rosinstall ├── pyproject.toml └── test ├── conftest.py ├── resources ├── test_cluster_config.yaml └── test_tasks.yaml ├── test_eval_utils.py ├── test_helpers.py ├── test_ib_cluster.py ├── test_information_metrics.py └── test_obb.py /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Google 3 | --- 4 | Language: Cpp 5 | DerivePointerAlignment: false 6 | PointerAlignment: Left 7 | ColumnLimit: 88 8 | BinPackArguments: false 9 | BinPackParameters: false 10 | --- 11 | -------------------------------------------------------------------------------- /.cmake-format.yaml: -------------------------------------------------------------------------------- 1 | parse: 2 | additional_commands: 3 | catkin_package: 4 | flags: [] 5 | kwargs: 6 | CAKTIN_DEPENDS: '*' 7 | DEPENDS: '*' 8 | INCLUDE_DIRS: '*' 9 | LIBRARIES: '*' 10 | catkin_add_gtest: 11 | pargs: 1 12 | add_rostest_gtest: 13 | pargs: 2 14 | format: 15 | disable: false 16 | line_width: 88 17 | tab_size: 2 18 | use_tabchars: false 19 | fractional_tab_policy: use-space 20 | max_subgroups_hwrap: 5 21 | max_pargs_hwrap: 10 22 | max_rows_cmdline: 2 23 | separate_ctrl_name_with_space: false 24 | separate_fn_name_with_space: false 25 | dangle_parens: true 26 | dangle_align: prefix 27 | min_prefix_chars: 4 28 | max_prefix_chars: 10 29 | max_lines_hwrap: 1 30 | line_ending: unix 31 | command_case: canonical 32 | keyword_case: unchanged 33 | always_wrap: [catkin_package, FetchContent_Declare] 34 | enable_sort: true 35 | autosort: false 36 | require_valid_layout: false 37 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | extend-ignore = E203,D301 4 | per-file-ignores = 5 | *__init__.py:F401,F403,D104 6 | notebooks/*.py:D100 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/python,c++ 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,c++ 3 | 4 | ### C++ ### 5 | # Prerequisites 6 | *.d 7 | 8 | # Compiled Object files 9 | *.slo 10 | *.lo 11 | *.o 12 | *.obj 13 | 14 | # Precompiled Headers 15 | *.gch 16 | *.pch 17 | 18 | # Compiled Dynamic libraries 19 | *.so 20 | *.dylib 21 | *.dll 22 | 23 | # Fortran module files 24 | *.mod 25 | *.smod 26 | 27 | # Compiled Static libraries 28 | *.lai 29 | *.la 30 | *.a 31 | *.lib 32 | 33 | # Executables 34 | *.exe 35 | *.out 36 | *.app 37 | 38 | ### Python ### 39 | # Byte-compiled / optimized / DLL files 40 | __pycache__/ 41 | *.py[cod] 42 | *$py.class 43 | 44 | # C extensions 45 | 46 | # Distribution / packaging 47 | .Python 48 | build/ 49 | develop-eggs/ 50 | dist/ 51 | downloads/ 52 | eggs/ 53 | .eggs/ 54 | lib/ 55 | lib64/ 56 | parts/ 57 | sdist/ 58 | var/ 59 | wheels/ 60 | share/python-wheels/ 61 | *.egg-info/ 62 | .installed.cfg 63 | *.egg 64 | MANIFEST 65 | 66 | # PyInstaller 67 | # Usually these files are written by a python script from a template 68 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 69 | *.manifest 70 | *.spec 71 | 72 | # Installer logs 73 | pip-log.txt 74 | pip-delete-this-directory.txt 75 | 76 | # Unit test / coverage reports 77 | htmlcov/ 78 | .tox/ 79 | .nox/ 80 | .coverage 81 | .coverage.* 82 | .cache 83 | nosetests.xml 84 | coverage.xml 85 | *.cover 86 | *.py,cover 87 | .hypothesis/ 88 | .pytest_cache/ 89 | cover/ 90 | 91 | # Translations 92 | *.mo 93 | *.pot 94 | 95 | # Django stuff: 96 | *.log 97 | local_settings.py 98 | db.sqlite3 99 | db.sqlite3-journal 100 | 101 | # Flask stuff: 102 | instance/ 103 | .webassets-cache 104 | 105 | # Scrapy stuff: 106 | .scrapy 107 | 108 | # Sphinx documentation 109 | docs/_build/ 110 | 111 | # PyBuilder 112 | .pybuilder/ 113 | target/ 114 | 115 | # Jupyter Notebook 116 | .ipynb_checkpoints 117 | 118 | # IPython 119 | profile_default/ 120 | ipython_config.py 121 | 122 | # pyenv 123 | # For a library or package, you might want to ignore these files since the code is 124 | # intended to run in multiple environments; otherwise, check them in: 125 | # .python-version 126 | 127 | # pipenv 128 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 129 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 130 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 131 | # install all needed dependencies. 132 | #Pipfile.lock 133 | 134 | # poetry 135 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 136 | # This is especially recommended for binary packages to ensure reproducibility, and is more 137 | # commonly ignored for libraries. 138 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 139 | #poetry.lock 140 | 141 | # pdm 142 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 143 | #pdm.lock 144 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 145 | # in version control. 146 | # https://pdm.fming.dev/#use-with-ide 147 | .pdm.toml 148 | 149 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 150 | __pypackages__/ 151 | 152 | # Celery stuff 153 | celerybeat-schedule 154 | celerybeat.pid 155 | 156 | # SageMath parsed files 157 | *.sage.py 158 | 159 | # Environments 160 | .env 161 | .venv 162 | env/ 163 | venv/ 164 | ENV/ 165 | env.bak/ 166 | venv.bak/ 167 | 168 | # Spyder project settings 169 | .spyderproject 170 | .spyproject 171 | 172 | # Rope project settings 173 | .ropeproject 174 | 175 | # mkdocs documentation 176 | /site 177 | 178 | # mypy 179 | .mypy_cache/ 180 | .dmypy.json 181 | dmypy.json 182 | 183 | # Pyre type checker 184 | .pyre/ 185 | 186 | # pytype static type analyzer 187 | .pytype/ 188 | 189 | # Cython debug symbols 190 | cython_debug/ 191 | 192 | # PyCharm 193 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 194 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 195 | # and can be added to the global gitignore or merged into this file. For a more nuclear 196 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 197 | #.idea/ 198 | 199 | ### Python Patch ### 200 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 201 | poetry.toml 202 | 203 | # ruff 204 | .ruff_cache/ 205 | 206 | # LSP config files 207 | pyrightconfig.json 208 | 209 | # End of https://www.toptal.com/developers/gitignore/api/python,c++ 210 | 211 | *.swp 212 | *.idea 213 | *.ccls* 214 | *.vscode/ 215 | 216 | compile_commands.json 217 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "thirdparty/Objectron"] 2 | path = thirdparty/Objectron 3 | url = https://github.com/google-research-datasets/Objectron.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2024, Massachusetts Institute of Technology 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /assets/clio_online_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIT-SPARK/Clio/f912b7c08fb0e029d80fd6b05eccee5009199ce4/assets/clio_online_example.png -------------------------------------------------------------------------------- /assets/overview-figure.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIT-SPARK/Clio/f912b7c08fb0e029d80fd6b05eccee5009199ce4/assets/overview-figure.jpg -------------------------------------------------------------------------------- /assets/sample_clustered.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIT-SPARK/Clio/f912b7c08fb0e029d80fd6b05eccee5009199ce4/assets/sample_clustered.png -------------------------------------------------------------------------------- /clio/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(clio VERSION 0.0.1) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 6 | set(CMAKE_CXX_EXTENSIONS OFF) 7 | 8 | add_compile_options(-Wall -Wextra) 9 | 10 | option(CLIO_ENABLE_TESTS "Build unit tests" OFF) 11 | option(BUILD_SHARED_LIBS "Build shared libs" ON) 12 | 13 | find_package(hydra REQUIRED) 14 | find_package(khronos REQUIRED) 15 | 16 | include(GNUInstallDirs) 17 | 18 | add_library( 19 | ${PROJECT_NAME} 20 | src/agglomerative_clustering.cpp 21 | src/clustering_workspace.cpp 22 | src/ib_utils.cpp 23 | src/ib_edge_selector.cpp 24 | src/object_update_functor.cpp 25 | src/probability_utilities.cpp 26 | src/region_update_functor.cpp 27 | ) 28 | target_include_directories( 29 | ${PROJECT_NAME} PUBLIC $ 30 | $ 31 | ) 32 | target_link_libraries(${PROJECT_NAME} PUBLIC hydra::hydra khronos::khronos) 33 | set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON) 34 | add_library(clio::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) 35 | 36 | if(CLIO_ENABLE_TESTS) 37 | enable_testing() 38 | add_subdirectory(tests) 39 | endif() 40 | 41 | install( 42 | TARGETS ${PROJECT_NAME} 43 | EXPORT ${PROJECT_NAME}-targets 44 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 45 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 46 | ) 47 | install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 48 | install(EXPORT ${PROJECT_NAME}-targets FILE ${PROJECT_NAME}Targets.cmake 49 | NAMESPACE clio:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/clio 50 | ) 51 | 52 | include(CMakePackageConfigHelpers) 53 | configure_package_config_file( 54 | ${CMAKE_CURRENT_LIST_DIR}/cmake/clioConfig.cmake.in 55 | ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake 56 | INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} 57 | ) 58 | write_basic_package_version_file( 59 | ${PROJECT_NAME}ConfigVersion.cmake VERSION ${PACKAGE_VERSION} 60 | COMPATIBILITY AnyNewerVersion 61 | ) 62 | install( 63 | FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake 64 | ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake 65 | DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} 66 | ) 67 | -------------------------------------------------------------------------------- /clio/cmake/clioConfig.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | include(CMakeFindDependencyMacro) 3 | 4 | get_filename_component(clio_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) 5 | 6 | find_dependency(hydra REQUIRED) 7 | find_dependency(khronos REQUIRED) 8 | 9 | if(NOT TARGET clio::clio) 10 | include("${clio_CMAKE_DIR}/clioTargets.cmake") 11 | endif() 12 | 13 | set(clio_LIBRARIES clio::clio) 14 | check_required_components(clio) 15 | -------------------------------------------------------------------------------- /clio/include/clio/agglomerative_clustering.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #include "clio/cluster.h" 8 | #include "clio/clustering_workspace.h" 9 | #include "clio/ib_edge_selector.h" 10 | #include "clio/scene_graph_types.h" 11 | 12 | namespace clio { 13 | 14 | void clusterAgglomerative(ClusteringWorkspace& ws, 15 | const hydra::EmbeddingGroup& tasks, 16 | EdgeSelector& edge_selector, 17 | const hydra::EmbeddingDistance& metric, 18 | bool reweight = false, 19 | double I_xy = -1, 20 | double delta_weight = 1, 21 | int verbosity = 5); 22 | 23 | class AgglomerativeClustering { 24 | public: 25 | using Clusters = std::vector; 26 | using NodeEmbeddingMap = std::map; 27 | 28 | struct Config { 29 | config::VirtualConfig tasks; 30 | config::VirtualConfig metric{ 31 | hydra::CosineDistance::Config()}; 32 | IBEdgeSelector::Config selector; 33 | bool filter_regions = false; 34 | } const config; 35 | 36 | AgglomerativeClustering(const Config& config); 37 | 38 | Clusters cluster(const spark_dsg::SceneGraphLayer& layer, 39 | const NodeEmbeddingMap& embeddings) const; 40 | 41 | Clusters getClusters(const ClusteringWorkspace& workspace, 42 | const NodeEmbeddingMap& features) const; 43 | 44 | private: 45 | hydra::EmbeddingGroup::Ptr tasks_; 46 | std::unique_ptr metric_; 47 | EdgeSelector::Ptr edge_selector_; 48 | }; 49 | 50 | void declare_config(AgglomerativeClustering::Config& config); 51 | 52 | } // namespace clio 53 | -------------------------------------------------------------------------------- /clio/include/clio/cluster.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | namespace clio { 7 | 8 | struct Cluster { 9 | using Ptr = std::shared_ptr; 10 | std::set nodes; 11 | double score; 12 | Eigen::VectorXf feature; 13 | size_t best_task_index; 14 | std::string best_task_name; 15 | }; 16 | 17 | } // namespace clio 18 | -------------------------------------------------------------------------------- /clio/include/clio/clustering_workspace.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "clio/scene_graph_types.h" 9 | 10 | namespace clio { 11 | 12 | struct ClusteringWorkspace { 13 | using NodeEmbeddings = std::map; 14 | std::map features; 15 | std::map node_lookup; 16 | std::map order; 17 | std::map edges; 18 | std::vector assignments; 19 | 20 | ClusteringWorkspace(const spark_dsg::SceneGraphLayer& layer, 21 | const NodeEmbeddings& node_embeddings); 22 | 23 | ClusteringWorkspace(const spark_dsg::SceneGraphLayer& layer, 24 | const std::vector& nodes); 25 | 26 | ClusteringWorkspace(const spark_dsg::SceneGraphLayer& layer); 27 | 28 | size_t size() const; 29 | 30 | size_t featureDim() const; 31 | 32 | std::list addMerge(EdgeKey to_merge); 33 | 34 | std::vector> getClusters() const; 35 | }; 36 | 37 | } // namespace clio 38 | -------------------------------------------------------------------------------- /clio/include/clio/edge_selector.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include "clio/clustering_workspace.h" 6 | #include "clio/scene_graph_types.h" 7 | 8 | namespace clio { 9 | 10 | class EdgeSelector { 11 | public: 12 | using ScoreFunc = std::function; 13 | using Ptr = std::unique_ptr; 14 | 15 | virtual ~EdgeSelector() = default; 16 | 17 | virtual void setup(const ClusteringWorkspace& ws, 18 | const hydra::EmbeddingGroup& tasks, 19 | const hydra::EmbeddingDistance& metric) = 0; 20 | 21 | virtual double scoreEdge(EdgeKey edge) = 0; 22 | 23 | virtual bool updateFromEdge(EdgeKey edge) = 0; 24 | 25 | // compares two weighted edges (default should usually be w_lhs < w_rhs) 26 | virtual bool compareEdges(const std::pair& lhs, 27 | const std::pair& rhs) const = 0; 28 | 29 | virtual void onlineReweighting(double param1, double param2) = 0; 30 | 31 | virtual std::string summarize() const = 0; 32 | }; 33 | 34 | } // namespace clio 35 | -------------------------------------------------------------------------------- /clio/include/clio/ib_edge_selector.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #include 5 | 6 | #include "clio/edge_selector.h" 7 | #include "clio/ib_utils.h" 8 | 9 | namespace clio { 10 | 11 | class IBEdgeSelector : public EdgeSelector { 12 | public: 13 | struct Config { 14 | double max_delta = 1.0e-3; 15 | double tolerance = -1.0e-18; 16 | PyGivenXConfig py_x; 17 | }; 18 | 19 | explicit IBEdgeSelector(const Config& config); 20 | 21 | virtual ~IBEdgeSelector() = default; 22 | 23 | void setup(const ClusteringWorkspace& ws, 24 | const hydra::EmbeddingGroup& tasks, 25 | const hydra::EmbeddingDistance& metric) override; 26 | 27 | double scoreEdge(EdgeKey edge) override; 28 | 29 | bool updateFromEdge(EdgeKey edge) override; 30 | 31 | bool compareEdges(const std::pair& lhs, 32 | const std::pair& rhs) const override; 33 | 34 | void onlineReweighting(double Ixy, double delta_weight) override; 35 | 36 | const Config config; 37 | 38 | std::string summarize() const override; 39 | 40 | protected: 41 | // p(x), p(z), p(y) 42 | Eigen::VectorXd px_; 43 | Eigen::VectorXd pz_; 44 | Eigen::VectorXd py_; 45 | // p(z|x), p(y|x), p(y|z) 46 | Eigen::MatrixXd pz_x_; // NxN 47 | Eigen::MatrixXd py_x_; // 2xN 48 | Eigen::MatrixXd py_z_; // 2xN 49 | // mutual information caches 50 | double I_xy_; 51 | double I_zy_prev_; 52 | double delta_weight_ = 1.0; 53 | std::vector deltas_; 54 | 55 | private: 56 | inline static const auto registration_ = 57 | config::RegistrationWithConfig("IB"); 60 | }; 61 | 62 | void declare_config(IBEdgeSelector::Config& config); 63 | 64 | } // namespace clio 65 | -------------------------------------------------------------------------------- /clio/include/clio/ib_utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "clio/clustering_workspace.h" 10 | #include "clio/scene_graph_types.h" 11 | 12 | namespace clio { 13 | 14 | struct PyGivenXConfig { 15 | float score_threshold = 0.23f; 16 | size_t top_k = 2; 17 | bool cumulative = true; 18 | bool null_task_preprune = true; 19 | }; 20 | 21 | Eigen::MatrixXd computeIBpyGivenX(const ClusteringWorkspace& ws, 22 | const hydra::EmbeddingGroup& tasks, 23 | const hydra::EmbeddingDistance& metric, 24 | const PyGivenXConfig& config); 25 | 26 | Eigen::VectorXd computeIBpx(const ClusteringWorkspace& ws); 27 | 28 | Eigen::VectorXd computeIBpy(const hydra::EmbeddingGroup& tasks); 29 | 30 | double computeDeltaWeight(const spark_dsg::SceneGraphLayer& layer, 31 | const std::vector& nodes); 32 | 33 | } // namespace clio 34 | -------------------------------------------------------------------------------- /clio/include/clio/object_update_functor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "clio/ib_edge_selector.h" 9 | 10 | namespace clio { 11 | 12 | struct IntersectionPolicy { 13 | using Ptr = std::unique_ptr; 14 | virtual ~IntersectionPolicy() = default; 15 | virtual bool call(const spark_dsg::KhronosObjectAttributes& lhs, 16 | const spark_dsg::KhronosObjectAttributes& rhs) const = 0; 17 | bool operator()(const spark_dsg::KhronosObjectAttributes& lhs, 18 | const spark_dsg::KhronosObjectAttributes& rhs) const { 19 | return call(lhs, rhs); 20 | } 21 | }; 22 | 23 | struct OverlapIntersection : public IntersectionPolicy { 24 | struct Config { 25 | double tolerance = 0.1; 26 | } const config; 27 | 28 | explicit OverlapIntersection(const Config& config); 29 | 30 | bool call(const spark_dsg::KhronosObjectAttributes& lhs, 31 | const spark_dsg::KhronosObjectAttributes& rhs) const; 32 | }; 33 | 34 | void declare_config(OverlapIntersection::Config& config); 35 | 36 | struct ComponentInfo { 37 | using Ptr = std::unique_ptr; 38 | 39 | ComponentInfo(const IBEdgeSelector::Config& config, 40 | const hydra::EmbeddingGroup& tasks, 41 | const hydra::EmbeddingDistance& metric, 42 | const spark_dsg::SceneGraphLayer& segments, 43 | const std::vector& nodes, 44 | double I_xy_full); 45 | 46 | IBEdgeSelector edge_selector; 47 | ClusteringWorkspace ws; 48 | 49 | std::vector segments; 50 | std::vector objects; 51 | }; 52 | 53 | class ObjectUpdateFunctor : public hydra::UpdateFunctor { 54 | public: 55 | struct Config { 56 | char prefix = 'O'; 57 | config::VirtualConfig edge_checker{ 58 | OverlapIntersection::Config()}; 59 | config::VirtualConfig tasks; 60 | config::VirtualConfig metric{ 61 | hydra::CosineDistance::Config()}; 62 | IBEdgeSelector::Config selector; 63 | double min_segment_score = 0.2; 64 | double min_object_score = 0.2; 65 | double neighbor_max_distance = 0.0; 66 | } const config; 67 | 68 | explicit ObjectUpdateFunctor(const Config& config); 69 | 70 | hydra::MergeList call(const spark_dsg::DynamicSceneGraph& unmerged, 71 | hydra::SharedDsgInfo& dsg, 72 | const hydra::UpdateInfo::ConstPtr& info) const override; 73 | 74 | std::set addSegmentEdges(spark_dsg::DynamicSceneGraph& graph) const; 75 | 76 | void clearActiveComponents(spark_dsg::DynamicSceneGraph& graph, 77 | const std::set& active) const; 78 | 79 | void detectObjects(spark_dsg::DynamicSceneGraph& segments) const; 80 | 81 | void updateActiveParents(spark_dsg::DynamicSceneGraph& graph) const; 82 | 83 | protected: 84 | IntersectionPolicy::Ptr edge_checker_; 85 | hydra::EmbeddingGroup::Ptr tasks_; 86 | std::unique_ptr metric_; 87 | 88 | hydra::IdTracker components_ids_; 89 | mutable std::set ignored_; 90 | mutable std::set active_; 91 | mutable NodeSymbol next_node_id_; 92 | mutable std::map components_; 93 | mutable std::map node_to_component_; 94 | }; 95 | 96 | void declare_config(ObjectUpdateFunctor::Config& config); 97 | 98 | } // namespace clio 99 | -------------------------------------------------------------------------------- /clio/include/clio/probability_utilities.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace clio { 5 | 6 | /** 7 | * @brief Compute Shannon entropy for a giving PMF 8 | * @param p PMF of the distribution 9 | * @param tolerance Threshold for near-zero probability 10 | * @returns Shannon entropy of the distribution 11 | */ 12 | double shannonEntropy(const Eigen::Ref& p, 13 | double tolerance = 1.0e-9); 14 | 15 | /** 16 | * @brief Compute the JS divergence of N distributions 17 | * @param p MxN matrix where each column is the PMF for one of the distributions 18 | * @param priors Weights between the N distributions 19 | * @param tolerance Threshold for near-zero probability 20 | * @returns JS divergence of the distributions 21 | */ 22 | double jensenShannonDivergence(const Eigen::MatrixXd& p, 23 | const Eigen::VectorXd& priors, 24 | double tolerance = 1.0e-9); 25 | 26 | /** 27 | * @brief Compute the mutual information between two distributions 28 | * @param pa Marginal of the first distribution p(a) 29 | * @param pb Marginal of the second distribution p(b) 30 | * @param pa_b Conditional probability distribution p(a|b) 31 | * @param tolerance Threshold for near-zero probability 32 | * @returns mutual information between the two distributions 33 | */ 34 | double mutualInformation(const Eigen::VectorXd& pa, 35 | const Eigen::VectorXd& pb, 36 | const Eigen::MatrixXd pa_b, 37 | double tolerance = 1.0e-9); 38 | 39 | } // namespace clio 40 | -------------------------------------------------------------------------------- /clio/include/clio/region_update_functor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #include "clio/agglomerative_clustering.h" 5 | 6 | namespace clio { 7 | 8 | struct RegionUpdateFunctor : public hydra::UpdateFunctor { 9 | struct Config { 10 | AgglomerativeClustering::Config clustering; 11 | } const config; 12 | 13 | explicit RegionUpdateFunctor(const Config& config); 14 | 15 | hydra::MergeList call(const spark_dsg::DynamicSceneGraph& unmmerged, 16 | hydra::SharedDsgInfo& dsg, 17 | const hydra::UpdateInfo::ConstPtr& info) const override; 18 | 19 | void updateGraphBatch(spark_dsg::DynamicSceneGraph& graph, 20 | const std::vector& clusters) const; 21 | 22 | private: 23 | mutable NodeSymbol region_id_; 24 | AgglomerativeClustering clustering_; 25 | }; 26 | 27 | void declare_config(RegionUpdateFunctor::Config& config); 28 | 29 | } // namespace clio 30 | -------------------------------------------------------------------------------- /clio/include/clio/scene_graph_types.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | namespace clio { 6 | using spark_dsg::EdgeKey; 7 | using spark_dsg::NodeId; 8 | using spark_dsg::NodeSymbol; 9 | } // namespace clio 10 | -------------------------------------------------------------------------------- /clio/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | clio 4 | 0.0.1 5 | Clio paper code and integration 6 | 7 | Nathan Hughes 8 | Nathan Hughes 9 | 10 | BSD 11 | 12 | cmake 13 | hydra 14 | khronos 15 | 16 | 17 | cmake 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /clio/src/agglomerative_clustering.cpp: -------------------------------------------------------------------------------- 1 | #include "clio/agglomerative_clustering.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include "clio/edge_selector.h" 13 | 14 | namespace clio { 15 | 16 | using namespace spark_dsg; 17 | using Clusters = AgglomerativeClustering::Clusters; 18 | 19 | void declare_config(AgglomerativeClustering::Config& config) { 20 | using namespace config; 21 | name("AgglomerativeClustering::Config"); 22 | field(config.tasks, "tasks"); 23 | config.metric.setOptional(); 24 | field(config.metric, "metric"); 25 | field(config.selector, "selector"); 26 | field(config.filter_regions, "filter_regions"); 27 | } 28 | 29 | void clusterAgglomerative(ClusteringWorkspace& ws, 30 | const hydra::EmbeddingGroup& tasks, 31 | EdgeSelector& edge_selector, 32 | const hydra::EmbeddingDistance& metric, 33 | bool reweight, 34 | double I_xy, 35 | double delta_weight, 36 | int verbosity) { 37 | VLOG(verbosity) << "[IB] starting clustering with " << ws.edges.size() << " edges"; 38 | 39 | edge_selector.setup(ws, tasks, metric); 40 | 41 | if (reweight) { 42 | edge_selector.onlineReweighting(I_xy, delta_weight); 43 | } 44 | 45 | VLOG(10) << "-----------------------------------"; 46 | VLOG(10) << "Scoring edges"; 47 | VLOG(10) << "-----------------------------------"; 48 | for (auto& [edge, weight] : ws.edges) { 49 | const auto score = edge_selector.scoreEdge(edge); 50 | VLOG(10) << "edge (" << edge << "): " << score; 51 | weight = score; 52 | } 53 | VLOG(10) << "-----------------------------------"; 54 | 55 | for (size_t i = 0; i < ws.size(); ++i) { 56 | if (ws.edges.empty()) { 57 | // shouldn't happen unless |connected components| > 1 58 | break; 59 | } 60 | 61 | // iter will always be valid: always at least one edge 62 | auto best_edge_ptr = std::min_element( 63 | ws.edges.begin(), ws.edges.end(), [&](const auto& lhs, const auto& rhs) { 64 | return edge_selector.compareEdges(lhs, rhs); 65 | }); 66 | CHECK(best_edge_ptr != ws.edges.end()); 67 | if (VLOG_IS_ON(15)) { 68 | VLOG(15) << "***********************************"; 69 | VLOG(15) << "Candidates"; 70 | VLOG(15) << "***********************************"; 71 | for (auto&& [edge, weight] : ws.edges) { 72 | VLOG(15) << "edge (" << edge << "): " << weight; 73 | } 74 | VLOG(15) << "***********************************"; 75 | } 76 | 77 | const EdgeKey best_edge = best_edge_ptr->first; 78 | if (!edge_selector.updateFromEdge(best_edge)) { 79 | // we've hit a stop criteria 80 | break; 81 | } 82 | 83 | const auto changed_edges = ws.addMerge(best_edge); 84 | VLOG(10) << "-----------------------------------"; 85 | VLOG(10) << "Scoring changed edges"; 86 | VLOG(10) << "-----------------------------------"; 87 | for (const auto edge : changed_edges) { 88 | const auto score = edge_selector.scoreEdge(edge); 89 | VLOG(10) << "edge " << edge << ": " << score; 90 | ws.edges[edge] = score; 91 | } 92 | VLOG(10) << "-----------------------------------"; 93 | } 94 | 95 | VLOG(verbosity) << "[IB] " << edge_selector.summarize(); 96 | } 97 | 98 | AgglomerativeClustering::AgglomerativeClustering(const Config& config) 99 | : config(config::checkValid(config)), 100 | tasks_(config.tasks.create()), 101 | metric_(config.metric.create()), 102 | edge_selector_(new IBEdgeSelector(config.selector)) {} 103 | 104 | Clusters AgglomerativeClustering::cluster(const SceneGraphLayer& layer, 105 | const NodeEmbeddingMap& features) const { 106 | if (tasks_->empty()) { 107 | LOG_FIRST_N(ERROR, 5) << "No tasks present: cannot cluster"; 108 | return {}; 109 | } 110 | 111 | ClusteringWorkspace ws(layer, features); 112 | clusterAgglomerative(ws, *tasks_, *edge_selector_, *metric_); 113 | 114 | const auto to_return = getClusters(ws, features); 115 | VLOG(1) << "[IB] finished clustering with " << to_return.size() << " cluster(s)"; 116 | return to_return; 117 | } 118 | 119 | Clusters AgglomerativeClustering::getClusters(const ClusteringWorkspace& ws, 120 | const NodeEmbeddingMap& features) const { 121 | const auto cluster_nodes = ws.getClusters(); 122 | 123 | Clusters to_return; 124 | for (const auto& nodes : cluster_nodes) { 125 | auto cluster = std::make_shared(); 126 | cluster->nodes.insert(nodes.begin(), nodes.end()); 127 | 128 | auto iter = cluster->nodes.begin(); 129 | cluster->feature = features.at(*iter); 130 | ++iter; 131 | while (iter != cluster->nodes.end()) { 132 | cluster->feature += features.at(*iter); 133 | ++iter; 134 | } 135 | 136 | cluster->feature /= cluster->nodes.size(); 137 | 138 | const auto info = tasks_->getBestScore(*metric_, cluster->feature); 139 | if (config.filter_regions && info.score < config.selector.py_x.score_threshold) { 140 | continue; 141 | } 142 | 143 | cluster->score = info.score; 144 | if (info.score >= config.selector.py_x.score_threshold) { 145 | cluster->best_task_index = info.index; 146 | cluster->best_task_name = tasks_->names.at(info.index); 147 | } else { 148 | cluster->best_task_name = ""; 149 | } 150 | 151 | to_return.push_back(cluster); 152 | } 153 | 154 | return to_return; 155 | } 156 | 157 | } // namespace clio 158 | -------------------------------------------------------------------------------- /clio/src/clustering_workspace.cpp: -------------------------------------------------------------------------------- 1 | #include "clio/clustering_workspace.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | namespace clio { 10 | 11 | using namespace spark_dsg; 12 | using EmbeddingMap = std::map; 13 | 14 | bool keysIntersect(EdgeKey key1, EdgeKey key2) { 15 | return key1.k1 == key2.k1 || key1.k1 == key2.k2 || key1.k2 == key2.k1 || 16 | key1.k2 == key2.k2; 17 | } 18 | 19 | EmbeddingMap getEmbeddingMap(const std::map& nodes) { 20 | EmbeddingMap features; 21 | for (const auto& id_node : nodes) { 22 | const auto& attrs = id_node.second->attributes(); 23 | // TODO(nathan) consider other pooling operations 24 | features[id_node.first] = attrs.semantic_feature.rowwise().mean(); 25 | } 26 | 27 | return features; 28 | } 29 | 30 | EmbeddingMap getEmbeddingMap(const SceneGraphLayer& layer, 31 | const std::vector& nodes) { 32 | EmbeddingMap features; 33 | for (const auto node : nodes) { 34 | const auto& attrs = layer.getNode(node).attributes(); 35 | // TODO(nathan) consider other pooling operations 36 | features[node] = attrs.semantic_feature.rowwise().mean(); 37 | } 38 | 39 | return features; 40 | } 41 | 42 | ClusteringWorkspace::ClusteringWorkspace(const SceneGraphLayer& layer) 43 | : ClusteringWorkspace(layer, getEmbeddingMap(layer.nodes())) {} 44 | 45 | ClusteringWorkspace::ClusteringWorkspace(const SceneGraphLayer& layer, 46 | const std::vector& nodes) 47 | : ClusteringWorkspace(layer, getEmbeddingMap(layer, nodes)) {} 48 | 49 | ClusteringWorkspace::ClusteringWorkspace(const SceneGraphLayer& layer, 50 | const EmbeddingMap& node_embeddings) { 51 | size_t index = 0; 52 | for (auto&& [node_id, feature] : node_embeddings) { 53 | features[index] = feature; 54 | node_lookup[index] = node_id; 55 | order[node_id] = index; 56 | ++index; 57 | } 58 | 59 | for (auto&& [node_id, index] : order) { 60 | const auto& node = layer.getNode(node_id); 61 | for (const auto& sibling : node.siblings()) { 62 | auto iter = order.find(sibling); 63 | if (iter == order.end()) { 64 | continue; 65 | } 66 | 67 | edges.emplace(EdgeKey(index, iter->second), 0.0); 68 | } 69 | } 70 | 71 | assignments.resize(order.size()); 72 | std::iota(assignments.begin(), assignments.end(), 0); 73 | } 74 | 75 | size_t ClusteringWorkspace::size() const { return order.size(); } 76 | 77 | size_t ClusteringWorkspace::featureDim() const { 78 | if (features.empty()) { 79 | return 0; 80 | } 81 | 82 | return features.begin()->second.rows(); 83 | } 84 | 85 | std::list ClusteringWorkspace::addMerge(EdgeKey key) { 86 | // TODO(nathan) there are more efficient ways to maintain this, but modfiying 87 | // disjoint set right now is not the best decision 88 | for (auto& parent : assignments) { 89 | if (parent == key.k2) { 90 | parent = key.k1; 91 | } 92 | } 93 | 94 | // clear all affected edge weights 95 | std::set seen; 96 | auto iter = edges.begin(); 97 | while (iter != edges.end()) { 98 | if (keysIntersect(iter->first, key)) { 99 | seen.insert(iter->first); 100 | iter = edges.erase(iter); 101 | } else { 102 | ++iter; 103 | } 104 | } 105 | 106 | std::set new_keys; 107 | std::list to_update; 108 | for (const auto& prev : seen) { 109 | const auto new_k1 = prev.k1 == key.k2 ? key.k1 : prev.k1; 110 | const auto new_k2 = prev.k2 == key.k2 ? key.k1 : prev.k2; 111 | if (new_k1 == new_k2) { 112 | continue; 113 | } 114 | 115 | const EdgeKey new_key(new_k1, new_k2); 116 | if (new_keys.count(new_key)) { 117 | continue; // we got a duplicate key from a merge 118 | } 119 | 120 | edges.emplace(new_key, 0.0); 121 | to_update.push_back(new_key); 122 | } 123 | 124 | return to_update; 125 | } 126 | 127 | std::vector> ClusteringWorkspace::getClusters() const { 128 | const std::set cluster_ids(assignments.begin(), assignments.end()); 129 | 130 | size_t index = 0; 131 | std::map cluster_lookup; 132 | for (const auto cluster_id : cluster_ids) { 133 | cluster_lookup[cluster_id] = index; 134 | ++index; 135 | } 136 | 137 | std::vector> to_return(cluster_ids.size()); 138 | for (size_t i = 0; i < assignments.size(); ++i) { 139 | auto& cluster = to_return.at(cluster_lookup[assignments[i]]); 140 | cluster.push_back(node_lookup.at(i)); 141 | } 142 | 143 | return to_return; 144 | } 145 | 146 | } // namespace clio 147 | -------------------------------------------------------------------------------- /clio/src/ib_edge_selector.cpp: -------------------------------------------------------------------------------- 1 | #include "clio/ib_edge_selector.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "clio/probability_utilities.h" 10 | 11 | namespace clio { 12 | 13 | using Indices = std::vector>; 14 | 15 | void declare_config(IBEdgeSelector::Config& config) { 16 | using namespace config; 17 | name("IBEdgeSelector::Config"); 18 | field(config.max_delta, "max_delta"); 19 | field(config.tolerance, "tolerance"); 20 | field(config.py_x.score_threshold, "score_threshold"); 21 | field(config.py_x.top_k, "top_k"); 22 | field(config.py_x.cumulative, "cumulative"); 23 | field(config.py_x.null_task_preprune, "null_task_preprune"); 24 | 25 | check(config.max_delta, GE, 0.0, "max_delta"); 26 | check(config.tolerance, LE, 0.0, "tolerance"); 27 | check(config.py_x.top_k, GT, 0, "top_k"); 28 | } 29 | 30 | IBEdgeSelector::IBEdgeSelector(const Config& config) 31 | : config(config::checkValid(config)) {} 32 | 33 | void IBEdgeSelector::setup(const ClusteringWorkspace& ws, 34 | const hydra::EmbeddingGroup& tasks, 35 | const hydra::EmbeddingDistance& metric) { 36 | const auto fmt = hydra::getDefaultFormat(); 37 | size_t N = ws.size(); 38 | 39 | // p(z) = p(x) initially 40 | px_ = computeIBpx(ws); 41 | pz_ = px_; 42 | // p(z|x) is identity 43 | pz_x_ = Eigen::MatrixXd::Identity(N, N); 44 | 45 | py_x_ = computeIBpyGivenX(ws, tasks, metric, config.py_x); 46 | 47 | // p(y|z) = p(y|x) (as p(z) = p(x) and p(z|x) = I_n 48 | py_z_ = py_x_; 49 | // p(y) is uniform 50 | py_ = computeIBpy(tasks); 51 | 52 | VLOG(10) << "p(x): " << px_.format(fmt); 53 | VLOG(10) << "p(z): " << pz_.format(fmt); 54 | VLOG(10) << "p(y): " << py_.format(fmt); 55 | VLOG(10) << "p(y|x): " << py_x_.format(fmt); 56 | VLOG(10) << "p(y|z): " << py_z_.format(fmt); 57 | VLOG(10) << "p(z|x): " << pz_x_.format(fmt); 58 | 59 | // initialize mutual information to starting values; 60 | I_xy_ = mutualInformation(py_, px_, py_x_); 61 | I_zy_prev_ = I_xy_; 62 | deltas_.clear(); 63 | } 64 | 65 | double IBEdgeSelector::scoreEdge(EdgeKey edge) { 66 | const auto fmt = hydra::getDefaultFormat(); 67 | const auto p_s = pz_(edge.k1); 68 | const auto p_t = pz_(edge.k2); 69 | const auto total = p_s + p_t; 70 | Eigen::VectorXd prior(2); 71 | prior << p_s / total, p_t / total; 72 | Eigen::MatrixXd py_z_local(py_z_.rows(), 2); 73 | py_z_local.col(0) = py_z_.col(edge.k1); 74 | py_z_local.col(1) = py_z_.col(edge.k2); 75 | const auto divergence = jensenShannonDivergence(py_z_local, prior); 76 | VLOG(20) << "Scoring edge (" << edge << "): prior: " << prior.format(fmt) 77 | << ", p(y|z=z): " << py_z_local.format(fmt) 78 | << ", divergence: " << divergence; 79 | return total * divergence; 80 | } 81 | 82 | bool IBEdgeSelector::updateFromEdge(EdgeKey edge) { 83 | // we merge target -> source 84 | const auto p_s = pz_(edge.k1); 85 | const auto p_t = pz_(edge.k2); 86 | // update new cluster probabilities 87 | pz_(edge.k1) = p_s + p_t; 88 | py_z_.col(edge.k1) = 89 | ((p_s * py_z_.col(edge.k1) + p_t * py_z_.col(edge.k2)) / (p_s + p_t)).eval(); 90 | pz_x_.col(edge.k1) += pz_x_.col(edge.k2); 91 | 92 | // zero-out merged nodes 93 | pz_(edge.k2) = 0.0; 94 | py_z_.col(edge.k2).setConstant(0.0); 95 | pz_x_.col(edge.k2).setConstant(0.0); 96 | 97 | // for I[a; b] order is p(a), p(b), p(a|b) 98 | const auto I_zy = mutualInformation(py_, pz_, py_z_); 99 | const auto d_I_zy = I_zy_prev_ - I_zy; 100 | 101 | // avoid divide-by-zero and other weirdness with precision 102 | const auto delta = delta_weight_ * d_I_zy / I_xy_; 103 | VLOG(10) << "delta for (" << edge << "): " << delta; 104 | 105 | I_zy_prev_ = I_zy; 106 | deltas_.push_back(delta); 107 | return delta < config.max_delta; 108 | } 109 | 110 | bool IBEdgeSelector::compareEdges(const std::pair& lhs, 111 | const std::pair& rhs) const { 112 | return lhs.second < rhs.second; 113 | } 114 | 115 | void IBEdgeSelector::onlineReweighting(double Ixy, double delta_weight) { 116 | I_xy_ = Ixy; 117 | delta_weight_ = delta_weight; 118 | } 119 | 120 | std::string IBEdgeSelector::summarize() const { 121 | if (deltas_.empty()) { 122 | return "0 merge(s), δ_0=N/A, δ_n=N/A"; 123 | } 124 | 125 | const size_t num_merges = 126 | deltas_.back() <= config.max_delta ? deltas_.size() : deltas_.size() - 1; 127 | const std::string d0 = std::to_string(deltas_.front()); 128 | const std::string dn = std::to_string(deltas_.back()); 129 | 130 | std::stringstream ss; 131 | ss << num_merges << " merge(s), " 132 | << "δ_0=" << d0 << ", δ_n=" << dn; 133 | return ss.str(); 134 | } 135 | 136 | } // namespace clio 137 | -------------------------------------------------------------------------------- /clio/src/ib_utils.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "clio/ib_utils.h" 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | namespace clio { 11 | 12 | using namespace spark_dsg; 13 | using Indices = std::vector>; 14 | 15 | Indices findTopKIndicesCols(const Eigen::MatrixXd& m, size_t top_k) { 16 | // Find top k indices for each column 17 | size_t k = std::min(top_k, static_cast(m.rows())); 18 | Indices top_indices; 19 | for (Eigen::Index c = 0; c < m.cols(); c++) { 20 | const auto& col = m.col(c); 21 | 22 | std::vector idx(m.rows(), 0); 23 | std::iota(idx.begin(), idx.end(), 0); 24 | // Sort the indices according to their respective value 25 | std::sort(idx.begin(), idx.end(), [&col](auto& lhv, auto& rhv) { 26 | return col(lhv) > col(rhv); 27 | }); 28 | 29 | for (size_t i = 0; i < k; i++) { 30 | top_indices.push_back({idx[i], c}); 31 | } 32 | } 33 | 34 | return top_indices; 35 | } 36 | 37 | Eigen::MatrixXd computeIBpyGivenX(const ClusteringWorkspace& ws, 38 | const hydra::EmbeddingGroup& tasks, 39 | const hydra::EmbeddingDistance& metric, 40 | const PyGivenXConfig& config) { 41 | const auto fmt = hydra::getDefaultFormat(); 42 | 43 | size_t N = ws.size(); 44 | size_t M = tasks.embeddings.size() + 1; 45 | 46 | Eigen::MatrixXd py_x = Eigen::MatrixXd::Ones(M, N) * 1e-12; 47 | Eigen::MatrixXd py_x_temp = Eigen::MatrixXd::Zero(M, N); 48 | py_x_temp.row(0).setConstant(config.score_threshold); 49 | VLOG(15) << "----------------------------------------"; 50 | VLOG(15) << "Computing workspace feature scores"; 51 | VLOG(15) << "----------------------------------------"; 52 | for (auto&& [idx, feature] : ws.features) { 53 | const auto scores = tasks.getScores(metric, feature); 54 | VLOG(15) << "scores @ " << idx << ": " << scores.format(fmt); 55 | py_x_temp.block(1, idx, M - 1, 1) = scores.cast(); 56 | } 57 | VLOG(15) << "----------------------------------------"; 58 | 59 | size_t k = std::min(M, config.top_k); 60 | size_t l = k; 61 | if (config.cumulative) { 62 | l = 1; 63 | } 64 | while (l <= k) { 65 | const auto top_k_inds = findTopKIndicesCols(py_x_temp, l); 66 | for (const auto& idx : top_k_inds) { 67 | py_x(idx.first, idx.second) = 68 | py_x(idx.first, idx.second) + py_x_temp(idx.first, idx.second); 69 | } 70 | l++; 71 | } 72 | 73 | if (config.null_task_preprune) { 74 | // Null task processing 75 | const auto top_inds = findTopKIndicesCols(py_x_temp, 1); 76 | for (const auto& idx : top_inds) { 77 | // Null task corresponds to first row 78 | if (idx.first == 0) { 79 | py_x.block(1, idx.second, M - 1, 1).setConstant(1e-12); 80 | // Essentially 0 (but not 0 to avoid NaN error) 81 | } 82 | } 83 | } 84 | 85 | VLOG(10) << "raw: p(y|x): " << py_x.format(fmt); 86 | const auto scored = py_x.bottomRows(M - 1); 87 | const auto min = scored.rowwise().minCoeff(); 88 | const auto max = scored.rowwise().maxCoeff(); 89 | const auto avg = scored.rowwise().mean(); 90 | VLOG(10) << "score average: " << avg.format(fmt) << ", range: " << min.format(fmt) 91 | << " -> " << max.format(fmt); 92 | 93 | const auto norm_factor = py_x.colwise().sum(); 94 | py_x.array().rowwise() /= norm_factor.array(); 95 | 96 | VLOG(10) << "p(y|x): " << py_x.format(fmt); 97 | 98 | return py_x; 99 | } 100 | 101 | Eigen::VectorXd computeIBpx(const ClusteringWorkspace& ws) { 102 | // p(x) is uniform 103 | size_t N = ws.size(); 104 | return Eigen::VectorXd::Constant(N, 1.0 / static_cast(N)); 105 | } 106 | 107 | Eigen::VectorXd computeIBpy(const hydra::EmbeddingGroup& tasks) { 108 | size_t M = tasks.embeddings.size() + 1; 109 | return Eigen::VectorXd::Constant(M, 1.0 / static_cast(M)); 110 | } 111 | 112 | double computeDeltaWeight(const SceneGraphLayer& layer, 113 | const std::vector& nodes) { 114 | return static_cast(nodes.size()) / static_cast(layer.numNodes()); 115 | } 116 | 117 | } // namespace clio 118 | -------------------------------------------------------------------------------- /clio/src/probability_utilities.cpp: -------------------------------------------------------------------------------- 1 | #include "clio/probability_utilities.h" 2 | 3 | #include 4 | #include 5 | 6 | namespace clio { 7 | 8 | // -sum(p(x)log(p(x))) over all x 9 | double shannonEntropy(const Eigen::Ref& p, double tolerance) { 10 | const auto fmt = hydra::getDefaultFormat(); 11 | double entropy = 0.0; 12 | for (int i = 0; i < p.rows(); ++i) { 13 | const double p_i = p(i, 0); 14 | if (p_i < tolerance) { 15 | continue; 16 | } 17 | 18 | entropy += p_i * std::log2(p_i); 19 | } 20 | 21 | VLOG(30) << "p: " << p.format(fmt) << ", H: " << -entropy; 22 | return -entropy; 23 | } 24 | 25 | // compute the Jensen-Shannon divergence of p(a|b) (uses prior p(b)) 26 | double jensenShannonDivergence(const Eigen::MatrixXd& pa_b, 27 | const Eigen::VectorXd& p_b, 28 | double tolerance) { 29 | const auto fmt = hydra::getDefaultFormat(); 30 | VLOG(30) << "========================="; 31 | VLOG(30) << "p(a|b): " << pa_b.format(fmt) << ", p(b): " << p_b.format(fmt); 32 | 33 | // M = sum_i=0^|{1, 2, ... m}| p_i(x) p(y|x=i) 34 | // p(a|b) * p(b) = \sum_i=0^|a| p(a|b=i)p(b=i) 35 | const Eigen::VectorXd M = pa_b * p_b; 36 | VLOG(30) << "M: " << M.format(fmt); 37 | 38 | VLOG(30) << "-------------------------"; 39 | VLOG(30) << "Entropies"; 40 | VLOG(30) << "-------------------------"; 41 | 42 | double total_entropy = 0.0; 43 | for (int i = 0; i < pa_b.cols(); ++i) { 44 | total_entropy += p_b(i) * shannonEntropy(pa_b.col(i), tolerance); 45 | } 46 | VLOG(30) << "-------------------------"; 47 | VLOG(30) << "========================="; 48 | return shannonEntropy(M, tolerance) - total_entropy; 49 | } 50 | 51 | // compute the mutual information between two distributions 52 | double mutualInformation(const Eigen::VectorXd& pa, 53 | const Eigen::VectorXd& pb, 54 | const Eigen::MatrixXd pa_b, 55 | double tolerance) { 56 | CHECK_EQ(pa_b.cols(), pb.rows()); 57 | CHECK_EQ(pa_b.rows(), pa.rows()); 58 | double total = 0.0; 59 | for (int b = 0; b < pa_b.cols(); ++b) { 60 | for (int a = 0; a < pa_b.rows(); ++a) { 61 | const auto p_joint = pa_b(a, b); 62 | // avoid log blowing up for events that can't occur 63 | if (p_joint < tolerance || pa(a) < tolerance) { 64 | continue; 65 | } 66 | 67 | total += pb(b) * p_joint * std::log2(p_joint / pa(a)); 68 | } 69 | } 70 | 71 | return total; 72 | } 73 | 74 | } // namespace clio 75 | -------------------------------------------------------------------------------- /clio/src/region_update_functor.cpp: -------------------------------------------------------------------------------- 1 | #include "clio/region_update_functor.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace clio { 11 | namespace { 12 | 13 | static const auto functor_reg = 14 | config::RegistrationWithConfig("RegionsIBFunctor"); 17 | 18 | } // namespace 19 | 20 | using hydra::MergeList; 21 | using hydra::timing::ScopedTimer; 22 | using namespace spark_dsg; 23 | 24 | using Clusters = std::vector; 25 | 26 | void declare_config(RegionUpdateFunctor::Config& config) { 27 | using namespace config; 28 | name("RegionUpdateFunctorConfig::Config"); 29 | field(config.clustering, "clustering"); 30 | } 31 | 32 | RegionUpdateFunctor::RegionUpdateFunctor(const Config& config) 33 | : config(config::checkValid(config)), 34 | region_id_('l', 0), 35 | clustering_(config.clustering) {} 36 | 37 | MergeList RegionUpdateFunctor::call(const DynamicSceneGraph&, 38 | hydra::SharedDsgInfo& dsg, 39 | const hydra::UpdateInfo::ConstPtr& info) const { 40 | ScopedTimer timer("backend/region_clustering", info->timestamp_ns); 41 | 42 | // TODO(nathan) cache this computation 43 | const auto& places = dsg.graph->getLayer(DsgLayers::PLACES); 44 | AgglomerativeClustering::NodeEmbeddingMap valid_features; 45 | for (auto&& [node_id, node] : places.nodes()) { 46 | const auto& attrs = node->attributes(); 47 | if (attrs.semantic_feature.size() <= 1) { 48 | continue; 49 | } 50 | 51 | valid_features[node_id] = attrs.semantic_feature.rightCols<1>(); 52 | } 53 | 54 | if (valid_features.empty()) { 55 | VLOG(2) << "Need to have at least one valid place feature"; 56 | return {}; 57 | } 58 | 59 | const auto clusters = clustering_.cluster(places, valid_features); 60 | updateGraphBatch(*dsg.graph, clusters); 61 | return {}; 62 | } 63 | 64 | void RegionUpdateFunctor::updateGraphBatch(DynamicSceneGraph& graph, 65 | const Clusters& clusters) const { 66 | VLOG(2) << "Got " << clusters.size() << " cluster(s)"; 67 | 68 | std::vector prev_regions; 69 | for (const auto& id_node_pair : graph.getLayer(DsgLayers::ROOMS).nodes()) { 70 | prev_regions.push_back(id_node_pair.first); 71 | } 72 | 73 | for (const auto node : prev_regions) { 74 | graph.removeNode(node); 75 | } 76 | 77 | std::set new_nodes; 78 | for (size_t i = 0; i < clusters.size(); ++i) { 79 | NodeSymbol new_node_id(region_id_.category(), i); 80 | auto attrs = std::make_unique(); 81 | attrs->semantic_label = 0; 82 | attrs->name = clusters[i]->best_task_name; 83 | attrs->semantic_feature = clusters[i]->feature; 84 | attrs->semantic_label = clusters[i]->best_task_index; 85 | graph.emplaceNode(DsgLayers::ROOMS, new_node_id, std::move(attrs)); 86 | 87 | for (const auto node_id : clusters[i]->nodes) { 88 | graph.insertEdge(new_node_id, node_id); 89 | } 90 | 91 | new_nodes.insert(new_node_id); 92 | } 93 | 94 | const auto& places = graph.getLayer(DsgLayers::PLACES); 95 | for (auto&& [node_id, node] : graph.getLayer(DsgLayers::ROOMS).nodes()) { 96 | const std::unordered_set to_use(node->children().begin(), 97 | node->children().end()); 98 | node->attributes().position = hydra::getRoomPosition(places, to_use); 99 | } 100 | 101 | hydra::addEdgesToRoomLayer(graph, new_nodes); 102 | } 103 | 104 | } // namespace clio 105 | -------------------------------------------------------------------------------- /clio/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(rostest REQUIRED) 2 | catkin_add_gtest( 3 | test_${PROJECT_NAME} 4 | main.cpp 5 | src/utilities.cpp 6 | test_agglomerative_clustering.cpp 7 | test_clustering_workspace.cpp 8 | test_embedding_distances.cpp 9 | test_ib_edge_selector.cpp 10 | test_object_update_functor.cpp 11 | test_probability_utilities.cpp 12 | ) 13 | target_include_directories(test_${PROJECT_NAME} PUBLIC include) 14 | target_link_libraries(test_${PROJECT_NAME} ${PROJECT_NAME}) 15 | -------------------------------------------------------------------------------- /clio/tests/include/clio_tests/utilities.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace clio::test { 6 | 7 | struct TestEmbeddingGroup : public EmbeddingGroup { 8 | struct Config { 9 | size_t num_embeddings = 1; 10 | }; 11 | 12 | explicit TestEmbeddingGroup(const Config&); 13 | 14 | static config::VirtualConfig getDefault(size_t num_embeddings = 1); 15 | 16 | static Eigen::MatrixXd getEmbedding(size_t index); 17 | 18 | private: 19 | inline static const auto registration_ = 20 | config::RegistrationWithConfig( 21 | "test_group"); 22 | }; 23 | 24 | void declare_config(TestEmbeddingGroup::Config&); 25 | 26 | } // namespace clio::test 27 | -------------------------------------------------------------------------------- /clio/tests/main.cpp: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------------------- 2 | * Copyright 2022 Massachusetts Institute of Technology. 3 | * All Rights Reserved 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | * 26 | * Research was sponsored by the United States Air Force Research Laboratory and 27 | * the United States Air Force Artificial Intelligence Accelerator and was 28 | * accomplished under Cooperative Agreement Number FA8750-19-2-1000. The views 29 | * and conclusions contained in this document are those of the authors and should 30 | * not be interpreted as representing the official policies, either expressed or 31 | * implied, of the United States Air Force or the U.S. Government. The U.S. 32 | * Government is authorized to reproduce and distribute reprints for Government 33 | * purposes notwithstanding any copyright notation herein. 34 | * -------------------------------------------------------------------------- */ 35 | #include 36 | #include 37 | #include 38 | 39 | auto main(int argc, char** argv) -> int { 40 | FLAGS_logtostderr = true; 41 | FLAGS_alsologtostderr = true; 42 | FLAGS_colorlogtostderr = true; 43 | FLAGS_minloglevel = 1; 44 | 45 | ::testing::InitGoogleTest(&argc, argv); 46 | google::ParseCommandLineFlags(&argc, &argv, true); 47 | google::InitGoogleLogging(argv[0]); 48 | 49 | return RUN_ALL_TESTS(); 50 | } 51 | -------------------------------------------------------------------------------- /clio/tests/src/utilities.cpp: -------------------------------------------------------------------------------- 1 | #include "clio_tests/utilities.h" 2 | 3 | #include 4 | 5 | namespace clio::test { 6 | 7 | TestEmbeddingGroup::TestEmbeddingGroup(const Config& config) { 8 | embeddings.push_back(Eigen::VectorXd::Zero(10)); 9 | for (size_t i = 0; i < config.num_embeddings; ++i) { 10 | embeddings.push_back(TestEmbeddingGroup::getEmbedding(i)); 11 | } 12 | } 13 | 14 | config::VirtualConfig TestEmbeddingGroup::getDefault( 15 | size_t num_embeddings) { 16 | Config config; 17 | config.num_embeddings = num_embeddings; 18 | return {config, "test_group"}; 19 | } 20 | 21 | Eigen::MatrixXd TestEmbeddingGroup::getEmbedding(size_t index) { 22 | Eigen::MatrixXd feature = Eigen::MatrixXd::Zero(10, 1); 23 | feature(std::clamp(index, 0, 10), 0) = 1.0; 24 | return feature; 25 | } 26 | 27 | void declare_config(TestEmbeddingGroup::Config& config) { 28 | using namespace config; 29 | name("TestEmbeddingGroup::Config"); 30 | field(config.num_embeddings, "num_embeddings"); 31 | } 32 | 33 | } // namespace clio::test 34 | -------------------------------------------------------------------------------- /clio/tests/test_agglomerative_clustering.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace clio { 9 | 10 | namespace { 11 | 12 | inline Eigen::VectorXd getOneHot(size_t i, size_t dim) { 13 | Eigen::VectorXd p = Eigen::VectorXd::Zero(dim); 14 | p(i) = 1.0; 15 | return p; 16 | } 17 | 18 | struct FakeEmbeddingGroup : public EmbeddingGroup { 19 | struct Config {}; 20 | 21 | explicit FakeEmbeddingGroup(const Config&) { 22 | Eigen::VectorXd ref(10); 23 | ref << 1.0, 2.0, 3.0, 4.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0; 24 | embeddings.push_back(ref); 25 | tasks.push_back("task_0"); 26 | } 27 | 28 | private: 29 | inline static const auto registration_ = 30 | config::RegistrationWithConfig( 31 | "fake_embeddings"); 32 | }; 33 | 34 | void declare_config(FakeEmbeddingGroup::Config&) {} 35 | 36 | inline AgglomerativeClustering::Config getFakeConfig() { 37 | AgglomerativeClustering::Config config; 38 | config.metric = 39 | config::VirtualConfig(CosineDistance::Config(), "cosine"); 40 | config.selector = config::VirtualConfig(IBEdgeSelector::Config(), "IB"); 41 | config.tasks = config::VirtualConfig(FakeEmbeddingGroup::Config(), 42 | "fake_embeddings"); 43 | return config; 44 | } 45 | 46 | std::map getNodeAssignments(const std::vector& clusters) { 47 | std::map assignments; 48 | for (size_t i = 0; i < clusters.size(); ++i) { 49 | for (const auto& node_id : clusters[i]->nodes) { 50 | assignments[node_id] = i; 51 | } 52 | } 53 | return assignments; 54 | } 55 | 56 | template 57 | std::string printMap(const std::map& values) { 58 | std::stringstream ss; 59 | ss << "{"; 60 | auto iter = values.begin(); 61 | while (iter != values.end()) { 62 | ss << iter->first << ": " << iter->second; 63 | ++iter; 64 | if (iter != values.end()) { 65 | ss << ", "; 66 | } 67 | } 68 | ss << "}"; 69 | return ss.str(); 70 | } 71 | 72 | template 73 | std::string printVec(const std::vector& values) { 74 | std::stringstream ss; 75 | ss << "["; 76 | auto iter = values.begin(); 77 | while (iter != values.end()) { 78 | ss << *iter; 79 | ++iter; 80 | if (iter != values.end()) { 81 | ss << ", "; 82 | } 83 | } 84 | ss << "]"; 85 | return ss.str(); 86 | } 87 | 88 | std::string workspaceState(const ClusteringWorkspace& ws) { 89 | std::stringstream ss; 90 | ss << "lookup: " << printMap(ws.node_lookup) << std::endl; 91 | ss << "order: " << printMap(ws.order) << std::endl; 92 | ss << "assignments: " << printVec(ws.assignments) << std::endl; 93 | ss << "edges: " << printMap(ws.edges) << std::endl; 94 | return ss.str(); 95 | } 96 | 97 | } // namespace 98 | 99 | TEST(AgglomerativeClustering, GetClustersCorrect) { 100 | IsolatedSceneGraphLayer layer(2); 101 | 102 | NodeEmbeddingMap map; 103 | for (size_t i = 0; i < 5; ++i) { 104 | layer.emplaceNode(2 * i, std::make_unique()); 105 | map[2 * i] = getOneHot(i, 10); 106 | } 107 | 108 | for (size_t i = 0; i < 5; ++i) { 109 | layer.emplaceNode(2 * i + 1, std::make_unique()); 110 | } 111 | 112 | for (size_t i = 0; i < 4; ++i) { 113 | layer.insertEdge(2 * i, 2 * (i + 1)); 114 | } 115 | 116 | ClusteringWorkspace ws(layer, map); 117 | const auto config = getFakeConfig(); 118 | AgglomerativeClustering clustering(config); 119 | 120 | { // expected_assignments = {0, 1, 2, 3, 4} 121 | auto clusters = clustering.getClusters(ws, map); 122 | EXPECT_EQ(clusters.size(), 5); 123 | const auto result = getNodeAssignments(clusters); 124 | std::map expected{{0, 0}, {2, 1}, {4, 2}, {6, 3}, {8, 4}}; 125 | EXPECT_EQ(expected, result) << workspaceState(ws); 126 | } 127 | 128 | ws.addMerge({1, 2}); 129 | { // expected_assignments = {0, 1, 1, 3, 4} 130 | auto clusters = clustering.getClusters(ws, map); 131 | EXPECT_EQ(clusters.size(), 4); 132 | const auto result = getNodeAssignments(clusters); 133 | std::map expected{{0, 0}, {2, 1}, {4, 1}, {6, 2}, {8, 3}}; 134 | EXPECT_EQ(expected, result) << workspaceState(ws); 135 | } 136 | 137 | ws.addMerge({3, 4}); 138 | { // expected_assignments = {0, 1, 1, 3, 3} 139 | auto clusters = clustering.getClusters(ws, map); 140 | EXPECT_EQ(clusters.size(), 3); 141 | const auto result = getNodeAssignments(clusters); 142 | std::map expected{{0, 0}, {2, 1}, {4, 1}, {6, 2}, {8, 2}}; 143 | EXPECT_EQ(expected, result) << workspaceState(ws); 144 | } 145 | 146 | ws.addMerge({0, 1}); 147 | { // expected_assignments = {0, 0, 0, 3, 3} 148 | auto clusters = clustering.getClusters(ws, map); 149 | EXPECT_EQ(clusters.size(), 2); 150 | const auto result = getNodeAssignments(clusters); 151 | std::map expected{{0, 0}, {2, 0}, {4, 0}, {6, 1}, {8, 1}}; 152 | EXPECT_EQ(expected, result) << workspaceState(ws); 153 | } 154 | 155 | ws.addMerge({0, 3}); 156 | { // expected_assignments = {0, 0, 0, 0, 0} 157 | auto clusters = clustering.getClusters(ws, map); 158 | EXPECT_EQ(clusters.size(), 1); 159 | const auto result = getNodeAssignments(clusters); 160 | std::map expected{{0, 0}, {2, 0}, {4, 0}, {6, 0}, {8, 0}}; 161 | EXPECT_EQ(expected, result) << workspaceState(ws); 162 | } 163 | } 164 | 165 | } // namespace clio 166 | -------------------------------------------------------------------------------- /clio/tests/test_clustering_workspace.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace clio { 6 | 7 | namespace { 8 | 9 | inline Eigen::VectorXd getOneHot(size_t i, size_t dim) { 10 | Eigen::VectorXd p = Eigen::VectorXd::Zero(dim); 11 | p(i) = 1.0; 12 | return p; 13 | } 14 | 15 | } // namespace 16 | 17 | TEST(ClusteringWorkspace, InitCorrect) { 18 | IsolatedSceneGraphLayer layer(2); 19 | 20 | NodeEmbeddingMap map; 21 | for (size_t i = 0; i < 5; ++i) { 22 | layer.emplaceNode(2 * i, std::make_unique()); 23 | map[2 * i] = getOneHot(i, 10); 24 | } 25 | 26 | for (size_t i = 0; i < 5; ++i) { 27 | layer.emplaceNode(2 * i + 1, std::make_unique()); 28 | } 29 | 30 | { // nodes only: only should get features, no edges 31 | ClusteringWorkspace ws(layer, map); 32 | EXPECT_EQ(ws.size(), 5); 33 | EXPECT_EQ(ws.featureDim(), 10); 34 | EXPECT_TRUE(ws.edges.empty()); 35 | 36 | std::map expected_lookup{{0, 0}, {1, 2}, {2, 4}, {3, 6}, {4, 8}}; 37 | EXPECT_EQ(ws.node_lookup, expected_lookup); 38 | std::map expected_order{{0, 0}, {2, 1}, {4, 2}, {6, 3}, {8, 4}}; 39 | EXPECT_EQ(ws.order, expected_order); 40 | std::vector expected_assignments{0, 1, 2, 3, 4}; 41 | EXPECT_EQ(ws.assignments, expected_assignments); 42 | } 43 | 44 | for (size_t i = 0; i < 9; ++i) { 45 | layer.insertEdge(i, i + 1); 46 | } 47 | 48 | { // no siblings in map: only should get features, no edges 49 | ClusteringWorkspace ws(layer, map); 50 | EXPECT_EQ(ws.size(), 5); 51 | EXPECT_EQ(ws.featureDim(), 10); 52 | EXPECT_TRUE(ws.edges.empty()); 53 | 54 | std::map expected_lookup{{0, 0}, {1, 2}, {2, 4}, {3, 6}, {4, 8}}; 55 | EXPECT_EQ(ws.node_lookup, expected_lookup); 56 | std::map expected_order{{0, 0}, {2, 1}, {4, 2}, {6, 3}, {8, 4}}; 57 | EXPECT_EQ(ws.order, expected_order); 58 | std::vector expected_assignments{0, 1, 2, 3, 4}; 59 | EXPECT_EQ(ws.assignments, expected_assignments); 60 | } 61 | 62 | for (size_t i = 0; i < 4; ++i) { 63 | layer.insertEdge(2 * i, 2 * (i + 1)); 64 | } 65 | 66 | { // linear siblings: should get edges 67 | ClusteringWorkspace ws(layer, map); 68 | EXPECT_EQ(ws.size(), 5); 69 | EXPECT_EQ(ws.featureDim(), 10); 70 | // edges are keyed by index in workspace 71 | std::map expected_edges{ 72 | {{0, 1}, 0.0}, {{1, 2}, 0.0}, {{2, 3}, 0.0}, {{3, 4}, 0.0}}; 73 | EXPECT_EQ(ws.edges, expected_edges); 74 | 75 | std::map expected_lookup{{0, 0}, {1, 2}, {2, 4}, {3, 6}, {4, 8}}; 76 | EXPECT_EQ(ws.node_lookup, expected_lookup); 77 | std::map expected_order{{0, 0}, {2, 1}, {4, 2}, {6, 3}, {8, 4}}; 78 | EXPECT_EQ(ws.order, expected_order); 79 | std::vector expected_assignments{0, 1, 2, 3, 4}; 80 | EXPECT_EQ(ws.assignments, expected_assignments); 81 | } 82 | } 83 | 84 | TEST(ClusteringWorkspace, MergeCorrect) { 85 | IsolatedSceneGraphLayer layer(2); 86 | 87 | NodeEmbeddingMap map; 88 | for (size_t i = 0; i < 5; ++i) { 89 | layer.emplaceNode(2 * i, std::make_unique()); 90 | map[2 * i] = getOneHot(i, 10); 91 | } 92 | 93 | for (size_t i = 0; i < 5; ++i) { 94 | layer.emplaceNode(2 * i + 1, std::make_unique()); 95 | } 96 | 97 | for (size_t i = 0; i < 4; ++i) { 98 | layer.insertEdge(2 * i, 2 * (i + 1)); 99 | } 100 | 101 | ClusteringWorkspace ws(layer, map); 102 | 103 | auto updated_edges = ws.addMerge({1, 2}); 104 | std::list expected_updates{{0, 1}, {1, 3}}; 105 | EXPECT_EQ(updated_edges, expected_updates); 106 | EXPECT_EQ(ws.size(), 5); 107 | EXPECT_EQ(ws.featureDim(), 10); 108 | std::map expected_edges{{{0, 1}, 0.0}, {{1, 3}, 0.0}, {{3, 4}, 0.0}}; 109 | EXPECT_EQ(ws.edges, expected_edges); 110 | std::vector expected_assignments{0, 1, 1, 3, 4}; 111 | EXPECT_EQ(ws.assignments, expected_assignments); 112 | 113 | // note: edge keys are ordered 114 | updated_edges = ws.addMerge({4, 3}); 115 | expected_updates = {{1, 3}}; 116 | EXPECT_EQ(updated_edges, expected_updates); 117 | EXPECT_EQ(ws.size(), 5); 118 | EXPECT_EQ(ws.featureDim(), 10); 119 | expected_edges = {{{0, 1}, 0.0}, {{1, 3}, 0.0}}; 120 | EXPECT_EQ(ws.edges, expected_edges); 121 | expected_assignments = {0, 1, 1, 3, 3}; 122 | EXPECT_EQ(ws.assignments, expected_assignments); 123 | 124 | updated_edges = ws.addMerge({0, 1}); 125 | expected_updates = {{0, 3}}; 126 | EXPECT_EQ(updated_edges, expected_updates); 127 | EXPECT_EQ(ws.size(), 5); 128 | EXPECT_EQ(ws.featureDim(), 10); 129 | expected_edges = {{{0, 3}, 0.0}}; 130 | EXPECT_EQ(ws.edges, expected_edges); 131 | expected_assignments = {0, 0, 0, 3, 3}; 132 | EXPECT_EQ(ws.assignments, expected_assignments); 133 | 134 | // note: edge keys are ordered 135 | updated_edges = ws.addMerge({3, 0}); 136 | EXPECT_TRUE(updated_edges.empty()); 137 | EXPECT_EQ(ws.size(), 5); 138 | EXPECT_EQ(ws.featureDim(), 10); 139 | expected_edges = {}; 140 | EXPECT_EQ(ws.edges, expected_edges); 141 | expected_assignments = {0, 0, 0, 0, 0}; 142 | EXPECT_EQ(ws.assignments, expected_assignments); 143 | } 144 | 145 | } // namespace clio 146 | -------------------------------------------------------------------------------- /clio/tests/test_ib_edge_selector.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace clio { 8 | 9 | struct TestableIBEdgeSelector : public IBEdgeSelector { 10 | explicit TestableIBEdgeSelector(const IBEdgeSelector::Config& config) 11 | : IBEdgeSelector(config) {} 12 | 13 | using IBEdgeSelector::px_; 14 | using IBEdgeSelector::py_; 15 | using IBEdgeSelector::py_x_; 16 | using IBEdgeSelector::py_z_; 17 | using IBEdgeSelector::pz_; 18 | using IBEdgeSelector::pz_x_; 19 | }; 20 | 21 | namespace { 22 | 23 | inline Eigen::VectorXd getOneHot(size_t i, size_t dim) { 24 | Eigen::VectorXd p = Eigen::VectorXd::Zero(dim); 25 | p(i) = 1.0; 26 | return p; 27 | } 28 | 29 | } // namespace 30 | 31 | TEST(IBEdgeSelector, SetupSimpleCorrect) { 32 | IsolatedSceneGraphLayer layer(2); 33 | 34 | NodeEmbeddingMap x_segments; 35 | for (size_t i = 0; i < 5; ++i) { 36 | layer.emplaceNode(2 * i, std::make_unique()); 37 | x_segments[2 * i] = getOneHot(i, 10); 38 | } 39 | 40 | EmbeddingGroup y_tasks; 41 | for (size_t i = 0; i < 3; ++i) { 42 | y_tasks.embeddings.push_back(getOneHot(i, 10)); 43 | y_tasks.tasks.push_back(std::to_string(i)); 44 | } 45 | 46 | for (size_t i = 0; i < 5; ++i) { 47 | layer.emplaceNode(2 * i + 1, std::make_unique()); 48 | } 49 | 50 | for (size_t i = 0; i < 4; ++i) { 51 | layer.insertEdge(2 * i, 2 * (i + 1)); 52 | } 53 | 54 | ClusteringWorkspace ws(layer, x_segments); 55 | CosineDistance dist; 56 | 57 | IBEdgeSelector::Config config; 58 | config.py_x.score_threshold = 1.0; 59 | // Test simplest case 60 | config.py_x.cumulative = false; 61 | config.py_x.null_task_preprune = false; 62 | config.py_x.top_k = 100; // Large so that essentially disabled 63 | TestableIBEdgeSelector selector(config); 64 | selector.setup(ws, y_tasks, dist); 65 | 66 | ASSERT_EQ(selector.px_.rows(), 5); 67 | ASSERT_EQ(selector.pz_.rows(), 5); 68 | ASSERT_EQ(selector.py_.rows(), 4); 69 | ASSERT_EQ(selector.pz_x_.rows(), 5); 70 | ASSERT_EQ(selector.pz_x_.cols(), 5); 71 | ASSERT_EQ(selector.py_x_.rows(), 4); 72 | ASSERT_EQ(selector.py_x_.cols(), 5); 73 | ASSERT_EQ(selector.py_z_.rows(), 4); 74 | ASSERT_EQ(selector.py_z_.cols(), 5); 75 | EXPECT_TRUE(selector.px_.isApprox(selector.pz_)); 76 | EXPECT_TRUE(selector.py_x_.isApprox(selector.py_z_)); 77 | 78 | const auto fmt = getDefaultFormat(); 79 | Eigen::VectorXd expected_px(5); 80 | expected_px << 0.2, 0.2, 0.2, 0.2, 0.2; // row-wise sum normalized via l1-norm 81 | EXPECT_TRUE(selector.px_.isApprox(expected_px)) 82 | << "expected: " << expected_px.format(fmt) 83 | << ", result: " << selector.px_.format(fmt); 84 | 85 | Eigen::MatrixXd expected_py_x(4, 5); 86 | expected_py_x << 0.5, 0.5, 0.5, 1.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 87 | 0.0, 0.0, 0.0, 0.5, 0.0, 0.0; 88 | EXPECT_TRUE(selector.py_x_.isApprox(expected_py_x, 1e-9)) 89 | << "expected: " << expected_py_x.format(fmt) 90 | << ", result: " << selector.py_x_.format(fmt); 91 | 92 | Eigen::VectorXd expected_py(4); 93 | expected_py << 0.25, 0.25, 0.25, 0.25; // row-wise sum normalized via l1-norm 94 | EXPECT_TRUE(selector.py_.isApprox(expected_py)) 95 | << "expected: " << expected_py.format(fmt) 96 | << ", result: " << selector.py_.format(fmt); 97 | } 98 | 99 | TEST(IBEdgeSelector, SetupTopKCorrect) { 100 | IsolatedSceneGraphLayer layer(2); 101 | 102 | NodeEmbeddingMap x_segments; 103 | for (size_t i = 0; i < 5; ++i) { 104 | layer.emplaceNode(2 * i, std::make_unique()); 105 | x_segments[2 * i] = getOneHot(i, 10); 106 | } 107 | 108 | EmbeddingGroup y_tasks; 109 | for (size_t i = 0; i < 3; ++i) { 110 | y_tasks.embeddings.push_back(getOneHot(i, 10)); 111 | y_tasks.tasks.push_back(std::to_string(i)); 112 | } 113 | 114 | for (size_t i = 0; i < 5; ++i) { 115 | layer.emplaceNode(2 * i + 1, std::make_unique()); 116 | } 117 | 118 | for (size_t i = 0; i < 4; ++i) { 119 | layer.insertEdge(2 * i, 2 * (i + 1)); 120 | } 121 | 122 | ClusteringWorkspace ws(layer, x_segments); 123 | CosineDistance dist; 124 | 125 | IBEdgeSelector::Config config; 126 | config.py_x.score_threshold = 0.9; 127 | // Test top k (k = 1) 128 | config.py_x.cumulative = false; 129 | config.py_x.null_task_preprune = false; 130 | config.py_x.top_k = 1; // Test single top k (one hot) 131 | TestableIBEdgeSelector selector(config); 132 | selector.setup(ws, y_tasks, dist); 133 | 134 | const auto fmt = getDefaultFormat(); 135 | 136 | Eigen::MatrixXd expected_py_x(4, 5); 137 | expected_py_x << 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 138 | 0.0, 0.0, 0.0, 1.0, 0.0, 0.0; 139 | EXPECT_TRUE(selector.py_x_.isApprox(expected_py_x, 1e-9)) 140 | << "expected: " << expected_py_x.format(fmt) 141 | << ", result: " << selector.py_x_.format(fmt); 142 | } 143 | 144 | TEST(IBEdgeSelector, SetupCumulativeCorrect) { 145 | IsolatedSceneGraphLayer layer(2); 146 | 147 | NodeEmbeddingMap x_segments; 148 | for (size_t i = 0; i < 5; ++i) { 149 | layer.emplaceNode(2 * i, std::make_unique()); 150 | x_segments[2 * i] = getOneHot(i, 10); 151 | } 152 | 153 | EmbeddingGroup y_tasks; 154 | for (size_t i = 0; i < 3; ++i) { 155 | y_tasks.embeddings.push_back(getOneHot(i, 10)); 156 | y_tasks.tasks.push_back(std::to_string(i)); 157 | } 158 | 159 | for (size_t i = 0; i < 5; ++i) { 160 | layer.emplaceNode(2 * i + 1, std::make_unique()); 161 | } 162 | 163 | for (size_t i = 0; i < 4; ++i) { 164 | layer.insertEdge(2 * i, 2 * (i + 1)); 165 | } 166 | 167 | ClusteringWorkspace ws(layer, x_segments); 168 | CosineDistance dist; 169 | 170 | IBEdgeSelector::Config config; 171 | config.py_x.score_threshold = 0.9; 172 | // Test cumulative 173 | config.py_x.cumulative = true; 174 | config.py_x.null_task_preprune = false; 175 | config.py_x.top_k = 2; 176 | TestableIBEdgeSelector selector(config); 177 | selector.setup(ws, y_tasks, dist); 178 | 179 | const auto fmt = getDefaultFormat(); 180 | 181 | Eigen::MatrixXd expected_py_x(4, 5); 182 | expected_py_x << 0.9 / 2.9, 0.9 / 2.9, 0.9 / 2.9, 1.0, 1.0, 2.0 / 2.9, 0.0, 0.0, 0.0, 183 | 0.0, 0.0, 2.0 / 2.9, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0 / 2.9, 0.0, 0.0; 184 | EXPECT_TRUE(selector.py_x_.isApprox(expected_py_x, 1e-9)) 185 | << "expected: " << expected_py_x.format(fmt) 186 | << ", result: " << selector.py_x_.format(fmt); 187 | } 188 | 189 | TEST(IBEdgeSelector, SetupNullPruneCorrect) { 190 | IsolatedSceneGraphLayer layer(2); 191 | 192 | NodeEmbeddingMap x_segments; 193 | for (size_t i = 0; i < 3; ++i) { 194 | layer.emplaceNode(2 * i, std::make_unique()); 195 | x_segments[2 * i] = getOneHot(i, 10); 196 | } 197 | for (size_t i = 3; i < 5; ++i) { 198 | layer.emplaceNode(2 * i, std::make_unique()); 199 | x_segments[2 * i] = getOneHot(i, 10) + getOneHot(0, 10); 200 | } 201 | 202 | EmbeddingGroup y_tasks; 203 | for (size_t i = 0; i < 3; ++i) { 204 | y_tasks.embeddings.push_back(getOneHot(i, 10)); 205 | y_tasks.tasks.push_back(std::to_string(i)); 206 | } 207 | 208 | for (size_t i = 0; i < 5; ++i) { 209 | layer.emplaceNode(2 * i + 1, std::make_unique()); 210 | } 211 | 212 | for (size_t i = 0; i < 4; ++i) { 213 | layer.insertEdge(2 * i, 2 * (i + 1)); 214 | } 215 | 216 | ClusteringWorkspace ws(layer, x_segments); 217 | CosineDistance dist; 218 | 219 | IBEdgeSelector::Config config; 220 | config.py_x.score_threshold = 0.9; 221 | // Test null preprune 222 | config.py_x.cumulative = false; 223 | config.py_x.null_task_preprune = true; 224 | config.py_x.top_k = 100; 225 | TestableIBEdgeSelector selector(config); 226 | selector.setup(ws, y_tasks, dist); 227 | 228 | const auto fmt = getDefaultFormat(); 229 | 230 | Eigen::MatrixXd expected_py_x(4, 5); 231 | expected_py_x << 0.9 / 1.9, 0.9 / 1.9, 0.9 / 1.9, 1.0, 1.0, 1.0 / 1.9, 0.0, 0.0, 0.0, 232 | 0.0, 0.0, 1.0 / 1.9, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0 / 1.9, 0.0, 0.0; 233 | EXPECT_TRUE(selector.py_x_.isApprox(expected_py_x, 1e-9)) 234 | << "expected: " << expected_py_x.format(fmt) 235 | << ", result: " << selector.py_x_.format(fmt); 236 | } 237 | 238 | TEST(IBEdgeSelector, UpdateCorrect) { 239 | IsolatedSceneGraphLayer layer(2); 240 | 241 | NodeEmbeddingMap x_segments; 242 | for (size_t i = 0; i < 5; ++i) { 243 | layer.emplaceNode(2 * i, std::make_unique()); 244 | x_segments[2 * i] = getOneHot(i, 10); 245 | } 246 | 247 | EmbeddingGroup y_tasks; 248 | for (size_t i = 0; i < 3; ++i) { 249 | y_tasks.embeddings.push_back(getOneHot(i, 10)); 250 | y_tasks.tasks.push_back(std::to_string(i)); 251 | } 252 | 253 | for (size_t i = 0; i < 5; ++i) { 254 | layer.emplaceNode(2 * i + 1, std::make_unique()); 255 | } 256 | 257 | for (size_t i = 0; i < 4; ++i) { 258 | layer.insertEdge(2 * i, 2 * (i + 1)); 259 | } 260 | 261 | ClusteringWorkspace ws(layer, x_segments); 262 | CosineDistance dist; 263 | 264 | IBEdgeSelector::Config config; 265 | config.py_x.score_threshold = 1.0; 266 | // Test simplest case 267 | config.py_x.cumulative = false; 268 | config.py_x.null_task_preprune = false; 269 | config.py_x.top_k = 100; // Large so that essentially disabled 270 | TestableIBEdgeSelector selector(config); 271 | selector.setup(ws, y_tasks, dist); 272 | selector.updateFromEdge(EdgeKey(0, 1)); 273 | 274 | const auto fmt = getDefaultFormat(); 275 | 276 | // check that hard-cluster assumptions hold 277 | EXPECT_EQ(selector.pz_(1), 0.0); 278 | EXPECT_EQ(selector.pz_(0), 0.4); 279 | EXPECT_EQ(selector.pz_x_(1, 1), 0.0); 280 | EXPECT_EQ(selector.pz_x_(0, 0), 1.0); 281 | EXPECT_EQ(selector.pz_x_(1, 0), 1.0) << "p(z|x): " << selector.pz_x_.format(fmt); 282 | 283 | Eigen::VectorXd py_0(4); 284 | py_0 << 0.5, 0.25, 0.25, 0.0; 285 | EXPECT_TRUE(selector.py_z_.col(0).isApprox(py_0, 1e-9)) 286 | << "p(y|z=0): " << selector.py_z_.col(0).format(fmt); 287 | Eigen::VectorXd py_1(4); 288 | py_1 << 0.0, 0.0, 0.0, 0.0; 289 | EXPECT_TRUE(selector.py_z_.col(1).isApprox(py_1)) 290 | << "p(y|z=1): " << selector.py_z_.col(1).format(fmt); 291 | } 292 | 293 | TEST(IBEdgeSelector, CompareEdgesCorrect) { 294 | IBEdgeSelector::Config config; 295 | TestableIBEdgeSelector selector(config); 296 | std::pair e1{{0, 1}, 0.0}; 297 | std::pair e2{{0, 1}, 0.1}; 298 | EXPECT_TRUE(selector.compareEdges(e1, e2)); 299 | EXPECT_FALSE(selector.compareEdges(e1, e1)); 300 | EXPECT_FALSE(selector.compareEdges(e2, e1)); 301 | } 302 | 303 | TEST(IBEdgeSelector, ComputeDeltaWeightCorrect) { 304 | IsolatedSceneGraphLayer layer(2); 305 | std::vector nodes; 306 | NodeEmbeddingMap x_segments; 307 | for (size_t i = 0; i < 5; ++i) { 308 | layer.emplaceNode(2 * i, std::make_unique()); 309 | x_segments[2 * i] = getOneHot(i, 10); 310 | nodes.push_back(2 * i); 311 | } 312 | 313 | EXPECT_EQ(computeDeltaWeight(layer, std::vector{}), 0.0); 314 | EXPECT_EQ(computeDeltaWeight(layer, nodes), 1.0); 315 | } 316 | 317 | } // namespace clio 318 | -------------------------------------------------------------------------------- /clio/tests/test_object_update_functor.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "clio_tests/utilities.h" 5 | 6 | namespace clio { 7 | 8 | TEST(IntersectionPolicy, TestOverlap) { 9 | OverlapIntersection::Config config; 10 | config.tolerance = 0.0; 11 | OverlapIntersection checker(config); 12 | 13 | KhronosObjectAttributes attrs1; 14 | attrs1.bounding_box.min << 1.0f, 1.0f, 1.0f; 15 | attrs1.bounding_box.max << 2.0f, 2.0f, 2.0f; 16 | 17 | KhronosObjectAttributes attrs2; 18 | attrs2.bounding_box.min << 3.0f, 3.0f, 3.0f; 19 | attrs2.bounding_box.max << 4.0f, 4.0f, 4.0f; 20 | 21 | EXPECT_FALSE(checker(attrs1, attrs2)); 22 | 23 | // make sure the two bounding boxes overlap 24 | attrs1.bounding_box.max << 3.5f, 3.5f, 3.5f; 25 | EXPECT_TRUE(checker(attrs1, attrs2)); 26 | } 27 | 28 | struct ObjectUpdateFunctorTests : public ::testing::Test { 29 | ObjectUpdateFunctorTests() 30 | : graph_info({{DsgLayers::SEGMENTS, 's'}, 31 | {DsgLayers::OBJECTS, 'o'}, 32 | {DsgLayers::PLACES, 'p'}}) { 33 | config.tasks = test::TestEmbeddingGroup::getDefault(2); 34 | } 35 | 36 | void SetUp() override {} 37 | 38 | void addSegment(size_t index, 39 | double min, 40 | double max, 41 | std::optional onehot_index = std::nullopt) { 42 | auto attrs = std::make_unique(); 43 | attrs->position << (min + max) / 2.0, 0.0, 0.0; 44 | 45 | Eigen::Vector3f x_min(min, -1.0, -1.0); 46 | Eigen::Vector3f x_max(max, 1.0, 1.0); 47 | attrs->bounding_box = BoundingBox(x_min, x_max); 48 | attrs->semantic_feature = 49 | test::TestEmbeddingGroup::getEmbedding(onehot_index.value_or(index)); 50 | 51 | graph().emplaceNode(DsgLayers::SEGMENTS, NodeSymbol('s', index), std::move(attrs)); 52 | } 53 | 54 | DynamicSceneGraph& graph() { return *graph_info.graph; } 55 | 56 | SharedDsgInfo graph_info; 57 | ObjectUpdateFunctor::Config config; 58 | }; 59 | 60 | TEST_F(ObjectUpdateFunctorTests, AddEdges) { 61 | ObjectUpdateFunctor functor(config); 62 | addSegment(0, -1.0, 1.0, 1); 63 | addSegment(1, 0.5, 1.5, 1); 64 | addSegment(2, 2.0, 3.0, 0); 65 | 66 | functor.call(graph_info, {}); 67 | EXPECT_TRUE(graph().hasEdge("s0"_id, "s1"_id)); 68 | EXPECT_FALSE(graph().hasEdge("s0"_id, "s2"_id)); 69 | EXPECT_FALSE(graph().hasEdge("s1"_id, "s2"_id)); 70 | 71 | // bridge two components 72 | addSegment(3, 1.2, 2.5, 0); 73 | functor.call(graph_info, {}); 74 | 75 | EXPECT_FALSE(graph().hasEdge("s3"_id, "s0"_id)); 76 | EXPECT_TRUE(graph().hasEdge("s3"_id, "s1"_id)); 77 | EXPECT_TRUE(graph().hasEdge("s3"_id, "s2"_id)); 78 | } 79 | 80 | } // namespace clio 81 | -------------------------------------------------------------------------------- /clio/tests/test_probability_utilities.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace clio { 6 | 7 | Eigen::VectorXd getBinaryProbability(double p_true) { 8 | Eigen::VectorXd p(2); 9 | p << p_true, 1.0 - p_true; 10 | return p; 11 | } 12 | 13 | TEST(ProbabilityUtilities, TestShannonEntropy) { 14 | const auto fmt = getDefaultFormat(); 15 | { // test case 1: no entropy 16 | Eigen::VectorXd p = getBinaryProbability(0.0); 17 | EXPECT_NEAR(shannonEntropy(p), 0.0, 1.0e-9) << "input: " << p.format(fmt); 18 | } 19 | 20 | { // test case 2: also no entropy 21 | Eigen::VectorXd p = getBinaryProbability(1.0); 22 | EXPECT_NEAR(shannonEntropy(p), 0.0, 1.0e-9) << "input: " << p.format(fmt); 23 | } 24 | 25 | { // test case 3: max entropy (of 1 bit) 26 | Eigen::VectorXd p = getBinaryProbability(0.5); 27 | EXPECT_NEAR(shannonEntropy(p), 1.0, 1.0e-9) << "input: " << p.format(fmt); 28 | } 29 | } 30 | 31 | TEST(ProbabilityUtilities, TestJSDivergence) { 32 | { // test case 1: complete divergence 33 | Eigen::MatrixXd dists = Eigen::MatrixXd::Identity(2, 2); 34 | Eigen::VectorXd priors = getBinaryProbability(0.5); 35 | EXPECT_NEAR(jensenShannonDivergence(dists, priors), 1.0, 1.0e-9); 36 | } 37 | 38 | { // test case 2: no divergence 39 | Eigen::MatrixXd dists(2, 2); 40 | dists << 0.5, 0.5, 0.5, 0.5; 41 | Eigen::VectorXd priors = getBinaryProbability(0.5); 42 | EXPECT_NEAR(jensenShannonDivergence(dists, priors), 0.0, 1.0e-9); 43 | } 44 | 45 | { // test case 3: no divergence (deterministic prior) 46 | Eigen::MatrixXd dists = Eigen::MatrixXd::Identity(5, 5); 47 | Eigen::VectorXd priors = Eigen::VectorXd::Zero(5); 48 | priors(3) = 1.0; 49 | EXPECT_NEAR(jensenShannonDivergence(dists, priors), 0.0, 1.0e-9); 50 | } 51 | 52 | { // test case 4: complete divergence (uniform prior) 53 | Eigen::MatrixXd dists = Eigen::MatrixXd::Identity(5, 5); 54 | Eigen::VectorXd priors = Eigen::VectorXd::Constant(5, 0.2); 55 | EXPECT_NEAR(jensenShannonDivergence(dists, priors), std::log2(5), 1.0e-9); 56 | } 57 | 58 | { // test case 5: interesting probabilities 59 | Eigen::MatrixXd dists = Eigen::MatrixXd::Zero(3, 2); 60 | dists << 3.0 / 8.0, 2.0 / 8.0, 4.0 / 8.0, 3.0 / 8.0, 1.0 / 8.0, 3.0 / 8.0; 61 | Eigen::VectorXd priors = getBinaryProbability(0.5); 62 | // D(p1||M) = 0.06996044115887057 63 | // D(p2||M) = 0.055481756047425 64 | EXPECT_NEAR(jensenShannonDivergence(dists, priors), 0.06272109860314778, 1.0e-9); 65 | } 66 | } 67 | 68 | TEST(ProbabilityUtilities, TestMutualInformation) { 69 | const auto fmt = getDefaultFormat(); 70 | { // test case 1: mutual info of 1 71 | Eigen::MatrixXd pa_b = Eigen::MatrixXd::Identity(2, 2); 72 | Eigen::MatrixXd p_a = getBinaryProbability(0.5); 73 | Eigen::MatrixXd p_b = getBinaryProbability(0.5); 74 | EXPECT_NEAR(mutualInformation(p_a, p_b, pa_b), 1.0, 1.0e-9) 75 | << "p(a): " << p_a.format(fmt) << ", p(b): " << p_b.format(fmt) 76 | << ", p(a|b): " << pa_b.format(fmt); 77 | } 78 | 79 | { // test case 2: mutual info of 0 80 | Eigen::MatrixXd pa_b(2, 2); 81 | pa_b << 0.5, 0.5, 0.5, 0.5; 82 | Eigen::MatrixXd p_a = getBinaryProbability(0.5); 83 | Eigen::MatrixXd p_b = getBinaryProbability(0.5); 84 | EXPECT_NEAR(mutualInformation(p_a, p_b, pa_b), 0.0, 1.0e-9) 85 | << "p(a): " << p_a.format(fmt) << ", p(b): " << p_b.format(fmt) 86 | << ", p(a|b): " << pa_b.format(fmt); 87 | } 88 | 89 | { // test case 2: mutual info for bad distribution 90 | Eigen::MatrixXd pa_b(2, 2); 91 | pa_b << 0.5, 0.5, 0.5, 0.5; 92 | Eigen::MatrixXd p_a = getBinaryProbability(1.0); 93 | Eigen::MatrixXd p_b = getBinaryProbability(0.5); 94 | // TODO(nathan) technically, mutual information is non-negative, but the joint 95 | // distribution we're providing isn't valid 96 | EXPECT_NEAR(mutualInformation(p_a, p_b, pa_b), -0.5, 1.0e-9) 97 | << "p(a): " << p_a.format(fmt) << ", p(b): " << p_b.format(fmt) 98 | << ", p(a|b): " << pa_b.format(fmt); 99 | } 100 | } 101 | 102 | } // namespace clio 103 | -------------------------------------------------------------------------------- /clio_batch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIT-SPARK/Clio/f912b7c08fb0e029d80fd6b05eccee5009199ce4/clio_batch/__init__.py -------------------------------------------------------------------------------- /clio_batch/cluster_plotting.py: -------------------------------------------------------------------------------- 1 | """Various plotting functions to look at results in 2D.""" 2 | 3 | import matplotlib.pyplot as plt 4 | import distinctipy 5 | import networkx as nx 6 | import matplotlib.patches 7 | import numpy as np 8 | 9 | 10 | def plot_requery_weights(ws, picker): 11 | """Plot requery weight values.""" 12 | fig, ax = plt.subplots() 13 | weights = [x[1] for x in ws.history] 14 | ax.plot(weights) 15 | ax.set_xlabel("Iterations") 16 | ax.set_ylabel("Utility") 17 | plt.show() 18 | 19 | 20 | def plot_ib_weights(ws, picker): 21 | """Plot IB weight values.""" 22 | fig, ax = plt.subplots(2, sharex=True) 23 | ax[0].plot(picker.deltas) 24 | ax[0].set_ylabel("delta(m)") 25 | ax[1].plot(picker.betas) 26 | ax[1].set_ylabel("Beta") 27 | ax[0].set_xlabel("Iterations") 28 | plt.show() 29 | 30 | 31 | def draw_clusters( 32 | ax, 33 | G, 34 | ws, 35 | edge_color=(0.22, 0.22, 0.22, 0.4), 36 | node_size=40, 37 | edge_width=0.5, 38 | pastel_factor=0.7, 39 | **kwargs, 40 | ): 41 | """Draw resulting clusters.""" 42 | pos = {x: G.nodes[x]["position"][:2] for x in G} 43 | # make colormap indexed by cluster id 44 | cmap = distinctipy.get_colors(ws.num_clusters, pastel_factor=pastel_factor) 45 | cmap_lookup = {x: cmap[i] for i, x in ws.cluster_ids.items()} 46 | # assign colors by looking up cluster id for each node 47 | cluster_map = ws.cluster_map 48 | colors = [cmap_lookup[cluster_map[x]] for x in G] 49 | # draw the graph 50 | nx.draw_networkx_edges(G, pos, edge_color=edge_color, width=edge_width, ax=ax) 51 | nx.draw_networkx_nodes( 52 | G, pos, node_color=colors, node_size=node_size, ax=ax, **kwargs 53 | ) 54 | ax.set_aspect("equal") 55 | 56 | 57 | def draw_task_clusters( 58 | ax, 59 | G, 60 | ws, 61 | score, 62 | edge_color=(0.22, 0.22, 0.22, 0.4), 63 | node_size=40, 64 | edge_width=0.5, 65 | pastel_factor=0.7, 66 | **kwargs, 67 | ): 68 | """Draw clusters colored by nearest task.""" 69 | pos = {x: G.nodes[x]["position"][:2] for x in G} 70 | cmap = distinctipy.get_colors(len(score.tasks), pastel_factor=pastel_factor) 71 | 72 | cluster_features = np.array(ws.get_merged_features(G, f_score=score)) 73 | best_tasks = score.get_best_tasks(cluster_features) 74 | 75 | cmap_lookup = {x: cmap[best_tasks[i]] for i, x in ws.cluster_ids.items()} 76 | colors = [cmap_lookup[ws.clusters[ws.order[x]]] for x in G] 77 | nx.draw_networkx_edges(G, pos, edge_color=edge_color, width=edge_width, ax=ax) 78 | nx.draw_networkx_nodes(G, pos, node_color=colors, node_size=node_size, ax=ax) 79 | 80 | patches = [matplotlib.patches.Patch(edgecolor="k", facecolor=c) for c in cmap] 81 | ax.legend( 82 | patches, score.tasks, loc="lower left", ncol=3, bbox_to_anchor=(-0.5, -0.15) 83 | ) 84 | ax.set_aspect("equal") 85 | 86 | 87 | def draw_best_tasks( 88 | ax, 89 | G, 90 | ws, 91 | score, 92 | edge_color=(0.22, 0.22, 0.22, 0.4), 93 | node_size=40, 94 | edge_width=0.5, 95 | pastel_factor=0.7, 96 | **kwargs, 97 | ): 98 | """Draw input graph colored by best task.""" 99 | pos = {x: G.nodes[x]["position"][:2] for x in G} 100 | cmap = distinctipy.get_colors(len(score.tasks), pastel_factor=pastel_factor) 101 | 102 | X = np.array([G.nodes[x]["semantic_feature"] for x in G]) 103 | best_tasks = score.get_best_tasks(X) 104 | colors = [cmap[x] for x in best_tasks] 105 | nx.draw_networkx_edges(G, pos, edge_color=edge_color, width=edge_width, ax=ax) 106 | nx.draw_networkx_nodes(G, pos, node_color=colors, node_size=node_size, ax=ax) 107 | 108 | patches = [matplotlib.patches.Patch(edgecolor="k", facecolor=c) for c in cmap] 109 | ax.legend( 110 | patches, score.tasks, loc="lower left", ncol=3, bbox_to_anchor=(-0.5, -0.15) 111 | ) 112 | ax.set_aspect("equal") 113 | -------------------------------------------------------------------------------- /clio_batch/cluster_utilities.py: -------------------------------------------------------------------------------- 1 | """Various utility classes for clustering.""" 2 | 3 | import spark_dsg as dsg 4 | import clio_batch.helpers as helpers 5 | import matplotlib.pyplot as plt 6 | import open3d as o3d 7 | import numpy as np 8 | 9 | from clio_eval.utils import min_from_box_position, max_from_box_position 10 | 11 | def get_last_feature(features): 12 | """Get the average of features in a matrix of features.""" 13 | x = np.squeeze(features) 14 | if len(x.shape) == 1: 15 | return x 16 | return np.average(x,axis=1) 17 | 18 | def _get_field_average(attrs, f, g=np.mean): 19 | X = np.array([f(x) for x in attrs]) 20 | return g(X, axis=0) 21 | 22 | def _get_mesh(attrs): 23 | mesh_faces = attrs.mesh().get_faces() 24 | mesh_vertices = attrs.mesh().get_vertices() 25 | mesh_vertices[:3, :] += min_from_box_position(attrs) 26 | mesh_vertices = np.transpose(mesh_vertices) 27 | 28 | mesh = o3d.geometry.TriangleMesh() 29 | mesh.vertices = o3d.utility.Vector3dVector(mesh_vertices[:, :3]) 30 | mesh.vertex_colors = o3d.utility.Vector3dVector(mesh_vertices[:, 3:]) 31 | mesh.triangles = o3d.utility.Vector3iVector(np.transpose(mesh_faces)) 32 | return mesh 33 | 34 | def merge_object_attributes(node_list): 35 | """Combine object segment attributes into the final object node.""" 36 | all_attrs = [node.attributes for node in node_list] 37 | 38 | merged = dsg.KhronosObjectAttributes() 39 | # NodeAttributes fields 40 | merged.position = _get_field_average(all_attrs, lambda x: x.position) 41 | merged.last_update_time_ns = max([x.last_update_time_ns for x in all_attrs]) 42 | # SemanticNodeAttributes fields 43 | 44 | bbox_min = _get_field_average(all_attrs, lambda x: min_from_box_position(x), np.min) 45 | bbox_max = _get_field_average(all_attrs, lambda x: max_from_box_position(x), np.max) 46 | center = (bbox_min + bbox_max) / 2.0 47 | extent = bbox_max - bbox_min 48 | merged.bounding_box = dsg.BoundingBox(extent, center) 49 | merged.semantic_feature = _get_field_average( 50 | all_attrs, lambda x: get_last_feature(x.semantic_feature) 51 | ) 52 | # KhronosObjectAttributes fields 53 | combined_mesh = o3d.geometry.TriangleMesh() 54 | for attrs in all_attrs: 55 | combined_mesh += _get_mesh(attrs) 56 | 57 | merged_mesh_vertices = np.zeros((6, np.asarray(combined_mesh.vertices).shape[0])) 58 | merged_mesh_vertices[:3, :] = ( 59 | np.transpose(np.asarray(combined_mesh.vertices)) - bbox_min 60 | ) 61 | merged_mesh_vertices[3:, :] = np.transpose(np.asarray(combined_mesh.vertex_colors)) 62 | merged_mesh_faces = np.transpose(np.asarray(combined_mesh.triangles)) 63 | merged.mesh().set_vertices(merged_mesh_vertices) 64 | merged.mesh().set_faces(merged_mesh_faces) 65 | 66 | return merged 67 | 68 | 69 | def rewrite_objects(G, cluster_ws): 70 | """Rewrite object layer using clustering results.""" 71 | G_new = G.clone() 72 | for n in G.get_layer(dsg.DsgLayers.OBJECTS).nodes: 73 | G_new.remove_node(n.id.value) 74 | 75 | for cluster_idx, cluster in enumerate(cluster_ws.cluster_nodes): 76 | segments = [G.get_node(n) for n in cluster] 77 | attrs = merge_object_attributes(segments) 78 | node_id = dsg.NodeSymbol("O", cluster_idx).value 79 | G_new.add_node(dsg.DsgLayers.OBJECTS, node_id, attrs) 80 | 81 | return G_new 82 | 83 | 84 | class Score: 85 | """Wrapper around cosine similiarity.""" 86 | 87 | def __init__( 88 | self, 89 | tasks, 90 | model_name="ViT-B/32", 91 | use_lerf_loss=False, 92 | cannonical_phrases="object*stuff*things*texture", 93 | ): 94 | """Set up a score function.""" 95 | self.use_lerf_loss = use_lerf_loss 96 | 97 | print(f"loading {model_name}...") 98 | clip_handler = helpers.ClipHandler(model_name) 99 | print(f"finished loading {model_name}") 100 | 101 | task_phrases = [x for x in tasks.split("*") if x != ""] 102 | self.tasks = task_phrases 103 | N_tasks = len(task_phrases) 104 | print(f"computing {N_tasks} task embeddings...") 105 | self.x_t = clip_handler.get_text_clip_features(task_phrases) 106 | 107 | if not use_lerf_loss: 108 | self.x_c = None 109 | else: 110 | cannonical_phrases = [x for x in cannonical_phrases.split("*") if x != ""] 111 | N_cannonical = len(cannonical_phrases) 112 | print(f"computing {N_cannonical} cannonical embeddings...") 113 | self.x_c = clip_handler.get_text_clip_features(cannonical_phrases) 114 | 115 | del clip_handler 116 | print("finished computing embeddings") 117 | 118 | def get_scores(self, X): 119 | """Get score matrix.""" 120 | return helpers.compute_sim_to_tasks(self.x_t, X, self.x_c, self.use_lerf_loss) 121 | 122 | def get_best_tasks(self, X, show=False): 123 | """Get best task index.""" 124 | scores = helpers.compute_sim_to_tasks(self.x_t, X, self.x_c, self.use_lerf_loss) 125 | if show: 126 | fig, ax = plt.subplots() 127 | im = ax.imshow(scores, aspect="auto", interpolation="none") 128 | ax.figure.colorbar(im, ax=ax) 129 | plt.show() 130 | 131 | return np.argmax(scores, axis=0) 132 | 133 | def draw_scores(self, ax, X): 134 | """Get best task index.""" 135 | scores = helpers.compute_sim_to_tasks(self.x_t, X, self.x_c, self.use_lerf_loss) 136 | im = ax.imshow(scores, aspect="auto", interpolation="none") 137 | ax.figure.colorbar(im, ax=ax) 138 | 139 | def __call__(self, X): 140 | """Score features.""" 141 | s = helpers.compute_cosine_sim(self.x_t, X, self.x_c, self.use_lerf_loss) 142 | if X.shape[0] > 1: 143 | return np.squeeze(s) 144 | else: 145 | return s 146 | 147 | 148 | class Workspace: 149 | """Class to hold useful datamembers.""" 150 | 151 | def __init__(self, G): 152 | """Initialize workspace with features and scores.""" 153 | self.N = G.number_of_nodes() 154 | self.clusters = np.arange(G.number_of_nodes()) 155 | self.order = {x: i for i, x in enumerate(G)} 156 | self.node_lookup = {i: x for x, i in self.order.items()} 157 | self.history = [] 158 | 159 | def record_merge(self, edge, weight): 160 | """Save history for merges.""" 161 | # contraction order is target -> source 162 | orig = self.order[edge[1]] 163 | self.clusters[self.clusters == orig] = self.order[edge[0]] 164 | self.history.append((edge, weight)) 165 | 166 | @property 167 | def cluster_ids(self): 168 | """Get map between cluster ID and index.""" 169 | return {x: i for x, i in enumerate(np.unique(self.clusters))} 170 | 171 | @property 172 | def cluster_map(self): 173 | """Map between node ID and parent cluster.""" 174 | return {x: self.clusters[idx] for x, idx in self.order.items()} 175 | 176 | @property 177 | def cluster_nodes(self): 178 | """Map between cluster ID and nodes.""" 179 | cluster_ids = np.unique(self.clusters) 180 | clusters = [] 181 | for c in cluster_ids: 182 | indices = np.argwhere(self.clusters == c) 183 | clusters.append([self.node_lookup[int(x)] for x in indices]) 184 | 185 | return clusters 186 | 187 | @property 188 | def num_clusters(self): 189 | """Get number of clusters.""" 190 | return len(np.unique(self.clusters)) 191 | 192 | def get_merged_features(self, G, f_score=None): 193 | """Get averaged features of each cluster.""" 194 | result = [] 195 | clusters = self.cluster_nodes 196 | for idx, cluster in enumerate(clusters): 197 | if len(cluster) == 1: 198 | result.append(G.nodes[cluster[0]]["semantic_feature"]) 199 | continue 200 | 201 | X_cluster = np.array([G.nodes[idx]["semantic_feature"] for idx in cluster]) 202 | weights = f_score(X_cluster) if f_score is not None else None 203 | result.append(np.average(X_cluster, axis=0, weights=weights)) 204 | 205 | return result 206 | 207 | def __len__(self): 208 | """Get number of features in workspace.""" 209 | return self.N 210 | -------------------------------------------------------------------------------- /clio_batch/helpers.py: -------------------------------------------------------------------------------- 1 | """Helper methods for clustering.""" 2 | import torch 3 | from PIL import Image 4 | import cv2 5 | import yaml 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import copy 9 | import networkx as nx 10 | 11 | 12 | def default_device(use_cuda=True): 13 | """Get default device to use for pytorch.""" 14 | return "cuda" if torch.cuda.is_available() and use_cuda else "cpu" 15 | 16 | 17 | class ClipHandler: 18 | """Wrapper around various CLIP models.""" 19 | 20 | def __init__(self, model_name, device=None, pretrained="laion2B_s32B_b79K"): 21 | """Construct a wrapper around CLIP.""" 22 | print(model_name) 23 | self.device = default_device() if device is None else device 24 | self.use_open_clip = model_name == "ViT-H-14" 25 | if self.use_open_clip: 26 | import open_clip 27 | self.model, _, self.preprocess = open_clip.create_model_and_transforms( 28 | model_name, pretrained=pretrained, device=self.device) 29 | self.tokenizer = open_clip.get_tokenizer(model_name) 30 | else: 31 | import clip 32 | self.model, self.preprocess = clip.load(model_name, device=self.device) 33 | self.tokenizer = clip.tokenize 34 | 35 | def get_text_clip_features(self, strings): 36 | """Compute text embeddings for all strings in list.""" 37 | texts = torch.cat([self.tokenizer(string) 38 | for string in strings]).to(self.device) 39 | with torch.no_grad(): 40 | # Access the underlying model when using DataParallel 41 | text_features = self.model.encode_text(texts) 42 | return text_features.cpu().numpy() 43 | 44 | def process_and_encode_image(self, rgb_img): 45 | """Compute image CLIP embedding.""" 46 | pil_img = Image.fromarray(rgb_img) 47 | image = self.preprocess(pil_img).unsqueeze(0).to(self.device) 48 | 49 | with torch.no_grad(): 50 | return np.squeeze(self.model.encode_image(image).cpu().numpy()) 51 | 52 | 53 | def compute_sim_to_tasks(v1, v2, canonical_features=None, use_lerf_canonical=True): 54 | # if using multiple tasks, task MUST be v1 55 | if type(v1) == list: 56 | v1 = copy.deepcopy(v1) 57 | 58 | v1 = np.array(v1) 59 | model_size = v1.shape[-1] 60 | v1 = v1.reshape((-1, model_size)) 61 | v2 = np.array(v2).reshape((-1, model_size)) 62 | 63 | v1_norm = np.linalg.norm(v1, axis=v1.ndim - 1) 64 | v2_norm = np.linalg.norm(v2, axis=v2.ndim - 1) 65 | v1 = v1/(v1_norm[:, np.newaxis]) 66 | v2 = v2/(v2_norm[:, np.newaxis]) 67 | 68 | if use_lerf_canonical and canonical_features is not None: 69 | canonical_features = canonical_features.reshape((-1, model_size)) 70 | canonical_norm = np.linalg.norm( 71 | canonical_features, axis=canonical_features.ndim - 1) 72 | canonical_features = canonical_features/(canonical_norm[:, np.newaxis]) 73 | canonical_term = np.max(np.exp(canonical_features @ v2.T), axis=0, keepdims=True) 74 | result = np.exp(v1 @ v2.T) / (canonical_term + np.exp(v1 @ v2.T)) 75 | 76 | else: 77 | result = (v1 @ v2.T) 78 | 79 | return result 80 | 81 | 82 | def compute_cosine_sim(v1, v2, canonical_features=None, use_lerf_canonical=True, get_assignments=False): 83 | # for each v2, pick the v1 that maximizes the sim and return that value 84 | sims = compute_sim_to_tasks( 85 | v1, v2, canonical_features=canonical_features, use_lerf_canonical=use_lerf_canonical) 86 | best_vals = np.max(sims, axis=0, keepdims=True) 87 | 88 | best_idxes = np.argmax(sims, axis=0) 89 | 90 | if get_assignments: 91 | return best_vals, best_idxes 92 | else: 93 | return best_vals 94 | 95 | 96 | def parse_tasks_from_yaml(yaml_file): 97 | prompts = [] 98 | with open(yaml_file, "r") as stream: 99 | prompts = [prompt for prompt in yaml.safe_load(stream)] 100 | return prompts 101 | 102 | 103 | def crop_image(frame, bbox, padding=4): 104 | frame = frame.copy() 105 | x_min, y_min, x_max, y_max = bbox 106 | 107 | # Check and adjust padding to avoid going beyond the image borders 108 | image_height, image_width, _ = frame.shape 109 | left_padding = min(padding, x_min) 110 | top_padding = min(padding, y_min) 111 | right_padding = min(padding, image_width - x_max) 112 | bottom_padding = min(padding, image_height - y_max) 113 | 114 | # Apply the adjusted padding 115 | x_min -= left_padding 116 | y_min -= top_padding 117 | x_max += right_padding 118 | y_max += bottom_padding 119 | 120 | x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max) 121 | 122 | cropped_image = frame[y_min:y_max, x_min:x_max, :] 123 | return cropped_image 124 | 125 | 126 | def apply_mask(cv_img, mask): 127 | _, binary_mask = cv2.threshold(mask.astype(np.uint8), .5, 255, cv2.THRESH_BINARY) 128 | 129 | # Use OpenCV's bitwise_and to apply the mask 130 | result_img = cv2.bitwise_and(cv_img, cv_img, mask=binary_mask) 131 | return result_img 132 | 133 | 134 | def create_obj_masks(masks, bboxes, clusters): 135 | if len(masks) == 0: 136 | return None 137 | mask_shape = masks[0].shape 138 | obj_masks = [] 139 | boxes = [] 140 | for cluster in clusters: 141 | obj_mask = np.zeros(mask_shape, dtype=bool) 142 | obj_box = None 143 | for mask_idx in cluster: 144 | obj_mask = obj_mask | masks[mask_idx] 145 | if bboxes is not None: 146 | cluster_box = bboxes[mask_idx].copy() 147 | obj_box = merge_boxes(obj_box, cluster_box) 148 | obj_masks.append(obj_mask) 149 | boxes.append(obj_box) 150 | if bboxes is None: 151 | return np.array(obj_masks) 152 | return np.array(obj_masks), np.array(boxes) 153 | 154 | 155 | def visualize_clustering(G, cv_image, masks, bboxes, clusters, ax, scale=100.0): 156 | import matplotlib 157 | print(clusters) 158 | print(len(masks)) 159 | ax.clear() 160 | masks = masks.copy() 161 | bboxes = bboxes.copy() 162 | masks, bboxes = create_obj_masks(masks, bboxes, clusters) 163 | 164 | node_images = [] 165 | for node, mask in enumerate(masks): 166 | masked_img = apply_mask( 167 | cv_image, mask) 168 | masked_img = crop_image(masked_img, bboxes[node]) 169 | node_images.append(masked_img) 170 | print(len(masks)) 171 | # Use Kamada-Kawai layout 172 | pos = nx.kamada_kawai_layout(G, weight='weight.0') # Specify the edge weight attribute (the first element) 173 | edge_weights = [scale*weight for _, _, weight in G.edges(data='weight')] 174 | edge_collection = nx.draw_networkx_edges(G, pos, edge_color=edge_weights, edge_cmap=plt.cm.inferno, edge_vmin=min(edge_weights), edge_vmax=max(edge_weights), width=2.0) 175 | cbar = plt.colorbar(edge_collection, label='Edge Weight') 176 | 177 | # Draw edges with weights as labels 178 | edge_labels = {(u, v): f'{data["weight"]*scale:.2f}' for u, v, data in G.edges(data=True)} 179 | nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_color='red') 180 | 181 | # Draw nodes with images 182 | index = 0 183 | for node, (x, y) in pos.items(): 184 | masked_img = node_images[index] 185 | img = Image.fromarray(masked_img) 186 | img = img.resize((50, 50)) # Adjust the size of the displayed image 187 | img_array = np.array(img) 188 | imagebox = matplotlib.offsetbox.OffsetImage(img_array, zoom=1.0, resample=True, clip_path=None) 189 | ab = matplotlib.offsetbox.AnnotationBbox(imagebox, (x, y), frameon=False, pad=0.0) 190 | ax.add_artist(ab) 191 | index += 1 192 | 193 | # # Show the plot 194 | # plt.axis('off') # Hide the axis 195 | # plt.show() 196 | -------------------------------------------------------------------------------- /clio_batch/ib_cluster.py: -------------------------------------------------------------------------------- 1 | """Agglomerative clustering method.""" 2 | import yaml 3 | import numpy as np 4 | import networkx as nx 5 | import time 6 | import matplotlib.pyplot as plt 7 | import json 8 | import os 9 | import logging 10 | import distinctipy 11 | 12 | import clio_batch.information_metrics as metrics 13 | import clio_batch.helpers as helpers 14 | 15 | 16 | class ClusterIBConfig(): 17 | def __init__(self, config_file_path): 18 | with open(config_file_path, 'r') as stream: 19 | config = yaml.safe_load(stream) 20 | 21 | self.debug = config.get('debug', False) 22 | self.debug_folder = config.get('debug_folder') 23 | self.sims_thres = config.get('sims_thres') 24 | self.delta = config.get('delta') 25 | self.top_k = config.get('top_k_tasks') 26 | self.cumulative = config.get('cumulative') 27 | self.lerf_loss_cannonical_phrases = config.get( 28 | 'lerf_loss_cannonical_phrases') 29 | self.use_lerf_loss = config.get('use_lerf_loss') 30 | 31 | def dump(self, output): 32 | param_dict = {} 33 | param_dict["name"] = "Modified-AIB" 34 | param_dict["sims_thres"] = self.sims_thres 35 | param_dict["delta"] = self.delta 36 | param_dict["top_k_tasks"] = self.top_k 37 | param_dict["cumulative"] = self.cumulative 38 | param_dict["lerf_loss_cannonical_phrases"] = self.lerf_loss_cannonical_phrases 39 | param_dict["use_lerf_loss"] = self.use_lerf_loss 40 | param_json = json.dumps(param_dict, indent=4) 41 | 42 | filename = "{}.json".format("cluster_config") 43 | param_log = os.path.join(output, filename) 44 | with open(param_log, "w") as outfile: 45 | outfile.write(param_json) 46 | 47 | 48 | class ClusterIB(): 49 | def __init__(self, config): 50 | self.config = config 51 | self.n = None 52 | self.px = None 53 | self.py = None 54 | self.py_x = None 55 | self.num_clusters = [] 56 | self.deltas = [] 57 | self.nx_graph = None 58 | self.current_clusters = {} 59 | self.lerf_loss_cannonical_features = None 60 | self.colors = None 61 | self.pos = None 62 | self.Ixy = None 63 | self.dIcy_weight = 1.0 64 | 65 | self.idx_mapping = {} 66 | 67 | def setup_py_x(self, region_features, task_features): 68 | self.px, self.py_x, self.py = self.compute_initial_probabilities( 69 | region_features, task_features) 70 | self.n = len(self.px) 71 | self.Ixy = metrics.mutual_information(self.px, self.py, self.py_x) 72 | 73 | def compute_initial_probabilities(self, region_features, task_features): 74 | num_tasks = task_features.shape[0] 75 | m = num_tasks + 1 76 | n = int(len(region_features)) 77 | k = min(num_tasks + 1, self.config.top_k) 78 | py_x_tmp = np.zeros((m, n)) 79 | py_x_tmp[0, :] = self.config.sims_thres 80 | logging.debug("running CLIP") 81 | cos_sim = helpers.compute_sim_to_tasks( 82 | task_features, region_features, self.lerf_loss_cannonical_features, self.config.use_lerf_loss) 83 | cos_sim = np.clip(cos_sim, 0, 1000) # make sure no negative values 84 | py_x_tmp[1:, :] = cos_sim 85 | py_x = 1e-12 * np.ones((m, n)) 86 | if self.config.cumulative: 87 | l = 1 88 | else: 89 | l = k 90 | # get top task 91 | top_inds = np.argpartition(py_x_tmp, -1, axis=0)[-1:] 92 | null_tasks = np.where(top_inds==0)[1] 93 | while l <= k: 94 | scale = 1.0 95 | if l == 1: 96 | scale = 1.0 97 | top_inds = np.argpartition(py_x_tmp, -l, axis=0)[-l:] 98 | py_x[top_inds, np.arange(n)] += scale*py_x_tmp[top_inds, np.arange(n)] 99 | l += 1 100 | 101 | py_x[:, null_tasks] = 1e-12 102 | py_x[0, null_tasks] = 1.0 103 | px = np.ones(n) / n 104 | py = np.ones(m) / m 105 | py_x = py_x / np.sum(py_x, axis=0) 106 | return px, py_x, py 107 | 108 | def update_delta_as_part(self, full_region_features, task_features): 109 | # For the case of factorized clustering. 110 | px_full, py_x_full, py = self.compute_initial_probabilities( 111 | full_region_features, task_features) 112 | self.Ixy = metrics.mutual_information(px_full, self.py, py_x_full) 113 | self.dIcy_weight = (self.n / len(full_region_features)) 114 | 115 | def initialize_nx_graph(self, nx_graph): 116 | node_idx = {} 117 | idx = 0 118 | self.nx_graph = nx.Graph() 119 | self.pos = {} 120 | for node in nx_graph.nodes: 121 | self.idx_mapping[idx] = node 122 | self.nx_graph.add_node(idx) 123 | self.pos[idx] = nx_graph.nodes[node]["position"][:2] 124 | node_idx[node] = idx 125 | idx += 1 126 | 127 | for edge in nx_graph.edges: 128 | self.nx_graph.add_edge(node_idx[edge[0]], node_idx[edge[1]]) 129 | 130 | def find_clusters(self): 131 | # p_y_gn_x from normalizing min_sim and task-cosine-sim 132 | 133 | # initialize pc_x 134 | pc_x = np.eye(self.n) 135 | pc = pc_x @ self.px 136 | 137 | py_c = self.py_x * self.px @ np.transpose(pc_x) / pc 138 | 139 | prev_Icy = metrics.mutual_information(pc, self.py, py_c) 140 | 141 | last_merged_node = None 142 | 143 | if self.config.debug: 144 | logging.debug("clusters: \n", self.current_clusters) 145 | self.log_info(pc_x, np.inf) 146 | 147 | t = 0 148 | delta = 0 149 | while True: 150 | if self.nx_graph.number_of_edges() == 0: 151 | break 152 | 153 | if self.config.debug: 154 | logging.debug("clusters: \n", self.current_clusters) 155 | self.log_info(pc_x, delta) 156 | 157 | plt_path = self.config.debug_folder + \ 158 | "/graph_" + str(t) + ".png" 159 | self.plot_current_graph(plt_path, py_c) 160 | 161 | update_edges = [] 162 | if last_merged_node is None: 163 | update_edges = list(self.nx_graph.edges()) 164 | else: 165 | update_edges = [(last_merged_node, nb) 166 | for nb in self.nx_graph.neighbors(last_merged_node)] 167 | 168 | for e in update_edges: 169 | d = self.compute_edge_weight(pc, py_c, e[0], e[1]) 170 | self.nx_graph[e[0]][e[1]]['weight'] = d 171 | 172 | min_edge = min(self.nx_graph.edges(), 173 | key=lambda x: self.nx_graph[x[0]][x[1]]["weight"]) 174 | 175 | # in nx contract edge, right node merge to left node 176 | # we want to stop the merge if it goes over to preserve identical results between batch and factorized 177 | py_c_temp = np.copy(py_c) 178 | pc_temp = np.copy(pc) 179 | pc_x_temp = np.copy(pc_x) 180 | 181 | py_c_temp[:, min_edge[0]] = (py_c[:, min_edge[0]] * pc[min_edge[0]] + 182 | py_c[:, min_edge[1]] * pc[min_edge[1]]) / (pc[min_edge[0]] + pc[min_edge[1]]) 183 | py_c_temp[:, min_edge[1]] = 0 184 | 185 | pc_temp[min_edge[0]] = pc[min_edge[0]] + pc[min_edge[1]] 186 | pc_temp[min_edge[1]] = 0 187 | 188 | pc_x_temp[min_edge[0], :] = pc_x[min_edge[0], :] + \ 189 | pc_x[min_edge[1], :] 190 | pc_x_temp[min_edge[1], :] = 0 191 | 192 | Icy = metrics.mutual_information(pc_temp, self.py, py_c_temp) 193 | dIcy = prev_Icy - Icy 194 | prev_Icy = Icy 195 | delta = self.dIcy_weight * dIcy / self.Ixy 196 | logging.debug("delta: ", delta) 197 | if delta > self.config.delta: 198 | break 199 | 200 | py_c = py_c_temp 201 | pc = pc_temp 202 | pc_x = pc_x_temp 203 | 204 | last_merged_node = min_edge[0] 205 | self.nx_graph = nx.contracted_edge( 206 | self.nx_graph, min_edge, self_loops=False) 207 | 208 | t += 1 209 | 210 | if self.config.debug: 211 | plt_path = self.config.debug_folder + "/cluster_delta.png" 212 | self.generate_plot(plt_path) 213 | 214 | internal_clusters = self.get_clusters_from_pc_x(pc_x) 215 | return self.get_node_clusters(internal_clusters) 216 | 217 | def get_clusters_from_pc_x(self, pc_x): 218 | cluster_mapping = np.argmax(pc_x, axis=0) 219 | clusters = {} 220 | for i in range(pc_x.shape[1]): 221 | if cluster_mapping[i] not in clusters.keys(): 222 | clusters[cluster_mapping[i]] = [] 223 | clusters[cluster_mapping[i]].append(i) 224 | 225 | return list(clusters.values()) 226 | 227 | def get_node_clusters(self, idx_clusters): 228 | node_clusters = [] 229 | for cluster in idx_clusters: 230 | node_cluster = [] 231 | for idx in cluster: 232 | node_cluster.append(self.idx_mapping[idx]) 233 | node_clusters.append(node_cluster) 234 | return node_clusters 235 | 236 | def compute_edge_weight(self, pc, py_c, i, j): 237 | prior = pc[[i, j]] / np.sum(pc[[i, j]]) 238 | weight = (pc[i] + pc[j]) * metrics.js_divergence(py_c[:, [i, j]], prior) 239 | return weight 240 | 241 | def log_info(self, pc_x, delta): 242 | obj_clusters = self.get_clusters_from_pc_x(pc_x) 243 | self.num_clusters.append(len(obj_clusters)) 244 | self.deltas.append(delta) 245 | 246 | def get_plot_pos(self): 247 | if self.pos is None: 248 | return nx.spring_layout(self.nx_graph) 249 | return self.pos 250 | 251 | def plot_current_graph(self, output, py_c): 252 | if self.colors is None: 253 | self.colors = distinctipy.get_colors(py_c.shape[0]) 254 | 255 | plt.figure(figsize=(12, 12)) 256 | relevant_task_idx = np.argmax(py_c, axis=0) 257 | color_map = [self.colors[relevant_task_idx[idx]] 258 | for idx in self.nx_graph.nodes] 259 | nx.draw(self.nx_graph, node_color=color_map, 260 | pos=self.get_plot_pos(), node_size=5, alpha=0.7) 261 | plt.savefig(output) 262 | plt.close() 263 | 264 | def generate_plot(self, output): 265 | fig = plt.figure() 266 | plt.plot(self.num_clusters, self.deltas) 267 | plt.xlabel("Number of Clusters") 268 | plt.ylabel("delta") 269 | plt.yscale('log') 270 | plt.savefig(output) 271 | plt.close() 272 | -------------------------------------------------------------------------------- /clio_batch/information_metrics.py: -------------------------------------------------------------------------------- 1 | """Functions dealing with entropy, information, and divergence.""" 2 | 3 | import numpy as np 4 | 5 | 6 | def shannon_entropy(p): 7 | """Compute Shannon entropy of PMF.""" 8 | return -np.sum(p * np.log2(p), axis=0) 9 | 10 | 11 | def js_divergence(py_c, pc): 12 | """Compute Jensen-Shannon divergence between two PMFs.""" 13 | assert py_c.shape[1] == pc.shape[0] 14 | joint_probs = py_c @ pc 15 | sum_entropies = np.sum(pc * shannon_entropy(py_c)) 16 | return shannon_entropy(joint_probs) - sum_entropies 17 | 18 | 19 | def mutual_information(px, pc, pc_x): 20 | """Get mutual information between two PMFs.""" 21 | px_ = px[px > 0] 22 | pc_ = pc[pc > 0] 23 | pc_x_ = pc_x[pc > 0, :] 24 | pc_x_ = pc_x_[:, px > 0] 25 | log_term = pc_x_ / pc_[:, None] 26 | log_term[log_term == 0] = 1 # this is to avoid infs 27 | log_term = np.log2(log_term) 28 | return np.sum((pc_x_ * log_term) @ px_) 29 | -------------------------------------------------------------------------------- /clio_batch/object_cluster.py: -------------------------------------------------------------------------------- 1 | """Cluster objects in 3D.""" 2 | import spark_dsg as dsg 3 | import spark_dsg.networkx as dsg_nx 4 | import pathlib 5 | import numpy as np 6 | import networkx as nx 7 | import click 8 | import time 9 | import clio_batch.helpers as helpers 10 | import clio_batch.cluster_utilities as cluster_utilities 11 | # import clio_batch.remote_visualizer as remote_visualizer 12 | 13 | import clio_batch.ib_cluster as cluster 14 | from clio_eval.utils import dsg_object_to_o3d 15 | from clio_eval.evaluate_helpers import get_dsg_version 16 | 17 | def bboxes_intersect(bbox_1, bbox_2, epsilon=0.0): 18 | bboxes = np.vstack((bbox_1.min, bbox_2.min, bbox_1.max, bbox_2.max)) 19 | bboxes = bboxes - np.min(bboxes, axis=0) 20 | bboxes[:2, :] = bboxes[:2, :] - epsilon * (bboxes[2:, :] - bboxes[:2, :]) 21 | bboxes[2:, :] = bboxes[2:, :] + epsilon * (bboxes[2:, :] - bboxes[:2, :]) 22 | return (bboxes[0, :] <= bboxes[3, :]).all()\ 23 | and (bboxes[2, :] >= bboxes[1, :]).all() 24 | 25 | 26 | def contains_object(node, other_node, G_obj_nx): 27 | bbox = G_obj_nx.nodes[node]["bounding_box"] 28 | other_bbox = G_obj_nx.nodes[other_node]["bounding_box"] 29 | 30 | return bbox.is_inside(other_bbox.min) and bbox.is_inside(other_bbox.max) 31 | 32 | 33 | def create_cluster_graphs(G_obj_nx, partition, bbox_dilation): 34 | nodes = [x for x in G_obj_nx] 35 | bboxes = [G_obj_nx.nodes[x]["bounding_box"] for x in G_obj_nx] 36 | if G_obj_nx.number_of_edges() == 0: 37 | for i in range(len(bboxes)): 38 | for j in range(i+1, len(bboxes)): 39 | # check overlap 40 | bbox_i = bboxes[i] 41 | bbox_j = bboxes[j] 42 | if bboxes_intersect(bbox_i, bbox_j, epsilon=bbox_dilation): 43 | G_obj_nx.add_edge(nodes[i], nodes[j]) 44 | 45 | cluster_graphs = [G_obj_nx] 46 | if partition: 47 | cluster_graphs = [G_obj_nx.subgraph( 48 | c).copy() for c in nx.connected_components(G_obj_nx)] 49 | 50 | return cluster_graphs 51 | 52 | 53 | def cluster_objects(full_nx, cluster_nx, task_features, cluster_config): 54 | if cluster_nx.number_of_nodes() <= 1 or cluster_nx.number_of_edges() == 0: 55 | return [list(cluster_nx.nodes)] 56 | 57 | ib_cluster_config = cluster.ClusterIBConfig(cluster_config) 58 | ib_solver = cluster.ClusterIB(ib_cluster_config) 59 | 60 | region_features = np.array( 61 | [np.average(cluster_nx.nodes[x]["semantic_feature"], axis=1) for x in cluster_nx]) 62 | full_region_features = np.array( 63 | [np.average(full_nx.nodes[x]["semantic_feature"], axis=1) for x in full_nx]) 64 | ib_solver.setup_py_x(region_features, task_features) 65 | ib_solver.update_delta_as_part(full_region_features, task_features) 66 | ib_solver.initialize_nx_graph(cluster_nx) 67 | print("initial number of objects", ib_solver.nx_graph.number_of_nodes()) 68 | cluster_assignments = ib_solver.find_clusters() 69 | print("number of objects after clustering", 70 | ib_solver.nx_graph.number_of_nodes()) 71 | return cluster_assignments 72 | 73 | 74 | def update_dsg(G_dsg, cluster_assignments, task_features, threshold): 75 | segments_layer = G_dsg.get_layer(dsg.DsgLayers.SEGMENTS) 76 | node_clusters = {} 77 | objects_layer = G_dsg.get_layer(dsg.DsgLayers.OBJECTS) 78 | id_to_remove = [] 79 | for n in objects_layer.nodes: 80 | id_to_remove.append(n.id.value) 81 | for id in id_to_remove: 82 | G_dsg.remove_node(id) 83 | 84 | for idx, cluster_assignment in enumerate(cluster_assignments): 85 | # get corresponding dsg indices 86 | dsg_cluster = [] 87 | for c in cluster_assignment: 88 | dsg_cluster.append(c) 89 | node_clusters[idx] = [G_dsg.get_node(n) for n in dsg_cluster] 90 | 91 | for new_node_id in node_clusters: 92 | merged_attrs = cluster_utilities.merge_object_attributes( 93 | node_clusters[new_node_id]) 94 | assert merged_attrs.semantic_feature.shape[1] == 1 95 | sim = helpers.compute_cosine_sim( 96 | task_features, merged_attrs.semantic_feature, None, False) 97 | 98 | if sim < threshold: 99 | continue 100 | 101 | node_id = dsg.NodeSymbol('O', new_node_id).value 102 | G_dsg.add_node(dsg.DsgLayers.OBJECTS, node_id, merged_attrs) 103 | # print('num in cluster', len(node_clusters[new_node_id])) 104 | for child in node_clusters[new_node_id]: 105 | G_dsg.insert_edge(node_id, child.id.value) 106 | return G_dsg 107 | 108 | 109 | def get_bboxes(dsg, layer=dsg.DsgLayers.OBJECTS): 110 | object_layer = dsg.get_layer(layer) 111 | boxes = [] 112 | for node in object_layer.nodes: 113 | obj_mesh, oriented_bbox = dsg_object_to_o3d(node) 114 | boxes.append(oriented_bbox) 115 | return boxes 116 | 117 | 118 | def cluster_3d(scene_graph_path, experiment_file, output_folder, cluster_config, model_name, 119 | thres, partition, bbox_dilation, recompute_edges=True): 120 | print(scene_graph_path) 121 | G = dsg.DynamicSceneGraph.load(str(scene_graph_path)) 122 | G_nx = dsg_nx.layer_to_networkx(G.get_layer(dsg.DsgLayers.SEGMENTS)) 123 | # remove all edges 124 | if recompute_edges: 125 | G_nx.remove_edges_from(list(G_nx.edges())) 126 | click.secho("loaded graph", fg="green") 127 | 128 | print(f"loading {model_name}...") 129 | cliphandler = helpers.ClipHandler(model_name) 130 | print(f"finished loading {model_name}") 131 | 132 | 133 | list_of_tasks = helpers.parse_tasks_from_yaml(experiment_file) 134 | tasks = "*".join(list_of_tasks) 135 | print(tasks) 136 | 137 | start_t = time.time() 138 | 139 | # create IB graph 140 | nx_graphs = create_cluster_graphs(G_nx, partition, bbox_dilation) 141 | print(f"partitioned into {len(nx_graphs)} subgraphs") 142 | 143 | task_phrases = [x for x in tasks.split("*") if x != ""] 144 | task_features = cliphandler.get_text_clip_features(task_phrases) 145 | 146 | cluster_assignments = [] 147 | for nx_graph in nx_graphs: 148 | cluster_assignments += cluster_objects( 149 | G_nx, nx_graph, task_features, cluster_config) 150 | 151 | G = update_dsg(G, cluster_assignments, task_features, thres) 152 | print(scene_graph_path) 153 | print("clustering took", time.time()-start_t) 154 | return G 155 | 156 | 157 | @click.command() 158 | @click.argument("scene_graph_path", type=click.Path(exists=True)) 159 | @click.argument("task_file", type=str) 160 | @click.argument("output_folder", type=str) 161 | @click.argument("cluster_config", type=str) 162 | @click.option("--model-name", "-m", default="ViT-L/14") 163 | @click.option("--thres", "-t", type=float, default=0.23) 164 | @click.option("--partition", "-p", is_flag=False) 165 | @click.option("--dilation", "-d", type=float, default=0.0) 166 | @click.option("--recompute-edges", is_flag=False) 167 | def main( 168 | scene_graph_path, 169 | task_file, 170 | output_folder, 171 | cluster_config, 172 | model_name, 173 | thres, 174 | partition, 175 | dilation, 176 | recompute_edges, 177 | ): 178 | """Set up clustering and run.""" 179 | # url = "tcp://127.0.0.1:8001" 180 | # viz = remote_visualizer.RemoteVisualizer(url=url) 181 | 182 | click.secho(f"loading graph from '{scene_graph_path}'...", fg="green") 183 | scene_graph_path = pathlib.Path(scene_graph_path).expanduser().absolute() 184 | 185 | clustered_G = cluster_3d(str(scene_graph_path), task_file, output_folder, 186 | cluster_config, model_name, thres, partition, dilation, recompute_edges) 187 | # viz.visualize(clustered_G) 188 | 189 | final_num_objs = clustered_G.get_layer(dsg.DsgLayers.PLACES).num_nodes() 190 | print("{} objects in the final scene graph.".format(final_num_objs)) 191 | 192 | output_dsg = pathlib.Path(output_folder) / "clio_dsg.json" 193 | clustered_G.save(output_dsg) 194 | 195 | # Keep output DSG version the same as input version. 196 | version = get_dsg_version(scene_graph_path) 197 | print(version) 198 | with open(output_dsg) as file: 199 | d = json.load(file) 200 | version = [int(i) for i in version] 201 | d['SPARK_ORIGIN_header'] = {'version':{'major':version[0], 'minor':version[1], 'patch':version[2]}, 'project_name':'main'} 202 | 203 | with open(output_dsg, 'w') as file: 204 | json.dump(d, file) 205 | 206 | if __name__ == "__main__": 207 | main() 208 | -------------------------------------------------------------------------------- /clio_data_maker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIT-SPARK/Clio/f912b7c08fb0e029d80fd6b05eccee5009199ce4/clio_data_maker/__init__.py -------------------------------------------------------------------------------- /clio_data_maker/ply_viewer.py: -------------------------------------------------------------------------------- 1 | # Publish a dense colmap mesh to ros to visualize in rviz. 2 | # Note the pose transform must be provided if you want metric scale and aligned. 3 | import rospy 4 | import open3d as o3d 5 | from sensor_msgs.msg import PointCloud2, PointField 6 | import sensor_msgs.point_cloud2 as pc2 7 | from std_msgs.msg import Header 8 | import numpy as np 9 | 10 | file_path = '/home/dominic/cam_ws/imgs/office_2_2024-03-08-07-16-39/dense/meshed-poisson.ply' 11 | # file_path = '/home/dominic/Documents/office_modified_mesh.ply' 12 | # file_path = '/home/dominic/Downloads/building33_5cm.ply' 13 | 14 | scale = 2.10 15 | transform_matrix = np.array([[-0.85042481, 0.46934804, -0.23767638, -7.08815597], 16 | [ 0.50329045, 0.59422864, -0.62736835, -4.50304495], 17 | [-0.15321999, -0.65314986, -0.74156517, 0.64755245], 18 | [ 0., 0., 0., 1. ]]) 19 | 20 | def load_ply(file_path): 21 | cloud = o3d.io.read_point_cloud(file_path) 22 | return cloud 23 | 24 | def convert_open3d_to_ros(cloud): 25 | header = Header() 26 | header.stamp = rospy.Time.now() 27 | header.frame_id = "world" # Change the frame_id as needed 28 | 29 | points = np.asarray(cloud.points) 30 | colors = np.asarray(cloud.colors) 31 | 32 | # Combine points and colors into a structured array 33 | cloud_data = np.empty(len(points), dtype=[ 34 | ('x', np.float32), 35 | ('y', np.float32), 36 | ('z', np.float32), 37 | ('rgb', np.uint32) # Use a single UINT32 field for color 38 | ]) 39 | 40 | points = scale * points 41 | points = np.hstack((points, np.ones((points.shape[0],1)))) 42 | print(points.shape) 43 | points = np.transpose(transform_matrix@np.transpose(points)) 44 | points = points/points[:,3:] 45 | 46 | 47 | cloud_data['x'] = points[:, 0] 48 | cloud_data['y'] = points[:, 1] 49 | cloud_data['z'] = points[:, 2] 50 | 51 | # Pack RGB channels into a single UINT32 field 52 | cloud_data['rgb'] = ((colors[:, 0] * 255.0).astype(np.uint32) << 16 | 53 | (colors[:, 1] * 255.0).astype(np.uint32) << 8 | 54 | (colors[:, 2] * 255.0).astype(np.uint32)) 55 | 56 | # Convert the structured array to bytes 57 | cloud_msg_data = cloud_data.tobytes() 58 | 59 | cloud_msg = PointCloud2( 60 | header=header, 61 | height=1, 62 | width=len(points), 63 | is_dense=False, 64 | is_bigendian=False, 65 | fields=[ 66 | PointField(name="x", offset=0, datatype=PointField.FLOAT32, count=1), 67 | PointField(name="y", offset=4, datatype=PointField.FLOAT32, count=1), 68 | PointField(name="z", offset=8, datatype=PointField.FLOAT32, count=1), 69 | PointField(name="rgb", offset=12, datatype=PointField.UINT32, count=1) 70 | ], 71 | point_step=16, # Size of one point (4 fields * 4 bytes each) 72 | row_step=16 * len(points), # Full length of the message 73 | data=cloud_msg_data 74 | ) 75 | 76 | return cloud_msg 77 | 78 | def publish_ply_as_pointcloud(pub, cloud_msg): 79 | pub.publish(cloud_msg) 80 | 81 | def main(): 82 | rospy.init_node('ply_viewer_node') 83 | 84 | cloud = load_ply(file_path) 85 | 86 | pub = rospy.Publisher('ply_cloud', PointCloud2, queue_size=1) 87 | rate = rospy.Rate(1) 88 | 89 | while not rospy.is_shutdown(): 90 | cloud_msg = convert_open3d_to_ros(cloud) 91 | publish_ply_as_pointcloud(pub, cloud_msg) 92 | rate.sleep() 93 | 94 | if __name__ == '__main__': 95 | try: 96 | main() 97 | except rospy.ROSInterruptException: 98 | pass 99 | -------------------------------------------------------------------------------- /clio_data_maker/rescale_bbox.py: -------------------------------------------------------------------------------- 1 | # Transform from Colmap frame to world frame at metric scale. 2 | import argparse 3 | import yaml 4 | import numpy as np 5 | import open3d as o3d 6 | import clio_eval.evaluate_helpers as eval_helpers 7 | from scipy.spatial.transform import Rotation as R 8 | 9 | def create_obb_from_dict(bbox_dict, transform_mat): 10 | # extract box pose 11 | quat = bbox_dict['rotation'] 12 | w, x, y, z = quat['w'], quat['x'], quat['y'], quat['z'] 13 | quat = [x, y, z, w] 14 | rot = R.from_quat(quat).as_matrix().astype(np.float64) 15 | center = np.array(bbox_dict['center']) 16 | box_pose = np.eye(4) 17 | box_pose[0:3,0:3] = rot 18 | box_pose[0:3,3] = center 19 | 20 | # apply transform to box pose 21 | box_tranformed_pose = transform_mat @ box_pose 22 | 23 | # repackage bounding box into dictionary 24 | box_tranformed_rot = box_tranformed_pose[0:3,0:3] 25 | box_tranformed_center = box_tranformed_pose[0:3,3].tolist() 26 | quat = R.from_matrix(box_tranformed_rot).as_quat().astype(np.float64) 27 | x, y, z, w = [float(i) for i in quat] 28 | print(x,y,z,w) 29 | bbox_dict_transformed = { 30 | 'center': box_tranformed_center, 31 | 'extents': bbox_dict['extents'], 32 | 'rotation': {'w':w, 'x':x, 'y':y, 'z':z} 33 | } 34 | 35 | return bbox_dict_transformed 36 | 37 | 38 | def rescale(detection, scale): 39 | return {'center':[scale * i for i in detection['center']], 40 | 'extents':[scale * i for i in detection['extents']], 41 | 'rotation':detection['rotation']} 42 | 43 | def main(): 44 | parser = argparse.ArgumentParser(description="Rescale Coordinates in yaml. ") 45 | 46 | parser.add_argument('--yaml_in', type=str, default=None) 47 | parser.add_argument('--yaml_out', type=str, default=None) 48 | parser.add_argument('--scene_name', type=str, default=None, help="office,cubicle,apartment") 49 | 50 | args = parser.parse_args() 51 | 52 | transform_mat, scale = eval_utils.get_transform_from_yaml(scene_transforms_path, args.scene_name) 53 | 54 | with open(args.yaml_in) as f: 55 | old_data = yaml.safe_load(f) 56 | 57 | scaled_dict = dict() 58 | for task, detections in old_data.items(): 59 | scaled_dict[task] = [] 60 | for detection in detections: 61 | scaled_box = rescale(detection, scale) 62 | transformed_box = (create_obb_from_dict(scaled_box. transform_mat)) 63 | scaled_dict[task].append(transformed_box) 64 | 65 | 66 | with open(args.yaml_out, 'w') as f: 67 | yaml.dump(scaled_dict, f) 68 | 69 | if __name__ == "__main__": 70 | main() 71 | -------------------------------------------------------------------------------- /clio_eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIT-SPARK/Clio/f912b7c08fb0e029d80fd6b05eccee5009199ce4/clio_eval/__init__.py -------------------------------------------------------------------------------- /clio_eval/evaluate_dsg.py: -------------------------------------------------------------------------------- 1 | import clio_eval.evaluate_helpers as eval_helpers 2 | import clio_eval.utils as eval_utils 3 | import spark_dsg as sdsg 4 | import argparse 5 | 6 | def main(): 7 | parser = argparse.ArgumentParser( 8 | description="Evaluate DSG. ") 9 | parser.add_argument('dsg_file', type=str, default=None) 10 | parser.add_argument('gt_yaml', type=str, default=None) 11 | parser.add_argument('-t', '--thres', type=float, default=0.0) 12 | parser.add_argument('--clip_model', type=str, 13 | default="ViT-L/14", help="ViT-L/14, ViT-B/32") 14 | parser.add_argument('--clustered', 15 | default=False, action="store_true") 16 | parser.add_argument('-v', '--visualize', 17 | default=False, action="store_true") 18 | 19 | 20 | args = parser.parse_args() 21 | 22 | results = eval_helpers.results_from_files( 23 | args.dsg_file, args.gt_yaml, args.clip_model, args.clustered, args.visualize, args.thres) 24 | 25 | print(results) 26 | 27 | 28 | if __name__ == "__main__": 29 | main() 30 | -------------------------------------------------------------------------------- /clio_eval/evaluate_helpers.py: -------------------------------------------------------------------------------- 1 | import clio_eval.utils as eval_utils 2 | import clio_batch.helpers as helpers 3 | import numpy as np 4 | import spark_dsg as sdsg 5 | import distinctipy 6 | import copy 7 | import json 8 | import open3d as o3d 9 | 10 | def get_dsg_version(dsg_file): 11 | with open(dsg_file) as file: 12 | d = json.load(file) 13 | if 'SPARK_DSG_header' not in d: 14 | version = ['1', '0', '0'] 15 | return version 16 | elif "SPARK_ORIGIN_header" in d: 17 | version_dictionary = d['SPARK_ORIGIN_header']['version'] 18 | else: 19 | version_dictionary = d['SPARK_DSG_header']['version'] 20 | version = [str(version_dictionary['major']), str(version_dictionary['minor']), str(version_dictionary['patch'])] 21 | return version 22 | 23 | def results_from_files(dsg_file, task_yaml, clip_model, clustered, visualize=False, thresh=0.0): 24 | print(dsg_file) 25 | layer = sdsg.DsgLayers.SEGMENTS 26 | if clustered: 27 | layer = sdsg.DsgLayers.OBJECTS 28 | 29 | dsg = sdsg.DynamicSceneGraph.load(dsg_file) 30 | offset_to_lower_corner = False 31 | version = ['' + i for i in get_dsg_version(dsg_file)] 32 | print('dsg version: ', version) 33 | if version == ['1', '0', '0']: 34 | offset_to_lower_corner = True 35 | est_objects = eval_utils.get_objects_from_dsg(dsg, offset_to_lower_corner, layer=layer) 36 | gt_data = eval_utils.GtData(task_yaml) 37 | if thresh > 0: 38 | est_objects = prune_objects(est_objects, clip_model, gt_data, thresh) 39 | return compute_results(est_objects, gt_data, clip_model, visualize=visualize) 40 | 41 | 42 | def prune_objects(est_objects, clip_model, gt_data, thres): 43 | cliphandler = helpers.ClipHandler(clip_model) 44 | task_list = list(gt_data.task_objects.keys()) 45 | task_features = cliphandler.get_text_clip_features(task_list) 46 | 47 | pruned_est_objects = [] 48 | for object in est_objects: 49 | t = [] 50 | for feature in task_features: 51 | temp = object.compute_similarity(feature) 52 | t.append(temp) 53 | max_sim = max(t) 54 | if max_sim > thres: 55 | pruned_est_objects.append(object) 56 | return pruned_est_objects 57 | 58 | 59 | def results_from_cg(cg_file, task_yaml, clip_model, visualize=False, thres=0.0): 60 | est_objects = get_objects_from_cg(cg_file) 61 | gt_data = eval_utils.GtData(task_yaml) 62 | if thres > 0: 63 | est_objects = prune_objects(est_objects, clip_model, gt_data, thres) 64 | return compute_results(est_objects, gt_data, clip_model, visualize=visualize) 65 | 66 | 67 | def compute_results(est_objects, gt_data, clip_model, min_sim_ratio=0.9, visualize=False): 68 | weak_recall, strict_recall, avg_iou = compute_recall_and_iou( 69 | est_objects, gt_data, clip_model, visualize) 70 | weak_precision, strict_precision = compute_precision( 71 | est_objects, gt_data, clip_model, min_sim_ratio, visualize) 72 | 73 | return eval_utils.Results(len(est_objects), weak_recall, strict_recall, avg_iou, weak_precision, strict_precision) 74 | 75 | 76 | def generate_gt_bboxes_viz(gt_bboxes, color): 77 | viz_gt_bboxes = [] 78 | for gt_bbox in gt_bboxes: 79 | outer = copy.deepcopy(gt_bbox) 80 | inner = copy.deepcopy(gt_bbox) 81 | inner.extent = 0.95 * inner.extent 82 | outer.color = color 83 | inner.color = [0, 0, 0] # clip inner to black 84 | viz_gt_bboxes = viz_gt_bboxes + [inner, outer] 85 | return viz_gt_bboxes 86 | 87 | 88 | def generate_est_bboxes_viz(est_objs, color, add_mesh=True): 89 | viz_est_bboxes = [] 90 | for est_obj in est_objs: 91 | est_bbox = copy.deepcopy(est_obj.oriented_bbox) 92 | est_bbox.color = color 93 | viz_est_bboxes.append(est_bbox) 94 | # add object mesh 95 | if add_mesh: 96 | viz_est_bboxes.append(est_obj.o3d_mesh) 97 | return viz_est_bboxes 98 | 99 | 100 | def compute_recall_and_iou(est_objects, gt_data, clip_model, visualize): 101 | # est_objects is a list of EvalObjects 102 | # gt_data is of GtData type 103 | cliphandler = helpers.ClipHandler(clip_model) 104 | task_list = list(gt_data.task_objects.keys()) 105 | task_features = cliphandler.get_text_clip_features(task_list) 106 | # Find associated est bboxes 107 | total_gt_objects = 0 108 | total_strict_matches = 0 109 | total_weak_matches = 0 110 | sum_iou = 0 111 | viz_gt_bboxes = [] 112 | viz_est_bboxes = [] 113 | est_boxes = [] 114 | colors = distinctipy.get_colors(len(task_list)) 115 | for i in range(len(task_list)): 116 | gt_bboxes = gt_data.task_objects[task_list[i]].copy() 117 | 118 | if visualize: 119 | viz_gt_bboxes.extend(generate_gt_bboxes_viz(gt_bboxes, colors[i])) 120 | 121 | num_objs = len(gt_bboxes) 122 | rel_objects = eval_utils.get_k_relevant_objects( 123 | est_objects, task_features[i], num_objs) 124 | total_gt_objects += len(gt_bboxes) 125 | if visualize: 126 | viz_est_bboxes.extend( 127 | generate_est_bboxes_viz(rel_objects, colors[i])) 128 | est_boxes.append(rel_objects[0].oriented_bbox) 129 | 130 | while len(gt_bboxes) > 0 and len(rel_objects) > 0: 131 | best_iou = 0 132 | best_gt_est_pair = None 133 | for gt_id, gt_bbox in enumerate(gt_bboxes): 134 | for est_id, est_obj in enumerate(rel_objects): 135 | iou = est_obj.compute_iou(gt_bbox) 136 | if iou > best_iou: 137 | best_iou = iou 138 | best_gt_est_pair = (gt_id, est_id) 139 | 140 | if best_iou == 0: 141 | break 142 | 143 | sum_iou += best_iou 144 | best_gt_bbox = gt_bboxes[best_gt_est_pair[0]] 145 | best_est_obj = rel_objects[best_gt_est_pair[1]] 146 | 147 | if best_est_obj.contains_centroid(best_gt_bbox): 148 | total_weak_matches += 1 149 | if best_est_obj.centroid_contained_in(best_gt_bbox): 150 | total_strict_matches += 1 151 | 152 | # delete 153 | del gt_bboxes[best_gt_est_pair[0]] 154 | del rel_objects[best_gt_est_pair[1]] 155 | 156 | if visualize: 157 | print("Visualizing recall computation...") 158 | o3d.visualization.draw_geometries(viz_gt_bboxes + viz_est_bboxes) 159 | merged_line_set = o3d.geometry.LineSet() 160 | for est_box in est_boxes: 161 | line_set = o3d.geometry.LineSet.create_from_oriented_bounding_box(est_box) 162 | merged_line_set += line_set 163 | o3d.io.write_line_set('/home/dominic/bounding_boxes.ply', merged_line_set) 164 | 165 | return total_weak_matches / total_gt_objects, total_strict_matches / total_gt_objects, sum_iou / total_gt_objects 166 | 167 | 168 | def compute_precision(est_objects, gt_data, clip_model, min_sim_ratio, visualize): 169 | # est_objects is a list of EvalObjects 170 | # gt_data is of GtData type 171 | cliphandler = helpers.ClipHandler(clip_model) 172 | 173 | # first sort all est objects into the different tasks 174 | task_list = list(gt_data.task_objects.keys()) 175 | est_task_objects = {task: [] for task in task_list} 176 | max_sims = {task: 0 for task in task_list} 177 | task_features = cliphandler.get_text_clip_features(task_list) 178 | 179 | total_est_objects = 0 180 | total_strict_matches = 0 181 | total_weak_matches = 0 182 | for est_obj in est_objects: 183 | sims = helpers.compute_sim_to_tasks(task_features, est_obj.feature) 184 | task_idx = np.argmax(sims) 185 | est_task_objects[task_list[task_idx]].append(est_obj) 186 | if sims[task_idx] > max_sims[task_list[task_idx]]: 187 | max_sims[task_list[task_idx]] = sims[task_idx] 188 | 189 | viz_gt_bboxes = [] 190 | viz_est_bboxes = [] 191 | colors = distinctipy.get_colors(len(task_list)) 192 | for i in range(len(task_list)): 193 | gt_bboxes = gt_data.task_objects[task_list[i]].copy() 194 | est_objects = est_task_objects[task_list[i]] 195 | task_feature = task_features[i] 196 | rel_objects = [obj for obj in est_objects if obj.compute_similarity( 197 | task_feature) > min_sim_ratio*max_sims[task_list[i]]] 198 | if visualize: 199 | viz_est_bboxes.extend( 200 | generate_est_bboxes_viz(rel_objects, colors[i])) 201 | if visualize: 202 | viz_gt_bboxes.extend( 203 | generate_gt_bboxes_viz(gt_bboxes, colors[i])) 204 | total_est_objects += len(rel_objects) 205 | while len(gt_bboxes) > 0 and len(rel_objects) > 0: 206 | best_iou = 0 207 | best_gt_est_pair = None 208 | for gt_id, gt_bbox in enumerate(gt_bboxes): 209 | for est_id, est_obj in enumerate(rel_objects): 210 | iou = est_obj.compute_iou(gt_bbox) 211 | if iou > best_iou: 212 | best_iou = iou 213 | best_gt_est_pair = (gt_id, est_id) 214 | 215 | if best_iou == 0: 216 | break 217 | 218 | best_gt_bbox = gt_bboxes[best_gt_est_pair[0]] 219 | best_est_obj = rel_objects[best_gt_est_pair[1]] 220 | if best_est_obj.contains_centroid(best_gt_bbox): 221 | total_weak_matches += 1 222 | if best_est_obj.centroid_contained_in(best_gt_bbox): 223 | total_strict_matches += 1 224 | 225 | # delete 226 | del gt_bboxes[best_gt_est_pair[0]] 227 | del rel_objects[best_gt_est_pair[1]] 228 | 229 | if visualize: 230 | print("Visualizing precision computation...") 231 | o3d.visualization.draw_geometries(viz_gt_bboxes + viz_est_bboxes) 232 | 233 | if total_est_objects == 0: 234 | return 0, 0 235 | 236 | return total_weak_matches / total_est_objects, total_strict_matches / total_est_objects 237 | -------------------------------------------------------------------------------- /clio_eval/experiments/configs/ablations/3d_clustering.yaml: -------------------------------------------------------------------------------- 1 | log_path: "/home/dominic/Downloads/clio_logs" 2 | datasets: 3 | - name: "cubicle" 4 | clip_model: "ViT-L/14" 5 | fine_dsg: "/home/dominic/Documents/clio_logs/cubicle/trial_0/large_fine_ps/dsg.json" 6 | khronos_dsg: "/home/dominic/Documents/clio_logs/cubicle/trial_0/large_khronos_ps/dsg.json" 7 | task_yaml: "/home/dominic/cam_ws/imgs/clio_datasets/cubicle/tasks_cubicle.yaml" 8 | experiments: 9 | - name: "d00001" 10 | cluster_config: "large_delta_00001.yaml" 11 | prune_threshold: 0.2301 12 | partition: False 13 | recompute_edges: False 14 | bbox_dilation: 0.0 15 | - name: "office" 16 | clip_model: "ViT-L/14" 17 | fine_dsg: "/home/dominic/Documents/clio_logs/office/trial_0/large_fine_ps/dsg.json" 18 | khronos_dsg: "/home/dominic/Documents/clio_logs/office/trial_0/large_khronos_ps/dsg.json" 19 | task_yaml: "/home/dominic/cam_ws/imgs/clio_datasets/office/tasks_office.yaml" 20 | experiments: 21 | - name: "d00001" 22 | cluster_config: "large_delta_00001.yaml" 23 | prune_threshold: 0.2301 24 | partition: False 25 | recompute_edges: False 26 | bbox_dilation: 0.0 27 | - name: "apartment" 28 | clip_model: "ViT-L/14" 29 | fine_dsg: "/home/dominic/Documents/clio_logs/apartment/trial_0/large_fine_ps/dsg.json" 30 | khronos_dsg: "/home/dominic/Documents/clio_logs/apartment/trial_0/large_khronos_ps/dsg.json" 31 | task_yaml: "/home/dominic/cam_ws/imgs/clio_datasets/apartment/tasks_apartment.yaml" 32 | experiments: 33 | - name: "d00001" 34 | cluster_config: "large_delta_00001.yaml" 35 | prune_threshold: 0.2301 36 | partition: False 37 | recompute_edges: False 38 | bbox_dilation: 0.0 -------------------------------------------------------------------------------- /clio_eval/experiments/configs/ablations/3d_clustering_oc.yaml: -------------------------------------------------------------------------------- 1 | log_path: "/home/dominic/Documents/clio_logs" 2 | datasets: 3 | - name: "cubicle" 4 | clip_model: "ViT-H-14" 5 | fine_dsg: "/home/dominic/Documents/clio_logs/cubicle/trial_0/open_fine_ps/dsg.json" 6 | khronos_dsg: "/home/dominic/Documents/clio_logs/cubicle/trial_0/open_khronos_ps/dsg.json" 7 | task_yaml: "/home/dominic/cam_ws/imgs/clio_datasets/cubicle/tasks_cubicle.yaml" 8 | experiments: 9 | - name: "d00001" 10 | cluster_config: "large_delta_00001_oc.yaml" 11 | prune_threshold: 0.2601 12 | partition: False 13 | recompute_edges: False 14 | bbox_dilation: 0.0 15 | - name: "office" 16 | clip_model: "ViT-H-14" 17 | fine_dsg: "/home/dominic/Documents/clio_logs/office/trial_0/open_fine_ps/dsg.json" 18 | khronos_dsg: "/home/dominic/Documents/clio_logs/office/trial_0/open_khronos_ps/dsg.json" 19 | task_yaml: "/home/dominic/cam_ws/imgs/clio_datasets/office/tasks_office.yaml" 20 | experiments: 21 | - name: "d00001" 22 | cluster_config: "large_delta_00001_oc.yaml" 23 | prune_threshold: 0.2601 24 | partition: False 25 | recompute_edges: False 26 | bbox_dilation: 0.0 27 | - name: "apartment" 28 | clip_model: "ViT-H-14" 29 | fine_dsg: "/home/dominic/Documents/clio_logs/apartment/trial_0/open_fine_ps/dsg.json" 30 | khronos_dsg: "/home/dominic/Documents/clio_logs/apartment/trial_0/open_khronos_ps/dsg.json" 31 | task_yaml: "/home/dominic/cam_ws/imgs/clio_datasets/apartment/tasks_apartment.yaml" 32 | experiments: 33 | - name: "d00001" 34 | cluster_config: "large_delta_00001_oc.yaml" 35 | prune_threshold: 0.2601 36 | partition: False 37 | recompute_edges: False 38 | bbox_dilation: 0.0 -------------------------------------------------------------------------------- /clio_eval/experiments/configs/ablations/realtime_clustering.yaml: -------------------------------------------------------------------------------- 1 | num_trials: 1 2 | log_path: "/home/dominic/Downloads/clio_online" 3 | semantic_threshold: 0.2301 4 | datasets: 5 | - name: "cubicle" 6 | task_yaml: "/home/dominic/cam_ws/imgs/clio_datasets/cubicle/tasks_cubicle.yaml" 7 | experiments: 8 | - name: "large_fine" 9 | - name: "office" 10 | task_yaml: "/home/dominic/cam_ws/imgs/clio_datasets/office/tasks_office.yaml" 11 | experiments: 12 | - name: "large_fine" 13 | - name: "apartment" 14 | task_yaml: "/home/dominic/cam_ws/imgs/clio_datasets/apartment/tasks_apartment.yaml" 15 | experiments: 16 | - name: "large_fine" -------------------------------------------------------------------------------- /clio_eval/experiments/configs/cluster/large_delta_00001.yaml: -------------------------------------------------------------------------------- 1 | debug: False 2 | debug_folder: /home/yunchang/logs/hydra_llm/3d_clustering_test/apartment/large_clustered 3 | 4 | #### Similarity configs 5 | use_lerf_loss: False 6 | lerf_loss_cannonical_phrases: 7 | - things 8 | - stuff 9 | - texture 10 | - object 11 | 12 | #### Clustering configs 13 | sims_thres: 0.23 14 | delta: 0.00001 15 | top_k_tasks: 2 16 | cumulative: True -------------------------------------------------------------------------------- /clio_eval/experiments/configs/cluster/large_delta_00001_oc.yaml: -------------------------------------------------------------------------------- 1 | debug: False 2 | debug_folder: /home/yunchang/logs/hydra_llm/3d_clustering_test/apartment/large_clustered 3 | 4 | #### Similarity configs 5 | use_lerf_loss: False 6 | lerf_loss_cannonical_phrases: 7 | - things 8 | - stuff 9 | - texture 10 | - object 11 | 12 | #### Clustering configs 13 | sims_thres: 0.26 14 | delta: 0.00001 15 | top_k_tasks: 2 16 | cumulative: True -------------------------------------------------------------------------------- /clio_eval/experiments/evaluate_ablations.py: -------------------------------------------------------------------------------- 1 | import time 2 | import argparse 3 | import yaml 4 | import json 5 | import os 6 | import subprocess 7 | import shutil 8 | import signal 9 | import numpy as np 10 | import csv 11 | from tabulate import tabulate 12 | 13 | import clio_eval.evaluate_helpers as eval_helpers 14 | from clio_eval.utils import Ablations 15 | 16 | def generate_raw_results(results, ablations, dec=3, print_table=True): 17 | folder = ablations.env["log_path"] 18 | header = ["Run", "IOU", 19 | "SAcc", "RAcc", "Sprec", "Rprec", "F1", "#-Objects"] 20 | fields = ["avg_iou", "strict_recall", 21 | "weak_recall", "strict_precision", "weak_precision", "f1", "num_objects"] 22 | # Maintain a latex table and a human readable table 23 | table = [] 24 | table.append(header) 25 | 26 | for i in range(len(results)): 27 | trial_results = results[i] 28 | for run in trial_results: 29 | res = trial_results[run] 30 | row = ["{}-{}".format(i, run)] + res.to_list(fields, decimals=dec) 31 | table.append(row) 32 | 33 | output_txt = os.path.join(folder, "raw_results.txt") 34 | with open(output_txt, 'w') as f: 35 | f.write(tabulate(table)) 36 | if print_table: 37 | print(tabulate(table)) 38 | 39 | 40 | def evaluate(ablations): 41 | results = [] 42 | for trial in range(ablations.num_trials): 43 | trial_results = {} 44 | for dataset in ablations.experiments: 45 | for label in ablations.experiments[dataset].experiments: 46 | label = label[0] 47 | if "large" in label: 48 | clip_model = "ViT-L/14" 49 | elif "open" in label: 50 | clip_model = "ViT-H-14" 51 | else: 52 | raise ValueError('Unable to determine clip mode.') 53 | task_yaml = ablations.experiments[dataset].task_yaml 54 | log_folder = "{}/{}/trial_{}/{}".format( 55 | ablations.env["log_path"], dataset, trial, label) 56 | dsg_file = os.path.join( 57 | log_folder, os.path.join("backend", "dsg.json")) 58 | if not os.path.exists(dsg_file): 59 | print("Missing {}".format(dsg_file)) 60 | continue 61 | 62 | if "fine" in label: 63 | fine_label = "{}_clio_primitives".format(dataset) 64 | clio_label = "{}_clio".format(dataset) 65 | if label.endswith("ps"): 66 | fine_label += "_ps" 67 | clio_label += "_ps" 68 | trial_results[fine_label] = eval_helpers.results_from_files( 69 | dsg_file, task_yaml, clip_model, False) 70 | trial_results[clio_label] = eval_helpers.results_from_files( 71 | dsg_file, task_yaml, clip_model, True, thresh=ablations.eval["semantic_threshold"]) 72 | else: 73 | print("Encountered unexpected label: {}".format(label)) 74 | results.append(trial_results) 75 | return results 76 | 77 | def main(): 78 | parser = argparse.ArgumentParser(description="Run ablations.") 79 | parser.add_argument('--configs', nargs='+', default=[]) 80 | 81 | args = parser.parse_args() 82 | 83 | for config_yaml in args.configs: 84 | ablation_config = Ablations() 85 | ablation_config.parse(config_yaml) 86 | 87 | results = evaluate(ablation_config) 88 | generate_raw_results(results, ablation_config) 89 | 90 | if __name__ == "__main__": 91 | main() -------------------------------------------------------------------------------- /clio_eval/experiments/result_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import clio_eval.utils as eval_utils 3 | import builtins 4 | 5 | 6 | class StatValue: 7 | def __init__(self, mean, std): 8 | self.mean = mean 9 | self.std = std 10 | 11 | @classmethod 12 | def from_list(cls, l): 13 | values = np.array(l) 14 | return cls(np.mean(values), np.std(values)) 15 | 16 | def __gt__(self, other): 17 | if type(other) == StatValue: 18 | return self.mean > other.mean 19 | # assume if not StatValue then some scalar 20 | return self.mean > other 21 | 22 | def __round__(self, precision): 23 | if precision <= 0: 24 | self.mean = int(self.mean) 25 | self.std = int(self.std) 26 | else: 27 | self.mean = round(self.mean, precision) 28 | self.std = round(self.std, precision) 29 | return self 30 | 31 | def __str__(self): 32 | return str(self.mean) + r" $\pm$ " + str(self.std) 33 | 34 | 35 | def bold_optimal_value(table, compr): 36 | def get_value(val): 37 | if type(val) == StatValue: 38 | return val.mean 39 | return val 40 | 41 | # table is list of lists 42 | # compr list of signs telling us how to compare values 43 | for i in range(len(compr)): 44 | if compr[i] == 0: 45 | continue 46 | 47 | values = [row[i] for row in table] 48 | if compr[i] > 0: 49 | opt_val = max(values) 50 | elif compr[i] < 0: 51 | opt_val = min(values) 52 | opt_idx = values.index(opt_val) 53 | table[opt_idx][i] = r"\textbf{" + str(table[opt_idx][i]) + r"}" 54 | 55 | 56 | def multi_trial_results(results_list): 57 | if len(results_list) == 1: 58 | return results_list[0] 59 | objects = StatValue.from_list([r.num_objects for r in results_list]) 60 | w_recall = StatValue.from_list([r.weak_recall for r in results_list]) 61 | s_recall = StatValue.from_list([r.strict_recall for r in results_list]) 62 | avg_iou = StatValue.from_list([r.avg_iou for r in results_list]) 63 | w_prec = StatValue.from_list([r.weak_precision for r in results_list]) 64 | s_prec = StatValue.from_list([r.strict_precision for r in results_list]) 65 | f1 = StatValue.from_list([r.f1 for r in results_list]) 66 | tpf = StatValue.from_list([r.tpf for r in results_list]) 67 | return eval_utils.Results(objects, w_recall, s_recall, avg_iou, w_prec, s_prec, f1, tpf) 68 | -------------------------------------------------------------------------------- /clio_eval/experiments/run_3d_object_ablations.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import yaml 3 | import os 4 | import csv 5 | import json 6 | import numpy as np 7 | from tabulate import tabulate 8 | import subprocess 9 | 10 | import clio_batch.object_cluster as obj_cluster 11 | import clio_eval.evaluate_helpers as eval_helpers 12 | import clio_eval.utils as eval_utils 13 | from clio_eval.evaluate_helpers import get_dsg_version 14 | 15 | 16 | def parse_experiment_yaml(yaml_file): 17 | with open(yaml_file, "r") as stream: 18 | return yaml.safe_load(stream) 19 | 20 | def write_csv(table, filename): 21 | # Dominic's write csv to interface with latex converter in paper repo 22 | # Define the order of columns 23 | column_order = ["IOU", "S. Rec", "W. Rec", 24 | "S. Prec", "W. Prec", "F1", "#-Objs", "Run"] 25 | 26 | # Reorder the columns in each row of the table 27 | reordered_table = [] 28 | for row in table: 29 | reordered_row = [row[table[0].index(col)] for col in column_order] 30 | reordered_table.append(reordered_row) 31 | 32 | # Write the reordered table to a CSV file 33 | with open(filename, 'w', newline='') as file: 34 | writer = csv.writer(file) 35 | writer.writerows(reordered_table) 36 | 37 | 38 | def print_and_save_results(results, folder, dec=3): 39 | table = [] 40 | header = ["Run", "#-Objs", "IOU", 41 | "S. Rec", "W. Rec", "S. Prec", "W. Prec", "F1"] 42 | 43 | table.append(header) 44 | 45 | for run in results: 46 | res = results[run] 47 | row = [run, res.num_objects, round(res.avg_iou, dec), round(res.strict_recall, dec), round( 48 | res.weak_recall, dec), round(res.strict_precision, dec), round(res.weak_precision, dec), round(res.f1, dec)] 49 | table.append(row) 50 | 51 | write_csv(table, 'results.csv') 52 | print(tabulate(table)) 53 | output_file = os.path.join(folder, "results.txt") 54 | with open(output_file, 'w') as f: 55 | f.write(tabulate(table)) 56 | 57 | 58 | def run(ablation_dict): 59 | for dataset in ablation_dict["datasets"]: 60 | clip_model = dataset["clip_model"] 61 | for experiment in dataset["experiments"]: 62 | output_folder = "{}/{}/{}".format( 63 | ablation_dict["log_path"], dataset["name"], experiment["name"]) 64 | if not os.path.exists(output_folder): 65 | os.makedirs(output_folder) 66 | elif os.path.exists(os.path.join(output_folder, "dsg.json")): 67 | print( 68 | "{} - {} exists. Skipping...".format(dataset["name"], experiment["name"])) 69 | continue 70 | experiments_path = os.path.abspath(os.path.dirname(__file__)) 71 | cluster_config_dir = os.path.join( 72 | experiments_path, "configs/cluster") 73 | cluster_config_path = os.path.join( 74 | cluster_config_dir, experiment["cluster_config"]) 75 | clustered_dsg = obj_cluster.cluster_3d( 76 | dataset["fine_dsg"], dataset["task_yaml"], output_folder, cluster_config_path, clip_model, 77 | experiment["prune_threshold"], experiment["partition"], experiment["bbox_dilation"], 78 | recompute_edges=experiment["recompute_edges"]) 79 | output_dsg = output_folder + "/dsg.json" 80 | clustered_dsg.save(output_dsg) 81 | 82 | # Keep output DSG version the same as input version. 83 | version = get_dsg_version(dataset["fine_dsg"]) 84 | print(version) 85 | with open(output_dsg) as file: 86 | d = json.load(file) 87 | version = [int(i) for i in version] 88 | d['SPARK_ORIGIN_header'] = {'version':{'major':version[0], 'minor':version[1], 'patch':version[2]}, 'project_name':'main'} 89 | 90 | with open(output_dsg, 'w') as file: 91 | json.dump(d, file) 92 | 93 | def evaluate(ablation_dict): 94 | results = {} 95 | for dataset in ablation_dict["datasets"]: 96 | clip_model = dataset["clip_model"] 97 | task_yaml = dataset["task_yaml"] 98 | fine_label = "{}_fine".format(dataset["name"]) 99 | results[fine_label] = eval_helpers.results_from_files( 100 | dataset["fine_dsg"], task_yaml, clip_model, False) 101 | khronos_label = "{}_khronos".format(dataset["name"]) 102 | results[khronos_label] = eval_helpers.results_from_files( 103 | dataset["khronos_dsg"], task_yaml, clip_model, False) 104 | medium_thres_label = "{}_khronos_thres".format(dataset["name"]) 105 | # assumes threshold for Khronos is the same as the first thresh in experiment yaml 106 | thresh = dataset['experiments'][0]["prune_threshold"] 107 | results[medium_thres_label] = eval_helpers.results_from_files( 108 | dataset["khronos_dsg"], task_yaml, clip_model, False, thresh=thresh) 109 | for experiment in dataset["experiments"]: 110 | output_folder = "{}/{}/{}".format( 111 | ablation_dict["log_path"], dataset["name"], experiment["name"]) 112 | dsg_file = os.path.join(output_folder, "dsg.json") 113 | if not os.path.exists(dsg_file): 114 | print("Missing {}".format(dsg_file)) 115 | continue 116 | 117 | run_label = "{}_{}".format(dataset["name"], experiment["name"]) 118 | # 3D clustering currently store clusters in places layer 119 | results[run_label] = eval_helpers.results_from_files( 120 | dsg_file, task_yaml, clip_model, True) 121 | 122 | print_and_save_results(results, ablation_dict["log_path"]) 123 | 124 | 125 | def main(): 126 | parser = argparse.ArgumentParser(description="Run ablations.") 127 | parser.add_argument("config", type=str, default=None) 128 | 129 | args = parser.parse_args() 130 | ablation_dict = parse_experiment_yaml(args.config) 131 | 132 | print("Running clustering...") 133 | run(ablation_dict) 134 | 135 | print("Evaluating...") 136 | evaluate(ablation_dict) 137 | 138 | 139 | if __name__ == "__main__": 140 | main() -------------------------------------------------------------------------------- /clio_eval/iou.py: -------------------------------------------------------------------------------- 1 | # Get IoU for 3D Oriented Bounding Boxes 2 | 3 | import os 4 | import sys 5 | import numpy as np 6 | import scipy.spatial as sp 7 | 8 | cwd = os.path.abspath(os.path.dirname(__file__)) 9 | objectron_path = os.path.abspath(os.path.join(cwd, "../thirdparty/Objectron/")) 10 | sys.path.insert(0, objectron_path) 11 | 12 | from objectron.dataset.iou import IoU 13 | from objectron.dataset.box import Box 14 | 15 | def get_box_verticies(obb): 16 | center = obb.get_center() 17 | verticies = np.take(obb.get_box_points(), [0,3,2,5,1,6,7,4], axis=0) 18 | stack_points = np.vstack((center, verticies)) 19 | return stack_points 20 | 21 | def compute_iou_oriented_bbox(bbox_1, bbox_2): 22 | box1 = Box(get_box_verticies(bbox_1)) 23 | box2 = Box(get_box_verticies(bbox_2)) 24 | iou = IoU(box1, box2) 25 | return iou.iou() -------------------------------------------------------------------------------- /clio_eval/scene_transforms.yaml: -------------------------------------------------------------------------------- 1 | apartment: 2 | T: 3 | - [0.99736433, 0.06378271, -0.03458547, -1.65754365] 4 | - [0.03576231, -0.01740201, 0.9992088, 2.19638849] 5 | - [0.06313038, -0.99781207, -0.01963716, 0.50415508] 6 | - [0.0, 0.0, 0.0, 1.0] 7 | scale: 0.638 8 | 9 | old_office: 10 | T: 11 | - [0.11473244, 0.234012, -0.96544024, -4.32607684] 12 | - [0.99141128, -0.08838281, 0.09639581, -4.09890664] 13 | - [-0.06277054, -0.96820807, -0.24214251, 0.582741] 14 | - [0.0, 0.0, 0.0, 1.0] 15 | scale: 2.07 16 | 17 | office: 18 | T: 19 | - [-0.85042481, 0.46934804, -0.23767638, -7.08815597] 20 | - [0.50329045, 0.59422864, -0.62736835, -4.50304495] 21 | - [-0.15321999, -0.65314986, -0.74156517, 0.64755245] 22 | - [0.0, 0.0, 0.0, 1.0] 23 | scale: 2.10 24 | 25 | cubicle: 26 | T: 27 | - [0.8680003, 0.34739165, -0.35481618, -0.06814865] 28 | - [0.42267521, -0.14186624, 0.89510873, 2.0763137] 29 | - [0.26061686, -0.92692665, -0.26997378, -0.30536732] 30 | - [0.0, 0.0, 0.0, 1.0] 31 | scale: 0.3527 32 | -------------------------------------------------------------------------------- /clio_eval/utils.py: -------------------------------------------------------------------------------- 1 | import spark_dsg as sdsg 2 | import yaml 3 | import numpy as np 4 | import open3d as o3d 5 | from scipy.spatial.transform import Rotation as sprot 6 | 7 | import clio_batch.helpers as helpers 8 | from clio_eval.iou import compute_iou_oriented_bbox 9 | 10 | 11 | class EvalObjects: 12 | def __init__(self, feature, o3d_mesh, oriented_bbox): 13 | self.feature = feature 14 | self.o3d_mesh = o3d_mesh 15 | self.oriented_bbox = oriented_bbox 16 | 17 | def compute_similarity(self, other_feature): 18 | return helpers.compute_cosine_sim(other_feature, self.feature) 19 | 20 | def compute_iou(self, other_oriented_bbox): 21 | return compute_iou_oriented_bbox(self.oriented_bbox, other_oriented_bbox) 22 | 23 | def contains_centroid(self, other_oriented_bbox): 24 | # Extract centroids 25 | centroid = np.asarray(self.oriented_bbox.get_center()) 26 | other_centroid = np.asarray( 27 | other_oriented_bbox.get_center()) 28 | 29 | # Extract half-lengths (extent) of the bounding boxes 30 | extent = np.asarray(self.oriented_bbox.extent) 31 | 32 | # Compute bounds for each box 33 | bounds = np.concatenate([centroid - extent, centroid + extent]) 34 | 35 | return is_point_inside_box(other_centroid, bounds) 36 | 37 | def centroid_contained_in(self, other_oriented_bbox): 38 | # Extract centroids 39 | other_centroid = np.asarray(other_oriented_bbox.get_center()) 40 | centroid = np.asarray( 41 | self.oriented_bbox.get_center()) 42 | 43 | # Extract half-lengths (extent) of the bounding boxes 44 | other_extent = np.asarray(other_oriented_bbox.extent) 45 | 46 | # Compute bounds for each box 47 | other_bounds = np.concatenate( 48 | [other_centroid - other_extent, other_centroid + other_extent]) 49 | 50 | return is_point_inside_box(centroid, other_bounds) 51 | 52 | 53 | class ExperimentGroup: 54 | def __init__(self, param): 55 | self.task_yaml = param["task_yaml"] 56 | self.experiments = [] 57 | for ab in param["experiments"]: 58 | self.experiments.append([ab["name"]]) 59 | 60 | 61 | class Ablations: 62 | def __init__(self): 63 | self.experiments = {} # name (str) : setup (ExperimentGroup) 64 | self.env = {} 65 | self.eval = {} 66 | 67 | def parse(self, yaml_file): 68 | with open(yaml_file, "r") as stream: 69 | params = yaml.safe_load(stream) 70 | self.num_trials = int(params["num_trials"]) 71 | self.env["log_path"] = params["log_path"] 72 | 73 | for group in params["datasets"]: 74 | eg = ExperimentGroup(group) 75 | self.experiments[group["name"]] = eg 76 | 77 | self.eval["semantic_threshold"] = params["semantic_threshold"] 78 | 79 | class GtData: 80 | def __init__(self, gt_yaml): 81 | gt_data = yaml.safe_load(open(gt_yaml)) 82 | self.task_objects = {} 83 | for task, gt_detections in yaml.safe_load(open(gt_yaml)).items(): 84 | self.task_objects[task] = [] 85 | for gt_detection in gt_detections: 86 | gt_bbox = get_open3d_bbox_from_dict(gt_detection) 87 | self.task_objects[task].append(gt_bbox) 88 | 89 | 90 | class Results: 91 | def __init__(self, num_objects, weak_recall, strict_recall, avg_iou, weak_precision, strict_precision, f1=None, tpf=-1): 92 | self.num_objects = num_objects 93 | self.weak_recall = weak_recall 94 | self.strict_recall = strict_recall 95 | self.avg_iou = avg_iou 96 | self.weak_precision = weak_precision 97 | self.strict_precision = strict_precision 98 | self.f1 = f1 99 | if self.f1 is None: 100 | if self.weak_precision == 0 and self.weak_recall == 0: 101 | self.f1 = 0 102 | else: 103 | self.f1 = 2 * self.weak_precision * self.weak_recall / \ 104 | (self.weak_precision + self.weak_recall) 105 | self.tpf = tpf 106 | 107 | def to_dict(self, decimals=3): 108 | output = {} 109 | output["avg_iou"] = round(self.avg_iou, decimals) 110 | output["strict_recall"] = round(self.strict_recall, decimals) 111 | output["weak_recall"] = round(self.weak_recall, decimals) 112 | output["strict_precision"] = round(self.strict_precision, decimals) 113 | output["weak_precision"] = round(self.weak_precision, decimals) 114 | output["num_objects"] = round(self.num_objects) 115 | output["f1"] = round(self.f1, decimals) 116 | output["tpf"] = round(self.tpf, decimals) 117 | return output 118 | 119 | def to_list(self, fields, decimals=3): 120 | mapping = self.to_dict(decimals=decimals) 121 | return [mapping[f] for f in fields] 122 | 123 | def __str__(self): 124 | return "Results:\n\ 125 | Num Objects: {}\n\ 126 | Recall (weak): {}\n\ 127 | Recall (strict): {}\n\ 128 | Avg IOU: {}\n\ 129 | Precision (weak): {}\n\ 130 | Precision (strict): {}\n\ 131 | F1: {}\n\ 132 | TPF(s): {}".format( 133 | self.num_objects, self.weak_recall, self.strict_recall, self.avg_iou, 134 | self.weak_precision, self.strict_precision, self.f1, self.tpf) 135 | 136 | def min_from_box_position(node, offset_to_lower_corner = False): 137 | # Older DSG version has mesh defined wrt lower corner of bbox. 138 | # New version is wrt center. Apply offset depending on version. 139 | if offset_to_lower_corner: 140 | offset = (node.bounding_box.dimensions / 2)[:, np.newaxis] 141 | return node.position[:, np.newaxis] - offset 142 | else: 143 | return node.position[:, np.newaxis] 144 | 145 | def max_from_box_position(node, offset_to_lower_corner = False): 146 | # Older DSG version has mesh defined wrt lower corner of bbox. 147 | # New version is wrt center. Apply offset depending on version. 148 | if offset_to_lower_corner: 149 | offset = (node.bounding_box.dimensions / 2)[:, np.newaxis] 150 | return node.position[:, np.newaxis] + offset 151 | else: 152 | return node.position[:, np.newaxis] 153 | 154 | def is_point_inside_box(point, bounds): 155 | return (bounds[0] <= point[0] <= bounds[3] and 156 | bounds[1] <= point[1] <= bounds[4] and 157 | bounds[2] <= point[2] <= bounds[5]) 158 | 159 | 160 | def obb_to_corners(obb): 161 | # Extract the eight corners of the OBB 162 | corners = np.asarray(obb.get_box_points()) 163 | return corners 164 | 165 | 166 | def get_open3d_bbox_from_dict(bbox_dict): 167 | center = np.array(bbox_dict['center'], dtype=np.float64).reshape(3, 1) 168 | extents = np.array(bbox_dict['extents'], dtype=np.float64).reshape(3, 1) 169 | quat = bbox_dict['rotation'] 170 | if type(quat) is not list: 171 | w, x, y, z = quat['w'], quat['x'], quat['y'], quat['z'] 172 | quat = [x, y, z, w] 173 | else: 174 | quat.append(quat.pop(0)) 175 | rot = sprot.from_quat(quat).as_matrix().astype(np.float64) 176 | 177 | return o3d.geometry.OrientedBoundingBox(center, rot, extents) 178 | 179 | 180 | def dsg_object_to_o3d(object_node, offset_to_lower_corner): 181 | world_t_object = min_from_box_position(object_node.attributes, offset_to_lower_corner) 182 | obj_faces = object_node.attributes.mesh().get_faces() 183 | obj_verts = object_node.attributes.mesh().get_vertices() 184 | 185 | object_vertices = obj_verts[:3, :] + world_t_object 186 | object_vertices = o3d.utility.Vector3dVector(np.transpose(object_vertices)) 187 | 188 | oriented_object_bbox = None 189 | if obj_verts.shape[1] > 3: 190 | try: 191 | oriented_object_bbox = o3d.geometry.PointCloud( 192 | object_vertices).get_minimal_oriented_bounding_box() 193 | except RuntimeError: 194 | pass 195 | 196 | o3d_mesh = o3d.geometry.TriangleMesh() 197 | o3d_mesh.vertices = object_vertices 198 | o3d_mesh.vertex_colors = o3d.utility.Vector3dVector( 199 | np.transpose(obj_verts[3:, :])) 200 | o3d_mesh.triangles = o3d.utility.Vector3iVector( 201 | np.transpose(obj_faces)) 202 | 203 | return o3d_mesh, oriented_object_bbox 204 | 205 | def get_objects_from_dsg(dsg, offset_to_lower_corner, visualize=False, layer=sdsg.DsgLayers.OBJECTS): 206 | # return list of bboxes and list of features 207 | eval_objects = [] 208 | vis_meshes = [] 209 | vis_bboxes = [] 210 | object_layer = dsg.get_layer(layer) 211 | for node in object_layer.nodes: 212 | obj_mesh, oriented_bbox = dsg_object_to_o3d(node, offset_to_lower_corner) 213 | 214 | if oriented_bbox is None: 215 | continue 216 | 217 | if visualize: 218 | vis_meshes.append(obj_mesh) 219 | oriented_bbox.color = [1, 0, 0] 220 | vis_bboxes.append(oriented_bbox) 221 | 222 | avg_feature = np.mean(np.asarray( 223 | node.attributes.semantic_feature), axis=1) 224 | eval_objects.append(EvalObjects(avg_feature, obj_mesh, oriented_bbox)) 225 | 226 | if len(vis_meshes) > 0: 227 | o3d.visualization.draw_geometries(vis_meshes + vis_bboxes) 228 | return eval_objects 229 | 230 | 231 | def get_k_relevant_objects(eval_objects, feature, k): 232 | if k > len(eval_objects): 233 | k = len(eval_objects) 234 | t = [] 235 | for object in eval_objects: 236 | temp = object.compute_similarity(feature) 237 | t.append(temp) 238 | # print('t', temp) 239 | # print('max:', max(t)) 240 | sorted_objs = sorted( 241 | eval_objects, key=lambda x: x.compute_similarity(feature), reverse=True) 242 | return sorted_objs[:k] 243 | 244 | def get_transform_from_yaml(transforms_file, scene_name): 245 | with open(transforms_file, "r") as file: 246 | data = yaml.safe_load(file) 247 | 248 | T = np.array(data[scene_name]['T']) 249 | scale = float(data[scene_name]['scale']) 250 | 251 | return T, scale -------------------------------------------------------------------------------- /clio_eval/visualize_objects.py: -------------------------------------------------------------------------------- 1 | import clio_eval.evaluate_helpers as eval_helpers 2 | import clio_eval.utils as eval_utils 3 | from clio_eval.viz_scene import visualize_objects_on_mesh 4 | import spark_dsg as sdsg 5 | import open3d as o3d 6 | import numpy as np 7 | import argparse 8 | import os 9 | import yaml 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser( 13 | description="Visualize. ") 14 | parser.add_argument('dsg_file', type=str, default=None) 15 | parser.add_argument('tasks_yaml', type=str, default=None) 16 | parser.add_argument('mesh_file', type=str, default=None) 17 | parser.add_argument('scene_name', type=str, default=None, help="office, apartment, cubicle") 18 | args = parser.parse_args() 19 | 20 | layer = sdsg.DsgLayers.OBJECTS 21 | 22 | dsg = sdsg.DynamicSceneGraph.load(args.dsg_file) 23 | 24 | offset_to_lower_corner = False 25 | version = ['' + i for i in eval_helpers.get_dsg_version(args.dsg_file)] 26 | print('dsg version: ', version) 27 | if version == ['1', '0', '0']: 28 | offset_to_lower_corner = True 29 | est_objects = eval_utils.get_objects_from_dsg(dsg, offset_to_lower_corner, layer=layer) 30 | 31 | print("num objects: ", len(est_objects)) 32 | 33 | # colors to visualize estimated and ground truth bounding boxes 34 | green = (0.0, 1.0, 0.0) 35 | black = (0.0, 0.0, 0.0) 36 | 37 | viz_est_bboxes = [] 38 | for i in range(len(est_objects)): 39 | viz_est_bboxes += eval_helpers.generate_est_bboxes_viz( 40 | [est_objects[i]], green, add_mesh=False) 41 | 42 | viz_gt_bboxes = [] 43 | gt_data = eval_utils.GtData(args.tasks_yaml).task_objects.copy() 44 | for task in gt_data: 45 | bboxes = gt_data[task] 46 | viz_gt_bboxes.extend(eval_helpers.generate_gt_bboxes_viz(bboxes, black)) 47 | 48 | # To visualize the COLMAP mesh we have to tranform it to metric scale in the correct coordinate frame. 49 | # Get correct transform from yaml file for the scene. 50 | cwd = os.path.abspath(os.path.dirname(__file__)) 51 | scene_transforms_path = os.path.abspath(os.path.join(cwd, "scene_transforms.yaml")) 52 | transform, scale = eval_utils.get_transform_from_yaml(scene_transforms_path, args.scene_name) 53 | 54 | visualize_objects_on_mesh(args.mesh_file, viz_est_bboxes, viz_gt_bboxes, transform, scale, mesh_size=0.01) 55 | 56 | 57 | if __name__ == "__main__": 58 | main() 59 | 60 | -------------------------------------------------------------------------------- /clio_eval/viz_scene.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import open3d as o3d 3 | import numpy as np 4 | import random 5 | import yaml 6 | 7 | print("open3d version", o3d.__version__) 8 | 9 | def align_vector_to_another(a=np.array([0, 0, 1]), b=np.array([1, 0, 0])): 10 | """ 11 | Aligns vector a to vector b with axis angle rotation 12 | """ 13 | if np.array_equal(a, b): 14 | return None, None 15 | axis_ = np.cross(a, b) 16 | axis_ = axis_ / np.linalg.norm(axis_) 17 | angle = np.arccos(np.dot(a, b)) 18 | 19 | return axis_, angle 20 | 21 | def normalized(a, axis=-1, order=2): 22 | """Normalizes a numpy array of points""" 23 | l2 = np.atleast_1d(np.linalg.norm(a, order, axis)) 24 | l2[l2 == 0] = 1 25 | return a / np.expand_dims(l2, axis), l2 26 | 27 | class LineMesh(object): 28 | def __init__(self, points, lines=None, colors=[0, 1, 0], radius=0.15): 29 | self.points = np.array(points) 30 | self.lines = np.array( 31 | lines) if lines is not None else self.lines_from_ordered_points(self.points) 32 | self.points=points 33 | self.colors = np.array(colors) 34 | self.radius = radius 35 | self.cylinder_segments = [] 36 | 37 | self.create_line_mesh() 38 | 39 | @staticmethod 40 | def lines_from_ordered_points(points): 41 | lines = [[i, i + 1] for i in range(0, points.shape[0] - 1, 1)] 42 | return np.array(lines) 43 | 44 | def create_line_mesh(self): 45 | first_points = self.points[self.lines[:, 0], :] 46 | second_points = self.points[self.lines[:, 1], :] 47 | line_segments = second_points - first_points 48 | line_segments_unit, line_lengths = normalized(line_segments) 49 | 50 | z_axis = np.array([0, 0, 1]) 51 | # Create triangular mesh cylinder segments of line 52 | for i in range(line_segments_unit.shape[0]): 53 | line_segment = line_segments_unit[i, :] 54 | line_length = line_lengths[i] 55 | # get axis angle rotation to align cylinder with line segment 56 | axis, angle = align_vector_to_another(z_axis, line_segment) 57 | # Get translation vector 58 | translation = first_points[i, :] + line_segment * line_length * 0.5 59 | # create cylinder and apply transformations 60 | cylinder_segment = o3d.geometry.TriangleMesh.create_cylinder( 61 | self.radius, line_length) 62 | cylinder_segment = cylinder_segment.translate( 63 | translation, relative=False) 64 | if axis is not None: 65 | axis_a = axis * angle 66 | x = o3d.geometry.get_rotation_matrix_from_axis_angle(axis_a) 67 | cylinder_segment = cylinder_segment.rotate( 68 | R=o3d.geometry.get_rotation_matrix_from_axis_angle(axis_a), 69 | center=cylinder_segment.get_center()) 70 | 71 | # color cylinder 72 | color = self.colors if self.colors.ndim == 1 else self.colors[i, :] 73 | cylinder_segment.paint_uniform_color(color) 74 | 75 | self.cylinder_segments.append(cylinder_segment) 76 | 77 | def add_line(self, vis): 78 | """Adds this line to the visualizer""" 79 | for cylinder in self.cylinder_segments: 80 | vis.add_geometry(cylinder) 81 | 82 | def remove_line(self, vis): 83 | """Removes this line from the visualizer""" 84 | for cylinder in self.cylinder_segments: 85 | vis.remove_geometry(cylinder) 86 | 87 | def visualize_objects_on_mesh(filename, est_bboxes, gt_bboxes, transform_matrix, scale, mesh_size=0.01): 88 | 89 | mesh = o3d.io.read_triangle_mesh(filename) 90 | mesh = mesh.simplify_vertex_clustering(mesh_size) 91 | 92 | center = np.array([0.0, 0.0, 0.0]) 93 | mesh = mesh.scale(scale, center).transform(transform_matrix) 94 | 95 | visualizer = o3d.visualization.Visualizer() 96 | visualizer.create_window() 97 | 98 | visualizer.add_geometry(mesh) 99 | 100 | # Convert bboxes to set of cylinders and add to visualization 101 | for bboxes in [est_bboxes, gt_bboxes]: 102 | merged_line_set = o3d.geometry.LineSet() 103 | for est_box in bboxes: 104 | merged_line_set += o3d.geometry.LineSet.create_from_oriented_bounding_box(est_box) 105 | color = est_box.color 106 | 107 | points = np.asarray(merged_line_set.points) 108 | lines = np.asarray(merged_line_set.lines) 109 | line_mesh1 = LineMesh(points, lines, radius=0.02) 110 | line_mesh1_geoms = line_mesh1.cylinder_segments 111 | 112 | for t_mesh in line_mesh1_geoms: 113 | t_mesh.paint_uniform_color(color) 114 | visualizer.add_geometry(t_mesh) 115 | 116 | visualizer.update_renderer() 117 | visualizer.run() 118 | 119 | visualizer.destroy_window() -------------------------------------------------------------------------------- /clio_ros/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(clio_ros) 3 | 4 | find_package(catkin REQUIRED) 5 | 6 | catkin_package( 7 | CATKIN_DEPENDS 8 | DEPENDS 9 | INCLUDE_DIRS 10 | LIBRARIES 11 | ) 12 | 13 | install(DIRECTORY launch/ DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/launch) 14 | install(DIRECTORY config/ DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/config) 15 | install(DIRECTORY rviz/ DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/rviz) 16 | -------------------------------------------------------------------------------- /clio_ros/app/task_server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Encode tasks as embeddings.""" 3 | import pathlib 4 | 5 | import rospy 6 | import yaml 7 | from semantic_inference_msgs.msg import FeatureVectors 8 | from semantic_inference_msgs.srv import EncodeFeature 9 | 10 | 11 | def _parse_tasks(prefix): 12 | tasks_file = rospy.get_param(f"~{prefix}_file", "") 13 | tasks = rospy.get_param("~{prefix}", []) 14 | if tasks_file == "": 15 | return tasks 16 | 17 | tasks_file = pathlib.Path(tasks_file).expanduser().absolute() 18 | if not tasks_file.exists(): 19 | rospy.logerr(f"[{rospy.get_name()}] Invalid {prefix} file @ {tasks_file}") 20 | return tasks 21 | 22 | with tasks_file.open("r") as fin: 23 | contents = yaml.safe_load(fin.read()) 24 | return [x for x in contents] 25 | 26 | 27 | class TaskServer: 28 | """Server to compute and distribute task embeddings.""" 29 | 30 | def __init__(self): 31 | """Get embeddings for tasks.""" 32 | self._object_pub = rospy.Publisher( 33 | "~objects", FeatureVectors, queue_size=1, latch=True 34 | ) 35 | self._place_pub = rospy.Publisher( 36 | "~places", FeatureVectors, queue_size=1, latch=True 37 | ) 38 | 39 | msg = self._embed_tasks(_parse_tasks("object_tasks")) 40 | self._object_pub.publish(msg) 41 | 42 | msg = self._embed_tasks(_parse_tasks("place_tasks")) 43 | self._place_pub.publish(msg) 44 | 45 | def _embed_tasks(self, tasks, service_name="semantic_inference/embed"): 46 | rospy.loginfo(f"[{rospy.get_name()}] Waiting for {service_name}") 47 | rospy.wait_for_service(service_name) 48 | proxy = rospy.ServiceProxy(service_name, EncodeFeature) 49 | rospy.loginfo(f"[{rospy.get_name()}] Encoding features for {tasks}") 50 | 51 | msg = FeatureVectors() 52 | for task in tasks: 53 | msg.names.append(task) 54 | msg.features.append(proxy(task).feature.feature) 55 | 56 | rospy.loginfo(f"[{rospy.get_name()}] Finished encoding features!") 57 | return msg 58 | 59 | def spin(self): 60 | """Wait for ros to shutdown.""" 61 | rospy.spin() 62 | 63 | 64 | def main(): 65 | """Start the task server.""" 66 | rospy.init_node("task_server") 67 | server = TaskServer() 68 | server.spin() 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /clio_ros/config/realsense/pipeline.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | ############################ 3 | # Pipeline Configuration # 4 | ############################ 5 | exit_after_clock: $(arg exit_after_clock) 6 | log_path: $(arg log_path) 7 | enable_lcd: false 8 | ############################# 9 | # Input Module Parameters # 10 | ############################# 11 | input: 12 | type: RosInput 13 | inputs: 14 | camera: 15 | receiver: 16 | type: OpenSetImageReceiver 17 | sensor: 18 | type: camera 19 | width: 640 20 | height: 360 21 | cx: 315.9902073557914 22 | cy: 254.59437002701452 23 | fx: 372.4634094238281 24 | fy: 371.5072021484375 25 | min_range: $(arg sensor_min_range) 26 | max_range: $(arg sensor_max_range) 27 | extrinsics: 28 | type: identity 29 | ############################################# 30 | # Active-Window Reconstruction Parameters # 31 | ############################################# 32 | active_window: 33 | type: ActiveWindow 34 | volumetric_map: 35 | voxel_size: 0.10 36 | truncation_distance: 0.25 37 | voxels_per_side: 16 38 | object_detector: 39 | type: InstanceForwarding 40 | max_range: $(arg sensor_max_range) 41 | tracker: 42 | type: MaxIouTracker 43 | track_by: voxels 44 | projective_integrator: 45 | max_weight: 100000 46 | interpolation_method: adaptive 47 | object_extractor: 48 | type: MeshObjectExtractor 49 | projective_integrator: 50 | num_threads: 2 51 | semantic_integrator: 52 | type: BinarySemanticIntegrator 53 | ######################### 54 | # Frontend Parameters # 55 | ######################### 56 | frontend: 57 | type: GraphBuilder 58 | pgmo: 59 | horizon: 60.0 60 | d_graph_resolution: 2.5 61 | output_mesh_resolution: 0.02 62 | freespace_places: 63 | type: gvd 64 | gvd: 65 | max_distance_m: 3.0 66 | min_diff_m: 0.1 67 | voronoi_config: 68 | min_distance_m: 0.4 69 | parent_l1_separation: 25 70 | parent_cos_angle_separation: 0.1 71 | graph: 72 | type: CompressionGraphExtractor 73 | compression_distance_m: 1.0 74 | min_node_distance_m: 0.4 75 | min_edge_distance_m: 0.25 76 | node_merge_distance_m: 0.5 77 | add_heuristic_edges: true 78 | add_overlap_edges: false 79 | add_freespace_edges: true 80 | freespace_edges: 81 | min_clearance_m: 0.3 82 | num_nodes_to_check: 10 83 | num_neighbors_to_find: 3 84 | view_database: 85 | view_selection_method: fusion 86 | ######################## 87 | # Backend Parameters # 88 | ######################## 89 | backend: 90 | type: BackendModule 91 | enable_node_merging: true 92 | update_functors: 93 | agents: 94 | type: UpdateAgentsFunctor 95 | objects: 96 | type: ObjectsIBFunctor 97 | tasks: 98 | ns: ~objects 99 | type: RosEmbeddingGroup 100 | silent_wait: false 101 | places: 102 | type: UpdatePlacesFunctor 103 | tasks: 104 | type: RosEmbeddingGroup 105 | regions: 106 | type: RegionsIBFunctor 107 | clustering: 108 | tasks: 109 | ns: ~places 110 | type: RosEmbeddingGroup 111 | silent_wait: false 112 | selector: 113 | max_delta: 0.010 114 | score_threshold: 0.21 115 | top_k: 2 116 | cumulative: true 117 | null_task_preprune: true 118 | pgmo: 119 | run_mode: FULL 120 | embed_trajectory_delta_t: 5.0 121 | num_interp_pts: 3 122 | interp_horizon: 10.0 123 | enable_sparsify: false 124 | trans_node_dist: 1.0 125 | rot_node_dist: 1.2 126 | rpgo: 127 | odom_trans_threshold: 0.05 128 | odom_rot_threshold: 0.01 129 | pcm_trans_threshold: -1 130 | pcm_rot_threshold: -1 131 | gnc_alpha: 0.9 132 | gnc_mu_step: 1.6 133 | gnc_fix_prev_inliers: true 134 | verbosity: UPDATE 135 | solver: LM 136 | covariance: 137 | odom: 1.0e-02 138 | loop_close: 5.0e-02 139 | sg_loop_close: 1.0e-01 140 | prior: 1.0e-02 141 | mesh_mesh: 1.0e-02 142 | pose_mesh: 1.0e-02 143 | place_mesh: 1.0e-02 144 | place_edge: 10.0 145 | place_merge: 10.0 146 | object_merge: 10.0 147 | -------------------------------------------------------------------------------- /clio_ros/config/realsense/realsense.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | ############################################# 3 | # Active-Window Reconstruction Parameters # 4 | ############################################# 5 | active_window: 6 | frame_data_buffer: 7 | max_buffer_size: 300 8 | store_every_n_frames: 1 9 | object_extractor: 10 | min_object_allocation_confidence: 0.5 11 | min_object_volume: 0.0 # m^3 12 | max_object_volume: 8.0 # m^3 13 | only_extract_reconstructed_objects: true 14 | min_object_reconstruction_confidence: 0.5 15 | min_object_reconstruction_observations: 3 16 | object_reconstruction_resolution: -0.03 # Positive: voxel size in meters. Negative: fraction of the extent. 0: Skip. (-0.02) 17 | object_detector: 18 | min_cluster_size: 20 # pixels 19 | max_cluster_size: 50000 # pixels 20 | tracker: 21 | semantic_association: assign_track # 'assign_cluster', 'assign_track' 22 | min_semantic_iou: 0.4 23 | min_cosine_sim: 0.7 24 | min_cross_iou: 0.1 25 | temporal_window: 5.0 26 | min_num_observations: 5 27 | ######################## 28 | # Backend Parameters # 29 | ######################## 30 | backend: 31 | update_functors: 32 | objects: 33 | min_segment_score: 0.0 34 | selector: 35 | max_delta: 1.0e-5 36 | score_threshold: 0.23 37 | top_k: 2 38 | cumulative: true 39 | null_task_preprune: true 40 | -------------------------------------------------------------------------------- /clio_ros/config/realsense/realsense_fine.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | ############################################# 3 | # Active-Window Reconstruction Parameters # 4 | ############################################# 5 | active_window: 6 | frame_data_buffer: 7 | max_buffer_size: 300 8 | store_every_n_frames: 1 9 | object_extractor: 10 | min_object_allocation_confidence: 0.5 11 | min_object_volume: 0.0 # m^3 12 | max_object_volume: 8.0 # m^3 13 | only_extract_reconstructed_objects: true 14 | min_object_reconstruction_confidence: 0.5 15 | min_object_reconstruction_observations: 2 16 | object_reconstruction_resolution: -0.03 # Positive: voxel size in meters. Negative: fraction of the extent. 0: Skip. (-0.02) 17 | object_detector: 18 | min_cluster_size: 20 # pixels 19 | max_cluster_size: 50000 # pixels 20 | tracker: 21 | semantic_association: assign_track # 'assign_cluster', 'assign_track' 22 | min_semantic_iou: 0.6 23 | min_cosine_sim: 0.9 24 | min_cross_iou: 0.1 25 | temporal_window: 5.0 26 | min_num_observations: 2 27 | ######################## 28 | # Backend Parameters # 29 | ######################## 30 | backend: 31 | update_functors: 32 | objects: 33 | min_segment_score: 0.0 34 | selector: 35 | max_delta: 1.0e-5 36 | score_threshold: 0.23 37 | top_k: 2 38 | cumulative: true 39 | null_task_preprune: true 40 | -------------------------------------------------------------------------------- /clio_ros/config/segmentation/large_clip.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | segmentation: 3 | type: fastsam 4 | confidence: 0.55 5 | iou: 0.85 6 | output_size: 640 7 | refinement: 8 | dilate_masks: true 9 | clip_model: 10 | type: clip 11 | model_name: ViT-L/14 # ViT-L/14, ViT-B/32 12 | # controls pixel-aligned features 13 | use_dense: false 14 | dense_ratio: 0.9 15 | -------------------------------------------------------------------------------- /clio_ros/config/segmentation/open_clip.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | segmentation: 3 | type: fastsam 4 | confidence: 0.55 5 | iou: 0.85 6 | output_size: 640 7 | refinement: 8 | dilate_masks: true 9 | clip_model: 10 | type: open_clip 11 | model_name: ViT-H-14 12 | # controls pixel-aligned features 13 | use_dense: false # must be false for openclip 14 | -------------------------------------------------------------------------------- /clio_ros/config/segmentation/small_clip.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | segmentation: 3 | type: fastsam 4 | confidence: 0.55 5 | iou: 0.85 6 | output_size: 640 7 | refinement: 8 | dilate_masks: true 9 | clip_model: 10 | type: clip 11 | model_name: ViT-B/32 # ViT-L/14, ViT-B/32 12 | # controls pixel-aligned features 13 | use_dense: false 14 | dense_ratio: 0.9 15 | -------------------------------------------------------------------------------- /clio_ros/config/visualizer/default.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | external_plugins: 3 | allow_plugins: true 4 | trace_plugin_allocations: true 5 | verbose_plugins: true 6 | paths: [hydra_ros] 7 | plugins: 8 | dsg_mesh: 9 | type: MeshPlugin 10 | use_color_adaptor: false 11 | khronos_objects: 12 | type: KhronosObjectPlugin 13 | dynamic_color_mode: CONSTANT 14 | config: 15 | layer_z_step: 3.0 # unit separation between layers 16 | collapse_layers: false 17 | dynamic_layer: 18 | layer2: 19 | visualize: true 20 | node_scale: 0.035 21 | layer1: 22 | visualize: true 23 | marker_scale: 0.05 24 | marker_color_mode: NearestFeatureColor 25 | color_settings: {features: {ns: ~objects, silent_wait: false}, colormap: {palette: distinct150}} 26 | use_bounding_box: false 27 | bounding_box_scale: 0.01 28 | interlayer_edge_scale: 0.01 29 | interlayer_edge_alpha: 0.0 30 | edge_scale: 0.005 31 | edge_alpha: 0.2 32 | layer2: 33 | z_offset_scale: 1.0 34 | visualize: true 35 | marker_scale: 0.15 36 | marker_color_mode: NearestFeatureColor 37 | color_settings: {features: {ns: ~objects, silent_wait: false}, colormap: {palette: distinct150}} 38 | use_label: true 39 | label_mode: NearestFeatureLabel 40 | label_settings: {features: {ns: ~objects, silent_wait: true}, label_width: 20} 41 | label_scale: 0.25 42 | use_bounding_box: true 43 | bounding_box_scale: 0.03 44 | collapse_bounding_box: true 45 | interlayer_edge_scale: 0.01 46 | interlayer_edge_alpha: 0.4 47 | layer3: 48 | # general 49 | z_offset_scale: 1.5 50 | visualize: true 51 | # nodes 52 | marker_scale: 0.15 53 | marker_color_mode: NearestFeatureColor 54 | color_settings: {features: {ns: ~places, silent_wait: false}, colormap: {palette: colorbrewer}} 55 | marker_alpha: 0.8 56 | use_sphere_marker: true 57 | # edges 58 | edge_scale: 0.005 59 | edge_alpha: 0.5 60 | edge_use_color: false 61 | interlayer_edge_use_source: false 62 | interlayer_edge_scale: 0.02 63 | interlayer_edge_alpha: 0.4 64 | interlayer_edge_use_color: true 65 | interlayer_edge_insertion_skip: 0 66 | layer4: 67 | z_offset_scale: 2.0 68 | visualize: true 69 | marker_scale: 0.20 70 | marker_color_mode: NearestFeatureColor 71 | color_settings: {features: {ns: ~places, silent_wait: false}, colormap: {palette: colorbrewer}} 72 | use_label: true 73 | label_mode: NearestFeatureLabel 74 | label_settings: {features: {ns: ~places, silent_wait: true}, label_width: 20} 75 | label_scale: 0.25 76 | interlayer_edge_scale: 0.01 77 | interlayer_edge_alpha: 0.4 78 | layer5: 79 | visualize: false 80 | -------------------------------------------------------------------------------- /clio_ros/launch/clio.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 48 | 49 | {object_tasks: $(arg object_tasks)} 50 | 51 | {place_tasks: $(arg place_tasks)} 52 | 53 | 54 | 55 | 56 | 57 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | {features: {type: FeatureReceiver}} 71 | {allow_plugins: true, paths: [khronos, khronos_ros, clio]} 72 | {segments: s, objects: o, places: p, rooms: r} 73 | false 74 | {objects: {prefix: s, target_layer: 1}} 75 | [{parent_layer: 3, child_layers: [{layer: 1}, {layer: 2}]}] 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /clio_ros/launch/realsense.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /clio_ros/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | clio_ros 4 | 0.0.1 5 | Clio paper ros code and integration 6 | 7 | Nathan Hughes 8 | Nathan Hughes 9 | 10 | BSD 11 | 12 | catkin 13 | clio 14 | hydra_ros 15 | hydra_visualizer 16 | khronos_ros 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /clio_ros/rviz/default.rviz: -------------------------------------------------------------------------------- 1 | Panels: 2 | - Class: rviz/Displays 3 | Help Height: 0 4 | Name: Displays 5 | Property Tree Widget: 6 | Expanded: 7 | - /Active Window1 8 | Splitter Ratio: 0.5 9 | Tree Height: 650 10 | - Class: rviz/Selection 11 | Name: Selection 12 | - Class: rviz/Tool Properties 13 | Expanded: 14 | - /2D Pose Estimate1 15 | - /2D Nav Goal1 16 | - /Publish Point1 17 | Name: Tool Properties 18 | Splitter Ratio: 0.5886790156364441 19 | - Class: rviz/Views 20 | Expanded: 21 | - /Current View1 22 | Name: Views 23 | Splitter Ratio: 0.5 24 | - Class: rviz/Time 25 | Name: Time 26 | SyncMode: 0 27 | SyncSource: Segmentation 28 | Preferences: 29 | PromptSaveOnExit: true 30 | Toolbars: 31 | toolButtonStyle: 2 32 | Visualization Manager: 33 | Class: "" 34 | Displays: 35 | - Class: rviz/TF 36 | Enabled: true 37 | Filter (blacklist): khronos_object 38 | Filter (whitelist): "" 39 | Frame Timeout: 15 40 | Frames: 41 | All Enabled: true 42 | dominic/forward_link: 43 | Value: true 44 | world: 45 | Value: true 46 | Marker Alpha: 1 47 | Marker Scale: 1 48 | Name: TF 49 | Show Arrows: true 50 | Show Axes: true 51 | Show Names: true 52 | Tree: 53 | world: 54 | dominic/forward_link: 55 | {} 56 | Update Interval: 0 57 | Value: true 58 | - Class: rviz/MarkerArray 59 | Enabled: true 60 | Marker Topic: /clio_visualizer/dsg_markers 61 | Name: Scene Graph 62 | Namespaces: 63 | "dynamic_edges_\x00": true 64 | "dynamic_label_\x00": true 65 | "dynamic_nodes_\x00": true 66 | interlayer_edges_3_1: true 67 | layer_bounding_boxes_1: true 68 | layer_edges_3: true 69 | layer_nodes_1: true 70 | layer_nodes_3: true 71 | Queue Size: 100 72 | Value: true 73 | - Class: kimera_pgmo_rviz/MeshDisplay 74 | Cull Backfaces: true 75 | Enable Lighting: false 76 | Enabled: true 77 | Name: Background 78 | Queue Size: 10 79 | Toggle Visibility All: true 80 | Topic: /clio_visualizer/dsg_mesh 81 | Unreliable: false 82 | Value: true 83 | Visible: 84 | Value: true 85 | robot0: 86 | Value: true 87 | dsg_mesh: true 88 | - Class: kimera_pgmo_rviz/MeshDisplay 89 | Cull Backfaces: false 90 | Enable Lighting: false 91 | Enabled: true 92 | Name: Object Meshes 93 | Queue Size: 10 94 | Toggle Visibility All: true 95 | Topic: /clio_visualizer/khronos_objects/static_objects 96 | Unreliable: false 97 | Value: true 98 | Visible: true 99 | - Class: rviz/Group 100 | Displays: 101 | - Class: kimera_pgmo_rviz/MeshDisplay 102 | Cull Backfaces: true 103 | Enable Lighting: false 104 | Enabled: false 105 | Name: Active Mesh 106 | Queue Size: 10 107 | Toggle Visibility All: true 108 | Topic: /clio_node/reconstruction/mesh 109 | Unreliable: false 110 | Value: false 111 | Visible: true 112 | - Class: rviz/Image 113 | Enabled: true 114 | Image Topic: /clio_node/reconstruction/camera/labels 115 | Max Value: 1 116 | Median window: 5 117 | Min Value: 0 118 | Name: Segmentation 119 | Normalize Range: true 120 | Queue Size: 2 121 | Transport Hint: raw 122 | Unreliable: false 123 | Value: true 124 | Enabled: true 125 | Name: Active Window 126 | Enabled: true 127 | Global Options: 128 | Background Color: 255; 255; 255 129 | Default Light: true 130 | Fixed Frame: world 131 | Frame Rate: 30 132 | Name: root 133 | Tools: 134 | - Class: rviz/Interact 135 | Hide Inactive Objects: true 136 | - Class: rviz/MoveCamera 137 | - Class: rviz/Select 138 | - Class: rviz/FocusCamera 139 | - Class: rviz/Measure 140 | - Class: rviz/SetInitialPose 141 | Theta std deviation: 0.2617993950843811 142 | Topic: /initialpose 143 | X std deviation: 0.5 144 | Y std deviation: 0.5 145 | - Class: rviz/SetGoal 146 | Topic: /move_base_simple/goal 147 | - Class: rviz/PublishPoint 148 | Single click: true 149 | Topic: /clicked_point 150 | Value: true 151 | Views: 152 | Current: 153 | Class: rviz/Orbit 154 | Distance: 22.5767879486084 155 | Enable Stereo Rendering: 156 | Stereo Eye Separation: 0.05999999865889549 157 | Stereo Focal Distance: 1 158 | Swap Stereo Eyes: false 159 | Value: false 160 | Field of View: 0.7853981852531433 161 | Focal Point: 162 | X: -4.554806232452393 163 | Y: 5.374069690704346 164 | Z: -1.520478367805481 165 | Focal Shape Fixed Size: false 166 | Focal Shape Size: 0.05000000074505806 167 | Invert Z Axis: false 168 | Name: Current View 169 | Near Clip Distance: 0.009999999776482582 170 | Pitch: 0.49479618668556213 171 | Target Frame: dominic/forward_link 172 | Yaw: 5.04360294342041 173 | Saved: ~ 174 | Window Geometry: 175 | Displays: 176 | collapsed: false 177 | Height: 1003 178 | Hide Left Dock: false 179 | Hide Right Dock: false 180 | QMainWindow State: 000000ff00000000fd00000004000000000000015600000395fc020000000efb000000100044006900730070006c006100790073010000003b000002c5000000c700fffffffb0000001200530065006c0065006300740069006f006e000000003b0000018f0000005c00fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261fb0000000a00440065007000740068000000036d0000006e0000000000000000fb0000001200530065006d0061006e00740069006300730000000382000000590000000000000000fb00000006005200470042000000020a000001d10000000000000000fb0000000a0049006d0061006700650000000308000000c80000000000000000fb0000000a0049006d0061006700650000000305000000cb0000000000000000fb00000018005300650067006d0065006e0074006100740069006f006e0100000306000000ca0000001600ffffff000000010000010f0000039dfc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a0056006900650077007302000006710000009b0000010f0000039dfb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e10000019700000003000007800000003efc0100000002fb0000000800540069006d00650200000000000002580000035e000001e0fb0000000800540069006d006501000000000000045000000000000000000000061c0000039500000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000 181 | Segmentation: 182 | collapsed: false 183 | Selection: 184 | collapsed: false 185 | Time: 186 | collapsed: false 187 | Tool Properties: 188 | collapsed: false 189 | Views: 190 | collapsed: false 191 | Width: 1912 192 | X: 72 193 | Y: 32 194 | -------------------------------------------------------------------------------- /install/clio.rosinstall: -------------------------------------------------------------------------------- 1 | - git: 2 | local-name: catkin_simple 3 | uri: git@github.com:catkin/catkin_simple.git 4 | - git: 5 | local-name: config_utilities 6 | uri: git@github.com:MIT-SPARK/config_utilities.git 7 | - git: 8 | local-name: hydra 9 | uri: git@github.com:MIT-SPARK/Hydra.git 10 | version: clio 11 | - git: 12 | local-name: hydra_ros 13 | uri: git@github.com:MIT-SPARK/Hydra-ROS.git 14 | version: clio 15 | - git: 16 | local-name: khronos 17 | uri: git@github.com:MIT-SPARK/Khronos.git 18 | version: clio 19 | - git: 20 | local-name: kimera_rpgo 21 | uri: git@github.com:MIT-SPARK/Kimera-RPGO.git 22 | - git: 23 | local-name: kimera_pgmo 24 | uri: git@github.com:MIT-SPARK/Kimera-PGMO.git 25 | version: clio 26 | - git: 27 | local-name: pose_graph_tools 28 | uri: git@github.com:MIT-SPARK/pose_graph_tools.git 29 | - git: 30 | local-name: semantic_inference 31 | uri: git@github.com:MIT-SPARK/semantic_inference.git 32 | - git: 33 | local-name: spark_dsg 34 | uri: git@github.com:MIT-SPARK/Spark-DSG.git 35 | version: main 36 | - git: 37 | local-name: spatial_hash 38 | uri: git@github.com:MIT-SPARK/Spatial-Hash.git 39 | - git: 40 | local-name: teaser_plusplus 41 | uri: git@github.com:MIT-SPARK/TEASER-plusplus.git 42 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "clio" 7 | version = "0.0.1" 8 | dependencies = [ 9 | "click", 10 | "clip@git+https://github.com/openai/CLIP", 11 | "distinctipy", 12 | "matplotlib", 13 | "numpy==1.23.5", 14 | "open_clip_torch", 15 | "open3d>=0.17.0", 16 | "opencv-python", 17 | "pandas", 18 | "seaborn>=0.11.0", 19 | "spark_dsg@git+https://github.com/MIT-SPARK/Spark-DSG.git@clio", 20 | "tabulate", 21 | "torch==2.0.1", 22 | "tqdm", 23 | ] 24 | 25 | [project.optional-dependencies] 26 | dev = ["pytest"] 27 | 28 | [tool.setuptools.packages.find] 29 | include = ["clio_eval", "clio_data_maker", "clio_batch"] 30 | namespaces = false 31 | 32 | [tool.pytest.ini_options] 33 | filterwarnings = [ 34 | "ignore:::.*clip.clip:", 35 | "ignore:::.*pkg_resources:", 36 | ] 37 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | """Fixtures for unit tests.""" 2 | 3 | import pytest 4 | import pathlib 5 | 6 | 7 | @pytest.fixture() 8 | def resources(): 9 | """Get a path to the resource directory for tests.""" 10 | return pathlib.Path(__file__).absolute().parent / "resources" 11 | -------------------------------------------------------------------------------- /test/resources/test_cluster_config.yaml: -------------------------------------------------------------------------------- 1 | debug: False 2 | show_all_steps: False 3 | debug_folder: /home/yunchang/logs/llm_graphs/ib_agg/requery 4 | vis_weight_graph: False 5 | show_cond_prob: False 6 | 7 | #### Segmentation configs 8 | use_fast_sam: True # if False, default SAM values conf, iou, size from CG are used bypassing values set here 9 | resize_img: False 10 | filter_sam_artifacts: True 11 | fast_sam_conf: 0.55 12 | fast_sam_iou: 0.85 13 | fast_sam_output_size: 640 14 | use_torch_refinement: True 15 | 16 | #### CLIP configs 17 | use_lerf_loss: False 18 | lerf_loss_cannonical_phrases: 19 | - things 20 | - stuff 21 | - texture 22 | - object 23 | clip_model: ViT-L/14 # ViT-L/14, ViT-B/32, ViT-H-14 (runs open_clip) 24 | small_segment: 1 # below this segmnet we use bounding box instead of mask 25 | use_pixel_aligned: False 26 | ratio_of_pixel_aligned: 0.9 27 | 28 | #### Clustering configs 29 | sims_thres: 0.2 30 | delta: 0.001 #0.0001 31 | top_k_tasks: 2 32 | cumulative: True -------------------------------------------------------------------------------- /test/resources/test_tasks.yaml: -------------------------------------------------------------------------------- 1 | bring me a pillow: [] 2 | clean toaster: [] 3 | find deck of cards: [] -------------------------------------------------------------------------------- /test/test_eval_utils.py: -------------------------------------------------------------------------------- 1 | """Unit tests for eval utils.""" 2 | 3 | import clio_eval.utils as eval_utils 4 | import pytest 5 | import open3d as o3d 6 | import numpy as np 7 | 8 | 9 | def _generate_test_eval_objs(): 10 | bbox_1 = o3d.geometry.OrientedBoundingBox() 11 | bbox_1.center = [0, 0, 0] 12 | bbox_1.extent = [1, 1, 1] 13 | obj_1 = eval_utils.EvalObjects( 14 | np.array([0, 0, 1]), o3d.geometry.TriangleMesh(), bbox_1 15 | ) 16 | 17 | bbox_2 = o3d.geometry.OrientedBoundingBox() 18 | bbox_2.center = [0, 0, 0] 19 | bbox_2.extent = [2, 2, 2] 20 | obj_2 = eval_utils.EvalObjects( 21 | np.array([0, 1, 0]), o3d.geometry.TriangleMesh(), bbox_2 22 | ) 23 | 24 | return obj_1, obj_2 25 | 26 | 27 | def test_compute_similarity(): 28 | """Test that cosine similarity works as expected.""" 29 | obj_1, obj_2 = _generate_test_eval_objs() 30 | 31 | assert 1.0 == obj_1.compute_similarity(obj_1.feature) 32 | assert 1.0 == obj_2.compute_similarity(obj_2.feature) 33 | assert 0.0 == obj_1.compute_similarity(obj_2.feature) 34 | 35 | 36 | def test_compute_iou(): 37 | """Test that IoU works as expected.""" 38 | obj_1, obj_2 = _generate_test_eval_objs() 39 | 40 | assert 1.0 == pytest.approx(obj_1.compute_iou(obj_1.oriented_bbox)) 41 | assert 1 / 8 == pytest.approx(obj_1.compute_iou(obj_2.oriented_bbox)) 42 | assert 1 / 8 == pytest.approx(obj_2.compute_iou(obj_1.oriented_bbox)) 43 | 44 | bbox_test = o3d.geometry.OrientedBoundingBox() 45 | bbox_test.extent = [1, 1, 1] 46 | 47 | bbox_test.center = [1.1, 1, 1] # avoids qhull issues 48 | assert 0.0 == obj_1.compute_iou(bbox_test) 49 | 50 | bbox_test.center = [1, 1, 1] 51 | assert 1 / 71 == pytest.approx(obj_2.compute_iou(bbox_test)) 52 | -------------------------------------------------------------------------------- /test/test_helpers.py: -------------------------------------------------------------------------------- 1 | """Unit tests for cluster helpers.""" 2 | 3 | import clio_batch.helpers as helpers 4 | import numpy as np 5 | 6 | 7 | def test_compute_cosine_sim(): 8 | """Test compute cosine sim.""" 9 | v1 = np.array([1, 0, 0]) 10 | v2 = np.array([0, 0, 1]) 11 | v3 = np.array([-1, 0, 0]) 12 | assert 1 == helpers.compute_cosine_sim(v1, v1) 13 | assert 0 == helpers.compute_cosine_sim(v1, v2) 14 | assert -1 == helpers.compute_cosine_sim(v1, v3) 15 | 16 | 17 | def test_compute_cosine_sim_multi_single(): 18 | """Test compute cosine sim with single and multiple columns.""" 19 | v1 = np.array([1, 0, 0]) 20 | v2 = np.array([[1, 0, 0], [0, 0, 1], [-1, 0, 0]]) 21 | sims = helpers.compute_cosine_sim(v1, v2) 22 | assert sims.shape == (1, 3) 23 | assert 1 == sims[0, 0] 24 | assert 0 == sims[0, 1] 25 | assert -1 == sims[0, 2] 26 | 27 | 28 | def test_compute_cosine_sim_multi_multi(): 29 | """Test compute cosine sim with multiple and multiple columns.""" 30 | # In this case we take the max wrt v1 31 | v1 = np.array([[1, 0, 0], [0, 0, 1]]) 32 | v2 = np.array([[1, 0, 0], [0, 0, 1], [-1, 0, 0]]) 33 | sims = helpers.compute_cosine_sim(v1, v2) 34 | assert sims.shape == (1, 3) 35 | assert 1 == sims[0, 0] 36 | assert 1 == sims[0, 1] 37 | assert 0 == sims[0, 2] 38 | 39 | 40 | def test_compute_sim_to_tasks(): 41 | """Test compute cosine sim to multiple tasks.""" 42 | # In this case we preserve all similarities 43 | v1 = np.array([[1, 0, 0], [0, 0, 1]]) 44 | v2 = np.array([[1, 0, 0], [0, 0, 1], [-1, 0, 0]]) 45 | sims = helpers.compute_sim_to_tasks(v1, v2) 46 | assert sims.shape == (2, 3) 47 | assert 1 == sims[0, 0] 48 | assert 0 == sims[0, 1] 49 | assert -1 == sims[0, 2] 50 | assert 0 == sims[1, 0] 51 | assert 1 == sims[1, 1] 52 | assert 0 == sims[1, 2] 53 | 54 | 55 | def test_parse_tasks_from_yaml(resources): 56 | """Test compute cosine sim to multiple tasks.""" 57 | yaml_path = resources / "test_tasks.yaml" 58 | prompts = helpers.parse_tasks_from_yaml(yaml_path) 59 | assert 3 == len(prompts) 60 | assert "bring me a pillow" == prompts[0] 61 | assert "clean toaster" == prompts[1] 62 | assert "find deck of cards" == prompts[2] 63 | -------------------------------------------------------------------------------- /test/test_ib_cluster.py: -------------------------------------------------------------------------------- 1 | """Unit tests for ib clustering.""" 2 | 3 | import clio_batch.ib_cluster as ib_cluster 4 | import pytest 5 | import os 6 | import numpy as np 7 | 8 | 9 | def initialize_cluster_module(): 10 | cwd = os.path.abspath(os.path.dirname(__file__)) 11 | yaml_path = os.path.join(os.path.join(cwd, "resources"), "test_cluster_config.yaml") 12 | cluster_config = ib_cluster.ClusterIBConfig(yaml_path) 13 | return ib_cluster.ClusterIB(cluster_config) 14 | 15 | 16 | def test_setup_py_x(): 17 | """Test py_x initialization for top-k equal to 2.""" 18 | region_features = np.array([[1, 0, 0], [1, 1, 0], [0, 1, 0]]) 19 | task_features = np.array([[1, 0, 0], [0, 1, 0]]) 20 | clusterer = initialize_cluster_module() 21 | clusterer.setup_py_x(region_features, task_features) 22 | 23 | assert (3,) == clusterer.px.shape 24 | assert (3, 3) == clusterer.py_x.shape 25 | assert pytest.approx(0.0909, 1e-3) == clusterer.py_x[0, 0] 26 | assert pytest.approx(0.9091, 1e-3) == clusterer.py_x[1, 0] 27 | assert pytest.approx(0) == clusterer.py_x[2, 0] 28 | assert pytest.approx(0) == clusterer.py_x[0, 1] 29 | assert pytest.approx(0.3333, 1e-3) == clusterer.py_x[1, 1] 30 | assert pytest.approx(0.6667, 1e-3) == clusterer.py_x[2, 1] 31 | assert pytest.approx(0.0909, 1e-3) == clusterer.py_x[0, 2] 32 | assert pytest.approx(0) == clusterer.py_x[1, 2] 33 | assert pytest.approx(0.9091, 1e-3) == clusterer.py_x[2, 2] 34 | 35 | 36 | def test_get_clusters_from_pc_x(): 37 | """Test that cluster extraction works as expected.""" 38 | pc_x = np.array([[0.3, 0, 1], [0.2, 1, 0], [0.5, 0, 0]]) 39 | clusterer = initialize_cluster_module() 40 | clusters = clusterer.get_clusters_from_pc_x(pc_x) 41 | assert 3 == len(clusters) 42 | 43 | pc_x = np.array([[0.5, 0, 1], [0.2, 1, 0], [0.3, 0, 0]]) 44 | clusters = clusterer.get_clusters_from_pc_x(pc_x) 45 | assert 2 == len(clusters) 46 | assert 2 == len(clusters[0]) 47 | -------------------------------------------------------------------------------- /test/test_information_metrics.py: -------------------------------------------------------------------------------- 1 | """Unit tests for mask refinement code.""" 2 | 3 | import clio_batch.information_metrics as metrics 4 | import pytest 5 | import numpy as np 6 | 7 | 8 | def test_compute_shannon_entropy(): 9 | """Test that Shannon entropy computation is correct.""" 10 | p1 = np.array([1.0]) 11 | assert 0 == metrics.shannon_entropy(p1) 12 | 13 | p2 = np.array([0.5, 0.5]) 14 | assert -np.log2(0.5) == metrics.shannon_entropy(p2) 15 | 16 | 17 | def test_compute_js_divergence(): 18 | """Test that Jensen-Shannon divergence is correct.""" 19 | pc = np.array([0.5, 0.5]) 20 | py_c = 0.5 * np.ones([2, 2]) 21 | assert 0 == metrics.js_divergence(py_c, pc) 22 | 23 | pc = np.array([1, 0]) 24 | py_c = 0.5 * np.ones([2, 2]) 25 | assert 0 == metrics.js_divergence(py_c, pc) 26 | 27 | pc = np.array([0.5, 0.5]) 28 | py_c = np.eye(2) + 1e-8 29 | assert -np.log2(0.5) == pytest.approx(metrics.js_divergence(py_c, pc)) 30 | 31 | 32 | def test_compute_mutual_info(): 33 | """Test that mutual information implementation is correct.""" 34 | px = np.array([0.5, 0.5]) 35 | pc = np.array([0.5, 0.5]) 36 | pc_x = np.eye(2) 37 | assert -np.log2(0.5) == metrics.mutual_information(px, pc, pc_x) 38 | 39 | px = np.array([1, 0]) 40 | pc = np.array([0.5, 0.5]) 41 | pc_x = 0.5 * np.ones([2, 2]) 42 | assert 0 == metrics.mutual_information(px, pc, pc_x) 43 | 44 | px = np.array([1, 0]) 45 | pc = np.array([0.5, 0.5]) 46 | pc_x = np.eye(2) 47 | assert -np.log2(0.5) == metrics.mutual_information(px, pc, pc_x) 48 | -------------------------------------------------------------------------------- /test/test_obb.py: -------------------------------------------------------------------------------- 1 | """Test that IoU works as expected.""" 2 | 3 | import numpy as np 4 | import open3d as o3d 5 | import pytest 6 | from clio_eval.iou import compute_iou_oriented_bbox 7 | 8 | 9 | def _get_rotation(angle): 10 | theta = np.radians(angle) 11 | R = np.array( 12 | [ 13 | [np.cos(theta), -np.sin(theta), 0.0], 14 | [np.sin(theta), np.cos(theta), 0.0], 15 | [0.0, 0.0, 1.0], 16 | ] 17 | ) 18 | return R 19 | 20 | 21 | def test_boxes_disjoint_rotated(): 22 | """Test that two oriented boxes have 0 IoU when (strictly) separated.""" 23 | bbox1 = o3d.geometry.OrientedBoundingBox( 24 | np.array([0.0, 0.0, 0.0]), _get_rotation(45), np.array([1.0, 1.0, 1.0]) 25 | ) 26 | bbox2 = o3d.geometry.OrientedBoundingBox( 27 | np.array([0.9, 0.9, 0.0]), _get_rotation(0), np.array([1.0, 1.0, 1.0]) 28 | ) 29 | 30 | est_iou = compute_iou_oriented_bbox(bbox1, bbox2) 31 | assert est_iou == pytest.approx(0.0) 32 | --------------------------------------------------------------------------------