├── .gitmodules ├── CMakeLists.txt ├── Dockerfile ├── LICENSE ├── README.md ├── cmake └── Modules │ └── FindNumPy.cmake ├── connectivity ├── 17DRP5sb8fy_connectivity.json ├── 1LXtFkjw3qL_connectivity.json ├── 1pXnuDYAj8r_connectivity.json ├── 29hnd4uzFmX_connectivity.json ├── 2azQ1b91cZZ_connectivity.json ├── 2n8kARJN3HM_connectivity.json ├── 2t7WUuJeko7_connectivity.json ├── 5LpN3gDmAk7_connectivity.json ├── 5ZKStnWn8Zo_connectivity.json ├── 5q7pvUzZiYa_connectivity.json ├── 759xd9YjKW5_connectivity.json ├── 7y3sRwLe3Va_connectivity.json ├── 8194nk5LbLH_connectivity.json ├── 82sE5b5pLXE_connectivity.json ├── 8WUmhLawc2A_connectivity.json ├── ARNzJeq3xxb_connectivity.json ├── B6ByNegPMKs_connectivity.json ├── D7G3Y4RVNrH_connectivity.json ├── D7N2EKCX4Sj_connectivity.json ├── E9uDoFAP3SH_connectivity.json ├── EDJbREhghzL_connectivity.json ├── EU6Fwq7SyZv_connectivity.json ├── GdvgFV5R1Z5_connectivity.json ├── HxpKQynjfin_connectivity.json ├── JF19kD82Mey_connectivity.json ├── JeFG25nYj2p_connectivity.json ├── JmbYfDe2QKZ_connectivity.json ├── PX4nDJXEHrG_connectivity.json ├── Pm6F8kyY3z2_connectivity.json ├── PuKPg4mmafe_connectivity.json ├── QUCTc6BB5sX_connectivity.json ├── README.md ├── RPmz2sHmrrY_connectivity.json ├── S9hNv5qa7GM_connectivity.json ├── SN83YJsR3w2_connectivity.json ├── TbHJrupSAjP_connectivity.json ├── ULsKaCPVFJR_connectivity.json ├── UwV83HsGsw3_connectivity.json ├── Uxmj2M2itWa_connectivity.json ├── V2XKFyX4ASd_connectivity.json ├── VFuaQ6m2Qom_connectivity.json ├── VLzqgDo317F_connectivity.json ├── VVfe2KiqLaN_connectivity.json ├── Vt2qJdWjCF2_connectivity.json ├── Vvot9Ly1tCj_connectivity.json ├── VzqfbhrpDEA_connectivity.json ├── WYY7iVyf5p8_connectivity.json ├── X7HyMhZNoso_connectivity.json ├── XcA2TqTSSAj_connectivity.json ├── YFuZgdQ5vWj_connectivity.json ├── YVUC4YcDtcY_connectivity.json ├── YmJkqBEsHnH_connectivity.json ├── Z6MFQCViBuw_connectivity.json ├── ZMojNkEp431_connectivity.json ├── aayBHfsNo7d_connectivity.json ├── ac26ZMwG7aT_connectivity.json ├── b8cTxDM8gDG_connectivity.json ├── cV4RVeZvu5T_connectivity.json ├── dhjEzFoUFzH_connectivity.json ├── e9zR4mvMWw7_connectivity.json ├── fzynW3qQPVF_connectivity.json ├── gTV8FGcVJC9_connectivity.json ├── gYvKGZ5eRqb_connectivity.json ├── gZ6f7yhEvPG_connectivity.json ├── gxdoqLR6rwA_connectivity.json ├── i5noydFURQK_connectivity.json ├── jh4fc5c5qoQ_connectivity.json ├── jtcxE69GiFV_connectivity.json ├── kEZ7cmS4wCh_connectivity.json ├── mJXqzFtmKg4_connectivity.json ├── oLBMNvg9in8_connectivity.json ├── p5wJjkQkbXX_connectivity.json ├── pLe4wQe7qrG_connectivity.json ├── pRbA3pwrgk9_connectivity.json ├── pa4otMbVnkk_connectivity.json ├── q9vSo1VnCiC_connectivity.json ├── qoiz87JEwZ2_connectivity.json ├── r1Q1Z4BcV1o_connectivity.json ├── r47D5H71a5s_connectivity.json ├── rPc6DW4iMge_connectivity.json ├── rqfALeAoiTq_connectivity.json ├── s8pcmisQ38h_connectivity.json ├── sKLMLpTHeUy_connectivity.json ├── sT4fr6TAbpF_connectivity.json ├── scans.txt ├── scans_dialog.txt ├── uNb9QFRL6hY_connectivity.json ├── ur6pFq6Qu1A_connectivity.json ├── vyrNrziPKCB_connectivity.json ├── wc2JMjhGNzB_connectivity.json ├── x8F5xyUWy9e_connectivity.json ├── yqstnuAEVhm_connectivity.json └── zsNo4HB9uLZ_connectivity.json ├── img_features └── .gitkeep ├── include ├── Benchmark.hpp ├── Catch.hpp ├── MatterSim.hpp ├── NavGraph.hpp └── cbf.h ├── models └── ResNet-152-deploy.prototxt ├── scripts ├── depth_to_skybox.py ├── downsize_skybox.py ├── fill_depth.py ├── precompute_img_features.py ├── precompute_optimal_policies.py └── timer.py ├── src ├── driver │ ├── driver.py │ └── mattersim_main.cpp ├── lib │ ├── Benchmark.cpp │ ├── MatterSim.cpp │ ├── NavGraph.cpp │ ├── cbf.cpp │ ├── fragment.sh │ └── vertex.sh ├── lib_python │ └── MatterSimPython.cpp └── test │ ├── main.cpp │ ├── python_test.py │ └── rendertest_spec.json ├── tasks └── NDH │ ├── DAN_modules │ ├── __init__.py │ ├── fc.py │ ├── language_model.py │ ├── refer_find_modules.py │ └── submodules.py │ ├── agent.py │ ├── data │ ├── README.md │ └── download.sh │ ├── env.py │ ├── eval.py │ ├── model.py │ ├── param.py │ ├── requirements.txt │ ├── summarize_perf.py │ ├── train.py │ └── utils.py └── teaser └── vdn_demo_v2_512.gif /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pybind11"] 2 | path = pybind11 3 | url = https://github.com/pybind/pybind11 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(Matterport_Simulator CXX) 2 | cmake_minimum_required(VERSION 2.8) 3 | 4 | option(OSMESA_RENDERING "Offscreen CPU rendering with OSMesa" OFF) 5 | option(EGL_RENDERING "Offscreen GPU rendering with EGL" OFF) 6 | 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 8 | # Make custom find-modules available 9 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/Modules") 10 | 11 | if(NOT CMAKE_BUILD_TYPE) 12 | set(CMAKE_BUILD_TYPE Release) 13 | endif() 14 | 15 | include_directories("${PROJECT_SOURCE_DIR}/include") 16 | 17 | find_package(OpenCV REQUIRED) 18 | find_package(PkgConfig REQUIRED) 19 | find_package(OpenMP) 20 | if (OPENMP_CXX_FOUND) 21 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 22 | set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") 23 | endif() 24 | 25 | pkg_check_modules(JSONCPP REQUIRED jsoncpp) 26 | 27 | if(EGL_RENDERING) 28 | add_definitions(-DEGL_RENDERING) 29 | find_package(OpenGL REQUIRED COMPONENTS OpenGL EGL) 30 | pkg_check_modules(EPOXY REQUIRED epoxy) 31 | set(GL_LIBS OpenGL::OpenGL OpenGL::EGL ${EPOXY_LIBRARIES}) 32 | elseif(OSMESA_RENDERING) 33 | add_definitions(-DOSMESA_RENDERING) 34 | pkg_check_modules(OSMESA REQUIRED osmesa) 35 | set(GL_LIBS ${OSMESA_LIBRARIES}) 36 | else() 37 | cmake_policy(SET CMP0072 OLD) 38 | find_package(OpenGL REQUIRED) 39 | find_package(GLEW REQUIRED) 40 | set(GL_LIBS ${OPENGL_LIBRARIES} ${GLEW_LIBRARIES}) 41 | endif() 42 | 43 | add_library(MatterSim SHARED src/lib/MatterSim.cpp src/lib/NavGraph.cpp src/lib/Benchmark.cpp src/lib/cbf.cpp) 44 | if(OSMESA_RENDERING) 45 | target_compile_definitions(MatterSim PUBLIC "-DOSMESA_RENDERING") 46 | endif() 47 | target_include_directories(MatterSim PRIVATE ${JSONCPP_INCLUDE_DIRS}) 48 | target_link_libraries(MatterSim ${JSONCPP_LIBRARIES} ${OpenCV_LIBS} ${GL_LIBS}) 49 | 50 | add_executable(tests src/test/main.cpp) 51 | target_include_directories(tests PRIVATE ${JSONCPP_INCLUDE_DIRS}) 52 | target_link_libraries(tests MatterSim ${JSONCPP_LIBRARIES} ${OpenCV_LIBS}) 53 | 54 | add_executable(mattersim_main src/driver/mattersim_main.cpp) 55 | target_link_libraries(mattersim_main MatterSim) 56 | 57 | add_subdirectory(pybind11) 58 | 59 | find_package(PythonInterp 2.7) 60 | message(${PYTHON_EXECUTABLE}) 61 | 62 | # Need to search for python executable again to pick up an activated 63 | # virtualenv python, if any. 64 | unset(PYTHON_EXECUTABLE CACHE) 65 | find_program(PYTHON_EXECUTABLE python 66 | PATHS ENV PATH # look in the PATH environment variable 67 | NO_DEFAULT_PATH # do not look anywhere else... 68 | ) 69 | 70 | find_package(NumPy REQUIRED) 71 | 72 | pybind11_add_module(MatterSimPython src/lib_python/MatterSimPython.cpp) 73 | target_include_directories(MatterSimPython PRIVATE ${NUMPY_INCLUDES}) 74 | target_link_libraries(MatterSimPython PRIVATE MatterSim) 75 | set_target_properties(MatterSimPython 76 | PROPERTIES 77 | OUTPUT_NAME MatterSim) 78 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Matterport3DSimulator 2 | # Requires nvidia gpu with driver 384.xx or higher 3 | 4 | 5 | FROM nvidia/cudagl:9.0-devel-ubuntu16.04 6 | 7 | # Install a few libraries to support both EGL and OSMESA options 8 | RUN apt-get update && apt-get install -y wget doxygen curl libjsoncpp-dev libepoxy-dev libglm-dev libosmesa6 libosmesa6-dev libglew-dev libopencv-dev python-opencv python-setuptools python-dev 9 | RUN easy_install pip 10 | RUN pip install torch torchvision pandas networkx 11 | 12 | #install latest cmake 13 | ADD https://cmake.org/files/v3.12/cmake-3.12.2-Linux-x86_64.sh /cmake-3.12.2-Linux-x86_64.sh 14 | RUN mkdir /opt/cmake 15 | RUN sh /cmake-3.12.2-Linux-x86_64.sh --prefix=/opt/cmake --skip-license 16 | RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake 17 | RUN cmake --version 18 | 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Peter Anderson, Philip Roberts, Qi Wu, Damien Teney, Jake Bruce 4 | Mark Johnson, Niko Sunderhauf, Ian Reid, Stephen Gould, Anton van den Hengel 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorch implementation of Cross-modal Memory Network 2 | 3 | [**Vision-Dialog Navigation by Exploring Cross-modal Memory**](https://arxiv.org/abs/2003.06745), CVPR 2020. 4 | 5 | 6 | 7 | ![Demo](teaser/vdn_demo_v2_512.gif) 8 | 9 | ### Requirements 10 | 11 | - Ubuntu 16.04 12 | - CUDA 9.0 or 10.0 13 | - [docker](https://askubuntu.com/questions/938700/how-do-i-install-docker-on-ubuntu-16-04-lts) 14 | - [nvidia-docker2.0](https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) 15 | 16 | We recommend using the mattersim [Dockerfile](Dockerfile) to install the simulator. 17 | 18 | ### Dataset Download 19 | 20 | Download the `train`, `val_seen`, `val_unseen`, and `test` splits of the CVDN and NDH dataset by executing: 21 | ``` 22 | sh tasks/CVDN/data/download.sh 23 | sh tasks/NDH/data/download.sh 24 | ``` 25 | 26 | ### Installation 27 | 28 | Build the docker image: 29 | ``` 30 | docker build -t mattersim . 31 | ``` 32 | 33 | Run the docker container, mounting your project path: 34 | ``` 35 | nvidia-docker run -it --shm-size 64G -v /User/home/Path_To_Project/:/Workspace/ mattersim 36 | ``` 37 | 38 | Compile the codebase: 39 | ``` 40 | mkdir build && cd build 41 | cmake -DEGL_RENDERING=ON .. 42 | make 43 | ``` 44 | 45 | Install python dependencies by running: 46 | ``` 47 | pip install -r tasks/NDH/requirements.txt 48 | ``` 49 | 50 | ## Train and Evaluate 51 | 52 | To train and evaluate with trusted supervision, sample feedback, and all dialog history: 53 | ``` 54 | python tasks/NDH/train.py \ 55 | --path_type=trusted_path \ 56 | --history=all \ 57 | --feedback=sample \ 58 | --eval_type=val \ 59 | --prefix=v1 60 | ``` 61 | 62 | 63 | Train and test with trusted supervision, sample feedback, and all dialog history: 64 | 65 | ``` 66 | python tasks/NDH/train.py \ 67 | --path_type=trusted_path \ 68 | --history=all \ 69 | --feedback=sample \ 70 | --eval_type=test \ 71 | --prefix=v1 72 | ``` 73 | 74 | To generate a summary of the agent performance: 75 | 76 | ``` 77 | python tasks/NDH/summarize_perf.py 78 | ``` 79 | 80 | ## Citation 81 | 82 | If you use the code in your research, please cite: 83 | ```bibtex 84 | @inproceedings{zhu2020vision, 85 | title={Vision-Dialog Navigation by Exploring Cross-modal Memory}, 86 | author={Zhu, Yi and Zhu, Fengda and Zhan, Zhaohuan and Lin, Bingqian and Jiao, Jianbin and Chang, Xiaojun and Liang, Xiaodan}, 87 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 88 | pages={10730--10739}, 89 | year={2020} 90 | } 91 | ``` 92 | 93 | ## Acknowledgements 94 | 95 | This repository is built upon the 96 | [Matterport3DSimulator](https://github.com/peteanderson80/Matterport3DSimulator), [CVDN](https://github.com/mmurray/cvdn) and [DAN-VisDial](https://github.com/gicheonkang/DAN-VisDial) codebase. 97 | -------------------------------------------------------------------------------- /cmake/Modules/FindNumPy.cmake: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # Copyright (c) 2013, Lars Baehren 3 | # All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without modification, 6 | # are permitted provided that the following conditions are met: 7 | # 8 | # * Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # * Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | #------------------------------------------------------------------------------- 25 | 26 | # - Check for the presence of NumPy 27 | # 28 | # The following variables are set when NumPy is found: 29 | # NUMPY_FOUND = Set to true, if all components of NUMPY have been found. 30 | # NUMPY_INCLUDES = Include path for the header files of NUMPY 31 | # NUMPY_LIBRARIES = Link these to use NUMPY 32 | # NUMPY_LFLAGS = Linker flags (optional) 33 | 34 | if (NOT NUMPY_FOUND) 35 | 36 | if (NOT NUMPY_ROOT_DIR) 37 | set (NUMPY_ROOT_DIR ${CMAKE_INSTALL_PREFIX}) 38 | endif (NOT NUMPY_ROOT_DIR) 39 | 40 | if (NOT PYTHONINTERP_FOUND) 41 | find_package (PythonInterp) 42 | endif (NOT PYTHONINTERP_FOUND) 43 | 44 | ##__________________________________________________________________________ 45 | ## Check for the header files 46 | 47 | ## Use Python to determine the include directory 48 | execute_process ( 49 | COMMAND ${PYTHON_EXECUTABLE} -c import\ numpy\;\ print\(numpy.get_include\(\)\)\; 50 | ERROR_VARIABLE NUMPY_FIND_ERROR 51 | RESULT_VARIABLE NUMPY_FIND_RESULT 52 | OUTPUT_VARIABLE NUMPY_FIND_OUTPUT 53 | OUTPUT_STRIP_TRAILING_WHITESPACE 54 | ) 55 | ## process the output from the execution of the command 56 | if (NOT NUMPY_FIND_RESULT) 57 | set (NUMPY_INCLUDES ${NUMPY_FIND_OUTPUT}) 58 | endif (NOT NUMPY_FIND_RESULT) 59 | 60 | ##__________________________________________________________________________ 61 | ## Check for the library 62 | 63 | unset (NUMPY_LIBRARIES) 64 | 65 | if (PYTHON_SITE_PACKAGES) 66 | find_library (NUMPY_NPYMATH_LIBRARY npymath 67 | HINTS ${PYTHON_SITE_PACKAGES}/numpy/core 68 | PATH_SUFFIXES lib 69 | ) 70 | if (NUMPY_NPYMATH_LIBRARY) 71 | list (APPEND NUMPY_LIBRARIES ${NUMPY_NPYMATH_LIBRARY}) 72 | endif (NUMPY_NPYMATH_LIBRARY) 73 | endif (PYTHON_SITE_PACKAGES) 74 | 75 | ##__________________________________________________________________________ 76 | ## Get API version of NumPy from 'numpy/numpyconfig.h' 77 | 78 | if (PYTHON_EXECUTABLE) 79 | execute_process ( 80 | COMMAND ${PYTHON_EXECUTABLE} -c import\ numpy\;\ print\(numpy.__version__\)\; 81 | ERROR_VARIABLE NUMPY_API_VERSION_ERROR 82 | RESULT_VARIABLE NUMPY_API_VERSION_RESULT 83 | OUTPUT_VARIABLE NUMPY_API_VERSION 84 | OUTPUT_STRIP_TRAILING_WHITESPACE 85 | ) 86 | else () 87 | ## Backup procedure: extract version number directly from the header file 88 | if (NUMPY_INCLUDES) 89 | find_file (HAVE_NUMPYCONFIG_H numpy/numpyconfig.h 90 | HINTS ${NUMPY_INCLUDES} 91 | ) 92 | endif (NUMPY_INCLUDES) 93 | endif () 94 | 95 | ## Dissect full version number into major, minor and patch version 96 | if (NUMPY_API_VERSION) 97 | string (REGEX REPLACE "\\." ";" _tmp ${NUMPY_API_VERSION}) 98 | list (GET _tmp 0 NUMPY_API_VERSION_MAJOR) 99 | list (GET _tmp 1 NUMPY_API_VERSION_MINOR) 100 | list (GET _tmp 2 NUMPY_API_VERSION_PATCH) 101 | endif (NUMPY_API_VERSION) 102 | 103 | ##__________________________________________________________________________ 104 | ## Actions taken when all components have been found 105 | 106 | find_package_handle_standard_args (NUMPY DEFAULT_MSG NUMPY_INCLUDES) 107 | 108 | if (NUMPY_FOUND) 109 | if (NOT NUMPY_FIND_QUIETLY) 110 | message (STATUS "Found components for NumPy") 111 | message (STATUS "NUMPY_ROOT_DIR = ${NUMPY_ROOT_DIR}") 112 | message (STATUS "NUMPY_INCLUDES = ${NUMPY_INCLUDES}") 113 | message (STATUS "NUMPY_LIBRARIES = ${NUMPY_LIBRARIES}") 114 | message (STATUS "NUMPY_API_VERSION = ${NUMPY_API_VERSION}") 115 | endif (NOT NUMPY_FIND_QUIETLY) 116 | else (NUMPY_FOUND) 117 | if (NUMPY_FIND_REQUIRED) 118 | message (FATAL_ERROR "Could not find NUMPY!") 119 | endif (NUMPY_FIND_REQUIRED) 120 | endif (NUMPY_FOUND) 121 | 122 | ##__________________________________________________________________________ 123 | ## Mark advanced variables 124 | 125 | mark_as_advanced ( 126 | NUMPY_ROOT_DIR 127 | NUMPY_INCLUDES 128 | NUMPY_LIBRARIES 129 | ) 130 | 131 | endif (NOT NUMPY_FOUND) 132 | -------------------------------------------------------------------------------- /connectivity/8194nk5LbLH_connectivity.json: -------------------------------------------------------------------------------- 1 | [{"image_id":"c9e8dc09263e4d0da77d16de0ecddd39","pose":[-0.611043,-0.00396746,-0.791588,-0.213904,0.791585,-0.00882497,-0.610996,2.305,-0.00456166,-0.999953,0.00853306,1.56916,0,0,0,1],"included":true,"visible":[false,false,false,false,true,true,false,true,true,true,false,false,false,false,false,false,false,false,false,false],"unobstructed":[false,false,false,false,true,false,false,false,true,true,false,false,false,false,false,false,false,false,false,false],"height":1.5826326295962942},{"image_id":"286b0c2d9a46408ba80b6ccebb21e582","pose":[0.951596,0.00201098,0.307346,6.58012,-0.307351,0.00915895,0.951552,-2.96479,-0.000901435,-0.999956,0.00933374,4.36353,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,false,false,false,false,false,false,false,true,false,true,false,true,false,true],"unobstructed":[false,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,true,true,false],"height":1.5712253956498747},{"image_id":"6776097c17ed4b93aee61704eb32f06c","pose":[-0.711582,-0.00419131,-0.702591,-1.68941,0.702575,0.00464776,-0.711594,-5.37908,0.00624796,-0.99998,-0.000362505,1.58622,0,0,0,1],"included":true,"visible":[false,false,false,false,false,true,true,true,false,true,false,true,false,false,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,true,true,false,false,false,false,true,false,false,false,false,false,false,false,true],"height":1.5804941871490743},{"image_id":"8c7e8da7d4a44ab695e6b3195eac0cf1","pose":[0.709879,0.011247,0.704234,8.62929,-0.70424,-0.00407304,0.70995,-1.77115,0.0108531,-0.999928,0.00502926,4.38556,0,0,0,1],"included":true,"visible":[false,true,false,false,false,false,false,false,false,false,true,false,true,true,false,false,false,true,true,false],"unobstructed":[false,true,false,false,false,false,false,false,false,false,true,false,false,true,false,false,false,true,true,false],"height":1.585645804390483},{"image_id":"f33c718aaf2c41469389a87944442c62","pose":[0.619478,0.0166688,0.784837,-3.88437,-0.784902,-0.00375152,0.619609,-0.528748,0.0132725,-0.999854,0.0107595,1.58368,0,0,0,1],"included":true,"visible":[true,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true],"unobstructed":[true,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true],"height":1.5829827809014503},{"image_id":"fcd90a404061413385286bef9662630e","pose":[-0.111393,0.00837906,0.993741,2.80245,-0.993773,-0.00348217,-0.111367,-3.78204,0.0025272,-0.999959,0.00871482,1.58057,0,0,0,1],"included":true,"visible":[true,false,true,false,false,false,false,true,false,false,false,false,false,false,false,true,false,false,false,false],"unobstructed":[false,false,true,false,false,false,true,true,true,false,false,false,false,false,false,false,false,false,false,false],"height":1.5763528408163245},{"image_id":"c07d4ae8330542a09cf8f8dddb9728ce","pose":[-0.985207,-0.0101267,0.171069,0.656519,-0.171094,0.00168538,-0.985253,-5.08928,0.00968898,-0.999947,-0.00339301,1.57611,0,0,0,1],"included":true,"visible":[true,false,true,false,false,true,false,true,false,false,false,false,false,false,true,false,true,false,false,true],"unobstructed":[false,false,true,false,false,true,false,true,false,false,false,true,false,false,false,false,false,false,false,false],"height":1.575276915205382},{"image_id":"2393bffb53fe4205bcc67796c6fb76e3","pose":[-0.241654,0.00228344,-0.97036,3.33582,0.970294,0.0124463,-0.241608,-5.90025,0.0115256,-0.99992,-0.00522325,1.57791,0,0,0,1],"included":true,"visible":[false,false,true,false,false,true,true,false,false,false,false,false,false,false,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,true,true,false,false,false,false,false,false,false,true,false,false,false,false,false],"height":1.5730354249357412},{"image_id":"71bf74df73cd4e24a191ef4f2338ca22","pose":[0.906931,-0.00688335,-0.421222,0.122562,0.421182,-0.00662188,0.906952,-0.00319673,-0.00903217,-0.999954,-0.00310641,1.57207,0,0,0,1],"included":true,"visible":[true,false,false,false,true,true,true,true,false,true,false,false,false,false,false,false,false,false,false,false],"unobstructed":[true,false,false,false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false],"height":1.570272020216938},{"image_id":"be8a2edacab34ec8887ba6a7b1e4945f","pose":[0.791463,0.0101015,0.611133,-3.50132,-0.611154,-0.00121731,0.791511,1.58103,0.00873934,-0.999948,0.00521015,1.56992,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,false,true,false,false,false,true,false,false,false,false,false,false,false,true],"unobstructed":[true,false,false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false],"height":1.577126892771864},{"image_id":"9bdde31adaa1443bb206b09bfa3c474c","pose":[0.799844,0.0047414,0.60019,8.67581,-0.600208,0.0075118,0.799809,-4.8108,-0.000716311,-0.99996,0.00885413,2.82261,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,true,true,false,false],"unobstructed":[false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false],"height":1.58264400638767},{"image_id":"66d4adb61b57494aa2c1ad141a0fad9b","pose":[-0.34536,-0.0108675,-0.938407,-2.27885,0.938436,0.00459882,-0.345423,-3.2282,0.00806945,-0.99993,0.00861029,1.58739,0,0,0,1],"included":true,"visible":[false,false,true,false,false,true,true,true,false,true,false,false,false,false,true,true,false,false,false,true],"unobstructed":[false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true],"height":1.5705441219971223},{"image_id":"83ff709c0e3e46079836153ea5c7feac","pose":[0.68423,0.0137303,0.729137,3.42529,-0.729235,0.00364543,0.684254,1.65175,0.00673696,-0.999899,0.012507,4.37069,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false],"height":1.578378655072358},{"image_id":"d9e325df2f3948679c78b93d8025e2da","pose":[0.826698,0.0192407,0.562317,8.49764,-0.562455,0.00220125,0.826825,-0.816805,0.0146709,-0.999812,0.0126418,4.38875,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,true,false,true,false,false,false,false,true,true,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,true,true,false],"height":1.5865892751674604},{"image_id":"423efb97f77f4e7995f19c66fe82afbc","pose":[0.958879,0.00141119,0.283813,5.51819,-0.283808,0.0124035,0.958801,-5.67527,-0.00216725,-0.999922,0.012294,1.58856,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,true,false,false,false,false,false,false,false,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,true,false,false,false],"height":1.5784339701720043},{"image_id":"6c49579a5cd34df8acb7f790b74e9eae","pose":[-0.95716,-0.00676032,-0.289482,-6.48379,0.289538,-0.00977451,-0.957117,-2.57899,0.00364085,-0.999929,0.0113132,1.59886,0,0,0,1],"included":true,"visible":[false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,true],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true],"height":1.5798282335589897},{"image_id":"aeed67040d744240b188f66f17d87d43","pose":[0.132175,0.0257204,0.990893,7.67989,-0.991226,0.00381825,0.132121,-5.81072,-0.000385302,-0.999662,0.0259995,2.29866,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,true,false,false,true,false,false,false,true,false,false,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false,false],"height":1.6026680667792301},{"image_id":"aae01016bb354f78bd6db86e9d71af2b","pose":[0.0788252,0.00384462,0.996881,6.79041,-0.996887,0.00184069,0.0788186,-0.995862,-0.00153193,-0.999991,0.0039778,4.37219,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false],"height":1.5770919536040346},{"image_id":"346b680ac5904359a1859c929ad312b6","pose":[-0.589008,0.00463239,0.808114,5.58585,-0.808123,0.00000695791,-0.589015,0.644327,-0.00273419,-0.999989,0.00373948,4.38174,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,true,false,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,true,false,false],"height":1.5707587596461066},{"image_id":"ae91518ed77047b3bdeeca864cd04029","pose":[0.310985,0.0070688,0.950389,-4.60607,-0.950392,-0.00460962,0.31102,-2.5949,0.00657945,-0.999964,0.00528466,1.58581,0,0,0,1],"included":true,"visible":[false,false,true,false,true,true,false,true,false,true,false,true,false,false,false,true,false,false,false,false],"unobstructed":[false,false,true,false,true,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false],"height":1.5747548700639524}] -------------------------------------------------------------------------------- /connectivity/GdvgFV5R1Z5_connectivity.json: -------------------------------------------------------------------------------- 1 | [{"image_id":"0b02e18654324edd8d74c078b66bfb20","pose":[-0.057695,-0.000357129,0.998334,-2.46692,-0.998304,-0.00769199,-0.0576965,-3.15814,0.00770012,-0.99997,0.0000884733,1.5171,0,0,0,1],"included":true,"visible":[false,true,false,false,false,true,true,true,true,false,true,false],"unobstructed":[false,false,false,false,false,true,false,true,true,false,true,false],"height":1.51470410293751},{"image_id":"1db1c0a09ecf40d188197efc05ced3bb","pose":[-0.442443,0.0138817,0.896688,-4.03893,-0.89679,-0.0101225,-0.442338,-3.05434,0.00293664,-0.999852,0.0169288,0.974424,0,0,0,1],"included":true,"visible":[true,false,false,false,true,true,false,false,true,false,false,true],"unobstructed":[false,false,false,false,false,true,false,false,true,false,false,true],"height":0.9701803380402906},{"image_id":"6178647ca8d14dc09370f6c1b7ed2fd6","pose":[-0.870025,0.0056275,0.492973,-3.69279,-0.493005,-0.0105975,-0.869962,1.95433,0.000328893,-0.999927,0.0119957,1.51516,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,true,true,false,false,true,false],"unobstructed":[false,false,false,true,false,false,true,true,false,true,true,false],"height":1.517582101716661},{"image_id":"565cc21cd28b4ee6bb5ba83c5270c032","pose":[0.0242634,0.000986587,-0.999704,-3.91782,0.999699,0.00333371,0.024267,0.178675,0.00335701,-0.999993,-0.0009042,1.50868,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,false,false,true,true,false],"unobstructed":[false,false,true,false,false,false,false,false,false,true,true,false],"height":1.5114421933143356},{"image_id":"ef638e508e054c4aabd49b38d1b88fc7","pose":[0.0820523,0.0151057,0.996513,-4.61631,-0.995947,-0.0356725,0.0825462,-2.18899,0.0367954,-0.999249,0.0121187,1.52757,0,0,0,1],"included":true,"visible":[false,true,false,false,false,true,false,false,true,false,false,true],"unobstructed":[false,false,false,false,false,true,false,false,true,false,false,true],"height":1.5162868543024455},{"image_id":"97ed68de989e44fdaf2d9b949898fab6","pose":[0.0900997,0.0149714,0.99582,-3.64126,-0.995713,-0.0195971,0.0903844,-3.16818,0.0208687,-0.999695,0.0131427,1.52081,0,0,0,1],"included":true,"visible":[true,true,false,false,true,false,false,false,true,false,false,true],"unobstructed":[true,true,false,false,true,false,false,false,true,false,false,true],"height":1.5211418713547455},{"image_id":"5fd70cff4992429a99a84fd3c117ccb5","pose":[-0.0539877,-0.000800861,-0.998541,0.0108044,0.998337,0.0201438,-0.0539926,0.00604319,0.020158,-0.999796,-0.000286778,1.51223,0,0,0,1],"included":true,"visible":[true,false,true,true,false,false,false,true,false,true,true,false],"unobstructed":[false,false,true,false,false,false,false,true,false,false,true,false],"height":1.5113248528175798},{"image_id":"86d342c576ff46a9828d2ba377cc8cd5","pose":[0.998173,0.0151118,-0.0584746,-1.78347,0.0584707,0.000718574,0.998288,-1.89835,0.0151283,-0.999885,-0.000165129,1.52238,0,0,0,1],"included":true,"visible":[true,false,true,false,false,false,true,false,false,false,true,false],"unobstructed":[true,false,true,false,false,false,true,false,false,false,true,false],"height":1.5103397372923053},{"image_id":"8dba9ff900b14f9b84ead660f5f7f701","pose":[-0.999855,-0.0144511,0.00887107,-4.11579,-0.00895392,0.00564829,-0.999943,-2.90606,0.0144005,-0.999879,-0.00577567,1.51617,0,0,0,1],"included":true,"visible":[true,true,false,false,true,true,false,false,false,false,false,true],"unobstructed":[true,true,false,false,true,true,false,false,false,false,false,true],"height":1.5112098807574073},{"image_id":"0d8c5fbfd73f44e28d6da370520611e4","pose":[0.0769887,0.00664334,0.997009,-6.15424,-0.997016,-0.00490415,0.0770216,-0.0398163,0.00540151,-0.999965,0.00624716,1.50965,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,true,false,false,false,true,false],"unobstructed":[false,false,true,true,false,false,false,false,false,false,false,false],"height":1.5058928427471967},{"image_id":"aebb1de49d21485e8bef7633dfb58761","pose":[-0.0229751,-0.0058052,-0.999718,-1.94579,0.999719,0.00553997,-0.0230069,-0.026534,0.00567231,-0.999967,0.0056775,1.50582,0,0,0,1],"included":true,"visible":[true,false,true,true,false,false,true,true,false,true,false,false],"unobstructed":[true,false,true,true,false,false,true,true,false,false,false,false],"height":1.5101720791580233},{"image_id":"e34e51f3d6584ad09c510de5db84752f","pose":[-0.0418368,-0.0124855,0.999046,-3.99281,-0.993607,-0.104406,-0.0429142,-2.13265,0.104842,-0.994456,-0.00803644,0.980264,0,0,0,1],"included":true,"visible":[false,true,false,false,true,true,false,false,true,false,false,false],"unobstructed":[false,true,false,false,true,true,false,false,true,false,false,false],"height":0.969584316081611}] -------------------------------------------------------------------------------- /connectivity/README.md: -------------------------------------------------------------------------------- 1 | ## connectivity 2 | Connectivity graphs indicating the navigable paths between viewpoints in each scan. 3 | 4 | Each json file contains an array of annotations, one for each viewpoint in the scan. All annotations share the same basic structure as follows: 5 | 6 | ``` 7 | { 8 | "image_id": str, 9 | "pose": [float x 16], 10 | "included": boolean, 11 | "visible": [boolean x num_viewpoints], 12 | "unobstructed": [boolean x num_viewpoints], 13 | "height": float 14 | } 15 | ``` 16 | - `image_id`: matterport skybox prefix 17 | - `pose`: 4x4 matrix in row major order that transforms matterport skyboxes to global coordinates (z-up). Pose matrices are based on the assumption that the camera is facing skybox image 3. 18 | - `included`: whether viewpoint is included in the simulator. Some overlapping viewpoints are excluded. 19 | - `visible`: indicates other viewpoints that can be seen from this viewpoint. 20 | - `unobstructed`: indicates transitions to other viewpoints that are considered navigable for an agent. 21 | - `height`: estimated height of the viewpoint above the floor. Not required for the simulator. 22 | 23 | Units are in metres. 24 | 25 | `scans.txt` contains a list of all the scan ids in the dataset. 26 | -------------------------------------------------------------------------------- /connectivity/YmJkqBEsHnH_connectivity.json: -------------------------------------------------------------------------------- 1 | [{"image_id":"006933a75f764c5485cf284bea0ded0b","pose":[0.210914,-0.00824746,-0.977469,-7.64722,0.977278,0.0232484,0.210677,-2.15553,0.0209873,-0.999695,0.0129646,1.56695,0,0,0,1],"included":true,"visible":[false,false,true,false,true,true,false,true,true,true,false],"unobstructed":[false,false,false,false,false,false,false,true,true,false,false],"height":1.524793092035509},{"image_id":"e4ede0695e4e4a77aae8537abb9f11d3","pose":[-0.0422212,-0.0176246,-0.998952,-0.133122,0.998904,0.0194092,-0.0425613,-0.0184591,0.0201393,-0.999656,0.016787,1.48352,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,false,false,false,false,false],"unobstructed":[false,false,true,false,false,true,false,false,false,false,false],"height":1.5227398475592409},{"image_id":"d471e89e00be49f49a7ecace814d60bf","pose":[0.426939,-0.00370058,-0.904272,-0.421886,0.904055,0.0239963,0.426739,-2.12366,0.0201203,-0.999705,0.0135916,1.49477,0,0,0,1],"included":true,"visible":[true,true,false,true,true,true,false,true,true,true,false],"unobstructed":[false,true,false,true,false,true,false,false,false,false,false],"height":1.5263900136377955},{"image_id":"b34af02ce9b642ebbd0c7e9e0ba3b553","pose":[0.960272,0.00870611,-0.278924,-0.0905727,0.278755,0.0168277,0.960214,-3.55265,0.0130537,-0.99982,0.0137334,1.49061,0,0,0,1],"included":true,"visible":[true,true,true,false,false,false,false,false,false,false,false],"unobstructed":[false,false,true,false,false,true,false,false,false,false,false],"height":1.5323637229797105},{"image_id":"01c80b5f8fbd4c969ee0bc03f1ec7a6c","pose":[0.359562,-0.0105291,-0.933061,-3.77309,0.932771,0.0313799,0.359097,-2.1838,0.0254987,-0.999452,0.0211054,1.53932,0,0,0,1],"included":true,"visible":[true,false,true,false,false,true,false,true,true,true,false],"unobstructed":[false,false,false,false,false,true,false,true,false,false,false],"height":1.5286629461398107},{"image_id":"82ea5baa30f945fe98f6cad3064af847","pose":[0.0376233,-0.0115611,-0.999224,-2.01669,0.998821,0.0310955,0.0372487,-2.16965,0.030641,-0.999449,0.0127185,1.50807,0,0,0,1],"included":true,"visible":[true,true,true,true,true,false,false,true,true,true,false],"unobstructed":[false,true,true,true,true,false,false,false,false,false,false],"height":1.5253207999550662},{"image_id":"aecbb791f30b452a9236c5a8c7030663","pose":[0.296076,-0.0242641,-0.954855,-13.5955,0.955111,0.0179483,0.2957,-2.22547,0.00996343,-0.999544,0.0284901,1.59272,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,true,true,true,true],"unobstructed":[false,false,false,false,false,false,false,false,false,true,true],"height":1.7557263982456066},{"image_id":"d841f7b710f9470796d55561f8f524db","pose":[0.270437,0.002913,-0.962732,-5.77716,0.962325,0.0284129,0.27041,-2.21321,0.028142,-0.999591,0.00488176,1.55947,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,false,true,true,false],"unobstructed":[true,false,false,false,true,false,false,false,false,false,false],"height":1.5357935019251416},{"image_id":"8e38fdd81c7949db9646968bafbbdcfc","pose":[-0.00277118,-0.0169575,-0.999852,-9.93905,0.999791,0.020127,-0.00311204,-2.17463,0.0201771,-0.999653,0.0168993,1.60592,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,true,false,true,true],"unobstructed":[true,false,false,false,false,false,false,false,false,true,false],"height":1.5208970888736792},{"image_id":"20fd759be0b64fc9aa96d290f0a704ec","pose":[0.227815,0.0117555,-0.973633,-12.1161,0.973367,0.0235263,0.228037,-2.15724,0.025587,-0.999654,-0.00608172,1.59969,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,true,true,true,false,true],"unobstructed":[false,false,false,false,false,false,true,false,true,false,false],"height":1.5261379179165138},{"image_id":"d838acff82244c2da0cf2651e54966cb","pose":[0.310234,-0.0632421,-0.948553,-15.2317,0.950604,0.0313736,0.308813,-2.28133,0.0102298,-0.997504,0.0698525,0.902626,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,true,true,true,true,false],"unobstructed":[false,false,false,false,false,false,true,false,false,false,false],"height":1.558854711359605}] -------------------------------------------------------------------------------- /connectivity/gZ6f7yhEvPG_connectivity.json: -------------------------------------------------------------------------------- 1 | [{"image_id":"80929af5cf234ae38ac3a2a4e60e4342","pose":[0.983395,0.00450812,-0.181418,-2.79247,0.181442,-0.00570068,0.983385,-1.38801,0.00339928,-0.999973,-0.00642298,1.42676,0,0,0,1],"included":true,"visible":[false,true,true,false,false,true,false,false],"unobstructed":[false,true,false,true,false,true,false,false],"height":1.4191402375960298},{"image_id":"ba27da20782d4e1a825f0a133ad84da9","pose":[-0.7605,-0.0115739,-0.649234,-2.38988,0.648885,0.0237502,-0.760515,-0.0538717,0.0242219,-0.999651,-0.0105509,1.4341,0,0,0,1],"included":true,"visible":[true,false,true,true,false,true,false,true],"unobstructed":[true,false,false,false,false,true,false,true],"height":1.424939020658826},{"image_id":"46cecea0b30e4786b673f5e951bf82d4","pose":[0.593129,0.0137361,-0.80499,0.99933,0.804932,0.010707,0.59327,1.17558,0.0167685,-0.999848,-0.00470498,1.41684,0,0,0,1],"included":true,"visible":[false,false,false,true,true,false,true,true],"unobstructed":[false,false,false,true,true,false,true,true],"height":1.4252108727703763},{"image_id":"bda7a9e6d1d94b3aa8ff491beb158f3a","pose":[-0.378592,-0.0208239,0.925329,-0.182918,-0.925433,-0.00820128,-0.37882,-1.72967,0.0154776,-0.999749,-0.0161651,1.42205,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,true,true],"unobstructed":[true,false,true,false,true,false,false,true],"height":1.42983949725488},{"image_id":"dbb2f8000bc04b3ebcd0a55112786149","pose":[-0.595363,0.00457706,-0.803444,1.10196,0.803383,0.0168543,-0.595222,-1.10724,0.0108174,-0.999847,-0.0137106,1.41536,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,true,true],"unobstructed":[false,false,true,true,false,false,true,true],"height":1.4186255623107038},{"image_id":"29b20fa80dcd4771974303c1ccd8953f","pose":[0.292738,0.0164579,-0.956051,-2.77306,0.956096,0.0090939,0.292909,1.55377,0.0135152,-0.999823,-0.0130722,1.43367,0,0,0,1],"included":true,"visible":[true,true,true,false,true,false,false,false],"unobstructed":[true,true,false,false,false,false,false,false],"height":1.4237594118402337},{"image_id":"0ee20663dfa34b438d48750ddcd7366c","pose":[-0.75968,-0.0019971,-0.650293,-0.111567,0.650131,0.0201598,-0.759554,1.31337,0.014627,-0.999794,-0.0140156,1.42291,0,0,0,1],"included":true,"visible":[false,false,true,true,true,false,false,true],"unobstructed":[false,false,true,false,true,false,false,true],"height":1.4276556862049736},{"image_id":"47d8a8282c1c4a7fb3eeeacc45e9d959","pose":[-0.0254788,0.00643152,-0.999654,-0.0034508,0.999603,0.0120797,-0.0253995,0.0112371,0.0119124,-0.999906,-0.00673574,1.42388,0,0,0,1],"included":true,"visible":[true,true,true,true,true,false,true,false],"unobstructed":[false,true,true,true,true,false,true,false],"height":1.4268855357216241}] -------------------------------------------------------------------------------- /connectivity/pLe4wQe7qrG_connectivity.json: -------------------------------------------------------------------------------- 1 | [{"image_id":"e4c0a4ec08104bf5ada134b123fa53e7","pose":[-0.133089,0.0111501,-0.991041,1.16811,0.991028,0.0137789,-0.132932,-2.20571,0.0121736,-0.999843,-0.0128829,1.54855,0,0,0,1],"included":true,"visible":[false,false,true,false,true,false,true,true,false,true,false,false,true,false,true,false,false,false,false,false,false,true,false,true,true,true,true,true,false,false,true],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,true,false,false,true,false,false,false,false,false,false],"height":1.5280399019555968},{"image_id":"959ea6af304a4339bbc5d97f044d11c3","pose":[0.312992,0.0130519,-0.949666,2.47951,0.948724,0.0422726,0.313263,-2.23387,0.0442338,-0.999021,0.000849325,1.58243,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,false,false,false,true,false,true,true,false,true,true,false,false,false,false,false,false,true,true,true,true,true,false,true,false,true],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,true,false,false],"height":1.5361363756730164},{"image_id":"ffe0e6835287419c9cfe343e9d613d87","pose":[-0.802259,-0.00971694,-0.596896,5.96539,0.59688,0.00470064,-0.802316,-2.03323,0.0106021,-0.999941,0.00202973,1.57957,0,0,0,1],"included":true,"visible":[false,true,false,false,false,false,true,false,false,false,false,false,true,true,false,true,false,false,false,false,false,true,true,false,false,false,true,false,false,true,true],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true],"height":1.518586128876891},{"image_id":"47a69295198f4265958b9b1d497c328d","pose":[-0.90497,-0.00981301,-0.42536,2.46799,0.425363,0.00186582,-0.90502,2.04203,0.00967489,-0.99995,0.0024866,1.55214,0,0,0,1],"included":true,"visible":[false,true,false,false,false,true,true,false,true,false,false,true,false,false,false,true,false,false,true,true,true,false,false,false,true,false,false,true,true,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false,false,false,false,false,false,false,false,false,false],"height":1.5121750884423606},{"image_id":"3dfe07714b2f49d88bd4c8749e8bb0b7","pose":[-0.979561,-0.00709476,0.201019,-1.64821,-0.200975,-0.00640329,-0.979575,0.566531,0.0082373,-0.999954,0.00484756,1.56065,0,0,0,1],"included":true,"visible":[true,false,false,false,false,false,true,true,false,false,false,true,false,true,false,true,true,false,false,false,false,true,true,true,true,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,true,false,false,false,false,true,false,false,true,true,false,true,false,false,false],"height":1.5157095354765127},{"image_id":"87407bb6ed614926b91fc3e27eab766e","pose":[0.22909,0.0301697,-0.972937,4.56488,0.973286,0.00848048,0.229435,2.04904,0.0151732,-0.999508,-0.02742,1.5442,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,true,false,false,true,false,false,false,false,true,true,false,false,true,true,true,false,true,false,false,false,false,false,true,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false,false,false,false,false,false],"height":1.5111934219678684},{"image_id":"530f8e4126b14082a5c4ff6c3f6ae7cd","pose":[-0.172634,-0.00379856,-0.984978,8.51758,0.984978,0.00322887,-0.172647,0.14365,0.00383645,-0.999987,0.0031851,1.4578,0,0,0,1],"included":true,"visible":[false,false,true,false,true,false,false,false,false,false,true,false,false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,true,false],"height":1.5362285111230571},{"image_id":"96782d3925ec4088ab224cdc92a4fd6a","pose":[-0.216113,-0.00838211,-0.976332,1.24213,0.976316,0.00844697,-0.216182,2.38931,0.0100594,-0.999929,0.00635911,1.53856,0,0,0,1],"included":true,"visible":[true,false,false,false,true,true,true,false,true,false,false,false,false,true,false,false,true,false,true,true,false,true,true,true,false,false,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false,false,false,false,false,true,false,false,false],"height":1.5135335729735602},{"image_id":"2dcc9c6ca2d44d5080a0a7e7b7fb9c4d","pose":[-0.951188,-0.00996649,-0.308449,-1.21085,0.308409,0.00538007,-0.951238,2.40322,0.0111403,-0.999936,-0.00204269,1.55952,0,0,0,1],"included":true,"visible":[false,false,false,false,false,false,false,true,false,true,false,false,false,false,true,false,true,true,true,false,false,true,false,false,false,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,true,false,true,false,false,false],"height":1.5317176811699114},{"image_id":"0d704acada9041c48621c5d01d775da0","pose":[0.884279,0.0143861,0.466735,-1.34535,-0.466608,-0.0113974,0.88439,-2.3821,0.0180428,-0.999831,-0.00336482,1.52522,0,0,0,1],"included":true,"visible":[true,false,false,false,false,false,false,false,true,false,false,false,false,false,true,false,true,true,false,false,false,true,false,false,true,true,false,false,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false],"height":1.5405532836763522},{"image_id":"2cbd295d838b4c51b5590dcf2a37fba0","pose":[0.246342,0.0412581,-0.968304,4.76599,0.96868,0.0216735,0.247362,0.169153,0.0311925,-0.998913,-0.0346258,1.42661,0,0,0,1],"included":true,"visible":[false,false,false,false,false,false,true,false,false,false,false,false,false,true,false,false,false,false,false,false,false,true,true,true,false,false,false,false,true,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false,false,false,true,false,true,false,false,false,false,true,true,false],"height":1.5180090338091925},{"image_id":"6fbd170d8df746b0b10e3801e2dad706","pose":[-0.872353,-0.0000202749,0.488874,3.49156,-0.488854,-0.00892582,-0.872319,0.121306,0.00438157,-0.99996,0.00777758,1.41535,0,0,0,1],"included":true,"visible":[false,true,false,false,true,false,false,false,false,false,false,false,false,false,false,true,false,true,false,false,false,true,false,false,false,true,false,false,true,false,false],"unobstructed":[false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false],"height":1.5371204380160495},{"image_id":"31d308fee8284a168c28e238cf814363","pose":[0.998122,0.0164352,-0.0590029,6.9369,0.0592246,-0.0133283,0.998155,-2.13031,0.0156188,-0.999776,-0.0142757,1.58199,0,0,0,1],"included":true,"visible":[false,true,true,false,false,false,false,false,false,false,true,false,false,true,false,true,false,false,true,false,false,true,true,true,false,false,true,false,false,true,true],"unobstructed":[false,false,true,false,false,false,false,false,false,false,true,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false],"height":1.5115252320863801},{"image_id":"789faffd87b949fd9ed7e6df4fadc2f1","pose":[0.998352,0.0156401,-0.0551931,6.89589,0.0551612,0.00248225,0.998474,-1.07864,0.0157535,-0.999874,0.00161644,1.58253,0,0,0,1],"included":true,"visible":[false,false,true,false,true,false,true,false,false,false,true,false,true,false,false,true,false,false,true,false,false,true,true,true,false,false,false,false,false,true,false],"unobstructed":[false,false,true,false,false,false,true,false,false,false,true,false,true,false,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false,true,false],"height":1.5156362905724483},{"image_id":"a26b0e83785f45d484e5f9b83fdb4df3","pose":[0.784717,-0.00024993,0.619854,-0.356288,-0.619842,-0.00640294,0.7847,-1.3696,0.00377304,-0.999979,-0.0051784,1.5663,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,false,false,false,true,false,false,true,false,false,true,true,true,false,false,false,true,false,false,true,true,false,true,false,false,false],"unobstructed":[true,false,false,false,true,false,false,false,false,true,false,false,false,false,false,true,true,true,false,false,false,true,false,false,true,true,false,false,false,false,false],"height":1.5217725369665362},{"image_id":"df0b69b34d04453691b72a6c16923756","pose":[0.00951654,-0.00498874,-0.999942,2.41189,0.999919,0.00833186,0.00947506,0.0914117,0.00828438,-0.999952,0.00506864,1.42153,0,0,0,1],"included":true,"visible":[false,true,false,true,true,false,false,false,false,false,true,true,false,true,true,false,true,true,false,false,false,true,false,false,false,true,false,false,true,false,false],"unobstructed":[false,false,false,true,false,false,false,false,false,false,false,true,false,false,true,false,true,false,false,false,false,true,false,false,false,false,false,false,true,false,false],"height":1.5270023190896223},{"image_id":"d7d0e431bbfa40429a561060150f24cb","pose":[0.999351,0.0057182,0.0355512,-0.337565,-0.0355828,0.00559738,0.999351,1.14528,0.00551577,-0.999968,0.00579823,1.55634,0,0,0,1],"included":true,"visible":[false,false,false,false,true,false,true,true,true,false,false,false,false,true,true,true,false,true,false,false,false,true,true,false,true,false,false,true,false,false,false],"unobstructed":[false,false,false,false,true,false,false,true,true,false,false,false,false,false,true,true,false,true,false,false,false,true,false,false,false,false,false,true,false,false,false],"height":1.5126864275679581},{"image_id":"8f17854feb134826ae42e16b303e7445","pose":[-0.04737,0.0249555,-0.998565,-0.00382618,0.998875,0.00294013,-0.0473109,-0.017549,0.00175551,-0.999684,-0.0250657,1.55087,0,0,0,1],"included":true,"visible":[false,false,false,false,false,false,false,false,true,true,false,true,false,true,true,true,true,false,false,false,false,true,false,false,true,false,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,true,false,false,false,false,true,false,false,false,false,false,true,false,false,false],"height":1.5136058544662168},{"image_id":"d0584db5d0ba41ee955f6c91195afcb3","pose":[-0.0387735,-0.000627238,0.999248,6.85886,-0.999187,-0.0109357,-0.0387783,2.09848,0.0109521,-0.99994,-0.000201698,1.56982,0,0,0,1],"included":true,"visible":[false,false,false,true,false,true,false,false,false,false,false,false,true,true,true,true,false,false,false,true,true,true,true,true,false,false,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false,false,true,false],"height":1.5123722877852799},{"image_id":"87491cd48b094270a2a1aa682b8a770c","pose":[0.995378,0.0106665,0.0954335,5.60063,-0.0953334,-0.00948957,0.9954,2.17887,0.0115233,-0.999898,-0.00842783,1.55259,0,0,0,1],"included":true,"visible":[false,false,false,true,false,true,true,false,false,false,true,true,true,true,false,true,false,false,true,false,true,true,true,true,false,false,false,false,true,false,false],"unobstructed":[false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,false,true,false],"height":1.5096271733017124},{"image_id":"8a65d3586fed4c5f9e0f28fc184b3ff2","pose":[0.999328,0.0243579,-0.0273564,3.25097,0.0277536,-0.016113,0.999485,2.12641,0.0239048,-0.999573,-0.0167772,1.55627,0,0,0,1],"included":true,"visible":[false,false,false,true,false,true,true,false,true,false,true,true,true,true,false,false,false,false,true,true,false,false,false,true,false,false,false,false,false,false,false],"unobstructed":[false,false,false,true,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false],"height":1.5216447032258948},{"image_id":"eb464984cc4847d2a61eab27e3e31e51","pose":[0.317487,0.0187868,-0.948076,1.37215,0.94826,-0.0045702,0.317459,0.120026,0.0016314,-0.999813,-0.0192648,1.55431,0,0,0,1],"included":true,"visible":[true,false,false,false,true,false,false,true,true,true,false,true,false,true,true,true,true,true,false,false,false,false,false,false,true,true,false,false,false,false,false],"unobstructed":[true,false,false,false,true,false,false,true,false,false,true,false,false,false,true,true,true,true,false,false,false,false,false,false,true,true,false,false,false,false,false],"height":1.5187432392237161},{"image_id":"ce103547e620457f935a63050cea57b3","pose":[-0.926095,-0.0151941,-0.376983,7.37065,0.376978,0.00327303,-0.926216,0.160002,0.0153072,-0.999879,0.00269771,1.43016,0,0,0,1],"included":true,"visible":[false,false,true,false,true,false,true,false,false,false,true,false,true,true,false,false,false,false,true,false,false,false,false,true,false,true,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,true,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,true,false],"height":1.5228214121764414},{"image_id":"fa48c6f958304aa8a8f765a72fe7e8d5","pose":[-0.994837,-0.00721806,0.101218,6.07693,-0.101252,0.00455002,-0.99485,0.0491342,0.00672061,-0.999963,-0.00525636,1.42403,0,0,0,1],"included":true,"visible":[false,false,false,false,true,false,true,false,false,false,true,false,true,true,false,false,false,false,false,false,false,false,true,false,false,true,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,true,false,true,true,false,false,false,false,true,false,false,false,true,false,false,false,false,false,false,true,false],"height":1.520425902170783},{"image_id":"50be95bc6efb466c90867d52cf32ba3f","pose":[0.803639,0.00102907,-0.595115,-0.280264,0.595001,0.0182495,0.803517,-2.40583,0.0116877,-0.999833,0.0140547,1.54308,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,false,false,false,true,false,false,true,false,true,false,true,true,false,false,false,true,true,false,false,true,false,true,false,false,false],"unobstructed":[true,false,false,false,true,false,false,false,false,true,false,false,false,false,true,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false,false],"height":1.5259856691595353},{"image_id":"91d1554c155e4185a8c69636d47fd58d","pose":[0.7634,0.00593063,0.645898,-1.49105,-0.645812,-0.0117048,0.763406,-0.563949,0.0120878,-0.999914,-0.00510434,1.56479,0,0,0,1],"included":true,"visible":[true,false,false,false,true,false,true,true,true,true,false,true,false,false,true,true,false,false,false,false,false,true,true,true,true,false,false,true,false,false,false],"unobstructed":[false,false,false,false,true,false,false,false,true,true,false,false,false,false,true,false,false,false,false,false,false,true,false,false,true,false,false,true,false,false,false],"height":1.5123581928141085},{"image_id":"5d4349e09ada47b0aa8b20a0d22c54ca","pose":[0.0797542,0.0285043,-0.996407,3.62156,0.996744,0.00951931,0.080054,-2.10242,0.0117672,-0.999548,-0.0276513,1.56537,0,0,0,1],"included":true,"visible":[false,true,true,false,false,false,false,false,false,true,true,true,true,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false,true],"unobstructed":[false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,true],"height":1.5223704869964667},{"image_id":"298e09e5e1144e7b9762747370ca68a5","pose":[0.31306,-0.00832259,-0.949696,0.0361493,0.949732,0.00181293,0.313056,2.42577,-0.000883427,-0.999963,0.0084728,1.55565,0,0,0,1],"included":true,"visible":[true,false,false,false,true,false,true,true,true,false,false,false,false,true,true,false,true,true,true,true,false,false,false,true,true,true,false,false,false,false,false],"unobstructed":[false,false,false,false,true,false,false,true,true,false,false,false,false,false,false,false,true,true,false,false,false,false,false,false,false,true,false,false,false,false,false],"height":1.5224640014863746},{"image_id":"f8e13e216dd6477ea05e694e2f1478d9","pose":[0.998766,0.0109404,-0.0484187,2.48582,0.0482994,0.0109393,0.998773,-1.19789,0.0114569,-0.99988,0.0103984,1.57265,0,0,0,1],"included":true,"visible":[false,true,false,true,true,false,true,false,true,true,true,true,false,false,false,true,true,true,false,false,false,false,true,true,true,false,true,false,false,false,true],"unobstructed":[false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false],"height":1.5206684141424807},{"image_id":"e5f7cab8517b47399eda8866f0e30ab3","pose":[-0.660778,-0.00608519,-0.750556,7.08848,0.750578,-0.00299603,-0.660773,1.44662,0.00177251,-0.999977,0.00654814,1.57334,0,0,0,1],"included":true,"visible":[false,false,false,false,false,true,true,false,false,false,true,false,true,true,false,false,false,false,true,true,false,false,true,true,false,true,false,false,true,false,false],"unobstructed":[false,false,false,false,false,false,true,false,false,false,true,false,false,true,false,false,false,false,true,true,false,false,true,true,false,false,false,false,false,false,false],"height":1.5050461478205863},{"image_id":"a924a5855b954d68b26ebe82ab61c71d","pose":[-0.120428,-0.000846936,-0.992721,4.79789,0.992705,0.00559062,-0.12043,-2.05172,0.0056522,-0.999984,0.000168504,1.57612,0,0,0,1],"included":true,"visible":[false,true,true,false,false,false,true,false,true,false,true,false,true,false,false,false,true,false,false,false,false,true,true,true,false,false,true,false,false,true,false],"unobstructed":[false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false],"height":1.5244946264278192}] -------------------------------------------------------------------------------- /connectivity/scans.txt: -------------------------------------------------------------------------------- 1 | 17DRP5sb8fy 2 | 1LXtFkjw3qL 3 | 1pXnuDYAj8r 4 | 29hnd4uzFmX 5 | 2azQ1b91cZZ 6 | 2n8kARJN3HM 7 | 2t7WUuJeko7 8 | 5LpN3gDmAk7 9 | 5q7pvUzZiYa 10 | 5ZKStnWn8Zo 11 | 759xd9YjKW5 12 | 7y3sRwLe3Va 13 | 8194nk5LbLH 14 | 82sE5b5pLXE 15 | 8WUmhLawc2A 16 | aayBHfsNo7d 17 | ac26ZMwG7aT 18 | ARNzJeq3xxb 19 | B6ByNegPMKs 20 | b8cTxDM8gDG 21 | cV4RVeZvu5T 22 | D7G3Y4RVNrH 23 | D7N2EKCX4Sj 24 | dhjEzFoUFzH 25 | E9uDoFAP3SH 26 | e9zR4mvMWw7 27 | EDJbREhghzL 28 | EU6Fwq7SyZv 29 | fzynW3qQPVF 30 | GdvgFV5R1Z5 31 | gTV8FGcVJC9 32 | gxdoqLR6rwA 33 | gYvKGZ5eRqb 34 | gZ6f7yhEvPG 35 | HxpKQynjfin 36 | i5noydFURQK 37 | JeFG25nYj2p 38 | JF19kD82Mey 39 | jh4fc5c5qoQ 40 | JmbYfDe2QKZ 41 | jtcxE69GiFV 42 | kEZ7cmS4wCh 43 | mJXqzFtmKg4 44 | oLBMNvg9in8 45 | p5wJjkQkbXX 46 | pa4otMbVnkk 47 | pLe4wQe7qrG 48 | Pm6F8kyY3z2 49 | pRbA3pwrgk9 50 | PuKPg4mmafe 51 | PX4nDJXEHrG 52 | q9vSo1VnCiC 53 | qoiz87JEwZ2 54 | QUCTc6BB5sX 55 | r1Q1Z4BcV1o 56 | r47D5H71a5s 57 | rPc6DW4iMge 58 | RPmz2sHmrrY 59 | rqfALeAoiTq 60 | s8pcmisQ38h 61 | S9hNv5qa7GM 62 | sKLMLpTHeUy 63 | SN83YJsR3w2 64 | sT4fr6TAbpF 65 | TbHJrupSAjP 66 | ULsKaCPVFJR 67 | uNb9QFRL6hY 68 | ur6pFq6Qu1A 69 | UwV83HsGsw3 70 | Uxmj2M2itWa 71 | V2XKFyX4ASd 72 | VFuaQ6m2Qom 73 | VLzqgDo317F 74 | Vt2qJdWjCF2 75 | VVfe2KiqLaN 76 | Vvot9Ly1tCj 77 | vyrNrziPKCB 78 | VzqfbhrpDEA 79 | wc2JMjhGNzB 80 | WYY7iVyf5p8 81 | X7HyMhZNoso 82 | x8F5xyUWy9e 83 | XcA2TqTSSAj 84 | YFuZgdQ5vWj 85 | YmJkqBEsHnH 86 | yqstnuAEVhm 87 | YVUC4YcDtcY 88 | Z6MFQCViBuw 89 | ZMojNkEp431 90 | zsNo4HB9uLZ -------------------------------------------------------------------------------- /connectivity/scans_dialog.txt: -------------------------------------------------------------------------------- 1 | JmbYfDe2QKZ 2 | gZ6f7yhEvPG 3 | WYY7iVyf5p8 4 | rqfALeAoiTq 5 | UwV83HsGsw3 6 | mJXqzFtmKg4 7 | ur6pFq6Qu1A 8 | rPc6DW4iMge 9 | D7G3Y4RVNrH 10 | RPmz2sHmrrY 11 | sKLMLpTHeUy 12 | s8pcmisQ38h 13 | q9vSo1VnCiC 14 | 82sE5b5pLXE 15 | e9zR4mvMWw7 16 | qoiz87JEwZ2 17 | Uxmj2M2itWa 18 | x8F5xyUWy9e 19 | kEZ7cmS4wCh 20 | pLe4wQe7qrG 21 | HxpKQynjfin 22 | X7HyMhZNoso 23 | zsNo4HB9uLZ 24 | ARNzJeq3xxb 25 | PuKPg4mmafe 26 | JF19kD82Mey 27 | VVfe2KiqLaN 28 | V2XKFyX4ASd 29 | ULsKaCPVFJR 30 | gTV8FGcVJC9 31 | 1LXtFkjw3qL 32 | 2t7WUuJeko7 33 | 1pXnuDYAj8r 34 | wc2JMjhGNzB 35 | fzynW3qQPVF 36 | jh4fc5c5qoQ 37 | D7N2EKCX4Sj 38 | Z6MFQCViBuw 39 | VLzqgDo317F 40 | 2n8kARJN3HM 41 | 2azQ1b91cZZ 42 | vyrNrziPKCB 43 | QUCTc6BB5sX 44 | 759xd9YjKW5 45 | XcA2TqTSSAj 46 | SN83YJsR3w2 47 | r1Q1Z4BcV1o 48 | oLBMNvg9in8 49 | YmJkqBEsHnH 50 | 5LpN3gDmAk7 51 | B6ByNegPMKs 52 | sT4fr6TAbpF 53 | YVUC4YcDtcY 54 | dhjEzFoUFzH 55 | GdvgFV5R1Z5 56 | VzqfbhrpDEA 57 | ZMojNkEp431 58 | gYvKGZ5eRqb 59 | 17DRP5sb8fy 60 | TbHJrupSAjP 61 | EDJbREhghzL 62 | ac26ZMwG7aT 63 | r47D5H71a5s 64 | pa4otMbVnkk 65 | EU6Fwq7SyZv 66 | jtcxE69GiFV 67 | i5noydFURQK 68 | gxdoqLR6rwA 69 | E9uDoFAP3SH 70 | 5q7pvUzZiYa 71 | aayBHfsNo7d 72 | b8cTxDM8gDG 73 | 8WUmhLawc2A 74 | JeFG25nYj2p 75 | yqstnuAEVhm 76 | Vvot9Ly1tCj 77 | p5wJjkQkbXX 78 | cV4RVeZvu5T 79 | 5ZKStnWn8Zo 80 | 8194nk5LbLH 81 | Vt2qJdWjCF2 82 | PX4nDJXEHrG 83 | VFuaQ6m2Qom 84 | pRbA3pwrgk9 85 | uNb9QFRL6hY 86 | S9hNv5qa7GM 87 | Pm6F8kyY3z2 88 | 29hnd4uzFmX 89 | 7y3sRwLe3Va 90 | YFuZgdQ5vWj -------------------------------------------------------------------------------- /img_features/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeezhu/CMN.pytorch/b4e3c3ca34668cb8031d525132b013ced472ed87/img_features/.gitkeep -------------------------------------------------------------------------------- /include/Benchmark.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MATTERSIM_BENCHMARK 2 | #define MATTERSIM_BENCHMARK 3 | 4 | #include 5 | 6 | namespace mattersim { 7 | 8 | class Timer { 9 | public: 10 | Timer(); 11 | virtual void Start(); 12 | virtual void Stop(); 13 | virtual void Reset(); 14 | virtual float MilliSeconds(); 15 | virtual float MicroSeconds(); 16 | virtual float Seconds(); 17 | inline bool running() { return running_; } 18 | 19 | protected: 20 | bool running_; 21 | std::chrono::steady_clock::time_point start_; 22 | std::chrono::steady_clock::duration elapsed_; 23 | }; 24 | } 25 | 26 | #endif // MATTERSIM_BENCHMARK 27 | -------------------------------------------------------------------------------- /include/MatterSim.hpp: -------------------------------------------------------------------------------- 1 | #ifndef MATTERSIM_HPP 2 | #define MATTERSIM_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #ifdef OSMESA_RENDERING 13 | #define GL_GLEXT_PROTOTYPES 14 | #include 15 | #include 16 | #elif defined (EGL_RENDERING) 17 | #include 18 | #include 19 | #else 20 | #include 21 | #endif 22 | 23 | #define GLM_FORCE_RADIANS 24 | #include 25 | #include 26 | #include 27 | #include "glm/ext.hpp" 28 | 29 | #include "Benchmark.hpp" 30 | #include "NavGraph.hpp" 31 | 32 | namespace mattersim { 33 | 34 | struct Viewpoint: std::enable_shared_from_this { 35 | Viewpoint(std::string viewpointId, unsigned int ix, double x, double y, double z, 36 | double rel_heading, double rel_elevation, double rel_distance) : 37 | viewpointId(viewpointId), ix(ix), x(x), y(y), z(z), rel_heading(rel_heading), 38 | rel_elevation(rel_elevation), rel_distance(rel_distance) 39 | {} 40 | 41 | //! Viewpoint identifier 42 | std::string viewpointId; 43 | //! Viewpoint index into connectivity graph 44 | unsigned int ix; 45 | //! 3D position in world coordinates 46 | double x; 47 | double y; 48 | double z; 49 | //! Heading relative to the camera 50 | double rel_heading; 51 | //! Elevation relative to the camera 52 | double rel_elevation; 53 | //! Distance from the agent 54 | double rel_distance; 55 | }; 56 | 57 | typedef std::shared_ptr ViewpointPtr; 58 | struct ViewpointPtrComp { 59 | inline bool operator() (const ViewpointPtr& l, const ViewpointPtr& r){ 60 | return sqrt(l->rel_heading*l->rel_heading+l->rel_elevation*l->rel_elevation) 61 | < sqrt(r->rel_heading*r->rel_heading+r->rel_elevation*r->rel_elevation); 62 | } 63 | }; 64 | 65 | /** 66 | * Simulator state class. 67 | */ 68 | struct SimState: std::enable_shared_from_this{ 69 | //! Building / scan environment identifier 70 | std::string scanId; 71 | //! Number of frames since the last newEpisode() call 72 | unsigned int step = 0; 73 | //! RGB image (in BGR channel order) from the agent's current viewpoint 74 | cv::Mat rgb; 75 | //! Depth image taken from the agent's current viewpoint 76 | cv::Mat depth; 77 | //! Agent's current 3D location 78 | ViewpointPtr location; 79 | //! Agent's current camera heading in radians 80 | double heading = 0; 81 | //! Agent's current camera elevation in radians 82 | double elevation = 0; 83 | //! Agent's current view [0-35] (set only when viewing angles are discretized) 84 | //! [0-11] looking down, [12-23] looking at horizon, [24-35] looking up 85 | unsigned int viewIndex = 0; 86 | //! Vector of nearby navigable locations representing state-dependent action candidates, i.e. 87 | //! viewpoints you can move to. Index 0 is always to remain at the current viewpoint. 88 | //! The remaining viewpoints are sorted by their angular distance from the centre of the image. 89 | std::vector navigableLocations; 90 | }; 91 | 92 | typedef std::shared_ptr SimStatePtr; 93 | 94 | 95 | /** 96 | * Main class for accessing an instance of the simulator environment. 97 | */ 98 | class Simulator { 99 | 100 | public: 101 | Simulator(); 102 | 103 | ~Simulator(); 104 | 105 | /** 106 | * Set a non-standard path to the Matterport3D dataset. 107 | * The provided directory must contain subdirectories of the form: 108 | * "/matterport_skybox_images/". Default is "./data/v1/scans/". 109 | */ 110 | void setDatasetPath(const std::string& path); 111 | 112 | /** 113 | * Set a non-standard path to the viewpoint connectivity graphs. The provided directory must contain files 114 | * of the form "/_connectivity.json". Default is "./connectivity" (the graphs provided 115 | * by this repo). 116 | */ 117 | void setNavGraphPath(const std::string& path); 118 | 119 | /** 120 | * Enable or disable rendering. Useful for testing. Default is true (enabled). 121 | */ 122 | void setRenderingEnabled(bool value); 123 | 124 | /** 125 | * Sets camera resolution. Default is 320 x 240. 126 | */ 127 | void setCameraResolution(int width, int height); 128 | 129 | /** 130 | * Sets camera vertical field-of-view in radians. Default is 0.8, approx 46 degrees. 131 | */ 132 | void setCameraVFOV(double vfov); 133 | 134 | /** 135 | * Set the camera elevation min and max limits in radians. Default is +-0.94 radians. 136 | * @return true if successful. 137 | */ 138 | bool setElevationLimits(double min, double max); 139 | 140 | /** 141 | * Enable or disable discretized viewing angles. When enabled, heading and 142 | * elevation changes will be restricted to 30 degree increments from zero, 143 | * with left/right/up/down movement triggered by the sign of the makeAction 144 | * heading and elevation parameters. Default is false (disabled). 145 | */ 146 | void setDiscretizedViewingAngles(bool value); 147 | 148 | /** 149 | * Enable or disable preloading of images from disk to CPU memory. Default is false (disabled). 150 | * Enabled is better for training models, but will cause a delay when starting the simulator. 151 | */ 152 | void setPreloadingEnabled(bool value); 153 | 154 | /** 155 | * Enable or disable rendering of depth images. Default is false (disabled). 156 | */ 157 | void setDepthEnabled(bool value); 158 | 159 | /** 160 | * Set the number of environments in the batch. Default is 1. 161 | */ 162 | void setBatchSize(unsigned int size); 163 | 164 | /** 165 | * Set the cache size for storing pano images in gpu memory. Default is 200. Should be comfortably 166 | * larger than the batch size. 167 | */ 168 | void setCacheSize(unsigned int size); 169 | 170 | /** 171 | * Set the random seed for episodes where viewpoint is not provided. 172 | */ 173 | void setSeed(int seed); 174 | 175 | /** 176 | * Initialize the simulator. Further configuration won't take any effect from now on. 177 | */ 178 | void initialize(); 179 | 180 | /** 181 | * Starts a new episode. If a viewpoint is not provided initialization will be random. 182 | * @param scanId - sets which scene is used, e.g. "2t7WUuJeko7" 183 | * @param viewpointId - sets the initial viewpoint location, e.g. "cc34e9176bfe47ebb23c58c165203134" 184 | * @param heading - set the agent's initial camera heading in radians. With z-axis up, 185 | * heading is defined relative to the y-axis (turning right is positive). 186 | * @param elevation - set the initial camera elevation in radians, measured from the horizon 187 | * defined by the x-y plane (up is positive). 188 | */ 189 | void newEpisode(const std::vector& scanId, const std::vector& viewpointId, 190 | const std::vector& heading, const std::vector& elevation); 191 | 192 | /** 193 | * Starts a new episode at a random viewpoint. 194 | * @param scanId - sets which scene is used, e.g. "2t7WUuJeko7" 195 | */ 196 | void newRandomEpisode(const std::vector& scanId); 197 | 198 | /** 199 | * Returns the current batch of environment states including RGB images and available actions. 200 | */ 201 | const std::vector& getState(); 202 | 203 | /** @brief Select an action. 204 | * 205 | * An RL agent will sample an action here. A task-specific reward can be determined 206 | * based on the location, heading, elevation, etc. of the resulting state. 207 | * @param index - an index into the set of feasible actions defined by getState()->navigableLocations. 208 | * @param heading - desired heading change in radians. With z-axis up, heading is defined 209 | * relative to the y-axis (turning right is positive). 210 | * @param elevation - desired elevation change in radians, measured from the horizon defined 211 | * by the x-y plane (up is positive). 212 | */ 213 | void makeAction(const std::vector& index, const std::vector& heading, 214 | const std::vector& elevation); 215 | 216 | /** 217 | * Closes the environment and releases underlying texture resources, OpenGL contexts, etc. 218 | */ 219 | void close(); 220 | 221 | /** 222 | * Reset the rendering timers that run automatically. 223 | */ 224 | void resetTimers(); 225 | 226 | /** 227 | * Return a formatted timing string. 228 | */ 229 | std::string timingInfo(); 230 | 231 | private: 232 | const int headingCount = 12; // 12 heading values in discretized views 233 | const double elevationIncrement = M_PI/6.0; // 30 degrees discretized up/down 234 | void populateNavigable(); 235 | void setHeadingElevation(const std::vector& heading, const std::vector& elevation); 236 | void renderScene(); 237 | #ifdef OSMESA_RENDERING 238 | void *buffer; 239 | OSMesaContext ctx; 240 | #elif defined (EGL_RENDERING) 241 | EGLDisplay eglDpy; 242 | GLuint FramebufferName; 243 | #else 244 | GLuint FramebufferName; 245 | #endif 246 | std::vector states; 247 | bool initialized; 248 | bool renderingEnabled; 249 | bool discretizeViews; 250 | bool preloadImages; 251 | bool renderDepth; 252 | int width; 253 | int height; 254 | int randomSeed; 255 | unsigned int cacheSize; 256 | unsigned int batchSize; 257 | double vfov; 258 | double minElevation; 259 | double maxElevation; 260 | glm::mat4 Projection; 261 | glm::mat4 View; 262 | glm::mat4 Model; 263 | glm::mat4 Scale; 264 | glm::mat4 RotateX; 265 | glm::mat4 RotateZ; 266 | GLint ProjMat; 267 | GLint ModelViewMat; 268 | GLint vertex; 269 | GLint isDepth; 270 | GLuint vao_cube; 271 | GLuint vbo_cube_vertices; 272 | GLuint glProgram; 273 | GLuint glShaderV; 274 | GLuint glShaderF; 275 | std::string datasetPath; 276 | std::string navGraphPath; 277 | Timer preloadTimer; // Preloading images from disk into cpu memory 278 | Timer loadTimer; // Loading textures from disk or cpu memory onto gpu 279 | Timer renderTimer; // Rendering time 280 | Timer gpuReadTimer; // Reading rendered images from gpu back to cpu memory 281 | Timer processTimer; // Total run time for simulator 282 | Timer wallTimer; // Wall clock timer 283 | unsigned int frames; 284 | }; 285 | } 286 | 287 | #endif 288 | -------------------------------------------------------------------------------- /include/NavGraph.hpp: -------------------------------------------------------------------------------- 1 | #ifndef NAVGRAPH_HPP 2 | #define NAVGRAPH_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #ifdef OSMESA_RENDERING 16 | #define GL_GLEXT_PROTOTYPES 17 | #include 18 | #include 19 | #elif defined (EGL_RENDERING) 20 | #include 21 | #include 22 | #else 23 | #include 24 | #endif 25 | 26 | #define GLM_FORCE_RADIANS 27 | #include 28 | #include 29 | #include 30 | 31 | namespace mattersim { 32 | 33 | static void assertOpenGLError(const std::string& msg) { 34 | GLenum error = glGetError(); 35 | if (error != GL_NO_ERROR) { 36 | std::stringstream s; 37 | s << "OpenGL error 0x" << std::hex << error << " at " << msg; 38 | throw std::runtime_error(s.str()); 39 | } 40 | } 41 | #ifdef EGL_RENDERING 42 | static void assertEGLError(const std::string& msg) { 43 | EGLint error = eglGetError(); 44 | 45 | if (error != EGL_SUCCESS) { 46 | std::stringstream s; 47 | s << "EGL error 0x" << std::hex << error << " at " << msg; 48 | throw std::runtime_error(s.str()); 49 | } 50 | } 51 | #endif 52 | 53 | /** 54 | * Navigation graph indicating which panoramic viewpoints are adjacent, and also 55 | * containing (optionally pre-loaded) skybox / cubemap images and textures. 56 | * Class is a singleton to ensure images and textures are only loaded once. 57 | */ 58 | class NavGraph final { 59 | 60 | private: 61 | 62 | NavGraph(const std::string& navGraphPath, const std::string& datasetPath, 63 | bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize); 64 | 65 | ~NavGraph(); 66 | 67 | public: 68 | // Delete the default, copy and move constructors 69 | NavGraph() = delete; 70 | NavGraph(const NavGraph&) = delete; 71 | NavGraph& operator=(const NavGraph&) = delete; 72 | NavGraph(NavGraph&&) = delete; 73 | NavGraph& operator=(NavGraph&&) = delete; 74 | 75 | /** 76 | * First call will load the navigation graph from disk and (optionally) preload the 77 | * cubemap images into memory. 78 | * @param navGraphPath - directory containing json viewpoint connectivity graphs 79 | * @param datasetPath - directory containing a data directory for each Matterport scan id 80 | * @param preloadImages - if true, all cubemap images will be loaded into CPU memory immediately 81 | * @param renderDepth - if true, depth map images are also required 82 | * @param randomSeed - only used for randomViewpoint function 83 | * @param cacheSize - number of pano textures to keep in GPU memory 84 | */ 85 | static NavGraph& getInstance(const std::string& navGraphPath, const std::string& datasetPath, 86 | bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize); 87 | 88 | /** 89 | * Select a random viewpoint from a scan 90 | */ 91 | const std::string& randomViewpoint(const std::string& scanId); 92 | 93 | /** 94 | * Find the index of a selected viewpointId 95 | */ 96 | unsigned int index(const std::string& scanId, const std::string& viewpointId) const; 97 | 98 | /** 99 | * ViewpointId of a selected viewpoint index 100 | */ 101 | const std::string& viewpoint(const std::string& scanId, unsigned int ix) const; 102 | 103 | /** 104 | * Camera rotation matrix for a selected viewpoint index 105 | */ 106 | const glm::mat4& cameraRotation(const std::string& scanId, unsigned int ix) const; 107 | 108 | /** 109 | * Camera position vector for a selected viewpoint index 110 | */ 111 | const glm::vec3& cameraPosition(const std::string& scanId, unsigned int ix) const; 112 | 113 | /** 114 | * Return a list of other viewpoint indices that are reachable from a selected viewpoint index 115 | */ 116 | std::vector adjacentViewpointIndices(const std::string& scanId, unsigned int ix) const; 117 | 118 | /** 119 | * Get cubemap RGB (and optionally, depth) textures for a selected viewpoint index 120 | */ 121 | std::pair cubemapTextures(const std::string& scanId, unsigned int ix); 122 | 123 | /** 124 | * Free GPU memory associated with this viewpoint's textures 125 | */ 126 | void deleteCubemapTextures(const std::string& scanId, unsigned int ix); 127 | 128 | 129 | protected: 130 | 131 | /** 132 | * Helper class representing nodes in the navigation graph and their cubemap textures. 133 | */ 134 | class Location { 135 | 136 | public: 137 | /** 138 | * Construct a location object from a json struct 139 | * @param viewpoint - json struct 140 | * @param skyboxDir - directory containing a data directory for each Matterport scan id 141 | * @param preload - if true, all cubemap images will be loaded into CPU memory immediately 142 | * @param depth - if true, depth textures will also be provided 143 | */ 144 | Location(const Json::Value& viewpoint, const std::string& skyboxDir, bool preload, bool depth); 145 | 146 | Location() = delete; // no default constructor 147 | 148 | /** 149 | * Return the cubemap RGB (and optionally, depth) textures for this viewpoint, which will 150 | * be loaded from CPU memory or disk if necessary 151 | */ 152 | std::pair cubemapTextures(); 153 | 154 | /** 155 | * Free GPU memory associated with RGB and depth textures at this location 156 | */ 157 | void deleteCubemapTextures(); 158 | 159 | std::string viewpointId; //! Unique Matterport identifier for every pano 160 | bool included; //! Some duplicated viewpoints have been excluded 161 | glm::mat4 rot; //! Camera pose rotation component 162 | glm::vec3 pos; //! Camera pose translation component 163 | std::vector unobstructed; //! Connections to other graph locations 164 | 165 | protected: 166 | 167 | /** 168 | * Load RGB (and optionally, depth) cubemap images from disk into CPU memory 169 | */ 170 | void loadCubemapImages(); 171 | 172 | /** 173 | * Create RGB (and optionally, depth) textures from cubemap images (e.g., in GPU memory) 174 | */ 175 | void loadCubemapTextures(); 176 | 177 | GLuint cubemap_texture; 178 | GLuint depth_texture; 179 | cv::Mat xpos; //! RGB images for faces of the cubemap 180 | cv::Mat xneg; 181 | cv::Mat ypos; 182 | cv::Mat yneg; 183 | cv::Mat zpos; 184 | cv::Mat zneg; 185 | cv::Mat xposD; //! Depth images for faces of the cubemap 186 | cv::Mat xnegD; 187 | cv::Mat yposD; 188 | cv::Mat ynegD; 189 | cv::Mat zposD; 190 | cv::Mat znegD; 191 | bool im_loaded; 192 | bool includeDepth; 193 | std::string skyboxDir; //! Path to skybox images 194 | }; 195 | typedef std::shared_ptr LocationPtr; 196 | 197 | 198 | /** 199 | * Helper class implementing a LRU cache for cubemap textures. 200 | */ 201 | class TextureCache { 202 | 203 | public: 204 | TextureCache(unsigned int size) : size(size) { 205 | cacheMap.reserve(size+1); 206 | } 207 | 208 | TextureCache() = delete; // no default constructor 209 | 210 | void add(LocationPtr loc) { 211 | auto map_it = cacheMap.find(loc); 212 | if (map_it != cacheMap.end()) { 213 | // Remove entry from middle of list 214 | cacheList.erase(map_it->second); 215 | cacheMap.erase(map_it); 216 | } 217 | // Add element to list and save iterator on map 218 | auto list_it = cacheList.insert(cacheList.begin(), loc); 219 | cacheMap.emplace(loc, list_it); 220 | if (cacheMap.size() >= size) { 221 | removeEldest(); 222 | } 223 | } 224 | 225 | void removeEldest() { 226 | if (cacheMap.empty()) { 227 | throw std::runtime_error("MatterSim: TextureCache is empty"); 228 | } 229 | LocationPtr loc = cacheList.back(); 230 | loc->deleteCubemapTextures(); 231 | cacheMap.erase(loc); 232 | cacheList.pop_back(); 233 | } 234 | 235 | private: 236 | unsigned int size; 237 | std::unordered_map::iterator > cacheMap; 238 | std::list cacheList; 239 | }; 240 | 241 | 242 | std::map > scanLocations; 243 | std::default_random_engine generator; 244 | TextureCache cache; 245 | }; 246 | 247 | } 248 | 249 | #endif 250 | -------------------------------------------------------------------------------- /include/cbf.h: -------------------------------------------------------------------------------- 1 | // NYU Depth V2 Dataset Matlab Toolbox 2 | // Authors: Nathan Silberman, Pushmeet Kohli, Derek Hoiem, Rob Fergus 3 | 4 | #ifndef CBF_H_ 5 | #define CBF_H_ 6 | 7 | #include 8 | 9 | namespace cbf { 10 | 11 | // Filters the given depth image using a Cross Bilateral Filter. 12 | // 13 | // Args: 14 | // height - height of the images. 15 | // width - width of the images. 16 | // depth - HxW row-major ordered matrix. 17 | // intensity - HxW row-major ordered matrix. 18 | // mask - HxW row-major ordered matrix. 19 | // result - HxW row-major ordered matrix. 20 | // num_scales - the number of scales at which to perform the filtering. 21 | // sigma_s - the space sigma (in pixels) 22 | // sigma_r - the range sigma (in intensity values, 0-1) 23 | void cbf(int height, int width, uint8_t* depth, uint8_t* intensity, 24 | uint8_t* mask, uint8_t* result, unsigned num_scales, double* sigma_s, 25 | double* sigma_r); 26 | 27 | } // namespace 28 | 29 | #endif // CBF_H_ 30 | -------------------------------------------------------------------------------- /scripts/depth_to_skybox.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' Script for generating depth skyboxes based on undistorted depth images, 4 | in order to support depth output in the simulator. The current version 5 | assumes that undistorted depth images are aligned to matterport skyboxes, 6 | and uses simple blending. Images are downsized 50%. ''' 7 | 8 | import os 9 | import math 10 | import cv2 11 | import numpy as np 12 | from multiprocessing import Pool 13 | from numpy.linalg import inv,norm 14 | from StringIO import StringIO 15 | 16 | 17 | # Parameters 18 | DOWNSIZED_WIDTH = 512 19 | DOWNSIZED_HEIGHT = 512 20 | NUM_WORKER_PROCESSES = 20 21 | FILL_HOLES = True 22 | VISUALIZE_OUTPUT = False 23 | 24 | if FILL_HOLES: 25 | import sys 26 | sys.path.append('build') 27 | from MatterSim import cbf 28 | 29 | # Constants 30 | # Note: Matterport camera is really y=up, x=right, -z=look. 31 | SKYBOX_WIDTH = 1024 32 | SKYBOX_HEIGHT = 1024 33 | base_dir = 'data/v1/scans' 34 | skybox_template = '%s/%s/matterport_skybox_images/%s_skybox%d_sami.jpg' 35 | color_template = '%s/%s/undistorted_color_images/%s_i%s.jpg' 36 | depth_template = '%s/%s/undistorted_depth_images/%s_d%s.png' 37 | camera_template = '%s/%s/undistorted_camera_parameters/%s.conf' 38 | skybox_depth_template = '%s/%s/matterport_skybox_images/%s_skybox_depth_small.png' 39 | 40 | 41 | # camera transform for skybox images 0-5 relative to image 1 42 | skybox_transforms = [ 43 | np.array([[1,0,0],[0,0,-1],[0,1,0]], dtype=np.double), #up (down) 44 | np.eye(3, dtype=np.double), 45 | np.array([[0,0,-1],[0,1,0],[1,0,0]], dtype=np.double), # right 46 | np.array([[-1,0,0],[0,1,0],[0,0,-1]], dtype=np.double), # 180 47 | np.array([[0,0,1],[0,1,0],[-1,0,0]], dtype=np.double), # left 48 | np.array([[1,0,0],[0,0,1],[0,-1,0]], dtype=np.double) # down (up) 49 | ] 50 | 51 | 52 | def camera_parameters(scan): 53 | ''' Returns two dicts containing undistorted camera intrinsics (3x3) and extrinsics (4x4), 54 | respectively, for a given scan. Viewpoint IDs are used as dict keys. ''' 55 | intrinsics = {} 56 | extrinsics = {} 57 | with open(camera_template % (base_dir,scan,scan)) as f: 58 | pos = -1 59 | for line in f.readlines(): 60 | if 'intrinsics_matrix' in line: 61 | intr = line.split() 62 | C = np.zeros((3, 3), np.double) 63 | C[0,0] = intr[1] # fx 64 | C[1,1] = intr[5] # fy 65 | C[0,2] = intr[3] # cx 66 | C[1,2] = intr[6] # cy 67 | C[2,2] = 1.0 68 | pos = 0 69 | elif pos >= 0 and pos < 6: 70 | q = line.find('.jpg') 71 | camera = line[q-37:q] 72 | if pos == 0: 73 | intrinsics[camera[:-2]] = C 74 | T = np.loadtxt(StringIO(line.split('jpg ')[1])).reshape((4,4)) 75 | # T is camera-to-world transform, invert for world-to-camera 76 | extrinsics[camera] = (T,inv(T)) 77 | pos += 1 78 | return intrinsics,extrinsics 79 | 80 | 81 | def z_to_euclid(K_inv, depth): 82 | ''' Takes inverse intrinsics matrix and a depth image. Returns a new depth image with 83 | depth converted from z-distance into euclidean distance from the camera centre. ''' 84 | 85 | assert len(depth.shape) == 2 86 | h = depth.shape[0] 87 | w = depth.shape[1] 88 | 89 | y,x = np.indices((h,w)) 90 | homo_pixels = np.vstack((x.flatten(),y.flatten(),np.ones((x.size)))) 91 | rays = K_inv.dot(homo_pixels) 92 | cos_theta = np.array([0,0,1]).dot(rays) / norm(rays,axis=0) 93 | 94 | output = depth / cos_theta.reshape(h,w) 95 | return output 96 | 97 | 98 | def instrinsic_matrix(width, height): 99 | ''' Construct an ideal camera intrinsic matrix. ''' 100 | K = np.zeros((3, 3), np.double) 101 | K[0,0] = width/2 #fx 102 | K[1,1] = height/2 #fy 103 | K[0,2] = width/2 #cx 104 | K[1,2] = height/2 #cy 105 | K[2,2] = 1.0 106 | return K 107 | 108 | 109 | 110 | def fill_joint_bilateral_filter(rgb, depth): 111 | ''' Fill holes in a 16bit depth image given corresponding rgb image ''' 112 | 113 | intensity = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY) 114 | 115 | # Convert the depth image to uint8. 116 | maxDepth = np.max(depth)+1 117 | depth = (depth.astype(np.float64)/maxDepth) 118 | depth[depth > 1] = 1 119 | depth = (depth*255).astype(np.uint8) 120 | 121 | # Convert to col major order 122 | depth = np.asfortranarray(depth) 123 | intensity = np.asfortranarray(intensity) 124 | mask = (depth == 0) 125 | result = np.zeros_like(depth) 126 | 127 | # Fill holes 128 | cbf(depth, intensity, mask, result) 129 | result = (result.astype(np.float64)/255*maxDepth).astype(np.uint16) 130 | return result 131 | 132 | 133 | def depth_to_skybox(scan, visualize=VISUALIZE_OUTPUT, fill_holes=FILL_HOLES): 134 | 135 | # Load camera parameters 136 | intrinsics,extrinsics = camera_parameters(scan) 137 | # Skybox camera intrinsics 138 | K_skybox = instrinsic_matrix(SKYBOX_WIDTH, SKYBOX_HEIGHT) 139 | 140 | pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()])) 141 | print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids)) 142 | 143 | if visualize: 144 | cv2.namedWindow('RGB') 145 | cv2.namedWindow('Depth') 146 | cv2.namedWindow('Skybox') 147 | 148 | for pano in pano_ids: 149 | 150 | # Load undistorted depth and rgb images 151 | depth = {} 152 | rgb = {} 153 | for c in range(3): 154 | K_inv = inv(intrinsics['%s_i%d' % (pano,c)]) 155 | for i in range(6): 156 | name = '%d_%d' % (c,i) 157 | if visualize: 158 | rgb[name] = cv2.imread(color_template % (base_dir,scan,pano,name)) 159 | # Load 16bit grayscale image 160 | d_im = cv2.imread(depth_template % (base_dir,scan,pano,name), cv2.IMREAD_ANYDEPTH) 161 | depth[name] = z_to_euclid(K_inv, d_im) 162 | 163 | ims = [] 164 | for skybox_ix in range(6): 165 | 166 | # Load skybox image 167 | skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix)) 168 | 169 | # Skybox index 1 is the same orientation as camera image 1_5 170 | skybox_ctw,_ = extrinsics[pano + '_i1_5'] 171 | skybox_ctw = skybox_ctw[:3,:3].dot(skybox_transforms[skybox_ix]) 172 | skybox_wtc = inv(skybox_ctw) 173 | 174 | base_depth = np.zeros((SKYBOX_HEIGHT,SKYBOX_WIDTH), np.uint16) 175 | if visualize: 176 | base_rgb = np.zeros((SKYBOX_HEIGHT,SKYBOX_WIDTH,3), np.uint8) 177 | 178 | for camera in range(3): 179 | for angle in range(6): 180 | 181 | # Camera parameters 182 | im_name = '%d_%d' % (camera,angle) 183 | K_im = intrinsics[pano + '_i' + im_name[0]] 184 | T_ctw,T_wtc = extrinsics[pano + '_i' + im_name] 185 | R_ctw = T_ctw[:3,:3] 186 | 187 | # Check if this image can be skipped (facing away) 188 | z = np.array([0,0,1]) 189 | if R_ctw.dot(z).dot(skybox_ctw.dot(z)) < 0: 190 | continue 191 | 192 | # Compute homography 193 | H = K_skybox.dot(skybox_wtc.dot(R_ctw.dot(inv(K_im)))) 194 | 195 | # Warp and blend the depth image 196 | flip = cv2.flip(depth[im_name], 1) # flip around y-axis 197 | warp = cv2.warpPerspective(flip, H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_NEAREST) 198 | mask = cv2.warpPerspective(np.ones_like(flip), H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR) 199 | mask[warp == 0] = 0 # Set mask to zero where we don't have any depth values 200 | mask = cv2.erode(mask,np.ones((3,3),np.uint8),iterations = 1) 201 | locs = np.where(mask == 1) 202 | base_depth[locs[0], locs[1]] = warp[locs[0], locs[1]] 203 | 204 | if visualize: 205 | # Warp and blend the rgb image 206 | flip = cv2.flip(rgb[im_name], 1) # flip around y-axis 207 | warp = cv2.warpPerspective(flip, H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR) 208 | mask = cv2.warpPerspective(np.ones_like(flip), H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR) 209 | mask = cv2.erode(mask,np.ones((3,3),np.uint8),iterations = 1) 210 | locs = np.where(mask == 1) 211 | base_rgb[locs[0], locs[1]] = warp[locs[0], locs[1]] 212 | 213 | depth_small = cv2.resize(cv2.flip(base_depth, 1),(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_NEAREST) # flip around y-axis, downsize 214 | if fill_holes: 215 | depth_filled = fill_joint_bilateral_filter(skybox, depth_small) # Fill holes 216 | ims.append(depth_filled) 217 | else: 218 | ims.append(depth_small) 219 | 220 | if visualize and False: 221 | cv2.imshow('Skybox', skybox) 222 | cv2.imshow('Depth', cv2.applyColorMap((depth_small/256).astype(np.uint8), cv2.COLORMAP_JET)) 223 | rgb_output = cv2.flip(base_rgb, 1) # flip around y-axis 224 | cv2.imshow('RGB', rgb_output) 225 | cv2.waitKey(0) 226 | 227 | newimg = np.concatenate(ims, axis=1) 228 | 229 | if visualize: 230 | maxDepth = np.max(newimg)+1 231 | newimg = (newimg.astype(np.float64)/maxDepth) 232 | newimg = (newimg*255).astype(np.uint8) 233 | cv2.imshow('Depth pano', cv2.applyColorMap(newimg, cv2.COLORMAP_JET)) 234 | cv2.waitKey(0) 235 | else: 236 | # Save output 237 | outfile = skybox_depth_template % (base_dir,scan,pano) 238 | assert cv2.imwrite(outfile, newimg), ('Could not write to %s' % outfile) 239 | 240 | if visualize: 241 | cv2.destroyAllWindows() 242 | print 'Completed scan %s' % (scan) 243 | 244 | 245 | 246 | if __name__ == '__main__': 247 | 248 | with open('connectivity/scans.txt') as f: 249 | scans = [scan.strip() for scan in f.readlines()] 250 | p = Pool(NUM_WORKER_PROCESSES) 251 | p.map(depth_to_skybox, scans) 252 | 253 | 254 | 255 | -------------------------------------------------------------------------------- /scripts/downsize_skybox.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' Script for downsizing skybox images. ''' 4 | 5 | import os 6 | import math 7 | import cv2 8 | import numpy as np 9 | from multiprocessing import Pool 10 | from depth_to_skybox import camera_parameters 11 | 12 | 13 | NUM_WORKER_PROCESSES = 20 14 | DOWNSIZED_WIDTH = 512 15 | DOWNSIZED_HEIGHT = 512 16 | 17 | # Constants 18 | SKYBOX_WIDTH = 1024 19 | SKYBOX_HEIGHT = 1024 20 | base_dir = 'data/v1/scans' 21 | skybox_template = '%s/%s/matterport_skybox_images/%s_skybox%d_sami.jpg' 22 | skybox_small_template = '%s/%s/matterport_skybox_images/%s_skybox%d_small.jpg' 23 | skybox_merge_template = '%s/%s/matterport_skybox_images/%s_skybox_small.jpg' 24 | 25 | 26 | 27 | def downsizeWithMerge(scan): 28 | # Load pano ids 29 | intrinsics,_ = camera_parameters(scan) 30 | pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()])) 31 | print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids)) 32 | 33 | for pano in pano_ids: 34 | 35 | ims = [] 36 | for skybox_ix in range(6): 37 | 38 | # Load and downsize skybox image 39 | skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix)) 40 | ims.append(cv2.resize(skybox,(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_AREA)) 41 | 42 | # Save output 43 | newimg = np.concatenate(ims, axis=1) 44 | fn = skybox_merge_template % (base_dir,scan,pano) 45 | succ = cv2.imwrite(fn, newimg) 46 | assert succ 47 | 48 | 49 | def downsize(scan): 50 | 51 | # Load pano ids 52 | intrinsics,_ = camera_parameters(scan) 53 | pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()])) 54 | print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids)) 55 | 56 | for pano in pano_ids: 57 | 58 | for skybox_ix in range(6): 59 | 60 | # Load and downsize skybox image 61 | skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix)) 62 | newimg = cv2.resize(skybox,(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_AREA) 63 | 64 | # Save output 65 | assert cv2.imwrite(skybox_small_template % (base_dir,scan,pano,skybox_ix), newimg) 66 | 67 | 68 | if __name__ == '__main__': 69 | 70 | with open('connectivity/scans.txt') as f: 71 | scans = [scan.strip() for scan in f.readlines()] 72 | p = Pool(NUM_WORKER_PROCESSES) 73 | p.map(downsizeWithMerge, scans) 74 | -------------------------------------------------------------------------------- /scripts/fill_depth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' Script for filling missing values in undistorted depth images. ''' 4 | 5 | import os 6 | import math 7 | import cv2 8 | import numpy as np 9 | from multiprocessing import Pool 10 | from depth_to_skybox import camera_parameters 11 | 12 | import sys 13 | sys.path.append('build') 14 | from MatterSim import cbf 15 | 16 | 17 | base_dir = 'data/v1/scans' 18 | color_template = '%s/%s/undistorted_color_images/%s_i%s.jpg' 19 | depth_template = '%s/%s/undistorted_depth_images/%s_d%s.png' 20 | filled_depth_template = '%s/%s/undistorted_depth_images/%s_d%s_filled.png' 21 | 22 | def fill_joint_bilateral_filter(scan): 23 | 24 | # Load camera parameters 25 | intrinsics,_ = camera_parameters(scan) 26 | pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()])) 27 | print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids)) 28 | 29 | for pano in pano_ids: 30 | 31 | # Load undistorted depth and rgb images 32 | for c in range(3): 33 | for i in range(6): 34 | name = '%d_%d' % (c,i) 35 | rgb = cv2.imread(color_template % (base_dir,scan,pano,name)) 36 | intensity = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY) 37 | 38 | # Load 16bit depth image 39 | depth = cv2.imread(depth_template % (base_dir,scan,pano,name), cv2.IMREAD_ANYDEPTH) 40 | 41 | # Convert the depth image to uint8. 42 | maxDepth = np.max(depth)+1 43 | depth = (depth.astype(np.float64)/maxDepth) 44 | depth[depth > 1] = 1 45 | depth = (depth*255).astype(np.uint8) 46 | 47 | #cv2.imshow('input', cv2.applyColorMap(depth, cv2.COLORMAP_JET)) 48 | 49 | # Convert to col major order 50 | depth = np.asfortranarray(depth) 51 | intensity = np.asfortranarray(intensity) 52 | mask = (depth == 0) 53 | result = np.zeros_like(depth) 54 | 55 | # Fill holes 56 | cbf(depth, intensity, mask, result) 57 | 58 | #cv2.imshow('result', cv2.applyColorMap(result, cv2.COLORMAP_JET)) 59 | #cv2.waitKey(0) 60 | 61 | result = (result.astype(np.float64)/255*maxDepth).astype(np.uint16) 62 | assert cv2.imwrite(filled_depth_template % (base_dir,scan,pano,name), result) 63 | 64 | 65 | if __name__ == '__main__': 66 | 67 | with open('connectivity/scans.txt') as f: 68 | scans = [scan.strip() for scan in f.readlines()] 69 | p = Pool(10) 70 | p.map(fill_joint_bilateral_filter, scans) 71 | 72 | 73 | -------------------------------------------------------------------------------- /scripts/precompute_img_features.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' Script to precompute image features using a Caffe ResNet CNN, using 36 discretized views 4 | at each viewpoint in 30 degree increments, and the provided camera WIDTH, HEIGHT 5 | and VFOV parameters. ''' 6 | 7 | import numpy as np 8 | import cv2 9 | import json 10 | import math 11 | import base64 12 | import csv 13 | import sys 14 | 15 | csv.field_size_limit(sys.maxsize) 16 | 17 | 18 | # Caffe and MatterSim need to be on the Python path 19 | sys.path.insert(0, 'build') 20 | import MatterSim 21 | 22 | #caffe_root = '../' # your caffe build 23 | #sys.path.insert(0, caffe_root + 'python') 24 | import caffe 25 | 26 | from timer import Timer 27 | 28 | 29 | TSV_FIELDNAMES = ['scanId', 'viewpointId', 'image_w','image_h', 'vfov', 'features'] 30 | VIEWPOINT_SIZE = 36 # Number of discretized views from one viewpoint 31 | FEATURE_SIZE = 2048 32 | BATCH_SIZE = 4 # Some fraction of viewpoint size - batch size 4 equals 11GB memory 33 | GPU_ID = 0 34 | PROTO = 'models/ResNet-152-deploy.prototxt' 35 | MODEL = 'models/ResNet-152-model.caffemodel' # You need to download this, see README.md 36 | #MODEL = 'models/resnet152_places365.caffemodel' 37 | OUTFILE = 'img_features/ResNet-152-imagenet.tsv' 38 | GRAPHS = 'connectivity/' 39 | 40 | # Simulator image parameters 41 | WIDTH=640 42 | HEIGHT=480 43 | VFOV=60 44 | 45 | 46 | def load_viewpointids(): 47 | viewpointIds = [] 48 | with open(GRAPHS+'scans.txt') as f: 49 | scans = [scan.strip() for scan in f.readlines()] 50 | for scan in scans: 51 | with open(GRAPHS+scan+'_connectivity.json') as j: 52 | data = json.load(j) 53 | for item in data: 54 | if item['included']: 55 | viewpointIds.append((scan, item['image_id'])) 56 | print 'Loaded %d viewpoints' % len(viewpointIds) 57 | return viewpointIds 58 | 59 | 60 | def transform_img(im): 61 | ''' Prep opencv 3 channel image for the network ''' 62 | im_orig = im.astype(np.float32, copy=True) 63 | im_orig -= np.array([[[103.1, 115.9, 123.2]]]) # BGR pixel mean 64 | blob = np.zeros((1, im.shape[0], im.shape[1], 3), dtype=np.float32) 65 | blob[0, :, :, :] = im_orig 66 | blob = blob.transpose((0, 3, 1, 2)) 67 | return blob 68 | 69 | 70 | def build_tsv(): 71 | # Set up the simulator 72 | sim = MatterSim.Simulator() 73 | sim.setCameraResolution(WIDTH, HEIGHT) 74 | sim.setCameraVFOV(math.radians(VFOV)) 75 | sim.setDiscretizedViewingAngles(True) 76 | sim.init() 77 | 78 | # Set up Caffe resnet 79 | caffe.set_device(GPU_ID) 80 | caffe.set_mode_gpu() 81 | net = caffe.Net(PROTO, MODEL, caffe.TEST) 82 | net.blobs['data'].reshape(BATCH_SIZE, 3, HEIGHT, WIDTH) 83 | 84 | count = 0 85 | t_render = Timer() 86 | t_net = Timer() 87 | with open(OUTFILE, 'wb') as tsvfile: 88 | writer = csv.DictWriter(tsvfile, delimiter = '\t', fieldnames = TSV_FIELDNAMES) 89 | 90 | # Loop all the viewpoints in the simulator 91 | viewpointIds = load_viewpointids() 92 | for scanId,viewpointId in viewpointIds: 93 | t_render.tic() 94 | # Loop all discretized views from this location 95 | blobs = [] 96 | features = np.empty([VIEWPOINT_SIZE, FEATURE_SIZE], dtype=np.float32) 97 | for ix in range(VIEWPOINT_SIZE): 98 | if ix == 0: 99 | sim.newEpisode(scanId, viewpointId, 0, math.radians(-30)) 100 | elif ix % 12 == 0: 101 | sim.makeAction(0, 1.0, 1.0) 102 | else: 103 | sim.makeAction(0, 1.0, 0) 104 | 105 | state = sim.getState() 106 | assert state.viewIndex == ix 107 | 108 | # Transform and save generated image 109 | blobs.append(transform_img(state.rgb)) 110 | 111 | t_render.toc() 112 | t_net.tic() 113 | # Run as many forward passes as necessary 114 | assert VIEWPOINT_SIZE % BATCH_SIZE == 0 115 | forward_passes = VIEWPOINT_SIZE / BATCH_SIZE 116 | ix = 0 117 | for f in range(forward_passes): 118 | for n in range(BATCH_SIZE): 119 | # Copy image blob to the net 120 | net.blobs['data'].data[n, :, :, :] = blobs[ix] 121 | ix += 1 122 | # Forward pass 123 | output = net.forward() 124 | features[f*BATCH_SIZE:(f+1)*BATCH_SIZE, :] = net.blobs['pool5'].data[:,:,0,0] 125 | 126 | writer.writerow({ 127 | 'scanId': scanId, 128 | 'viewpointId': viewpointId, 129 | 'image_w': WIDTH, 130 | 'image_h': HEIGHT, 131 | 'vfov' : VFOV, 132 | 'features': base64.b64encode(features) 133 | }) 134 | count += 1 135 | t_net.toc() 136 | if count % 100 == 0: 137 | print 'Processed %d / %d viewpoints, %.1fs avg render time, %.1fs avg net time, projected %.1f hours' %\ 138 | (count,len(viewpointIds), t_render.average_time, t_net.average_time, 139 | (t_render.average_time+t_net.average_time)*len(viewpointIds)/3600) 140 | 141 | 142 | def read_tsv(infile): 143 | # Verify we can read a tsv 144 | in_data = [] 145 | with open(infile, "r+b") as tsv_in_file: 146 | reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames = TSV_FIELDNAMES) 147 | for item in reader: 148 | item['image_h'] = int(item['image_h']) 149 | item['image_w'] = int(item['image_w']) 150 | item['vfov'] = int(item['vfov']) 151 | item['features'] = np.frombuffer(base64.decodestring(item['features']), 152 | dtype=np.float32).reshape((VIEWPOINT_SIZE, FEATURE_SIZE)) 153 | in_data.append(item) 154 | return in_data 155 | 156 | 157 | if __name__ == "__main__": 158 | 159 | build_tsv() 160 | data = read_tsv(OUTFILE) 161 | print 'Completed %d viewpoints' % len(data) 162 | 163 | -------------------------------------------------------------------------------- /scripts/precompute_optimal_policies.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' Script for precomputing the optimal (shortest path) policy at each viewpoint. ''' 4 | 5 | from env import R2RBatch 6 | import json 7 | import os 8 | import argparse 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--dir', default='./data/v1/scans') 12 | parser.add_argument('--split', default='train') 13 | args = parser.parse_args() 14 | 15 | r2r = R2RBatch(None, batch_size=1, splits=[args.split]) 16 | 17 | def mkdir_p(path): 18 | try: 19 | os.makedirs(path) 20 | except OSError as exc: 21 | if os.path.isdir(path): 22 | pass 23 | else: raise 24 | 25 | for scan in r2r.paths: 26 | for goal in r2r.paths[scan]: 27 | mkdir_p('{}/{}/policies'.format(args.dir, scan)) 28 | with open('{}/{}/policies/{}.json'.format(args.dir, scan, goal), 'w') as f: 29 | f.write(json.dumps(r2r.paths[scan][goal])) 30 | 31 | -------------------------------------------------------------------------------- /scripts/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /src/driver/driver.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('build') 3 | import MatterSim 4 | import time 5 | import math 6 | import cv2 7 | import numpy as np 8 | 9 | WIDTH = 800 10 | HEIGHT = 600 11 | VFOV = math.radians(60) 12 | HFOV = VFOV*WIDTH/HEIGHT 13 | TEXT_COLOR = [230, 40, 40] 14 | 15 | cv2.namedWindow('Python RGB') 16 | cv2.namedWindow('Python Depth') 17 | 18 | sim = MatterSim.Simulator() 19 | sim.setCameraResolution(WIDTH, HEIGHT) 20 | sim.setCameraVFOV(VFOV) 21 | sim.setDepthEnabled(True) 22 | sim.initialize() 23 | #sim.newEpisode(['2t7WUuJeko7'], ['1e6b606b44df4a6086c0f97e826d4d15'], [0], [0]) 24 | #sim.newEpisode(['1LXtFkjw3qL'], ['0b22fa63d0f54a529c525afbf2e8bb25'], [0], [0]) 25 | sim.newRandomEpisode(['1LXtFkjw3qL']) 26 | 27 | heading = 0 28 | elevation = 0 29 | location = 0 30 | ANGLEDELTA = 5 * math.pi / 180 31 | 32 | print '\nPython Demo' 33 | print 'Use arrow keys to move the camera.' 34 | print 'Use number keys (not numpad) to move to nearby viewpoints indicated in the RGB view.\n' 35 | 36 | while True: 37 | sim.makeAction([location], [heading], [elevation]) 38 | location = 0 39 | heading = 0 40 | elevation = 0 41 | 42 | state = sim.getState()[0] 43 | locations = state.navigableLocations 44 | rgb = np.array(state.rgb, copy=False) 45 | for idx, loc in enumerate(locations[1:]): 46 | # Draw actions on the screen 47 | fontScale = 3.0/loc.rel_distance 48 | x = int(WIDTH/2 + loc.rel_heading/HFOV*WIDTH) 49 | y = int(HEIGHT/2 - loc.rel_elevation/VFOV*HEIGHT) 50 | cv2.putText(rgb, str(idx + 1), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 51 | fontScale, TEXT_COLOR, thickness=3) 52 | cv2.imshow('Python RGB', rgb) 53 | 54 | depth = np.array(state.depth, copy=False) 55 | cv2.imshow('Python Depth', depth) 56 | k = cv2.waitKey(1) 57 | if k == -1: 58 | continue 59 | else: 60 | k = (k & 255) 61 | if k == ord('q'): 62 | break 63 | elif ord('1') <= k <= ord('9'): 64 | location = k - ord('0') 65 | if location >= len(locations): 66 | location = 0 67 | elif k == 81 or k == ord('a'): 68 | heading = -ANGLEDELTA 69 | elif k == 82 or k == ord('w'): 70 | elevation = ANGLEDELTA 71 | elif k == 83 or k == ord('d'): 72 | heading = ANGLEDELTA 73 | elif k == 84 or k == ord('s'): 74 | elevation = -ANGLEDELTA 75 | -------------------------------------------------------------------------------- /src/driver/mattersim_main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "MatterSim.hpp" 5 | 6 | using namespace mattersim; 7 | 8 | #define WIDTH 1280 9 | #define HEIGHT 720 10 | 11 | #ifndef M_PI 12 | #define M_PI (3.14159265358979323846) 13 | #endif 14 | 15 | int main(int argc, char *argv[]) { 16 | 17 | cv::namedWindow("C++ RGB"); 18 | cv::namedWindow("C++ Depth"); 19 | 20 | Simulator sim; 21 | 22 | // Sets resolution. Default is 320X240 23 | sim.setCameraResolution(640,480); 24 | sim.setDepthEnabled(true); 25 | 26 | // Initialize the simulator. Further camera configuration won't take any effect from now on. 27 | sim.initialize(); 28 | 29 | std::cout << "\nC++ Demo" << std::endl; 30 | std::cout << "Showing some random viewpoints in one building." << std::endl; 31 | 32 | int i = 0; 33 | while(true) { 34 | i++; 35 | std::cout << "Episode #" << i << "\n"; 36 | 37 | // Starts a new episode. It is not needed right after init() but it doesn't cost much and the loop is nicer. 38 | sim.newRandomEpisode(std::vector(1,"pa4otMbVnkk")); // Launches at a random location 39 | 40 | for (int k=0; k<500; k++) { 41 | 42 | // Get the state 43 | SimStatePtr state = sim.getState().at(0); // SimStatePtr is std::shared_ptr 44 | 45 | // Which consists of: 46 | unsigned int n = state->step; 47 | cv::Mat rgb = state->rgb; // OpenCV CV_8UC3 type (i.e. 8bit color rgb) 48 | cv::Mat depth = state->depth; // OpenCV CV_16UC1 type (i.e. 16bit grayscale) 49 | ViewpointPtr location = state->location; // Need a class to hold viewpoint id, and x,y,z location of a viewpoint 50 | float heading = state->heading; 51 | float elevation = state->elevation; // camera parameters 52 | std::vector reachable = state->navigableLocations; // Where we can move to, 53 | int locationIdx = 0; // Must be an index into reachable 54 | double headingChange = M_PI / 500; 55 | double elevationChange = 0; 56 | 57 | cv::imshow("C++ RGB", rgb); 58 | cv::imshow("C++ Depth", depth); 59 | cv::waitKey(10); 60 | 61 | sim.makeAction(std::vector(1, locationIdx), 62 | std::vector(1, headingChange), 63 | std::vector(1, elevationChange)); 64 | 65 | } 66 | } 67 | 68 | // It will be done automatically in destructor but after close you can init it again with different settings. 69 | sim.close(); 70 | 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /src/lib/Benchmark.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Benchmark.hpp" 4 | 5 | namespace mattersim { 6 | 7 | Timer::Timer() 8 | : running_(false), 9 | elapsed_(0) {} 10 | 11 | void Timer::Start() { 12 | if (!running()) { 13 | start_ = std::chrono::steady_clock::now(); 14 | running_ = true; 15 | } 16 | } 17 | 18 | void Timer::Stop() { 19 | if (running()) { 20 | elapsed_ += std::chrono::steady_clock::now() - start_; 21 | running_ = false; 22 | } 23 | } 24 | 25 | void Timer::Reset() { 26 | if (running()) { 27 | running_ = false; 28 | } 29 | elapsed_ = std::chrono::steady_clock::duration(0); 30 | } 31 | 32 | float Timer::MicroSeconds() { 33 | if (running()) { 34 | elapsed_ += std::chrono::steady_clock::now() - start_; 35 | } 36 | return std::chrono::duration_cast(elapsed_).count(); 37 | } 38 | 39 | float Timer::MilliSeconds() { 40 | if (running()) { 41 | elapsed_ += std::chrono::steady_clock::now() - start_; 42 | } 43 | return std::chrono::duration_cast(elapsed_).count(); 44 | } 45 | 46 | float Timer::Seconds() { 47 | if (running()) { 48 | elapsed_ += std::chrono::steady_clock::now() - start_; 49 | } 50 | return std::chrono::duration_cast(elapsed_).count(); 51 | } 52 | 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/lib/NavGraph.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #ifdef _OPENMP 8 | #include 9 | #endif 10 | #include "NavGraph.hpp" 11 | 12 | namespace mattersim { 13 | 14 | 15 | NavGraph::Location::Location(const Json::Value& viewpoint, const std::string& skyboxDir, 16 | bool preload, bool depth): skyboxDir(skyboxDir), im_loaded(false), 17 | includeDepth(depth), cubemap_texture(0), depth_texture(0) { 18 | 19 | viewpointId = viewpoint["image_id"].asString(); 20 | included = viewpoint["included"].asBool(); 21 | 22 | float posearr[16]; 23 | int i = 0; 24 | for (auto f : viewpoint["pose"]) { 25 | posearr[i++] = f.asFloat(); 26 | } 27 | // glm uses column-major order. Inputs are in row-major order. 28 | rot = glm::transpose(glm::make_mat4(posearr)); 29 | // glm access is col,row 30 | pos = glm::vec3{rot[3][0], rot[3][1], rot[3][2]}; 31 | rot[3] = {0,0,0,1}; // remove translation component 32 | 33 | for (auto u : viewpoint["unobstructed"]) { 34 | unobstructed.push_back(u.asBool()); 35 | } 36 | 37 | if (preload) { 38 | // Preload skybox images 39 | loadCubemapImages(); 40 | } 41 | }; 42 | 43 | 44 | void NavGraph::Location::loadCubemapImages() { 45 | cv::Mat rgb = cv::imread(skyboxDir + viewpointId + "_skybox_small.jpg"); 46 | int w = rgb.cols/6; 47 | int h = rgb.rows; 48 | xpos = rgb(cv::Rect(2*w, 0, w, h)); 49 | xneg = rgb(cv::Rect(4*w, 0, w, h)); 50 | ypos = rgb(cv::Rect(0*w, 0, w, h)); 51 | yneg = rgb(cv::Rect(5*w, 0, w, h)); 52 | zpos = rgb(cv::Rect(1*w, 0, w, h)); 53 | zneg = rgb(cv::Rect(3*w, 0, w, h)); 54 | if (xpos.empty() || xneg.empty() || ypos.empty() || yneg.empty() || zpos.empty() || zneg.empty()) { 55 | throw std::invalid_argument( "MatterSim: Could not open skybox RGB files at: " + skyboxDir + viewpointId + "_skybox_small.jpg"); 56 | } 57 | if (includeDepth) { 58 | // 16 bit grayscale images 59 | cv::Mat depth = cv::imread(skyboxDir + viewpointId + "_skybox_depth_small.png", CV_LOAD_IMAGE_ANYDEPTH); 60 | xposD = depth(cv::Rect(2*w, 0, w, h)); 61 | xnegD = depth(cv::Rect(4*w, 0, w, h)); 62 | yposD = depth(cv::Rect(0*w, 0, w, h)); 63 | ynegD = depth(cv::Rect(5*w, 0, w, h)); 64 | zposD = depth(cv::Rect(1*w, 0, w, h)); 65 | znegD = depth(cv::Rect(3*w, 0, w, h)); 66 | if (xposD.empty() || xnegD.empty() || yposD.empty() || ynegD.empty() || zposD.empty() || znegD.empty()) { 67 | throw std::invalid_argument( "MatterSim: Could not open skybox depth files at: " + skyboxDir + viewpointId + "_skybox_depth_small.png"); 68 | } 69 | } 70 | im_loaded = true; 71 | } 72 | 73 | 74 | void NavGraph::Location::loadCubemapTextures() { 75 | // RGB texture 76 | glActiveTexture(GL_TEXTURE0); 77 | glEnable(GL_TEXTURE_CUBE_MAP); 78 | glGenTextures(1, &cubemap_texture); 79 | glBindTexture(GL_TEXTURE_CUBE_MAP, cubemap_texture); 80 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 81 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 82 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 83 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 84 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); 85 | //use fast 4-byte alignment (default anyway) if possible 86 | glPixelStorei(GL_UNPACK_ALIGNMENT, (xneg.step & 3) ? 1 : 4); 87 | //set length of one complete row in data (doesn't need to equal image.cols) 88 | glPixelStorei(GL_UNPACK_ROW_LENGTH, xneg.step/xneg.elemSize()); 89 | glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RGB, xpos.rows, xpos.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, xpos.ptr()); 90 | glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, GL_RGB, xneg.rows, xneg.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, xneg.ptr()); 91 | glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, GL_RGB, ypos.rows, ypos.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, ypos.ptr()); 92 | glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, GL_RGB, yneg.rows, yneg.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, yneg.ptr()); 93 | glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, GL_RGB, zpos.rows, zpos.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, zpos.ptr()); 94 | glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, GL_RGB, zneg.rows, zneg.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, zneg.ptr()); 95 | assertOpenGLError("RGB texture"); 96 | if (includeDepth) { 97 | // Depth Texture 98 | glActiveTexture(GL_TEXTURE0); 99 | glEnable(GL_TEXTURE_CUBE_MAP); 100 | glGenTextures(1, &depth_texture); 101 | glBindTexture(GL_TEXTURE_CUBE_MAP, depth_texture); 102 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 103 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 104 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 105 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 106 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); 107 | //use fast 4-byte alignment (default anyway) if possible 108 | glPixelStorei(GL_UNPACK_ALIGNMENT, (xnegD.step & 3) ? 1 : 4); 109 | //set length of one complete row in data (doesn't need to equal image.cols) 110 | glPixelStorei(GL_UNPACK_ROW_LENGTH, xnegD.step/xnegD.elemSize()); 111 | glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RED, xposD.rows, xposD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, xposD.ptr()); 112 | glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, GL_RED, xnegD.rows, xnegD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, xnegD.ptr()); 113 | glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, GL_RED, yposD.rows, yposD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, yposD.ptr()); 114 | glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, GL_RED, ynegD.rows, ynegD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, ynegD.ptr()); 115 | glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, GL_RED, zposD.rows, zposD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, zposD.ptr()); 116 | glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, GL_RED, znegD.rows, znegD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, znegD.ptr()); 117 | assertOpenGLError("Depth texture"); 118 | } 119 | } 120 | 121 | 122 | void NavGraph::Location::deleteCubemapTextures() { 123 | // no need to check existence, silently ignores errors 124 | glDeleteTextures(1, &cubemap_texture); 125 | glDeleteTextures(1, &depth_texture); 126 | cubemap_texture = 0; 127 | depth_texture = 0; 128 | } 129 | 130 | 131 | std::pair NavGraph::Location::cubemapTextures() { 132 | if (glIsTexture(cubemap_texture)){ 133 | return {cubemap_texture, depth_texture}; 134 | } 135 | if (!im_loaded) { 136 | loadCubemapImages(); 137 | } 138 | loadCubemapTextures(); 139 | return {cubemap_texture, depth_texture}; 140 | } 141 | 142 | 143 | NavGraph::NavGraph(const std::string& navGraphPath, const std::string& datasetPath, 144 | bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize) : cache(cacheSize) { 145 | 146 | generator.seed(randomSeed); 147 | 148 | auto textFile = navGraphPath + "/scans.txt"; 149 | std::ifstream scansFile(textFile); 150 | if (scansFile.fail()){ 151 | throw std::invalid_argument( "MatterSim: Could not open list of scans at: " + 152 | textFile + ", is path valid?" ); 153 | } 154 | std::vector scanIds; 155 | std::copy(std::istream_iterator(scansFile), 156 | std::istream_iterator(), 157 | std::back_inserter(scanIds)); 158 | 159 | #pragma omp parallel for 160 | for (unsigned int i=0; i> root; 170 | auto skyboxDir = datasetPath + "/" + scanId + "/matterport_skybox_images/"; 171 | #pragma omp critical 172 | { 173 | scanLocations.insert(std::pair > (scanId, std::vector())); 175 | } 176 | for (auto viewpoint : root) { 177 | Location l(viewpoint, skyboxDir, preloadImages, renderDepth); 178 | #pragma omp critical 179 | { 180 | scanLocations[scanId].push_back(std::make_shared(l)); 181 | } 182 | } 183 | } 184 | } 185 | 186 | 187 | NavGraph::~NavGraph() { 188 | // free all remaining textures 189 | for (auto scan : scanLocations) { 190 | for (auto loc : scan.second) { 191 | loc->deleteCubemapTextures(); 192 | } 193 | } 194 | } 195 | 196 | 197 | NavGraph& NavGraph::getInstance(const std::string& navGraphPath, const std::string& datasetPath, 198 | bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize){ 199 | // magic static 200 | static NavGraph instance(navGraphPath, datasetPath, preloadImages, renderDepth, randomSeed, cacheSize); 201 | return instance; 202 | } 203 | 204 | 205 | const std::string& NavGraph::randomViewpoint(const std::string& scanId) { 206 | std::uniform_int_distribution distribution(0,scanLocations.at(scanId).size()-1); 207 | int start_ix = distribution(generator); // generates random starting index 208 | int ix = start_ix; 209 | while (!scanLocations.at(scanId).at(ix)->included) { // Don't start at an excluded viewpoint 210 | ix++; 211 | if (ix >= scanLocations.at(scanId).size()) ix = 0; 212 | if (ix == start_ix) { 213 | throw std::logic_error( "MatterSim: ScanId: " + scanId + " has no included viewpoints!"); 214 | } 215 | } 216 | return scanLocations.at(scanId).at(ix)->viewpointId; 217 | } 218 | 219 | 220 | unsigned int NavGraph::index(const std::string& scanId, const std::string& viewpointId) const { 221 | int ix = -1; 222 | for (int i = 0; i < scanLocations.at(scanId).size(); ++i) { 223 | if (scanLocations.at(scanId).at(i)->viewpointId == viewpointId) { 224 | if (!scanLocations.at(scanId).at(i)->included) { 225 | throw std::invalid_argument( "MatterSim: ViewpointId: " + 226 | viewpointId + ", is excluded from the connectivity graph." ); 227 | } 228 | ix = i; 229 | break; 230 | } 231 | } 232 | if (ix < 0) { 233 | throw std::invalid_argument( "MatterSim: Could not find viewpointId: " + 234 | viewpointId + ", is viewpoint id valid?" ); 235 | } else { 236 | return ix; 237 | } 238 | } 239 | 240 | const std::string& NavGraph::viewpoint(const std::string& scanId, unsigned int ix) const { 241 | return scanLocations.at(scanId).at(ix)->viewpointId; 242 | } 243 | 244 | 245 | const glm::mat4& NavGraph::cameraRotation(const std::string& scanId, unsigned int ix) const { 246 | return scanLocations.at(scanId).at(ix)->rot; 247 | } 248 | 249 | 250 | const glm::vec3& NavGraph::cameraPosition(const std::string& scanId, unsigned int ix) const { 251 | return scanLocations.at(scanId).at(ix)->pos; 252 | } 253 | 254 | 255 | std::vector NavGraph::adjacentViewpointIndices(const std::string& scanId, unsigned int ix) const { 256 | std::vector reachable; 257 | for (unsigned int i = 0; i < scanLocations.at(scanId).size(); ++i) { 258 | if (i == ix) { 259 | // Skip option to stay at the same viewpoint 260 | continue; 261 | } 262 | if (scanLocations.at(scanId).at(ix)->unobstructed[i] && scanLocations.at(scanId).at(i)->included) { 263 | reachable.push_back(i); 264 | } 265 | } 266 | return reachable; 267 | } 268 | 269 | 270 | std::pair NavGraph::cubemapTextures(const std::string& scanId, unsigned int ix) { 271 | LocationPtr loc = scanLocations.at(scanId).at(ix); 272 | std::pair textures = loc->cubemapTextures(); 273 | cache.add(loc); 274 | return textures; 275 | } 276 | 277 | 278 | void NavGraph::deleteCubemapTextures(const std::string& scanId, unsigned int ix) { 279 | scanLocations.at(scanId).at(ix)->deleteCubemapTextures(); 280 | } 281 | 282 | 283 | } 284 | -------------------------------------------------------------------------------- /src/lib/cbf.cpp: -------------------------------------------------------------------------------- 1 | // NYU Depth V2 Dataset Matlab Toolbox 2 | // Authors: Nathan Silberman, Pushmeet Kohli, Derek Hoiem, Rob Fergus 3 | 4 | #include "cbf.h" 5 | 6 | #include // TODO: remove this. 7 | #include 8 | #include 9 | #include 10 | 11 | // Uncomment this define for intermediate filtering results. 12 | // #define DEBUG 13 | 14 | #define PI 3.14159 15 | 16 | #define UCHAR_MAX 255 17 | #define FILTER_RAD 5 18 | 19 | void toc(const char* message, clock_t start) { 20 | 21 | #ifdef DEBUG 22 | double d = clock() - start; 23 | d = 1000 * d / CLOCKS_PER_SEC; 24 | printf("[%s] %10.0f\n", message, d); 25 | #endif 26 | } 27 | 28 | // Args: 29 | // filter_size - the number of pixels in the filter. 30 | void create_offset_array(int filter_rad, int* offsets_h, int img_height) { 31 | int filter_len = filter_rad * 2 + 1; 32 | int filter_size = filter_len * filter_len; 33 | 34 | int kk = 0; 35 | for (int yy = -filter_rad; yy <= filter_rad; ++yy) { 36 | for (int xx = -filter_rad; xx <= filter_rad; ++xx, ++kk) { 37 | offsets_h[kk] = yy + img_height * xx; 38 | } 39 | } 40 | } 41 | 42 | void calc_pyr_sizes(int* heights, int* widths, int* pyr_offsets, int orig_height, int orig_width, int num_scales) { 43 | int offset = 0; 44 | for (int scale = 0; scale < num_scales; ++scale) { 45 | pyr_offsets[scale] = offset; 46 | 47 | // Calculate the size of the downsampled images. 48 | heights[scale] = static_cast(orig_height / pow((float)2, scale)); 49 | widths[scale] = static_cast(orig_width / pow((float)2, scale)); 50 | 51 | offset += heights[scale] * widths[scale]; 52 | } 53 | 54 | #ifdef DEBUG 55 | for (int ii = 0; ii < num_scales; ++ii) { 56 | printf("Scale %d: [%d x %d], offset=%d\n", ii, heights[ii], widths[ii], pyr_offsets[ii]); 57 | } 58 | #endif 59 | } 60 | 61 | int get_pyr_size(int* heights, int* widths, int num_scales) { 62 | 63 | int total_pixels = 0; 64 | for (int ii = 0; ii < num_scales; ++ii) { 65 | total_pixels += heights[ii] * widths[ii]; 66 | } 67 | 68 | return total_pixels; 69 | } 70 | 71 | // We're upsampling from the result matrix (which is small) to the depth matrix, 72 | // which is larger. 73 | // 74 | // For example, dst could be 480x640 and src may be 240x320. 75 | // 76 | // Args: 77 | // depth_dst - H1xW1 matrix where H1 and W1 are equal to height_dst and 78 | // width_dst. 79 | void upsample_cpu(float* depth_dst, 80 | bool* mask_dst, 81 | bool* valid_dst, 82 | float* depth_src, 83 | float* result_src, 84 | bool* mask_src, 85 | bool* valid_src, 86 | int height_src, 87 | int width_src, 88 | int height_dst, 89 | int width_dst, 90 | int dst_img_ind) { 91 | 92 | int num_threads = height_dst * width_dst; 93 | 94 | // Dont bother if the upsampled one isnt missing. 95 | if (!mask_dst[dst_img_ind]) { 96 | return; 97 | } 98 | 99 | int x_dst = floorf((float) dst_img_ind / height_dst); 100 | int y_dst = fmodf(dst_img_ind, height_dst); 101 | 102 | int y_src = static_cast((float) y_dst * height_src / height_dst); 103 | int x_src = static_cast((float) x_dst * width_src / width_dst); 104 | 105 | // Finally, convert to absolute coords. 106 | int src_img_ind = y_src + height_src * x_src; 107 | 108 | if (!mask_src[src_img_ind]) { 109 | depth_dst[dst_img_ind] = depth_src[src_img_ind]; 110 | } else { 111 | depth_dst[dst_img_ind] = result_src[src_img_ind]; 112 | } 113 | 114 | valid_dst[dst_img_ind] = valid_src[src_img_ind]; 115 | } 116 | 117 | // Args: 118 | // depth - the depth image, a HxW vector 119 | // intensity - the intensity image, a HxW vector. 120 | // is_missing - a binary mask specifying whether each pixel is missing 121 | // (and needs to be filled in) or not. 122 | // valid_in - a mask specifying which of the input values are allowed 123 | // to be used for filtering. 124 | // valid_out - a mask specifying which of the output values are allowed 125 | // to be used for future filtering. 126 | // result - the result of the filtering operation, a HxW matrix. 127 | // abs_inds - the absolute indices (into depth, intensity, etc) which 128 | // need filtering. 129 | // offsets - vector of offsets from the current abs_ind to be used for 130 | // filtering. 131 | // guassian - the values (weights) of the gaussian filter corresponding 132 | // to the offset matrix. 133 | void cbf_cpu(const float* depth, const float* intensity, bool* is_missing, 134 | bool* valid_in, bool* valid_out, float* result, 135 | const int* abs_inds, 136 | const int* offsets, 137 | const float* gaussian_space, 138 | int height, 139 | int width, 140 | int filter_rad, 141 | float sigma_s, 142 | float sigma_r, 143 | int numThreads, 144 | int idx) { 145 | 146 | int abs_ind = abs_inds[idx]; 147 | 148 | int src_Y = abs_ind % height; 149 | int src_X = abs_ind / height; 150 | 151 | int filter_len = filter_rad * 2 + 1; 152 | int filter_size = filter_len * filter_len; 153 | 154 | float weight_sum = 0; 155 | float value_sum = 0; 156 | 157 | float weight_intensity_sum = 0; 158 | 159 | float gaussian_range[filter_size]; 160 | float gaussian_range_sum = 0; 161 | 162 | for (int ii = 0; ii < filter_size; ++ii) { 163 | // Unfortunately we need to double check that the radii are correct 164 | // unless we add better processing of borders. 165 | 166 | int abs_offset = abs_ind + offsets[ii]; // THESE ARE CALC TWICE. 167 | 168 | int dst_Y = abs_offset % height; 169 | int dst_X = abs_offset / height; 170 | 171 | if (abs_offset < 0 || abs_offset >= (height * width) 172 | || abs(src_Y-dst_Y) > FILTER_RAD || abs(src_X-dst_X) > FILTER_RAD) { 173 | continue; 174 | 175 | // The offsets are into ANY part of the image. So they MAY be accessing 176 | // a pixel that was originally missing. However, if that pixel has been 177 | // filled in, then we can still use it. 178 | } else if (is_missing[abs_offset] && !valid_in[abs_offset]) { 179 | continue; 180 | } 181 | 182 | float vv = intensity[abs_offset] - intensity[abs_ind]; 183 | 184 | 185 | gaussian_range[ii] = exp(-(vv * vv) / (2*sigma_r * sigma_r)); 186 | gaussian_range_sum += gaussian_range[ii]; 187 | } 188 | 189 | int count = 0; 190 | 191 | for (int ii = 0; ii < filter_size; ++ii) { 192 | // Get the Absolute offset into the image (1..N where N=H*W) 193 | int abs_offset = abs_ind + offsets[ii]; 194 | int dst_Y = abs_offset % height; 195 | int dst_X = abs_offset / height; 196 | if (abs_offset < 0 || abs_offset >= (height * width) 197 | || abs(src_Y-dst_Y) > FILTER_RAD || abs(src_X-dst_X) > FILTER_RAD) { 198 | continue; 199 | } else if (is_missing[abs_offset] && !valid_in[abs_offset]) { 200 | continue; 201 | } 202 | 203 | ++count; 204 | 205 | weight_sum += gaussian_space[ii] * gaussian_range[ii]; 206 | value_sum += depth[abs_offset] * gaussian_space[ii] * gaussian_range[ii]; 207 | } 208 | 209 | if (weight_sum == 0) { 210 | return; 211 | } 212 | 213 | if (isnan(weight_sum)) { 214 | printf("*******************\n"); 215 | printf(" Weight sum is NaN\n"); 216 | printf("*******************\n"); 217 | } 218 | 219 | value_sum /= weight_sum; 220 | 221 | result[abs_ind] = value_sum; 222 | 223 | valid_out[abs_ind] = 1; 224 | } 225 | 226 | // Args: 227 | // filter_size - the number of pixels in the filter. 228 | void create_spatial_gaussian(int filter_rad, float sigma_s, float* gaussian_h) { 229 | int filter_len = filter_rad * 2 + 1; 230 | int filter_size = filter_len * filter_len; 231 | 232 | float sum = 0; 233 | int kk = 0; 234 | for (int yy = -filter_rad; yy <= filter_rad; ++yy) { 235 | for (int xx = -filter_rad; xx <= filter_rad; ++xx, ++kk) { 236 | gaussian_h[kk] = exp(-(xx*xx + yy*yy) / (2*sigma_s * sigma_s)); 237 | sum += gaussian_h[kk]; 238 | } 239 | } 240 | 241 | for (int ff = 0; ff < filter_size; ++ff) { 242 | gaussian_h[ff] /= sum; 243 | } 244 | } 245 | 246 | // Counts the number of missing pixels in the given mask. Note that the mask 247 | // MUST already be in the appropriate offset location. 248 | // 249 | // Args: 250 | // height - the heigh of the image at the current scale. 251 | // width - the width of the image at the current scale. 252 | // mask - pointer into the mask_ms_d matrix. The offset has already been 253 | // calculated. 254 | // abs_inds_h - pre-allocated GPU memory location. 255 | int get_missing_pixel_coords(int height, int width, bool* mask, int* abs_inds_to_filter_h) { 256 | int num_pixels = height * width; 257 | 258 | int num_missing_pixels = 0; 259 | for (int nn = 0; nn < num_pixels; ++nn) { 260 | if (mask[nn]) { 261 | abs_inds_to_filter_h[num_missing_pixels] = nn; 262 | ++num_missing_pixels; 263 | } 264 | } 265 | 266 | return num_missing_pixels; 267 | } 268 | 269 | static void savePGM(bool* imf, const char *name, int height, int width) { 270 | int NN = height * width; 271 | uint8_t im[NN]; 272 | 273 | for (int nn = 0; nn < NN; ++nn) { 274 | // First convert to X,Y 275 | int y = nn % height; 276 | int x = floor(nn / height); 277 | 278 | // Then back to Abs Inds 279 | int mm = y * width + x; 280 | 281 | im[mm] = uint8_t(255*imf[nn]); 282 | } 283 | 284 | std::ofstream file(name, std::ios::out | std::ios::binary); 285 | 286 | file << "P5\n" << width << " " << height << "\n" << UCHAR_MAX << "\n"; 287 | file.write((char *)&im, width * height * sizeof(uint8_t)); 288 | } 289 | 290 | static void savePGM(float* imf, const char *name, int height, int width) { 291 | int NN = height * width; 292 | uint8_t im[NN]; 293 | 294 | for (int nn = 0; nn < NN; ++nn) { 295 | // First convert to X,Y 296 | int y = nn % height; 297 | int x = floor(nn / height); 298 | 299 | // Then back to Abs Inds 300 | int mm = y * width + x; 301 | 302 | im[mm] = uint8_t(255*imf[nn]); 303 | } 304 | 305 | std::ofstream file(name, std::ios::out | std::ios::binary); 306 | 307 | file << "P5\n" << width << " " << height << "\n" << UCHAR_MAX << "\n"; 308 | file.write((char *)&im, width * height * sizeof(uint8_t)); 309 | } 310 | 311 | void filter_at_scale(float* depth_h, 312 | float* intensity_h, 313 | bool* mask_h, 314 | bool* valid_h, 315 | float* result_h, 316 | int* abs_inds_to_filter_h, 317 | int height, 318 | int width, 319 | float sigma_s, 320 | float sigma_r) { 321 | 322 | int filter_rad = FILTER_RAD; 323 | int filter_size = 2 * filter_rad + 1; 324 | int F = filter_size * filter_size; 325 | 326 | // Create the offset array. 327 | int* offsets_h = (int*) malloc(F * sizeof(int)); 328 | create_offset_array(filter_rad, offsets_h, height); 329 | 330 | // Create the gaussian. 331 | float* gaussian_h = (float*) malloc(F * sizeof(float)); 332 | create_spatial_gaussian(filter_rad, sigma_s, gaussian_h); 333 | 334 | // ************************************************ 335 | // We need to be smart about how we do this, so rather 336 | // than execute the filter for EVERY point in the image, 337 | // we will only do it for the points missing depth information. 338 | // ************************************************ 339 | 340 | int num_missing_pixels = get_missing_pixel_coords(height, width, mask_h, abs_inds_to_filter_h); 341 | #ifdef DEBUG 342 | printf("Num Missing Pixels: %d\n", num_missing_pixels); 343 | #endif 344 | 345 | clock_t start_filter = clock(); 346 | 347 | // We should not be writing into the same value for 'valid' that we're passing in. 348 | bool* valid_in = (bool*) malloc(height * width * sizeof(bool)); 349 | for (int i = 0; i < height * width; ++i) { 350 | valid_in[i] = valid_h[i]; 351 | } 352 | 353 | for (int i = 0; i < num_missing_pixels; ++i) { 354 | cbf_cpu(depth_h, 355 | intensity_h, 356 | mask_h, 357 | valid_in, 358 | valid_h, 359 | result_h, 360 | abs_inds_to_filter_h, 361 | offsets_h, 362 | gaussian_h, 363 | height, 364 | width, 365 | filter_rad, 366 | sigma_s, 367 | sigma_r, 368 | num_missing_pixels, 369 | i); 370 | } 371 | 372 | toc("FILTER OP", start_filter); 373 | 374 | free(valid_in); 375 | free(offsets_h); 376 | free(gaussian_h); 377 | } 378 | 379 | void cbf::cbf(int height, int width, uint8_t* depth, uint8_t* intensity, 380 | uint8_t* mask_h, uint8_t* result, unsigned num_scales, 381 | double* sigma_s, double* sigma_r) { 382 | 383 | clock_t start_func = clock(); 384 | 385 | int pyr_heights[num_scales]; 386 | int pyr_widths[num_scales]; 387 | int pyr_offsets[num_scales]; 388 | calc_pyr_sizes(&pyr_heights[0], &pyr_widths[0], &pyr_offsets[0], height, width, num_scales); 389 | 390 | // Allocate the memory needed for the absolute missing pixel indices. We'll 391 | // allocate the number of bytes required for the largest image, since the 392 | // smaller ones obviously fit inside of it. 393 | int N = height * width; 394 | int* abs_inds_to_filter_h = (int*) malloc(N * sizeof(int)); 395 | 396 | int pyr_size = get_pyr_size(&pyr_heights[0], &pyr_widths[0], num_scales); 397 | 398 | // ************************ 399 | // CREATING THE PYRAMID 400 | // ************************ 401 | clock_t start_pyr = clock(); 402 | 403 | // NEG TIME. 404 | float* depth_ms_h = (float*) malloc(pyr_size * sizeof(float)); 405 | float* intensity_ms_h = (float*) malloc(pyr_size * sizeof(float)); 406 | bool* mask_ms_h = (bool*) malloc(pyr_size * sizeof(bool)); 407 | float* result_ms_h = (float*) malloc(pyr_size * sizeof(float)); 408 | bool* valid_ms_h = (bool*) malloc(pyr_size * sizeof(bool)); 409 | 410 | for (int nn = 0; nn < N; ++nn) { 411 | depth_ms_h[nn] = depth[nn] / 255.0; 412 | intensity_ms_h[nn] = intensity[nn] / 255.0; 413 | mask_ms_h[nn] = mask_h[nn]; 414 | valid_ms_h[nn] = !mask_h[nn]; 415 | result_ms_h[nn] = 0; 416 | } 417 | 418 | float* depth_ms_h_p = depth_ms_h + pyr_offsets[1]; 419 | float* intensity_ms_h_p = intensity_ms_h + pyr_offsets[1]; 420 | bool* mask_ms_h_p = mask_ms_h + pyr_offsets[1]; 421 | bool* valid_ms_h_p = valid_ms_h + pyr_offsets[1]; 422 | float* result_ms_h_p = result_ms_h + pyr_offsets[1]; 423 | 424 | for (int scale = 1; scale < num_scales; ++scale) { 425 | for (int xx = 0; xx < pyr_widths[scale]; ++xx) { 426 | for (int yy = 0; yy < pyr_heights[scale]; ++yy, ++depth_ms_h_p, ++intensity_ms_h_p, ++mask_ms_h_p, ++result_ms_h_p, ++valid_ms_h_p) { 427 | int abs_yy = static_cast(((float)yy / pyr_heights[scale]) * height); 428 | int abs_xx = static_cast(((float)xx / pyr_widths[scale]) * width); 429 | int img_offset = abs_yy + height * abs_xx; 430 | *depth_ms_h_p = depth_ms_h[img_offset]; 431 | *intensity_ms_h_p = intensity_ms_h[img_offset]; 432 | *mask_ms_h_p = mask_h[img_offset]; 433 | *valid_ms_h_p = !mask_h[img_offset]; 434 | *result_ms_h_p = 0; 435 | } 436 | } 437 | } 438 | 439 | // ********************************* 440 | // RUN THE ACTUAL FILTERING CODE 441 | // ********************************* 442 | 443 | for (int scale = num_scales - 1; scale >= 0; --scale) { 444 | #ifdef DEBUG 445 | printf("Filtering at scale %d, [%dx%d]\n", scale, pyr_heights[scale], pyr_widths[scale]); 446 | 447 | char filename1[50]; 448 | sprintf(filename1, "missing_pixels_before_filtering_scale%d.pgm", scale); 449 | // Now that we've performed the filtering, save the intermediate image. 450 | savePGM(mask_ms_h + pyr_offsets[scale], filename1, pyr_heights[scale], pyr_widths[scale]); 451 | 452 | char filename2[50]; 453 | sprintf(filename2, "valid_pixels_before_filtering_scale%d.pgm", scale); 454 | // Now that we've performed the filtering, save the intermediate image. 455 | savePGM(valid_ms_h + pyr_offsets[scale], filename2, pyr_heights[scale], pyr_widths[scale]); 456 | 457 | sprintf(filename2, "valid_intensity_before_filtering_scale%d.pgm", scale); 458 | // Now that we've performed the filtering, save the intermediate image. 459 | savePGM(intensity_ms_h + pyr_offsets[scale], filename2, pyr_heights[scale], pyr_widths[scale]); 460 | 461 | sprintf(filename2, "depth_before_filtering_scale%d.pgm", scale); 462 | // Now that we've performed the filtering, save the intermediate image. 463 | savePGM(depth_ms_h + pyr_offsets[scale], filename2, pyr_heights[scale], pyr_widths[scale]); 464 | #endif 465 | 466 | filter_at_scale(depth_ms_h + pyr_offsets[scale], 467 | intensity_ms_h + pyr_offsets[scale], 468 | mask_ms_h + pyr_offsets[scale], 469 | valid_ms_h + pyr_offsets[scale], 470 | result_ms_h + pyr_offsets[scale], 471 | abs_inds_to_filter_h, 472 | pyr_heights[scale], 473 | pyr_widths[scale], 474 | sigma_s[scale], 475 | sigma_r[scale]); 476 | 477 | 478 | #ifdef DEBUG 479 | sprintf(filename2, "valid_pixels_after_filtering_scale%d.pgm", scale); 480 | // Now that we've performed the filtering, save the intermediate image. 481 | savePGM(valid_ms_h + pyr_offsets[scale], filename2, pyr_heights[scale], pyr_widths[scale]); 482 | #endif 483 | 484 | #ifdef DEBUG 485 | char filename[50]; 486 | sprintf(filename, "depth_after_filtering_scale%d.pgm", scale); 487 | // Now that we've performed the filtering, save the intermediate image. 488 | savePGM(result_ms_h + pyr_offsets[scale], filename, pyr_heights[scale], pyr_widths[scale]); 489 | #endif 490 | 491 | if (scale == 0) { 492 | continue; 493 | } 494 | 495 | // Now, we need to upsample the resulting depth and store it in the next 496 | // highest location. 497 | int num_missing_pixels = pyr_heights[scale-1] * pyr_widths[scale-1]; 498 | 499 | #ifdef DEBUG 500 | printf("Upsampling %d\n", num_missing_pixels); 501 | #endif 502 | for (int i = 0; i < num_missing_pixels; ++i) { 503 | upsample_cpu(depth_ms_h + pyr_offsets[scale-1], 504 | mask_ms_h + pyr_offsets[scale-1], 505 | valid_ms_h + pyr_offsets[scale-1], 506 | depth_ms_h + pyr_offsets[scale], 507 | result_ms_h + pyr_offsets[scale], 508 | mask_ms_h + pyr_offsets[scale], 509 | valid_ms_h + pyr_offsets[scale], 510 | pyr_heights[scale], 511 | pyr_widths[scale], 512 | pyr_heights[scale-1], 513 | pyr_widths[scale-1], 514 | i); 515 | } 516 | 517 | 518 | #ifdef DEBUG 519 | sprintf(filename, "up_depth_after_filtering_scale%d.pgm", scale); 520 | // Now that we've performed the filtering, save the intermediate image. 521 | savePGM(depth_ms_h + pyr_offsets[scale-1], filename, pyr_heights[scale-1], pyr_widths[scale-1]); 522 | 523 | sprintf(filename, "up_valid_after_filtering_scale%d.pgm", scale); 524 | // Now that we've performed the filtering, save the intermediate image. 525 | savePGM(valid_ms_h + pyr_offsets[scale-1], filename, pyr_heights[scale-1], pyr_widths[scale-1]); 526 | #endif 527 | } 528 | 529 | // Copy the final result from the device. 530 | for (int nn = 0; nn < N; ++nn) { 531 | if (mask_ms_h[nn]) { 532 | result[nn] = static_cast(result_ms_h[nn] * 255); 533 | } else { 534 | result[nn] = depth[nn]; 535 | } 536 | } 537 | 538 | free(depth_ms_h); 539 | free(intensity_ms_h); 540 | free(mask_ms_h); 541 | free(result_ms_h); 542 | free(valid_ms_h); 543 | free(abs_inds_to_filter_h); 544 | 545 | toc("Entire Function", start_func); 546 | } 547 | 548 | -------------------------------------------------------------------------------- /src/lib/fragment.sh: -------------------------------------------------------------------------------- 1 | R""( 2 | #version 120 3 | 4 | varying vec3 texCoord; 5 | varying vec4 camCoord; 6 | uniform samplerCube cubemap; 7 | const vec3 camlook = vec3( 0.0, 0.0, -1.0 ); 8 | uniform bool isDepth; 9 | 10 | void main (void) { 11 | vec4 color = textureCube(cubemap, texCoord); 12 | if (isDepth) { 13 | float scale = dot(camCoord.xyz, camlook) / length(camCoord.xyz); 14 | gl_FragColor = color*scale; 15 | } else { 16 | gl_FragColor = color; 17 | } 18 | } 19 | )"" 20 | -------------------------------------------------------------------------------- /src/lib/vertex.sh: -------------------------------------------------------------------------------- 1 | R""( 2 | #version 120 3 | 4 | attribute vec3 vertex; 5 | varying vec3 texCoord; 6 | varying vec4 camCoord; 7 | uniform mat4 ProjMat; 8 | uniform mat4 ModelViewMat; 9 | 10 | void main() { 11 | camCoord = ModelViewMat * vec4(vertex, 1.0); 12 | gl_Position = ProjMat * camCoord; 13 | texCoord = vertex; 14 | } 15 | )"" 16 | -------------------------------------------------------------------------------- /src/lib_python/MatterSimPython.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "MatterSim.hpp" 4 | #include "cbf.h" 5 | 6 | namespace py = pybind11; 7 | 8 | namespace mattersim { 9 | 10 | void cbf(py::buffer depth, py::buffer intensity, py::buffer mask, py::buffer result) { 11 | double spaceSigmas[3] = {12, 5, 8}; 12 | double rangeSigmas[3] = {0.2, 0.08, 0.02}; 13 | py::buffer_info d_info = depth.request(); 14 | py::buffer_info i_info = intensity.request(); 15 | py::buffer_info m_info = mask.request(); 16 | py::buffer_info r_info = result.request(); 17 | cbf::cbf(d_info.shape[0], d_info.shape[1], 18 | static_cast(d_info.ptr), 19 | static_cast(i_info.ptr), 20 | static_cast(m_info.ptr), 21 | static_cast(r_info.ptr), 22 | 3, &spaceSigmas[0], &rangeSigmas[0]); 23 | } 24 | 25 | } 26 | 27 | using namespace mattersim; 28 | 29 | PYBIND11_MODULE(MatterSim, m) { 30 | m.def("cbf", &mattersim::cbf, "Cross Bilateral Filter"); 31 | py::class_(m, "ViewPoint") 32 | .def_readonly("viewpointId", &Viewpoint::viewpointId) 33 | .def_readonly("ix", &Viewpoint::ix) 34 | .def_readonly("x", &Viewpoint::x) 35 | .def_readonly("y", &Viewpoint::y) 36 | .def_readonly("z", &Viewpoint::z) 37 | .def_readonly("rel_heading", &Viewpoint::rel_heading) 38 | .def_readonly("rel_elevation", &Viewpoint::rel_elevation) 39 | .def_readonly("rel_distance", &Viewpoint::rel_distance); 40 | py::class_(m, "Mat", pybind11::buffer_protocol()) 41 | .def_buffer([](cv::Mat& im) -> pybind11::buffer_info { 42 | ssize_t item_size = im.elemSize() / im.channels(); 43 | std::string format = pybind11::format_descriptor::format(); 44 | if (item_size == 2) { // handle 16bit data from depth maps 45 | format = pybind11::format_descriptor::format(); 46 | } 47 | return pybind11::buffer_info( 48 | im.data, // Pointer to buffer 49 | item_size, // Size of one scalar 50 | format, 51 | 3, // Number of dimensions (row, cols, channels) 52 | { im.rows, im.cols, im.channels() }, // Buffer dimensions 53 | { // Strides (in bytes) for each index 54 | item_size * im.channels() * im.cols, 55 | item_size * im.channels(), 56 | item_size 57 | } 58 | ); 59 | }); 60 | py::class_(m, "SimState") 61 | .def_readonly("scanId", &SimState::scanId) 62 | .def_readonly("step", &SimState::step) 63 | .def_readonly("rgb", &SimState::rgb) 64 | .def_readonly("depth", &SimState::depth) 65 | .def_readonly("location", &SimState::location) 66 | .def_readonly("heading", &SimState::heading) 67 | .def_readonly("elevation", &SimState::elevation) 68 | .def_readonly("viewIndex", &SimState::viewIndex) 69 | .def_readonly("navigableLocations", &SimState::navigableLocations); 70 | py::class_(m, "Simulator") 71 | .def(py::init<>()) 72 | .def("setDatasetPath", &Simulator::setDatasetPath) 73 | .def("setNavGraphPath", &Simulator::setNavGraphPath) 74 | .def("setRenderingEnabled", &Simulator::setRenderingEnabled) 75 | .def("setCameraResolution", &Simulator::setCameraResolution) 76 | .def("setCameraVFOV", &Simulator::setCameraVFOV) 77 | .def("setElevationLimits", &Simulator::setElevationLimits) 78 | .def("setDiscretizedViewingAngles", &Simulator::setDiscretizedViewingAngles) 79 | .def("setPreloadingEnabled", &Simulator::setPreloadingEnabled) 80 | .def("setDepthEnabled", &Simulator::setDepthEnabled) 81 | .def("setBatchSize", &Simulator::setBatchSize) 82 | .def("setCacheSize", &Simulator::setCacheSize) 83 | .def("setSeed", &Simulator::setSeed) 84 | .def("initialize", &Simulator::initialize) 85 | .def("newEpisode", &Simulator::newEpisode) 86 | .def("newRandomEpisode", &Simulator::newRandomEpisode) 87 | .def("getState", &Simulator::getState, py::return_value_policy::take_ownership) 88 | .def("makeAction", &Simulator::makeAction) 89 | .def("close", &Simulator::close) 90 | .def("resetTimers", &Simulator::resetTimers) 91 | .def("timingInfo", &Simulator::timingInfo); 92 | } 93 | -------------------------------------------------------------------------------- /src/test/python_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('build') 3 | 4 | from MatterSim import Simulator 5 | import math 6 | import cv2 7 | import json 8 | import numpy as np 9 | 10 | 11 | sim = Simulator() 12 | sim.setCameraResolution(500, 300) 13 | sim.setCameraVFOV(math.radians(60)) 14 | sim.setElevationLimits(math.radians(-40),math.radians(50)) 15 | sim.initialize() 16 | 17 | with open("src/test/rendertest_spec.json") as f: 18 | spec = json.load(f) 19 | for tc in spec[:1]: 20 | sim.newEpisode(tc["scanId"], tc["viewpointId"], tc["heading"], tc["elevation"]) 21 | state = sim.getState() 22 | im = np.array(state.rgb, copy=False) 23 | imgfile = tc["reference_image"] 24 | cv2.imwrite("sim_imgs/"+imgfile, im); 25 | cv2.imshow('rendering', im) 26 | cv2.waitKey(0) 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/test/rendertest_spec.json: -------------------------------------------------------------------------------- 1 | [ 2 | [ 3 | { 4 | "scanId": "17DRP5sb8fy", 5 | "viewpointId": "85c23efeaecd4d43a7dcd5b90137179e", 6 | "elevation": 0.008557380839564054, 7 | "heading": 2.551961945320492, 8 | "reference_image": "17DRP5sb8fy_85c23efeaecd4d43a7dcd5b90137179e_2.551961945320492_0.008557380839564054.png" 9 | }, 10 | { 11 | "scanId": "1LXtFkjw3qL", 12 | "viewpointId": "187589bb7d4644f2943079fb949c0be9", 13 | "elevation": 0.0004921836022802584, 14 | "heading": 1.8699330579409539, 15 | "reference_image": "1LXtFkjw3qL_187589bb7d4644f2943079fb949c0be9_1.8699330579409539_0.0004921836022802584.png" 16 | }, 17 | { 18 | "scanId": "1pXnuDYAj8r", 19 | "viewpointId": "163d61ac7edb43fb958c5d9e69ae11ad", 20 | "elevation": -0.02444352614304746, 21 | "heading": 4.626331047551077, 22 | "reference_image": "1pXnuDYAj8r_163d61ac7edb43fb958c5d9e69ae11ad_4.626331047551077_-0.02444352614304746.png" 23 | }, 24 | { 25 | "scanId": "29hnd4uzFmX", 26 | "viewpointId": "1576d62e7bbb45e8a5ef9e7bb37b1839", 27 | "elevation": -0.0006838914039405167, 28 | "heading": 5.844119909926444, 29 | "reference_image": "29hnd4uzFmX_1576d62e7bbb45e8a5ef9e7bb37b1839_5.844119909926444_-0.0006838914039405167.png" 30 | } 31 | ], 32 | [ 33 | { 34 | "scanId": "2azQ1b91cZZ", 35 | "viewpointId": "3daad58ad53742038e50d62e91f84e7b", 36 | "elevation": 0.016732869758208434, 37 | "heading": 3.1736484087962933, 38 | "reference_image": "2azQ1b91cZZ_3daad58ad53742038e50d62e91f84e7b_3.1736484087962933_0.016732869758208434.png" 39 | }, 40 | { 41 | "scanId": "2n8kARJN3HM", 42 | "viewpointId": "94ac3cea52ec455993f8562f78da3be1", 43 | "elevation": -0.0009188787844489273, 44 | "heading": 2.604601935142565, 45 | "reference_image": "2n8kARJN3HM_94ac3cea52ec455993f8562f78da3be1_2.604601935142565_-0.0009188787844489273.png" 46 | }, 47 | { 48 | "scanId": "2t7WUuJeko7", 49 | "viewpointId": "529f006f8293406da0b506defd2891a5", 50 | "elevation": -0.013788837143969411, 51 | "heading": 0.032985516949381344, 52 | "reference_image": "2t7WUuJeko7_529f006f8293406da0b506defd2891a5_0.032985516949381344_-0.013788837143969411.png" 53 | }, 54 | { 55 | "scanId": "5LpN3gDmAk7", 56 | "viewpointId": "bda8025f20404048a77381e9e0dc0ccf", 57 | "elevation": -0.01083211073205187, 58 | "heading": 5.325207878739601, 59 | "reference_image": "5LpN3gDmAk7_bda8025f20404048a77381e9e0dc0ccf_5.325207878739601_-0.01083211073205187.png" 60 | } 61 | ], 62 | [ 63 | { 64 | "scanId": "5q7pvUzZiYa", 65 | "viewpointId": "397403366d784caf804d741f32fd68b9", 66 | "elevation": -0.0007063598518199811, 67 | "heading": 2.8746465006968234, 68 | "reference_image": "5q7pvUzZiYa_397403366d784caf804d741f32fd68b9_2.8746465006968234_-0.0007063598518199811.png" 69 | }, 70 | { 71 | "scanId": "5ZKStnWn8Zo", 72 | "viewpointId": "c76b52856e7c4f2a9a4419000c8e646a", 73 | "elevation": -0.02922217527541366, 74 | "heading": 4.13470589902238, 75 | "reference_image": "5ZKStnWn8Zo_c76b52856e7c4f2a9a4419000c8e646a_4.13470589902238_-0.02922217527541366.png" 76 | }, 77 | { 78 | "scanId": "759xd9YjKW5", 79 | "viewpointId": "2343ef3bf04a4433af62f0d527d7512a", 80 | "elevation": -0.016938006310169448, 81 | "heading": 3.5451019786019264, 82 | "reference_image": "759xd9YjKW5_2343ef3bf04a4433af62f0d527d7512a_3.5451019786019264_-0.016938006310169448.png" 83 | }, 84 | { 85 | "scanId": "7y3sRwLe3Va", 86 | "viewpointId": "9bbf903d50da4ffd9e5d1fb7c9f4d69b", 87 | "elevation": 0.008361841032265524, 88 | "heading": 1.7348660165523566, 89 | "reference_image": "7y3sRwLe3Va_9bbf903d50da4ffd9e5d1fb7c9f4d69b_1.7348660165523566_0.008361841032265524.png" 90 | } 91 | ], 92 | [ 93 | { 94 | "scanId": "8194nk5LbLH", 95 | "viewpointId": "c9e8dc09263e4d0da77d16de0ecddd39", 96 | "elevation": 0.008533161479170466, 97 | "heading": 4.05504292862083, 98 | "reference_image": "8194nk5LbLH_c9e8dc09263e4d0da77d16de0ecddd39_4.05504292862083_0.008533161479170466.png" 99 | }, 100 | { 101 | "scanId": "82sE5b5pLXE", 102 | "viewpointId": "056a491afa534b17bac36f4f5898462a", 103 | "elevation": -0.0037883068413356496, 104 | "heading": 1.689393931320027, 105 | "reference_image": "82sE5b5pLXE_056a491afa534b17bac36f4f5898462a_1.689393931320027_-0.0037883068413356496.png" 106 | }, 107 | { 108 | "scanId": "8WUmhLawc2A", 109 | "viewpointId": "d21aae0b5d944f27a0074525c803fc9f", 110 | "elevation": -0.04510889155759994, 111 | "heading": 3.047458184407221, 112 | "reference_image": "8WUmhLawc2A_d21aae0b5d944f27a0074525c803fc9f_3.047458184407221_-0.04510889155759994.png" 113 | }, 114 | { 115 | "scanId": "ac26ZMwG7aT", 116 | "viewpointId": "efeef7cc82c84690addb0bf415f075ea", 117 | "elevation": -0.013447513736072197, 118 | "heading": 0.07434352566701552, 119 | "reference_image": "ac26ZMwG7aT_efeef7cc82c84690addb0bf415f075ea_0.07434352566701552_-0.013447513736072197.png" 120 | } 121 | ], 122 | [ 123 | { 124 | "scanId": "ARNzJeq3xxb", 125 | "viewpointId": "9a671e6915de4eb897f45fee8bf2031d", 126 | "elevation": 0.02583868533558965, 127 | "heading": 5.616355886953764, 128 | "reference_image": "ARNzJeq3xxb_9a671e6915de4eb897f45fee8bf2031d_5.616355886953764_0.02583868533558965.png" 129 | }, 130 | { 131 | "scanId": "B6ByNegPMKs", 132 | "viewpointId": "e3a65955df26467581c32613c4e9f865", 133 | "elevation": 0.007265625492957138, 134 | "heading": 5.230794959607039, 135 | "reference_image": "B6ByNegPMKs_e3a65955df26467581c32613c4e9f865_5.230794959607039_0.007265625492957138.png" 136 | }, 137 | { 138 | "scanId": "b8cTxDM8gDG", 139 | "viewpointId": "f2944e0b66b9461994a7f757582f9bc3", 140 | "elevation": -0.007543204141144086, 141 | "heading": 0.0853092784395515, 142 | "reference_image": "b8cTxDM8gDG_f2944e0b66b9461994a7f757582f9bc3_0.0853092784395515_-0.007543204141144086.png" 143 | }, 144 | { 145 | "scanId": "cV4RVeZvu5T", 146 | "viewpointId": "1b321779a4374c2b952c51820daa9e6c", 147 | "elevation": 0.07914721704610106, 148 | "heading": 6.266463179566256, 149 | "reference_image": "cV4RVeZvu5T_1b321779a4374c2b952c51820daa9e6c_6.266463179566256_0.07914721704610106.png" 150 | } 151 | ] 152 | ] 153 | -------------------------------------------------------------------------------- /tasks/NDH/DAN_modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yeezhu/CMN.pytorch/b4e3c3ca34668cb8031d525132b013ced472ed87/tasks/NDH/DAN_modules/__init__.py -------------------------------------------------------------------------------- /tasks/NDH/DAN_modules/fc.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is modified from Hengyuan Hu's repository. 3 | https://github.com/hengyuan-hu/bottom-up-attention-vqa 4 | """ 5 | from __future__ import print_function 6 | import torch.nn as nn 7 | from torch.nn.utils.weight_norm import weight_norm 8 | 9 | class FCNet(nn.Module): 10 | """Simple class for non-linear fully connect network 11 | """ 12 | def __init__(self, dims): 13 | super(FCNet, self).__init__() 14 | 15 | layers = [] 16 | for i in range(len(dims)-2): 17 | in_dim = dims[i] 18 | out_dim = dims[i+1] 19 | layers.append(weight_norm(nn.Linear(in_dim, out_dim), dim=None)) 20 | layers.append(nn.ReLU()) 21 | layers.append(weight_norm(nn.Linear(dims[-2], dims[-1]), dim=None)) 22 | layers.append(nn.ReLU()) 23 | 24 | self.main = nn.Sequential(*layers) 25 | 26 | def forward(self, x): 27 | return self.main(x) 28 | 29 | 30 | if __name__ == '__main__': 31 | fc1 = FCNet([10, 20, 10]) 32 | print(fc1) 33 | 34 | print('============') 35 | fc2 = FCNet([10, 20]) 36 | print(fc2) -------------------------------------------------------------------------------- /tasks/NDH/DAN_modules/language_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Variable 5 | from torch.nn import functional as F 6 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 7 | 8 | class DynamicRNN(nn.Module): 9 | """ 10 | This code is modified from batra-mlp-lab's repository. 11 | https://github.com/batra-mlp-lab/visdial-challenge-starter-pytorch 12 | """ 13 | def __init__(self, rnn_model): 14 | super(DynamicRNN, self).__init__() 15 | self.rnn_model = rnn_model 16 | 17 | def forward(self, seq_input, seq_lens, initial_state=None): 18 | """A wrapper over pytorch's rnn to handle sequences of variable length. 19 | 20 | Arguments 21 | --------- 22 | seq_input : torch.autograd.Variable 23 | Input sequence tensor (padded) for RNN model. (b, max_seq_len, embed_size) 24 | seq_lens : torch.LongTensor 25 | Length of sequences (b, ) 26 | initial_state : torch.autograd.Variable 27 | Initial (hidden, cell) states of RNN model. 28 | 29 | Returns 30 | ------- 31 | A single tensor of shape (batch_size, rnn_hidden_size) corresponding 32 | to the outputs of the RNN model at the last time step of each input 33 | sequence. 34 | """ 35 | sorted_len, fwd_order, bwd_order = self._get_sorted_order(seq_lens) 36 | sorted_seq_input = seq_input.index_select(0, fwd_order) 37 | packed_seq_input = pack_padded_sequence( 38 | sorted_seq_input, lengths=sorted_len, batch_first=True) 39 | 40 | if initial_state is not None: 41 | hx = initial_state 42 | sorted_hx = [x.index_select(1, fwd_order) for x in hx] 43 | assert hx[0].size(0) == self.rnn_model.num_layers 44 | else: 45 | hx = None 46 | self.rnn_model.flatten_parameters() 47 | enc_h, (h_n, c_n) = self.rnn_model(packed_seq_input, hx) 48 | ctx, _ = pad_packed_sequence(enc_h, batch_first=True) 49 | 50 | c_t = c_n[-1].index_select(dim=0, index=bwd_order) 51 | rnn_output = h_n[-1].index_select(dim=0, index=bwd_order) 52 | return ctx, rnn_output, c_t 53 | 54 | @staticmethod 55 | def _get_sorted_order(lens): 56 | sorted_len, fwd_order = torch.sort(lens.contiguous().view(-1), 0, descending=True) 57 | _, bwd_order = torch.sort(fwd_order) 58 | if isinstance(sorted_len, Variable): 59 | sorted_len = sorted_len.data 60 | sorted_len = list(sorted_len) 61 | return sorted_len, fwd_order, bwd_order 62 | 63 | 64 | class WordEmbedding(nn.Module): 65 | """ 66 | code from @jnhwkim (Jin-Hwa Kim) 67 | https://github.com/jnhwkim/ban-vqa 68 | """ 69 | def __init__(self, ntoken, emb_dim, dropout, padding_idx): 70 | super(WordEmbedding, self).__init__() 71 | self.emb = nn.Embedding(ntoken+1, emb_dim, padding_idx) 72 | self.dropout = nn.Dropout(dropout) 73 | self.ntoken = ntoken 74 | self.emb_dim = emb_dim 75 | 76 | def init_embedding(self, np_file, tfidf=None, tfidf_weights=None): 77 | weight_init = torch.from_numpy(np.load(np_file)) 78 | assert weight_init.shape == (self.ntoken, self.emb_dim) 79 | self.emb.weight.data[:self.ntoken] = weight_init 80 | 81 | def forward(self, x): 82 | emb = self.emb(x) 83 | emb = self.dropout(emb) 84 | return emb 85 | -------------------------------------------------------------------------------- /tasks/NDH/DAN_modules/refer_find_modules.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dual Attention Networks for Visual Reference Resolution in Visual Dialog 3 | Gi-Cheon Kang, Jaeseo Lim, Byoung-Tak Zhang 4 | https://arxiv.org/abs/1902.09368 5 | """ 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import Variable 10 | from torch.nn.utils.weight_norm import weight_norm 11 | from .submodules import MultiHeadAttention, PositionwiseFeedForward 12 | from .fc import FCNet 13 | 14 | class REFER(nn.Module): 15 | """ This code is modified from Yu-Hsiang Huang's repository 16 | https://github.com/jadore801120/attention-is-all-you-need-pytorch 17 | """ 18 | def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.2): 19 | super(REFER, self).__init__() 20 | self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) 21 | self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout) 22 | 23 | def forward(self, q, m): 24 | enc_output, enc_slf_attn = self.slf_attn(q, m, m) 25 | enc_output = self.pos_ffn(enc_output) 26 | return enc_output, enc_slf_attn 27 | 28 | class FIND(nn.Module): 29 | """ This code is modified from Hengyuan Hu's repository. 30 | https://github.com/hengyuan-hu/bottom-up-attention-vqa 31 | """ 32 | def __init__(self, v_dim, q_dim, num_hid, dropout=0.2): 33 | super(FIND, self).__init__() 34 | 35 | self.v_proj = FCNet([v_dim, num_hid]) 36 | self.q_proj = FCNet([q_dim, num_hid]) 37 | self.dropout = nn.Dropout(dropout) 38 | self.linear = weight_norm(nn.Linear(num_hid, 1), dim=None) 39 | 40 | def forward(self, v, q): 41 | """ 42 | v: [batch, v, 2048] 43 | q: [10, batch, 1024] 44 | """ 45 | logits = self.logits(v, q) 46 | w = nn.functional.softmax(logits, 1) 47 | return w 48 | 49 | def logits(self, v, q): 50 | batch, k, _ = v.size() 51 | v_proj = self.v_proj(v) 52 | q_proj = self.q_proj(q).unsqueeze(1).repeat(1, k, 1) 53 | joint_repr = v_proj * q_proj 54 | joint_repr = self.dropout(joint_repr) 55 | logits = self.linear(joint_repr) 56 | return logits 57 | -------------------------------------------------------------------------------- /tasks/NDH/DAN_modules/submodules.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is modified from Yu-Hsiang Huang's repository 3 | https://github.com/jadore801120/attention-is-all-you-need-pytorch 4 | """ 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import Variable 10 | 11 | class MultiHeadAttention(nn.Module): 12 | def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1): 13 | super(MultiHeadAttention, self).__init__() 14 | 15 | self.n_head = n_head 16 | self.d_k = d_k 17 | self.d_v = d_v 18 | 19 | self.w_qs = nn.Linear(d_model, n_head * d_k) 20 | self.w_ks = nn.Linear(d_model, n_head * d_k) 21 | self.w_vs = nn.Linear(d_model, n_head * d_v) 22 | nn.init.normal_(self.w_qs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k))) 23 | nn.init.normal_(self.w_ks.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k))) 24 | nn.init.normal_(self.w_vs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_v))) 25 | 26 | self.attention = ScaledDotProductAttention(temperature=np.power(d_k, 0.5)) 27 | self.layer_norm = LayerNorm(d_model) 28 | 29 | self.fc = nn.Linear(n_head * d_v, d_model) 30 | nn.init.xavier_normal_(self.fc.weight) 31 | self.dropout = nn.Dropout(dropout) 32 | 33 | def forward(self, q, k, v, mask=None): 34 | ''' 35 | q: [batch, 1, 512] 36 | k, v: [batch, num_entry, 512] 37 | ''' 38 | d_k, d_v, n_head = self.d_k, self.d_v, self.n_head 39 | 40 | sz_b, len_q, _ = q.size() 41 | sz_b, len_k, _ = k.size() 42 | sz_b, len_v, _ = v.size() 43 | 44 | residual = q 45 | 46 | q = self.w_qs(q).view(sz_b, len_q, n_head, d_k) 47 | k = self.w_ks(k).view(sz_b, len_k, n_head, d_k) 48 | v = self.w_vs(v).view(sz_b, len_v, n_head, d_v) 49 | 50 | q = q.permute(2, 0, 1, 3).contiguous().view(-1, len_q, d_k) # (n*b) x lq x dk 51 | k = k.permute(2, 0, 1, 3).contiguous().view(-1, len_k, d_k) # (n*b) x lk x dk 52 | v = v.permute(2, 0, 1, 3).contiguous().view(-1, len_v, d_v) # (n*b) x lv x dv 53 | 54 | if mask is not None: 55 | mask = mask.repeat(n_head, 1, 1) # (n*b) x .. x .. 56 | output, attn = self.attention(q, k, v, mask=mask) 57 | 58 | output = output.view(n_head, sz_b, len_q, d_v) 59 | output = output.permute(1, 2, 0, 3).contiguous().view(sz_b, len_q, -1) # b x lq x (n*dv) 60 | 61 | output = self.dropout(self.fc(output)) 62 | output = self.layer_norm(output + residual) 63 | 64 | return output, attn 65 | 66 | class ScaledDotProductAttention(nn.Module): 67 | ''' Scaled Dot-Product Attention ''' 68 | 69 | def __init__(self, temperature, attn_dropout=0.1): 70 | super(ScaledDotProductAttention, self).__init__() 71 | self.temperature = temperature 72 | self.dropout = nn.Dropout(attn_dropout) 73 | self.softmax = nn.Softmax(dim=2) 74 | 75 | def forward(self, q, k, v, mask=None): 76 | 77 | attn = torch.bmm(q, k.transpose(1, 2)) 78 | attn = attn / self.temperature 79 | 80 | if mask is not None: 81 | attn = attn.masked_fill(mask, -np.inf) 82 | 83 | attn = self.softmax(attn) 84 | attn = self.dropout(attn) 85 | output = torch.bmm(attn, v) 86 | 87 | return output, attn 88 | 89 | class PositionwiseFeedForward(nn.Module): 90 | ''' A two-feed-forward-layer module ''' 91 | 92 | def __init__(self, d_in, d_hid, dropout=0.1): 93 | super(PositionwiseFeedForward, self).__init__() 94 | self.w_1 = nn.Conv1d(d_in, d_hid, 1) # position-wise 95 | self.w_2 = nn.Conv1d(d_hid, d_in, 1) # position-wise 96 | self.layer_norm = LayerNorm(d_in) 97 | self.dropout = nn.Dropout(dropout) 98 | 99 | def forward(self, x): 100 | residual = x 101 | output = x.transpose(1, 2) 102 | output = self.w_2(F.relu(self.w_1(output))) 103 | output = output.transpose(1, 2) 104 | output = self.dropout(output) 105 | output = self.layer_norm(output + residual) 106 | return output 107 | 108 | class LayerNorm(nn.Module): 109 | """ 110 | Layer Normalization 111 | """ 112 | def __init__(self, features, eps=1e-6): 113 | super(LayerNorm, self).__init__() 114 | self.gamma = nn.Parameter(torch.ones(features)) 115 | self.beta = nn.Parameter(torch.zeros(features)) 116 | self.eps = eps 117 | 118 | def forward(self, x): 119 | mean = x.mean(-1, keepdim=True) 120 | std = x.std(-1, keepdim=True) 121 | return self.gamma * (x - mean) / (std + self.eps) + self.beta 122 | -------------------------------------------------------------------------------- /tasks/NDH/data/README.md: -------------------------------------------------------------------------------- 1 | # Navigation from Dialog History (NDH) Task Data 2 | 3 | A brief overview of the metadata available in each NDH instance: 4 | 5 | #### Always Available: 6 | | Metadata | Explanation | 7 | |---|---| 8 | | `inst_idx` | The unique index of this task instance. | 9 | | `scan` | The unique scan ID of the house in which this instance took place. | 10 | | `target` | The target object for the dialog this instance was drawn from. | 11 | | `start_pano` | The `heading`, `elevation`, and panorama id `pano` of the position from which the navigator asked the last question. | 12 | | `nav_camera` | A list of camera heading adjustments that occurred since the navigator moved to the most recent navigation node (i.e., looking around before asking a question). | 13 | | `dialog_history` | A list of turns. Each turn has a `nav_idx` (the `nav_history` list index where the utterance was transmitted), a `role` (either 'oracle' or 'navigator'), and a `message` (the utterance). | 14 | | `nav_history` | The navigation nodes traversed by the navigator before the latest question. | 15 | 16 | #### Only Available at Training Time: 17 | | Training Metadata | Explanation | 18 | |---|---| 19 | | `game_idx` | The unique index of the dialog from which this instance was drawn. | 20 | | `end_panos` | The navigation nodes that compose the end region. | 21 | | `player_path` | The navigation nodes traversed by the navigator in response to the latest answer. | 22 | | `planner_path` | The navigation nodes shown to the oracle in response to the most recent question (first 5 shortest path steps towards the `end_panos`, if there is no dialog history). | 23 | | `navigator_game_quality` | The 1-5 rating received by the navigator from the oracle in this game. | 24 | | `navigator_avg_quality` | The average 1-5 rating received by the navigator across all games in which they were involved. | 25 | | `oracle_game_quality` | The 1-5 rating received by the oracle from the navigator in this game. | 26 | | `oracle_avg_quality` | The average 1-5 rating received by the oracle across all games in which they were involved. | 27 | | `R2R_success` | The R2R success metric of the player path calculated against the last node in the planner path. | 28 | | `R2R_spl` | The R2R SPL metric of the player path against the end node of the planner path. | 29 | | `R2R_oracle_success` | The R2R success metric calculated as though the player path stopped within three meters of the last node in the planner path, or 0 if it never got close. | 30 | | `R2R_oracle_spl` | The R2R SPL metric calculated as though the player path stopped within three meters of the last node in the planner path, or 0 if it never got close. | 31 | -------------------------------------------------------------------------------- /tasks/NDH/data/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | wget https://cvdn.dev/dataset/NDH/train_val/train.json -P tasks/NDH/data/ 4 | wget https://cvdn.dev/dataset/NDH/train_val/val_seen.json -P tasks/NDH/data/ 5 | wget https://cvdn.dev/dataset/NDH/train_val/val_unseen.json -P tasks/NDH/data/ 6 | wget https://cvdn.dev/dataset/NDH/test_cleaned/test_cleaned.json -P tasks/NDH/data/ 7 | 8 | -------------------------------------------------------------------------------- /tasks/NDH/eval.py: -------------------------------------------------------------------------------- 1 | ''' Evaluation of agent trajectories ''' 2 | 3 | import json 4 | import os 5 | import sys 6 | from collections import defaultdict 7 | import networkx as nx 8 | import numpy as np 9 | import pprint 10 | pp = pprint.PrettyPrinter(indent=4) 11 | 12 | from env import R2RBatch 13 | from utils import load_datasets, load_nav_graphs 14 | from agent import BaseAgent, StopAgent, RandomAgent, ShortestAgent 15 | 16 | 17 | class Evaluation(object): 18 | ''' Results submission format: [{'instr_id': string, 'trajectory':[(viewpoint_id, heading_rads, elevation_rads),] } ] ''' 19 | 20 | def __init__(self, splits, path_type='planner_path'): 21 | self.error_margin = 3.0 22 | self.splits = splits 23 | self.gt = {} 24 | self.instr_ids = [] 25 | self.scans = [] 26 | for item in load_datasets(splits): 27 | self.gt[item['inst_idx']] = item 28 | self.instr_ids.append(item['inst_idx']) 29 | self.scans.append(item['scan']) 30 | 31 | # Add 'trusted_path' to gt metadata if necessary. 32 | if path_type == 'trusted_path' and 'test' not in splits: 33 | planner_goal = item['planner_path'][-1] 34 | if planner_goal in item['player_path'][1:]: 35 | self.gt[item['inst_idx']]['trusted_path'] = item['player_path'][:] 36 | else: 37 | self.gt[item['inst_idx']]['trusted_path'] = item['planner_path'][:] 38 | 39 | self.scans = set(self.scans) 40 | self.instr_ids = set(self.instr_ids) 41 | self.graphs = load_nav_graphs(self.scans) 42 | self.distances = {} 43 | self.path_type = path_type 44 | for scan,G in self.graphs.iteritems(): # compute all shortest paths 45 | self.distances[scan] = dict(nx.all_pairs_dijkstra_path_length(G)) 46 | 47 | def _get_nearest(self, scan, goal_id, path): 48 | near_id = path[0][0] 49 | near_d = self.distances[scan][near_id][goal_id] 50 | for item in path: 51 | d = self.distances[scan][item[0]][goal_id] 52 | if d < near_d: 53 | near_id = item[0] 54 | near_d = d 55 | return near_id 56 | 57 | def _score_item(self, instr_id, path): 58 | ''' Calculate error based on the final position in trajectory, and also 59 | the closest position (oracle stopping rule). ''' 60 | gt = self.gt[int(instr_id)] 61 | start = gt[self.path_type][0] 62 | assert start == path[0][0], 'Result trajectories should include the start position' 63 | goal = gt[self.path_type][-1] 64 | planner_goal = gt['planner_path'][-1] # for calculating oracle planner success (e.g., passed over desc goal?) 65 | final_position = path[-1][0] 66 | nearest_position = self._get_nearest(gt['scan'], goal, path) 67 | nearest_planner_position = self._get_nearest(gt['scan'], planner_goal, path) 68 | dist_to_end_start = None 69 | dist_to_end_end = None 70 | for end_pano in gt['end_panos']: 71 | d = self.distances[gt['scan']][start][end_pano] 72 | if dist_to_end_start is None or d < dist_to_end_start: 73 | dist_to_end_start = d 74 | d = self.distances[gt['scan']][final_position][end_pano] 75 | if dist_to_end_end is None or d < dist_to_end_end: 76 | dist_to_end_end = d 77 | self.scores['nav_errors'].append(self.distances[gt['scan']][final_position][goal]) 78 | self.scores['oracle_errors'].append(self.distances[gt['scan']][nearest_position][goal]) 79 | self.scores['oracle_plan_errors'].append(self.distances[gt['scan']][nearest_planner_position][planner_goal]) 80 | self.scores['dist_to_end_reductions'].append(dist_to_end_start - dist_to_end_end) 81 | distance = 0 # Work out the length of the path in meters 82 | prev = path[0] 83 | for curr in path[1:]: 84 | if prev[0] != curr[0]: 85 | try: 86 | self.graphs[gt['scan']][prev[0]][curr[0]] 87 | except KeyError as err: 88 | print 'Error: The provided trajectory moves from %s to %s but the navigation graph contains no '\ 89 | 'edge between these viewpoints. Please ensure the provided navigation trajectories '\ 90 | 'are valid, so that trajectory length can be accurately calculated.' % (prev[0], curr[0]) 91 | raise 92 | distance += self.distances[gt['scan']][prev[0]][curr[0]] 93 | prev = curr 94 | self.scores['trajectory_lengths'].append(distance) 95 | self.scores['shortest_path_lengths'].append(self.distances[gt['scan']][start][goal]) 96 | 97 | def score(self, output_file): 98 | ''' Evaluate each agent trajectory based on how close it got to the goal location ''' 99 | self.scores = defaultdict(list) 100 | instr_ids = set(self.instr_ids) 101 | 102 | item_list = [] 103 | with open(output_file) as f: 104 | for item in json.load(f): 105 | # Check against expected ids 106 | if item['inst_idx'] in instr_ids: 107 | instr_ids.remove(item['inst_idx']) 108 | self._score_item(item['inst_idx'], item['trajectory']) 109 | item['dist_to_end_reductions'] = self.scores['dist_to_end_reductions'][-1] 110 | item_list.append(item) 111 | 112 | with open(output_file + '.a', 'w') as f: 113 | json.dump(item_list, f) 114 | 115 | assert len(instr_ids) == 0, 'Trajectories not provided for %d instruction ids: %s' % (len(instr_ids), instr_ids) 116 | assert len(self.scores['nav_errors']) == len(self.instr_ids) 117 | 118 | num_successes = len([i for i in self.scores['nav_errors'] if i < self.error_margin]) 119 | oracle_successes = len([i for i in self.scores['oracle_errors'] if i < self.error_margin]) 120 | oracle_plan_successes = len([i for i in self.scores['oracle_plan_errors'] if i < self.error_margin]) 121 | 122 | spls = [] 123 | for err, length, sp in zip(self.scores['nav_errors'], self.scores['trajectory_lengths'], self.scores['shortest_path_lengths']): 124 | if err < self.error_margin: 125 | if sp > 0: 126 | spls.append(sp / max(length, sp)) 127 | else: # In IF, some Q/A pairs happen when we're already in the goal region, so taking no action is correct. 128 | spls.append(1 if length == 0 else 0) 129 | else: 130 | spls.append(0) 131 | 132 | score_summary ={ 133 | 'length': np.average(self.scores['trajectory_lengths']), 134 | 'nav_error': np.average(self.scores['nav_errors']), 135 | 'oracle success_rate': float(oracle_successes)/float(len(self.scores['oracle_errors'])), 136 | 'success_rate': float(num_successes)/float(len(self.scores['nav_errors'])), 137 | 'spl': np.average(spls), 138 | 'oracle path_success_rate': float(oracle_plan_successes)/float(len(self.scores['oracle_plan_errors'])), 139 | 'dist_to_end_reduction': sum(self.scores['dist_to_end_reductions']) / float(len(self.scores['dist_to_end_reductions'])) 140 | } 141 | 142 | assert score_summary['spl'] <= score_summary['success_rate'] 143 | return score_summary, self.scores 144 | 145 | 146 | RESULT_DIR = 'tasks/NDH/eval/results/' 147 | 148 | 149 | def eval_simple_agents(): 150 | # path_type = 'planner_path' 151 | # path_type = 'player_path' 152 | path_type = 'trusted_path' 153 | 154 | ''' Run simple baselines on each split. ''' 155 | for split in ['train', 'val_seen', 'val_unseen', 'test']: 156 | env = R2RBatch(None, batch_size=1, splits=[split], path_type=path_type) 157 | ev = Evaluation([split], path_type=path_type) 158 | 159 | for agent_type in ['Stop', 'Shortest', 'Random']: 160 | outfile = '%s%s_%s_agent.json' % (RESULT_DIR, split, agent_type.lower()) 161 | agent = BaseAgent.get_agent(agent_type)(env, outfile) 162 | agent.test() 163 | agent.write_results() 164 | score_summary, _ = ev.score(outfile) 165 | print '\n%s' % agent_type 166 | pp.pprint(score_summary) 167 | 168 | 169 | def eval_seq2seq(): 170 | ''' Eval sequence to sequence models on val splits (iteration selected from training error) ''' 171 | outfiles = [ 172 | RESULT_DIR + 'seq2seq_teacher_imagenet_%s_iter_5000.json', 173 | RESULT_DIR + 'seq2seq_sample_imagenet_%s_iter_20000.json' 174 | ] 175 | for outfile in outfiles: 176 | for split in ['val_seen', 'val_unseen']: 177 | ev = Evaluation([split]) 178 | score_summary, _ = ev.score(outfile % split) 179 | print '\n%s' % outfile 180 | pp.pprint(score_summary) 181 | 182 | 183 | if __name__ == '__main__': 184 | 185 | eval_simple_agents() 186 | #eval_seq2seq() 187 | -------------------------------------------------------------------------------- /tasks/NDH/model.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | import torch.nn.functional as F 6 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 7 | from param import args 8 | from DAN_modules.refer_find_modules import REFER, FIND 9 | from DAN_modules.language_model import WordEmbedding, DynamicRNN 10 | 11 | class EncoderLSTM(nn.Module): 12 | ''' Encodes navigation instructions, returning hidden state context (for 13 | attention methods) and a decoder initial state. ''' 14 | 15 | def __init__(self, vocab_size, embedding_size, hidden_size, padding_idx, 16 | dropout_ratio, bidirectional=False, num_layers=1): 17 | super(EncoderLSTM, self).__init__() 18 | self.embedding_size = embedding_size 19 | self.hidden_size = hidden_size 20 | self.drop = nn.Dropout(p=dropout_ratio) 21 | self.num_directions = 2 if bidirectional else 1 22 | self.num_layers = num_layers 23 | 24 | self.word_embed = nn.Embedding(vocab_size, embedding_size, padding_idx) 25 | self.sent_embed = nn.LSTM(embedding_size, hidden_size, 2, dropout=dropout_ratio, batch_first=True) 26 | self.sent_embed = DynamicRNN(self.sent_embed) 27 | self.hist_embed = nn.LSTM(embedding_size, hidden_size, 2, dropout=dropout_ratio, batch_first=True) 28 | self.hist_embed = DynamicRNN(self.hist_embed) 29 | 30 | self.encoder2decoder = nn.Linear(2 * hidden_size * self.num_directions, 31 | hidden_size * self.num_directions) 32 | 33 | self.layer_stack = nn.ModuleList([ 34 | REFER(d_model=512, d_inner=1024, n_head=4, d_k=256, d_v=256, dropout=0.2) 35 | for _ in range(2)]) 36 | 37 | def init_state(self, inputs): 38 | ''' Initialize to zero cell states and hidden states.''' 39 | batch_size = inputs.size(0) 40 | h0 = Variable(torch.zeros( 41 | self.num_layers * self.num_directions, 42 | batch_size, 43 | self.hidden_size 44 | ), requires_grad=False) 45 | c0 = Variable(torch.zeros( 46 | self.num_layers * self.num_directions, 47 | batch_size, 48 | self.hidden_size 49 | ), requires_grad=False) 50 | return h0.cuda(), c0.cuda() 51 | 52 | def add_entry(self, mem, hist, hl): 53 | h_emb = self.word_embed(hist) 54 | h_emb = self.drop(h_emb) 55 | _, h_emb, _ = self.hist_embed(h_emb, hl) 56 | h_emb = h_emb.unsqueeze(1) 57 | 58 | if mem is None: mem = h_emb 59 | else: mem = torch.cat((mem, h_emb), 1) 60 | return mem 61 | 62 | def refer_module(self, mem, q): 63 | ''' 64 | q : [b, 512] 65 | mem : [b, number of entry, 512] 66 | ''' 67 | context = q.unsqueeze(1) 68 | for enc_layer in self.layer_stack: 69 | context, _ = enc_layer(context, mem) 70 | return context.squeeze(1) 71 | 72 | def forward(self, inputs, lengths, Last_QA, Last_QA_lengths, hist, hist_lengths, tar, tar_lengths): 73 | ''' Expects input vocab indices as (batch, seq_len). Also requires a 74 | list of lengths for dynamic batching. ''' 75 | 76 | q = Last_QA 77 | ql = Last_QA_lengths 78 | c = tar 79 | cl = tar_lengths 80 | h = hist 81 | hl = hist_lengths 82 | # write history embedding to memory 83 | mem = self.add_entry(None, c, cl) 84 | enc_outs = [] 85 | q_emb = self.word_embed(q) 86 | q_emb = self.drop(q_emb) 87 | 88 | ctx, q_emb, c_t = self.sent_embed(q_emb, ql) 89 | 90 | for i in range(15): 91 | 92 | his = self.refer_module(mem, q_emb) 93 | ref_aware = torch.cat((q_emb, his), 1) 94 | enc_outs.append(ref_aware) 95 | 96 | # write history embedding to memory 97 | if i != 14: 98 | mem = self.add_entry(mem, h[:, i, :], hl[:, i]) 99 | 100 | enc_out = torch.stack(enc_outs, 1) 101 | # enc_out = self.linear(enc_out[:, -1, :]) 102 | decoder_init = nn.Tanh()(self.encoder2decoder(enc_out[:, -1, :])) 103 | 104 | mem = torch.cat((mem, q_emb.unsqueeze(1)), 1) 105 | mem = self.drop(mem) 106 | 107 | return mem, decoder_init, c_t 108 | 109 | 110 | class SoftDotAttention(nn.Module): 111 | '''Soft Dot Attention. 112 | 113 | Ref: http://www.aclweb.org/anthology/D15-1166 114 | Adapted from PyTorch OPEN NMT. 115 | ''' 116 | 117 | def __init__(self, query_dim, ctx_dim): 118 | '''Initialize layer.''' 119 | super(SoftDotAttention, self).__init__() 120 | self.linear_in = nn.Linear(query_dim, ctx_dim, bias=False) 121 | self.sm = nn.Softmax() 122 | self.linear_out = nn.Linear(query_dim + ctx_dim, query_dim, bias=False) 123 | self.tanh = nn.Tanh() 124 | 125 | def forward(self, h, context, mask=None, 126 | output_tilde=True, output_prob=True): 127 | '''Propagate h through the network. 128 | 129 | h: batch x dim 130 | context: batch x seq_len x dim 131 | mask: batch x seq_len indices to be masked 132 | ''' 133 | target = self.linear_in(h).unsqueeze(2) # batch x dim x 1 134 | 135 | # Get attention 136 | attn = torch.bmm(context, target).squeeze(2) # batch x seq_len 137 | logit = attn 138 | 139 | if mask is not None: 140 | # -Inf masking prior to the softmax 141 | attn.masked_fill_(mask, -float('inf')) 142 | # attn = self.sm(attn, dim=1) # There will be a bug here, but it's actually a problem in torch source code. 143 | attn = self.sm(attn) 144 | attn3 = attn.view(attn.size(0), 1, attn.size(1)) # batch x 1 x seq_len 145 | 146 | weighted_context = torch.bmm(attn3, context).squeeze(1) # batch x dim 147 | if not output_prob: 148 | attn = logit 149 | if output_tilde: 150 | h_tilde = torch.cat((weighted_context, h), 1) 151 | h_tilde = self.tanh(self.linear_out(h_tilde)) 152 | return h_tilde, attn 153 | else: 154 | return weighted_context, attn 155 | 156 | 157 | class AttnDecoderLSTM(nn.Module): 158 | ''' An unrolled LSTM with attention over instructions for decoding navigation actions. ''' 159 | 160 | def __init__(self, embedding_size, hidden_size, 161 | dropout_ratio, feature_size=2048+4): 162 | super(AttnDecoderLSTM, self).__init__() 163 | self.embedding_size = embedding_size 164 | self.feature_size = feature_size 165 | self.hidden_size = hidden_size 166 | self.embedding = nn.Sequential( 167 | nn.Linear(args.angle_feat_size, self.embedding_size), 168 | nn.Tanh() 169 | ) 170 | self.drop = nn.Dropout(p=dropout_ratio) 171 | self.drop_env = nn.Dropout(p=args.featdropout) 172 | self.lstm = nn.LSTMCell(embedding_size+feature_size, hidden_size) 173 | self.feat_att_layer = SoftDotAttention(hidden_size, feature_size) 174 | self.attention_layer = SoftDotAttention(hidden_size, hidden_size) 175 | self.candidate_att_layer = SoftDotAttention(hidden_size, feature_size) 176 | 177 | def forward(self, action, feature, cand_feat, 178 | h_0, prev_h1, c_0, 179 | ctx, ctx_mask=None, 180 | already_dropfeat=False): 181 | ''' 182 | Takes a single step in the decoder LSTM (allowing sampling). 183 | action: batch x angle_feat_size 184 | feature: batch x 36 x (feature_size + angle_feat_size) 185 | cand_feat: batch x cand x (feature_size + angle_feat_size) 186 | h_0: batch x hidden_size 187 | prev_h1: batch x hidden_size 188 | c_0: batch x hidden_size 189 | ctx: batch x seq_len x dim 190 | ctx_mask: batch x seq_len - indices to be masked 191 | already_dropfeat: used in EnvDrop 192 | ''' 193 | action_embeds = self.embedding(action) 194 | 195 | # Adding Dropout 196 | action_embeds = self.drop(action_embeds) 197 | 198 | if not already_dropfeat: 199 | # Dropout the raw feature as a common regularization 200 | feature[..., :-args.angle_feat_size] = self.drop_env(feature[..., :-args.angle_feat_size]) # Do not drop the last args.angle_feat_size (position feat) 201 | 202 | prev_h1_drop = self.drop(prev_h1) 203 | attn_feat, _ = self.feat_att_layer(prev_h1_drop, feature, output_tilde=False) 204 | 205 | concat_input = torch.cat((action_embeds, attn_feat), 1) 206 | 207 | h_1, c_1 = self.lstm(concat_input, (prev_h1, c_0)) 208 | 209 | h_1_drop = self.drop(h_1) 210 | h_tilde, alpha = self.attention_layer(h_1_drop, ctx, ctx_mask) 211 | 212 | # Adding Dropout 213 | h_tilde_drop = self.drop(h_tilde) 214 | 215 | if not already_dropfeat: 216 | cand_feat[..., :-args.angle_feat_size] = self.drop_env(cand_feat[..., :-args.angle_feat_size]) 217 | 218 | _, logit = self.candidate_att_layer(h_tilde_drop, cand_feat, output_prob=False) 219 | 220 | return h_1, c_1, logit, h_tilde -------------------------------------------------------------------------------- /tasks/NDH/param.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | parser = argparse.ArgumentParser() 3 | parser.add_argument('--path_type', type=str, required=True, 4 | help='planner_path, player_path, or trusted_path') 5 | parser.add_argument('--history', type=str, required=True, 6 | help='none, target, oracle_ans, nav_q_oracle_ans, or all') 7 | parser.add_argument('--feedback', type=str, required=True, 8 | help='teacher or sample') 9 | parser.add_argument('--eval_type', type=str, required=True, 10 | help='val or test') 11 | parser.add_argument('--blind', action='store_true', required=False, 12 | help='whether to replace the ResNet encodings with zero vectors at inference time') 13 | parser.add_argument('--angle_feat_size', type=int, default=4) 14 | parser.add_argument('--num_view', type=int, default=36) 15 | parser.add_argument('--featdropout', type=float, default=0.3) 16 | parser.add_argument('--ignoreid', type=int, default=-100) 17 | parser.add_argument('--prefix', type=str, default="v1", required=True) 18 | args = parser.parse_args() 19 | -------------------------------------------------------------------------------- /tasks/NDH/requirements.txt: -------------------------------------------------------------------------------- 1 | backports.functools-lru-cache==1.4 2 | cycler==0.10.0 3 | decorator==4.1.2 4 | matplotlib==2.1.0 5 | networkx==2.0 6 | numpy==1.13.3 7 | olefile==0.44 8 | pandas==0.21.0 9 | Pillow==4.3.0 10 | pyparsing==2.2.0 11 | python-dateutil==2.6.1 12 | pytz==2017.3 13 | PyYAML==3.12 14 | six==1.11.0 15 | subprocess32==3.2.7 16 | torch==1.0.0 17 | torchvision==0.1.9 18 | -------------------------------------------------------------------------------- /tasks/NDH/summarize_perf.py: -------------------------------------------------------------------------------- 1 | ''' Plotting losses etc. ''' 2 | 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import os 7 | 8 | PLOT_DIR = 'tasks/NDH/plots/' 9 | dfs = {} 10 | # val-seq2seq-all-planner_path-sample-imagenet-log 11 | summary = {"val_seen": {}, "val_unseen": {}, "test": {}} 12 | for path_type, path_len in [['planner_path', 20], ['player_path', 80], ['trusted_path', 80]]: 13 | print(path_type) 14 | for eval_type in ['val', 'test']: 15 | print('\t%s (%d)' % (eval_type, path_len)) 16 | for feedback in ['sample']: 17 | print('\t\t%s' % feedback) 18 | for history in ['none', 'target', 'oracle_ans', 'nav_q_oracle_ans', 'all']: 19 | for blind in [True, False]: 20 | print('\t\t\t%s (%s)' % (history, 'blind' if blind else 'vision')) 21 | if path_len is None: 22 | if blind: 23 | log = '%s-seq2seq-%s-%s-%s-imagenet-blind-log.csv' % (eval_type, history, 24 | path_type, feedback) 25 | else: 26 | log = '%s-seq2seq-%s-%s-%s-imagenet-log.csv' % (eval_type, history, path_type, feedback) 27 | else: 28 | if blind: 29 | log = '%s-seq2seq-%s-%s-%d-%s-imagenet-blind-log.csv' % (eval_type, history, 30 | path_type, path_len, feedback) 31 | else: 32 | log = '%s-seq2seq-%s-%s-%d-%s-imagenet-log.csv' % (eval_type, history, 33 | path_type, path_len, feedback) 34 | fn = os.path.join(PLOT_DIR, log) 35 | if os.path.isfile(fn): 36 | dfs[log] = pd.read_csv(fn) 37 | print('\t\t\t\t%d' % len(dfs[log])) 38 | metrics = [ 39 | 'val_seen success_rate', 40 | 'val_seen oracle path_success_rate', 41 | 'val_seen dist_to_end_reduction', 42 | 'val_unseen success_rate', 43 | 'val_unseen oracle path_success_rate', 44 | 'val_unseen dist_to_end_reduction'] if eval_type == 'val' else [ 45 | 'test success_rate', 46 | 'test oracle path_success_rate', 47 | 'test dist_to_end_reduction'] 48 | for metric in metrics: 49 | v = max(dfs[log][metric]) 50 | print('\t\t\t\t%s\t%.3f' % (metric, v)) 51 | 52 | # Populate summary. 53 | if len(dfs[log]) == 200: 54 | for cond in ['val_seen', 'val_unseen', 'test']: 55 | abl = history + "-%s" % ('blind' if blind else 'vis') 56 | if abl not in summary[cond]: 57 | summary[cond][abl] = {"if": {}, "gd": {}} 58 | ifm = '%s oracle path_success_rate' % cond 59 | if ifm in dfs[log]: 60 | summary[cond][abl]["if"][path_type] = list(dfs[log][ifm]) 61 | gdm = '%s dist_to_end_reduction' % cond 62 | if gdm in dfs[log]: 63 | summary[cond][abl]["gd"][path_type] = list(dfs[log][gdm]) 64 | 65 | # Print partial table rows for easy copy/paste to latex. 66 | print('') 67 | for cond in ['val_seen', 'val_unseen', 'test']: 68 | for history in ['none', 'target', 'oracle_ans', 'nav_q_oracle_ans', 'all']: 69 | for blind in [True, False]: 70 | abl = history + "-%s" % ('blind' if blind else 'vis') 71 | if abl not in summary[cond]: 72 | continue 73 | l = '%s\t%s\t' % (cond, abl) 74 | ns = [] 75 | for metric in ['if', 'gd']: 76 | for sup in ['planner_path', 'player_path', 'trusted_path']: 77 | if sup in summary[cond][abl][metric]: 78 | if cond == 'test': # performance is at epoch of best val_seen GD performance. 79 | if sup not in summary["val_unseen"][abl]["gd"]: 80 | print("val_unseen not yet finished for %s" % abl) 81 | ns.append(-2) 82 | else: 83 | b = max(summary["val_unseen"][abl]["gd"][sup]) 84 | best_idx = summary["val_unseen"][abl]["gd"][sup].index(b) 85 | ns.append(summary[cond][abl][metric][sup][best_idx]) 86 | else: 87 | ns.append(max(summary[cond][abl][metric][sup])) 88 | else: 89 | ns.append(-1) 90 | l += ' & '.join(["$%.1f$" % (n*100) for n in ns[:3]]) + ' & ' + \ 91 | ' & '.join(["$%.2f$" % n for n in ns[3:]]) 92 | print(l) 93 | -------------------------------------------------------------------------------- /tasks/NDH/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | from torch import optim 7 | import torch.nn.functional as F 8 | 9 | import os 10 | import time 11 | import numpy as np 12 | import pandas as pd 13 | from collections import defaultdict 14 | 15 | from utils import read_vocab,write_vocab,build_vocab,Tokenizer,padding_idx,timeSince 16 | from env import R2RBatch 17 | from model import EncoderLSTM, AttnDecoderLSTM 18 | from agent import Seq2SeqAgent 19 | from eval import Evaluation 20 | from param import args 21 | 22 | import warnings 23 | warnings.filterwarnings("ignore") 24 | 25 | TRAIN_VOCAB = 'tasks/NDH/data/train_vocab.txt' 26 | TRAINVAL_VOCAB = 'tasks/NDH/data/trainval_vocab.txt' 27 | 28 | prefix=args.prefix 29 | 30 | RESULT_DIR = 'tasks/NDH/results/' + prefix 31 | SNAPSHOT_DIR = 'tasks/NDH/snapshots/' + prefix 32 | PLOT_DIR = 'tasks/NDH/plots/' + prefix 33 | 34 | if not os.path.exists(RESULT_DIR): 35 | os.makedirs(RESULT_DIR) 36 | if not os.path.exists(SNAPSHOT_DIR): 37 | os.makedirs(SNAPSHOT_DIR) 38 | if not os.path.exists(PLOT_DIR): 39 | os.makedirs(PLOT_DIR) 40 | 41 | IMAGENET_FEATURES = 'img_features/ResNet-152-imagenet.tsv' 42 | 43 | # Training settings. 44 | agent_type = 'seq2seq' 45 | 46 | # Fixed params from MP. 47 | features = IMAGENET_FEATURES 48 | batch_size = 100 49 | # word_embedding_size = 256 50 | word_embedding_size = 256 51 | action_embedding_size = 32 52 | target_embedding_size = 32 53 | hidden_size = 512 54 | bidirectional = False 55 | dropout_ratio = 0.5 56 | learning_rate = 0.0001 57 | weight_decay = 0.0005 58 | 59 | def train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, 60 | log_every=100, val_envs=None): 61 | ''' Train on training set, validating on both seen and unseen. ''' 62 | if val_envs is None: 63 | val_envs = {} 64 | 65 | if agent_type == 'seq2seq': 66 | agent = Seq2SeqAgent(train_env, "", encoder, decoder, max_episode_len) 67 | else: 68 | sys.exit("Unrecognized agent_type '%s'" % agent_type) 69 | print 'Training a %s agent with %s feedback' % (agent_type, feedback_method) 70 | encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate, weight_decay=weight_decay) 71 | decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate, weight_decay=weight_decay) 72 | 73 | data_log = defaultdict(list) 74 | start = time.time() 75 | print 'Start training' 76 | for idx in range(0, n_iters, log_every): 77 | 78 | interval = min(log_every,n_iters-idx) 79 | iter = idx + interval 80 | data_log['iteration'].append(iter) 81 | 82 | # Train for log_every interval 83 | agent.train(encoder_optimizer, decoder_optimizer, interval, feedback=feedback_method) 84 | train_losses = np.array(agent.losses) 85 | assert len(train_losses) == interval 86 | train_loss_avg = np.average(train_losses) 87 | data_log['train loss'].append(train_loss_avg) 88 | loss_str = 'train loss: %.4f' % train_loss_avg 89 | 90 | # Run validation 91 | for env_name, (env, evaluator) in val_envs.iteritems(): 92 | agent.env = env 93 | agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, env_name, iter) 94 | # Get validation loss under the same conditions as training 95 | agent.test(use_dropout=True, feedback=feedback_method, allow_cheat=True) 96 | val_losses = np.array(agent.losses) 97 | val_loss_avg = np.average(val_losses) 98 | data_log['%s loss' % env_name].append(val_loss_avg) 99 | # Get validation distance from goal under test evaluation conditions 100 | agent.test(use_dropout=False, feedback='argmax') 101 | agent.write_results() 102 | score_summary, _ = evaluator.score(agent.results_path) 103 | loss_str = ', %s loss: %.4f' % (env_name, val_loss_avg) 104 | for metric, val in score_summary.iteritems(): 105 | data_log['%s %s' % (env_name, metric)].append(val) 106 | if metric in ['success_rate', 'oracle success_rate', 'oracle path_success_rate', 'dist_to_end_reduction']: 107 | loss_str += ', %s: %.3f' % (metric, val) 108 | 109 | agent.env = train_env 110 | 111 | print('%s (%d %d%%) %s' % (timeSince(start, float(iter)/n_iters), 112 | iter, float(iter)/n_iters*100, loss_str)) 113 | df = pd.DataFrame(data_log) 114 | df.set_index('iteration') 115 | df_path = '%s%s-log.csv' % (PLOT_DIR, model_prefix) 116 | df.to_csv(df_path) 117 | 118 | split_string = "-".join(train_env.splits) 119 | enc_path = '%s%s_%s_enc_iter_%d' % (SNAPSHOT_DIR, model_prefix, split_string, iter) 120 | dec_path = '%s%s_%s_dec_iter_%d' % (SNAPSHOT_DIR, model_prefix, split_string, iter) 121 | agent.save(enc_path, dec_path) 122 | 123 | print 'Finish training' 124 | def setup(): 125 | torch.manual_seed(1) 126 | torch.cuda.manual_seed(1) 127 | # Check for vocabs 128 | if not os.path.exists(TRAIN_VOCAB): 129 | write_vocab(build_vocab(splits=['train']), TRAIN_VOCAB) 130 | if not os.path.exists(TRAINVAL_VOCAB): 131 | write_vocab(build_vocab(splits=['train', 'val_seen', 'val_unseen']), TRAINVAL_VOCAB) 132 | 133 | 134 | def test_submission(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind): 135 | ''' Train on combined training and validation sets, and generate test submission. ''' 136 | 137 | setup() 138 | 139 | # Create a batch training environment that will also preprocess text 140 | vocab = read_vocab(TRAINVAL_VOCAB) 141 | tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) 142 | train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok, 143 | path_type=path_type, history=history, blind=blind) 144 | 145 | # Build models and train 146 | enc_hidden_size = hidden_size//2 if bidirectional else hidden_size 147 | encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, 148 | dropout_ratio, bidirectional=bidirectional).cuda() 149 | decoder = AttnDecoderLSTM(action_embedding_size, hidden_size, dropout_ratio).cuda() 150 | 151 | train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix) 152 | 153 | # Generate test submission 154 | test_env = R2RBatch(features, batch_size=batch_size, splits=['test'], tokenizer=tok, 155 | path_type=path_type, history=history, blind=blind) 156 | agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len) 157 | agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, 'test', 5000) 158 | agent.test(use_dropout=False, feedback='argmax') 159 | agent.write_results() 160 | 161 | 162 | # NOTE: only available to us, now, for writing the paper. 163 | def train_test(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind): 164 | ''' Train on the training set, and validate on the test split. ''' 165 | 166 | setup() 167 | # Create a batch training environment that will also preprocess text 168 | vocab = read_vocab(TRAINVAL_VOCAB) 169 | tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) 170 | train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok, 171 | path_type=path_type, history=history, blind=blind) 172 | 173 | # Creat validation environments 174 | val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split], 175 | tokenizer=tok, path_type=path_type, history=history, blind=blind), 176 | Evaluation([split], path_type=path_type)) for split in ['test']} 177 | 178 | # Build models and train 179 | enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size 180 | encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, 181 | dropout_ratio, bidirectional=bidirectional).cuda() 182 | decoder = AttnDecoderLSTM(action_embedding_size, hidden_size, dropout_ratio).cuda() 183 | train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, 184 | model_prefix, val_envs=val_envs) 185 | 186 | 187 | def train_val(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind): 188 | ''' Train on the training set, and validate on seen and unseen splits. ''' 189 | 190 | setup() 191 | # Create a batch training environment that will also preprocess text 192 | vocab = read_vocab(TRAIN_VOCAB) 193 | tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH) 194 | train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok, 195 | path_type=path_type, history=history, blind=blind) 196 | 197 | # Creat validation environments 198 | val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split], 199 | tokenizer=tok, path_type=path_type, history=history, blind=blind), 200 | Evaluation([split], path_type=path_type)) for split in ['val_seen', 'val_unseen']} 201 | 202 | # Build models and train 203 | enc_hidden_size = hidden_size//2 if bidirectional else hidden_size 204 | encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, 205 | dropout_ratio, bidirectional=bidirectional).cuda() 206 | decoder = AttnDecoderLSTM(action_embedding_size, hidden_size, dropout_ratio).cuda() 207 | train(train_env, encoder, decoder, n_iters, 208 | path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, val_envs=val_envs) 209 | 210 | 211 | if __name__ == "__main__": 212 | 213 | assert args.path_type in ['planner_path', 'player_path', 'trusted_path'] 214 | assert args.history in ['none', 'target', 'oracle_ans', 'nav_q_oracle_ans', 'all'] 215 | assert args.feedback in ['sample', 'teacher'] 216 | assert args.eval_type in ['val', 'test'] 217 | 218 | blind = args.blind 219 | 220 | # Set default args. 221 | path_type = args.path_type 222 | # In MP, max_episode_len = 20 while average hop range [4, 7], e.g. ~3x max. 223 | # max_episode_len has to account for turns; this heuristically allowed for about 1 turn per hop. 224 | if path_type == 'planner_path': 225 | max_episode_len = 20 # [1, 6], e.g., ~3x max 226 | else: 227 | max_episode_len = 80 # [2, 41], e.g., ~2x max (120 ~3x) (80 ~2x) [for player/trusted paths] 228 | 229 | # Input settings. 230 | history = args.history 231 | # In MP, MAX_INPUT_LEN = 80 while average utt len is 29, e.g., a bit less than 3x avg. 232 | if history == 'none': 233 | MAX_INPUT_LENGTH = 1 # [] fixed length. 234 | elif history == 'target': 235 | MAX_INPUT_LENGTH = 3 # [ target ] fixed length. 236 | elif history == 'oracle_ans': 237 | MAX_INPUT_LENGTH = 70 # 16.16+/-9.67 ora utt len, 35.5 at x2 stddevs. 71 is double that. 238 | elif history == 'nav_q_oracle_ans': 239 | MAX_INPUT_LENGTH = 120 # 11.24+/-6.43 [plus Ora avg], 24.1 at x2 std. 71+48 ~~ 120 per QA doubles both. 240 | else: # i.e., 'all' 241 | MAX_INPUT_LENGTH = 120 * 6 # 4.93+/-3.21 turns -> 2.465+/-1.605 Q/A. 5.67 at x2 std. Call it 6 (real max 13). 242 | 243 | # Training settings. 244 | feedback_method = args.feedback 245 | n_iters = 20000 246 | 247 | # Model prefix to uniquely id this instance. 248 | model_prefix = '%s-seq2seq-%s-%s-%d-%s-imagenet' % (args.eval_type, history, path_type, max_episode_len, feedback_method) 249 | if blind: 250 | model_prefix += '-blind' 251 | 252 | if args.eval_type == 'val': 253 | train_val(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind) 254 | else: 255 | train_test(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind) 256 | 257 | # test_submission(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind) 258 | -------------------------------------------------------------------------------- /tasks/NDH/utils.py: -------------------------------------------------------------------------------- 1 | ''' Utils for io, language, connectivity graphs etc ''' 2 | 3 | import os 4 | import sys 5 | import re 6 | import string 7 | import json 8 | import time 9 | import math 10 | from collections import Counter 11 | import numpy as np 12 | import networkx as nx 13 | from param import args 14 | import torch 15 | 16 | 17 | def length2mask(length, size=None): 18 | batch_size = len(length) 19 | size = int(max(length)) if size is None else size 20 | mask = (torch.arange(size, dtype=torch.int64).unsqueeze(0).repeat(batch_size, 1) 21 | > (torch.LongTensor(length) - 1).unsqueeze(1)).cuda() 22 | return mask 23 | 24 | def new_simulator(): 25 | import MatterSim 26 | # Simulator image parameters 27 | WIDTH = 640 28 | HEIGHT = 480 29 | VFOV = 60 30 | 31 | sim = MatterSim.Simulator() 32 | sim.setRenderingEnabled(False) 33 | sim.setCameraResolution(WIDTH, HEIGHT) 34 | sim.setCameraVFOV(math.radians(VFOV)) 35 | sim.setDiscretizedViewingAngles(True) 36 | sim.initialize() 37 | 38 | return sim 39 | 40 | def angle_feature(heading, elevation): 41 | import math 42 | # twopi = math.pi * 2 43 | # heading = (heading + twopi) % twopi # From 0 ~ 2pi 44 | # It will be the same 45 | return np.array([math.sin(heading), math.cos(heading), 46 | math.sin(elevation), math.cos(elevation)] * (args.angle_feat_size // 4), 47 | dtype=np.float32) 48 | 49 | def get_point_angle_feature(baseViewId=0): 50 | sim = new_simulator() 51 | 52 | feature = np.empty((36, args.angle_feat_size), np.float32) 53 | base_heading = (baseViewId % 12) * math.radians(30) 54 | for ix in range(36): 55 | if ix == 0: 56 | sim.newEpisode(['ZMojNkEp431'], ['2f4d90acd4024c269fb0efe49a8ac540'], [0], [math.radians(-30)]) 57 | elif ix % 12 == 0: 58 | sim.makeAction([0], [1.0], [1.0]) 59 | else: 60 | sim.makeAction([0], [1.0], [0]) 61 | 62 | state = sim.getState()[0] 63 | assert state.viewIndex == ix 64 | 65 | heading = state.heading - base_heading 66 | 67 | feature[ix, :] = angle_feature(heading, state.elevation) 68 | return feature 69 | 70 | def get_all_point_angle_feature(): 71 | return [get_point_angle_feature(baseViewId) for baseViewId in range(36)] 72 | 73 | # padding, unknown word, end of sentence 74 | base_vocab = ['', '', '', '