├── .gitmodules
├── CMakeLists.txt
├── Dockerfile
├── LICENSE
├── README.md
├── cmake
    └── Modules
    │   └── FindNumPy.cmake
├── connectivity
    ├── 17DRP5sb8fy_connectivity.json
    ├── 1LXtFkjw3qL_connectivity.json
    ├── 1pXnuDYAj8r_connectivity.json
    ├── 29hnd4uzFmX_connectivity.json
    ├── 2azQ1b91cZZ_connectivity.json
    ├── 2n8kARJN3HM_connectivity.json
    ├── 2t7WUuJeko7_connectivity.json
    ├── 5LpN3gDmAk7_connectivity.json
    ├── 5ZKStnWn8Zo_connectivity.json
    ├── 5q7pvUzZiYa_connectivity.json
    ├── 759xd9YjKW5_connectivity.json
    ├── 7y3sRwLe3Va_connectivity.json
    ├── 8194nk5LbLH_connectivity.json
    ├── 82sE5b5pLXE_connectivity.json
    ├── 8WUmhLawc2A_connectivity.json
    ├── ARNzJeq3xxb_connectivity.json
    ├── B6ByNegPMKs_connectivity.json
    ├── D7G3Y4RVNrH_connectivity.json
    ├── D7N2EKCX4Sj_connectivity.json
    ├── E9uDoFAP3SH_connectivity.json
    ├── EDJbREhghzL_connectivity.json
    ├── EU6Fwq7SyZv_connectivity.json
    ├── GdvgFV5R1Z5_connectivity.json
    ├── HxpKQynjfin_connectivity.json
    ├── JF19kD82Mey_connectivity.json
    ├── JeFG25nYj2p_connectivity.json
    ├── JmbYfDe2QKZ_connectivity.json
    ├── PX4nDJXEHrG_connectivity.json
    ├── Pm6F8kyY3z2_connectivity.json
    ├── PuKPg4mmafe_connectivity.json
    ├── QUCTc6BB5sX_connectivity.json
    ├── README.md
    ├── RPmz2sHmrrY_connectivity.json
    ├── S9hNv5qa7GM_connectivity.json
    ├── SN83YJsR3w2_connectivity.json
    ├── TbHJrupSAjP_connectivity.json
    ├── ULsKaCPVFJR_connectivity.json
    ├── UwV83HsGsw3_connectivity.json
    ├── Uxmj2M2itWa_connectivity.json
    ├── V2XKFyX4ASd_connectivity.json
    ├── VFuaQ6m2Qom_connectivity.json
    ├── VLzqgDo317F_connectivity.json
    ├── VVfe2KiqLaN_connectivity.json
    ├── Vt2qJdWjCF2_connectivity.json
    ├── Vvot9Ly1tCj_connectivity.json
    ├── VzqfbhrpDEA_connectivity.json
    ├── WYY7iVyf5p8_connectivity.json
    ├── X7HyMhZNoso_connectivity.json
    ├── XcA2TqTSSAj_connectivity.json
    ├── YFuZgdQ5vWj_connectivity.json
    ├── YVUC4YcDtcY_connectivity.json
    ├── YmJkqBEsHnH_connectivity.json
    ├── Z6MFQCViBuw_connectivity.json
    ├── ZMojNkEp431_connectivity.json
    ├── aayBHfsNo7d_connectivity.json
    ├── ac26ZMwG7aT_connectivity.json
    ├── b8cTxDM8gDG_connectivity.json
    ├── cV4RVeZvu5T_connectivity.json
    ├── dhjEzFoUFzH_connectivity.json
    ├── e9zR4mvMWw7_connectivity.json
    ├── fzynW3qQPVF_connectivity.json
    ├── gTV8FGcVJC9_connectivity.json
    ├── gYvKGZ5eRqb_connectivity.json
    ├── gZ6f7yhEvPG_connectivity.json
    ├── gxdoqLR6rwA_connectivity.json
    ├── i5noydFURQK_connectivity.json
    ├── jh4fc5c5qoQ_connectivity.json
    ├── jtcxE69GiFV_connectivity.json
    ├── kEZ7cmS4wCh_connectivity.json
    ├── mJXqzFtmKg4_connectivity.json
    ├── oLBMNvg9in8_connectivity.json
    ├── p5wJjkQkbXX_connectivity.json
    ├── pLe4wQe7qrG_connectivity.json
    ├── pRbA3pwrgk9_connectivity.json
    ├── pa4otMbVnkk_connectivity.json
    ├── q9vSo1VnCiC_connectivity.json
    ├── qoiz87JEwZ2_connectivity.json
    ├── r1Q1Z4BcV1o_connectivity.json
    ├── r47D5H71a5s_connectivity.json
    ├── rPc6DW4iMge_connectivity.json
    ├── rqfALeAoiTq_connectivity.json
    ├── s8pcmisQ38h_connectivity.json
    ├── sKLMLpTHeUy_connectivity.json
    ├── sT4fr6TAbpF_connectivity.json
    ├── scans.txt
    ├── scans_dialog.txt
    ├── uNb9QFRL6hY_connectivity.json
    ├── ur6pFq6Qu1A_connectivity.json
    ├── vyrNrziPKCB_connectivity.json
    ├── wc2JMjhGNzB_connectivity.json
    ├── x8F5xyUWy9e_connectivity.json
    ├── yqstnuAEVhm_connectivity.json
    └── zsNo4HB9uLZ_connectivity.json
├── img_features
    └── .gitkeep
├── include
    ├── Benchmark.hpp
    ├── Catch.hpp
    ├── MatterSim.hpp
    ├── NavGraph.hpp
    └── cbf.h
├── models
    └── ResNet-152-deploy.prototxt
├── scripts
    ├── depth_to_skybox.py
    ├── downsize_skybox.py
    ├── fill_depth.py
    ├── precompute_img_features.py
    ├── precompute_optimal_policies.py
    └── timer.py
├── src
    ├── driver
    │   ├── driver.py
    │   └── mattersim_main.cpp
    ├── lib
    │   ├── Benchmark.cpp
    │   ├── MatterSim.cpp
    │   ├── NavGraph.cpp
    │   ├── cbf.cpp
    │   ├── fragment.sh
    │   └── vertex.sh
    ├── lib_python
    │   └── MatterSimPython.cpp
    └── test
    │   ├── main.cpp
    │   ├── python_test.py
    │   └── rendertest_spec.json
├── tasks
    └── NDH
    │   ├── DAN_modules
    │       ├── __init__.py
    │       ├── fc.py
    │       ├── language_model.py
    │       ├── refer_find_modules.py
    │       └── submodules.py
    │   ├── agent.py
    │   ├── data
    │       ├── README.md
    │       └── download.sh
    │   ├── env.py
    │   ├── eval.py
    │   ├── model.py
    │   ├── param.py
    │   ├── requirements.txt
    │   ├── summarize_perf.py
    │   ├── train.py
    │   └── utils.py
└── teaser
    └── vdn_demo_v2_512.gif


/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "pybind11"]
2 | 	path = pybind11
3 | 	url = https://github.com/pybind/pybind11
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(Matterport_Simulator CXX)
 2 | cmake_minimum_required(VERSION 2.8)
 3 | 
 4 | option(OSMESA_RENDERING "Offscreen CPU rendering with OSMesa" OFF)
 5 | option(EGL_RENDERING "Offscreen GPU rendering with EGL" OFF)
 6 | 
 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 8 | # Make custom find-modules available
 9 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/Modules")
10 | 
11 | if(NOT CMAKE_BUILD_TYPE)
12 |   set(CMAKE_BUILD_TYPE Release)
13 | endif()
14 | 
15 | include_directories("${PROJECT_SOURCE_DIR}/include")
16 | 
17 | find_package(OpenCV REQUIRED)
18 | find_package(PkgConfig REQUIRED)
19 | find_package(OpenMP)
20 | if (OPENMP_CXX_FOUND)
21 |     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
22 |     set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
23 | endif()
24 | 
25 | pkg_check_modules(JSONCPP REQUIRED jsoncpp)
26 | 
27 | if(EGL_RENDERING)
28 |   add_definitions(-DEGL_RENDERING)
29 |   find_package(OpenGL REQUIRED COMPONENTS OpenGL EGL)
30 |   pkg_check_modules(EPOXY REQUIRED epoxy)
31 |   set(GL_LIBS OpenGL::OpenGL OpenGL::EGL ${EPOXY_LIBRARIES})
32 | elseif(OSMESA_RENDERING)
33 |   add_definitions(-DOSMESA_RENDERING)
34 |   pkg_check_modules(OSMESA REQUIRED osmesa)
35 |   set(GL_LIBS ${OSMESA_LIBRARIES})
36 | else()
37 |   cmake_policy(SET CMP0072 OLD)
38 |   find_package(OpenGL REQUIRED)
39 |   find_package(GLEW REQUIRED)
40 |   set(GL_LIBS ${OPENGL_LIBRARIES} ${GLEW_LIBRARIES})
41 | endif()
42 | 
43 | add_library(MatterSim SHARED src/lib/MatterSim.cpp src/lib/NavGraph.cpp src/lib/Benchmark.cpp src/lib/cbf.cpp)
44 | if(OSMESA_RENDERING)
45 |   target_compile_definitions(MatterSim PUBLIC "-DOSMESA_RENDERING")
46 | endif()
47 | target_include_directories(MatterSim PRIVATE ${JSONCPP_INCLUDE_DIRS})
48 | target_link_libraries(MatterSim ${JSONCPP_LIBRARIES} ${OpenCV_LIBS} ${GL_LIBS})
49 | 
50 | add_executable(tests src/test/main.cpp)
51 | target_include_directories(tests PRIVATE ${JSONCPP_INCLUDE_DIRS})
52 | target_link_libraries(tests MatterSim ${JSONCPP_LIBRARIES} ${OpenCV_LIBS})
53 | 
54 | add_executable(mattersim_main src/driver/mattersim_main.cpp)
55 | target_link_libraries(mattersim_main MatterSim)
56 | 
57 | add_subdirectory(pybind11)
58 | 
59 | find_package(PythonInterp 2.7)
60 | message(${PYTHON_EXECUTABLE})
61 | 
62 | # Need to search for python executable again to pick up an activated
63 | # virtualenv python, if any.
64 | unset(PYTHON_EXECUTABLE CACHE)
65 | find_program(PYTHON_EXECUTABLE python
66 |       PATHS ENV PATH         # look in the PATH environment variable
67 |       NO_DEFAULT_PATH        # do not look anywhere else...
68 |       )
69 | 
70 | find_package(NumPy REQUIRED)
71 | 
72 | pybind11_add_module(MatterSimPython src/lib_python/MatterSimPython.cpp)
73 | target_include_directories(MatterSimPython PRIVATE ${NUMPY_INCLUDES})
74 | target_link_libraries(MatterSimPython PRIVATE MatterSim)
75 | set_target_properties(MatterSimPython
76 |   PROPERTIES
77 |   OUTPUT_NAME MatterSim)
78 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Matterport3DSimulator
 2 | # Requires nvidia gpu with driver 384.xx or higher
 3 | 
 4 | 
 5 | FROM nvidia/cudagl:9.0-devel-ubuntu16.04
 6 | 
 7 | # Install a few libraries to support both EGL and OSMESA options
 8 | RUN apt-get update && apt-get install -y wget doxygen curl libjsoncpp-dev libepoxy-dev libglm-dev libosmesa6 libosmesa6-dev libglew-dev libopencv-dev python-opencv python-setuptools python-dev
 9 | RUN easy_install pip
10 | RUN pip install torch torchvision pandas networkx
11 | 
12 | #install latest cmake
13 | ADD https://cmake.org/files/v3.12/cmake-3.12.2-Linux-x86_64.sh /cmake-3.12.2-Linux-x86_64.sh
14 | RUN mkdir /opt/cmake
15 | RUN sh /cmake-3.12.2-Linux-x86_64.sh --prefix=/opt/cmake --skip-license
16 | RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
17 | RUN cmake --version
18 | 
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Peter Anderson, Philip Roberts, Qi Wu, Damien Teney, Jake Bruce
 4 | Mark Johnson, Niko Sunderhauf, Ian Reid, Stephen Gould, Anton van den Hengel
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch implementation of Cross-modal Memory Network
 2 | 
 3 | [**Vision-Dialog Navigation by Exploring Cross-modal Memory**](https://arxiv.org/abs/2003.06745), CVPR 2020.
 4 | 
 5 | 
 6 | <!-- ![Model Illustration](teaser/model-1.png) -->
 7 | ![Demo](teaser/vdn_demo_v2_512.gif)
 8 | 
 9 | ### Requirements
10 | 
11 | - Ubuntu 16.04
12 | - CUDA 9.0 or 10.0
13 | - [docker](https://askubuntu.com/questions/938700/how-do-i-install-docker-on-ubuntu-16-04-lts)
14 | - [nvidia-docker2.0](https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0))
15 | 
16 | We recommend using the mattersim [Dockerfile](Dockerfile) to install the simulator. 
17 | 
18 | ### Dataset Download
19 | 
20 | Download the `train`, `val_seen`, `val_unseen`, and `test` splits of the CVDN and NDH dataset by executing:
21 | ```
22 | sh tasks/CVDN/data/download.sh
23 | sh tasks/NDH/data/download.sh
24 | ```
25 | 
26 | ### Installation
27 | 
28 | Build the docker image:
29 | ```
30 | docker build -t mattersim .
31 | ```
32 | 
33 | Run the docker container, mounting your project path:
34 | ```
35 | nvidia-docker run -it --shm-size 64G -v /User/home/Path_To_Project/:/Workspace/ mattersim
36 | ```
37 | 
38 | Compile the codebase:
39 | ```
40 | mkdir build && cd build
41 | cmake -DEGL_RENDERING=ON ..
42 | make
43 | ```
44 | 
45 | Install python dependencies by running:
46 | ```
47 | pip install -r tasks/NDH/requirements.txt
48 | ```
49 | 
50 | ## Train and Evaluate
51 | 
52 | To train and evaluate with trusted supervision, sample feedback, and all dialog history:
53 | ```
54 | python tasks/NDH/train.py \
55 |     --path_type=trusted_path \
56 |     --history=all \
57 |     --feedback=sample \
58 |     --eval_type=val \
59 |     --prefix=v1
60 | ```
61 | 
62 | 
63 | Train and test with trusted supervision, sample feedback, and all dialog history:
64 | 
65 | ```
66 | python tasks/NDH/train.py \
67 |     --path_type=trusted_path \
68 |     --history=all \
69 |     --feedback=sample \
70 |     --eval_type=test \
71 |     --prefix=v1
72 | ```
73 | 
74 | To generate a summary of the agent performance:
75 | 
76 | ```
77 | python tasks/NDH/summarize_perf.py
78 | ```
79 | 
80 | ## Citation
81 | 
82 | If you use the code in your research, please cite:
83 | ```bibtex
84 | @inproceedings{zhu2020vision,
85 |   title={Vision-Dialog Navigation by Exploring Cross-modal Memory},
86 |   author={Zhu, Yi and Zhu, Fengda and Zhan, Zhaohuan and Lin, Bingqian and Jiao, Jianbin and Chang, Xiaojun and Liang, Xiaodan},
87 |   booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
88 |   pages={10730--10739},
89 |   year={2020}
90 | }
91 | ```
92 | 
93 | ## Acknowledgements
94 | 
95 | This repository is built upon the 
96 | [Matterport3DSimulator](https://github.com/peteanderson80/Matterport3DSimulator), [CVDN](https://github.com/mmurray/cvdn) and [DAN-VisDial](https://github.com/gicheonkang/DAN-VisDial) codebase.
97 | 


--------------------------------------------------------------------------------
/cmake/Modules/FindNumPy.cmake:
--------------------------------------------------------------------------------
  1 | #-------------------------------------------------------------------------------
  2 | # Copyright (c) 2013, Lars Baehren <lbaehren@gmail.com>
  3 | # All rights reserved.
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without modification,
  6 | # are permitted provided that the following conditions are met:
  7 | #
  8 | #  * Redistributions of source code must retain the above copyright notice, this
  9 | #    list of conditions and the following disclaimer.
 10 | #  * Redistributions in binary form must reproduce the above copyright notice,
 11 | #    this list of conditions and the following disclaimer in the documentation
 12 | #    and/or other materials provided with the distribution.
 13 | #
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 17 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 18 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 20 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 21 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 22 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 23 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | #-------------------------------------------------------------------------------
 25 | 
 26 | # - Check for the presence of NumPy
 27 | #
 28 | # The following variables are set when NumPy is found:
 29 | #  NUMPY_FOUND      = Set to true, if all components of NUMPY have been found.
 30 | #  NUMPY_INCLUDES   = Include path for the header files of NUMPY
 31 | #  NUMPY_LIBRARIES  = Link these to use NUMPY
 32 | #  NUMPY_LFLAGS     = Linker flags (optional)
 33 | 
 34 | if (NOT NUMPY_FOUND)
 35 | 
 36 |     if (NOT NUMPY_ROOT_DIR)
 37 |         set (NUMPY_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
 38 |     endif (NOT NUMPY_ROOT_DIR)
 39 | 
 40 |     if (NOT PYTHONINTERP_FOUND)
 41 |         find_package (PythonInterp)
 42 |     endif (NOT PYTHONINTERP_FOUND)
 43 | 
 44 |     ##__________________________________________________________________________
 45 |     ## Check for the header files
 46 | 
 47 |     ## Use Python to determine the include directory
 48 |     execute_process (
 49 |         COMMAND ${PYTHON_EXECUTABLE} -c import\ numpy\;\ print\(numpy.get_include\(\)\)\;
 50 |         ERROR_VARIABLE NUMPY_FIND_ERROR
 51 |         RESULT_VARIABLE NUMPY_FIND_RESULT
 52 |         OUTPUT_VARIABLE NUMPY_FIND_OUTPUT
 53 |         OUTPUT_STRIP_TRAILING_WHITESPACE
 54 |         )
 55 |     ## process the output from the execution of the command
 56 |     if (NOT NUMPY_FIND_RESULT)
 57 |         set (NUMPY_INCLUDES ${NUMPY_FIND_OUTPUT})
 58 |     endif (NOT NUMPY_FIND_RESULT)
 59 | 
 60 |     ##__________________________________________________________________________
 61 |     ## Check for the library
 62 | 
 63 |     unset (NUMPY_LIBRARIES)
 64 | 
 65 |     if (PYTHON_SITE_PACKAGES)
 66 |         find_library (NUMPY_NPYMATH_LIBRARY npymath
 67 |             HINTS ${PYTHON_SITE_PACKAGES}/numpy/core
 68 |             PATH_SUFFIXES lib
 69 |             )
 70 |         if (NUMPY_NPYMATH_LIBRARY)
 71 |             list (APPEND NUMPY_LIBRARIES ${NUMPY_NPYMATH_LIBRARY})
 72 |         endif (NUMPY_NPYMATH_LIBRARY)
 73 |     endif (PYTHON_SITE_PACKAGES)
 74 | 
 75 |     ##__________________________________________________________________________
 76 |     ## Get API version of NumPy from 'numpy/numpyconfig.h'
 77 | 
 78 |     if (PYTHON_EXECUTABLE)
 79 |         execute_process (
 80 |             COMMAND ${PYTHON_EXECUTABLE} -c import\ numpy\;\ print\(numpy.__version__\)\;
 81 |             ERROR_VARIABLE NUMPY_API_VERSION_ERROR
 82 |             RESULT_VARIABLE NUMPY_API_VERSION_RESULT
 83 |             OUTPUT_VARIABLE NUMPY_API_VERSION
 84 |             OUTPUT_STRIP_TRAILING_WHITESPACE
 85 |             )
 86 |     else ()
 87 |         ## Backup procedure: extract version number directly from the header file
 88 |         if (NUMPY_INCLUDES)
 89 |             find_file (HAVE_NUMPYCONFIG_H numpy/numpyconfig.h
 90 |                 HINTS ${NUMPY_INCLUDES}
 91 |                 )
 92 |         endif (NUMPY_INCLUDES)
 93 |     endif ()
 94 | 
 95 |     ## Dissect full version number into major, minor and patch version
 96 |     if (NUMPY_API_VERSION)
 97 |         string (REGEX REPLACE "\\." ";" _tmp ${NUMPY_API_VERSION})
 98 |         list (GET _tmp 0 NUMPY_API_VERSION_MAJOR)
 99 |         list (GET _tmp 1 NUMPY_API_VERSION_MINOR)
100 |         list (GET _tmp 2 NUMPY_API_VERSION_PATCH)
101 |     endif (NUMPY_API_VERSION)
102 | 
103 |     ##__________________________________________________________________________
104 |     ## Actions taken when all components have been found
105 | 
106 |     find_package_handle_standard_args (NUMPY DEFAULT_MSG NUMPY_INCLUDES)
107 | 
108 |     if (NUMPY_FOUND)
109 |         if (NOT NUMPY_FIND_QUIETLY)
110 |             message (STATUS "Found components for NumPy")
111 |             message (STATUS "NUMPY_ROOT_DIR    = ${NUMPY_ROOT_DIR}")
112 |             message (STATUS "NUMPY_INCLUDES    = ${NUMPY_INCLUDES}")
113 |             message (STATUS "NUMPY_LIBRARIES   = ${NUMPY_LIBRARIES}")
114 |             message (STATUS "NUMPY_API_VERSION = ${NUMPY_API_VERSION}")
115 |         endif (NOT NUMPY_FIND_QUIETLY)
116 |     else (NUMPY_FOUND)
117 |         if (NUMPY_FIND_REQUIRED)
118 |             message (FATAL_ERROR "Could not find NUMPY!")
119 |         endif (NUMPY_FIND_REQUIRED)
120 |     endif (NUMPY_FOUND)
121 | 
122 |     ##__________________________________________________________________________
123 |     ## Mark advanced variables
124 | 
125 |   mark_as_advanced (
126 |     NUMPY_ROOT_DIR
127 |     NUMPY_INCLUDES
128 |     NUMPY_LIBRARIES
129 |     )
130 | 
131 | endif (NOT NUMPY_FOUND)
132 | 


--------------------------------------------------------------------------------
/connectivity/8194nk5LbLH_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"c9e8dc09263e4d0da77d16de0ecddd39","pose":[-0.611043,-0.00396746,-0.791588,-0.213904,0.791585,-0.00882497,-0.610996,2.305,-0.00456166,-0.999953,0.00853306,1.56916,0,0,0,1],"included":true,"visible":[false,false,false,false,true,true,false,true,true,true,false,false,false,false,false,false,false,false,false,false],"unobstructed":[false,false,false,false,true,false,false,false,true,true,false,false,false,false,false,false,false,false,false,false],"height":1.5826326295962942},{"image_id":"286b0c2d9a46408ba80b6ccebb21e582","pose":[0.951596,0.00201098,0.307346,6.58012,-0.307351,0.00915895,0.951552,-2.96479,-0.000901435,-0.999956,0.00933374,4.36353,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,false,false,false,false,false,false,false,true,false,true,false,true,false,true],"unobstructed":[false,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,true,true,false],"height":1.5712253956498747},{"image_id":"6776097c17ed4b93aee61704eb32f06c","pose":[-0.711582,-0.00419131,-0.702591,-1.68941,0.702575,0.00464776,-0.711594,-5.37908,0.00624796,-0.99998,-0.000362505,1.58622,0,0,0,1],"included":true,"visible":[false,false,false,false,false,true,true,true,false,true,false,true,false,false,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,true,true,false,false,false,false,true,false,false,false,false,false,false,false,true],"height":1.5804941871490743},{"image_id":"8c7e8da7d4a44ab695e6b3195eac0cf1","pose":[0.709879,0.011247,0.704234,8.62929,-0.70424,-0.00407304,0.70995,-1.77115,0.0108531,-0.999928,0.00502926,4.38556,0,0,0,1],"included":true,"visible":[false,true,false,false,false,false,false,false,false,false,true,false,true,true,false,false,false,true,true,false],"unobstructed":[false,true,false,false,false,false,false,false,false,false,true,false,false,true,false,false,false,true,true,false],"height":1.585645804390483},{"image_id":"f33c718aaf2c41469389a87944442c62","pose":[0.619478,0.0166688,0.784837,-3.88437,-0.784902,-0.00375152,0.619609,-0.528748,0.0132725,-0.999854,0.0107595,1.58368,0,0,0,1],"included":true,"visible":[true,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true],"unobstructed":[true,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true],"height":1.5829827809014503},{"image_id":"fcd90a404061413385286bef9662630e","pose":[-0.111393,0.00837906,0.993741,2.80245,-0.993773,-0.00348217,-0.111367,-3.78204,0.0025272,-0.999959,0.00871482,1.58057,0,0,0,1],"included":true,"visible":[true,false,true,false,false,false,false,true,false,false,false,false,false,false,false,true,false,false,false,false],"unobstructed":[false,false,true,false,false,false,true,true,true,false,false,false,false,false,false,false,false,false,false,false],"height":1.5763528408163245},{"image_id":"c07d4ae8330542a09cf8f8dddb9728ce","pose":[-0.985207,-0.0101267,0.171069,0.656519,-0.171094,0.00168538,-0.985253,-5.08928,0.00968898,-0.999947,-0.00339301,1.57611,0,0,0,1],"included":true,"visible":[true,false,true,false,false,true,false,true,false,false,false,false,false,false,true,false,true,false,false,true],"unobstructed":[false,false,true,false,false,true,false,true,false,false,false,true,false,false,false,false,false,false,false,false],"height":1.575276915205382},{"image_id":"2393bffb53fe4205bcc67796c6fb76e3","pose":[-0.241654,0.00228344,-0.97036,3.33582,0.970294,0.0124463,-0.241608,-5.90025,0.0115256,-0.99992,-0.00522325,1.57791,0,0,0,1],"included":true,"visible":[false,false,true,false,false,true,true,false,false,false,false,false,false,false,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,true,true,false,false,false,false,false,false,false,true,false,false,false,false,false],"height":1.5730354249357412},{"image_id":"71bf74df73cd4e24a191ef4f2338ca22","pose":[0.906931,-0.00688335,-0.421222,0.122562,0.421182,-0.00662188,0.906952,-0.00319673,-0.00903217,-0.999954,-0.00310641,1.57207,0,0,0,1],"included":true,"visible":[true,false,false,false,true,true,true,true,false,true,false,false,false,false,false,false,false,false,false,false],"unobstructed":[true,false,false,false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false],"height":1.570272020216938},{"image_id":"be8a2edacab34ec8887ba6a7b1e4945f","pose":[0.791463,0.0101015,0.611133,-3.50132,-0.611154,-0.00121731,0.791511,1.58103,0.00873934,-0.999948,0.00521015,1.56992,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,false,true,false,false,false,true,false,false,false,false,false,false,false,true],"unobstructed":[true,false,false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false],"height":1.577126892771864},{"image_id":"9bdde31adaa1443bb206b09bfa3c474c","pose":[0.799844,0.0047414,0.60019,8.67581,-0.600208,0.0075118,0.799809,-4.8108,-0.000716311,-0.99996,0.00885413,2.82261,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,true,true,false,false],"unobstructed":[false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false],"height":1.58264400638767},{"image_id":"66d4adb61b57494aa2c1ad141a0fad9b","pose":[-0.34536,-0.0108675,-0.938407,-2.27885,0.938436,0.00459882,-0.345423,-3.2282,0.00806945,-0.99993,0.00861029,1.58739,0,0,0,1],"included":true,"visible":[false,false,true,false,false,true,true,true,false,true,false,false,false,false,true,true,false,false,false,true],"unobstructed":[false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true],"height":1.5705441219971223},{"image_id":"83ff709c0e3e46079836153ea5c7feac","pose":[0.68423,0.0137303,0.729137,3.42529,-0.729235,0.00364543,0.684254,1.65175,0.00673696,-0.999899,0.012507,4.37069,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false],"height":1.578378655072358},{"image_id":"d9e325df2f3948679c78b93d8025e2da","pose":[0.826698,0.0192407,0.562317,8.49764,-0.562455,0.00220125,0.826825,-0.816805,0.0146709,-0.999812,0.0126418,4.38875,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,true,false,true,false,false,false,false,true,true,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,true,true,false],"height":1.5865892751674604},{"image_id":"423efb97f77f4e7995f19c66fe82afbc","pose":[0.958879,0.00141119,0.283813,5.51819,-0.283808,0.0124035,0.958801,-5.67527,-0.00216725,-0.999922,0.012294,1.58856,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,true,false,false,false,false,false,false,false,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,true,false,false,false],"height":1.5784339701720043},{"image_id":"6c49579a5cd34df8acb7f790b74e9eae","pose":[-0.95716,-0.00676032,-0.289482,-6.48379,0.289538,-0.00977451,-0.957117,-2.57899,0.00364085,-0.999929,0.0113132,1.59886,0,0,0,1],"included":true,"visible":[false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,true],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true],"height":1.5798282335589897},{"image_id":"aeed67040d744240b188f66f17d87d43","pose":[0.132175,0.0257204,0.990893,7.67989,-0.991226,0.00381825,0.132121,-5.81072,-0.000385302,-0.999662,0.0259995,2.29866,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,true,false,false,true,false,false,false,true,false,false,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false,false],"height":1.6026680667792301},{"image_id":"aae01016bb354f78bd6db86e9d71af2b","pose":[0.0788252,0.00384462,0.996881,6.79041,-0.996887,0.00184069,0.0788186,-0.995862,-0.00153193,-0.999991,0.0039778,4.37219,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false],"height":1.5770919536040346},{"image_id":"346b680ac5904359a1859c929ad312b6","pose":[-0.589008,0.00463239,0.808114,5.58585,-0.808123,0.00000695791,-0.589015,0.644327,-0.00273419,-0.999989,0.00373948,4.38174,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,true,false,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,true,false,false],"height":1.5707587596461066},{"image_id":"ae91518ed77047b3bdeeca864cd04029","pose":[0.310985,0.0070688,0.950389,-4.60607,-0.950392,-0.00460962,0.31102,-2.5949,0.00657945,-0.999964,0.00528466,1.58581,0,0,0,1],"included":true,"visible":[false,false,true,false,true,true,false,true,false,true,false,true,false,false,false,true,false,false,false,false],"unobstructed":[false,false,true,false,true,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false],"height":1.5747548700639524}]


--------------------------------------------------------------------------------
/connectivity/GdvgFV5R1Z5_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"0b02e18654324edd8d74c078b66bfb20","pose":[-0.057695,-0.000357129,0.998334,-2.46692,-0.998304,-0.00769199,-0.0576965,-3.15814,0.00770012,-0.99997,0.0000884733,1.5171,0,0,0,1],"included":true,"visible":[false,true,false,false,false,true,true,true,true,false,true,false],"unobstructed":[false,false,false,false,false,true,false,true,true,false,true,false],"height":1.51470410293751},{"image_id":"1db1c0a09ecf40d188197efc05ced3bb","pose":[-0.442443,0.0138817,0.896688,-4.03893,-0.89679,-0.0101225,-0.442338,-3.05434,0.00293664,-0.999852,0.0169288,0.974424,0,0,0,1],"included":true,"visible":[true,false,false,false,true,true,false,false,true,false,false,true],"unobstructed":[false,false,false,false,false,true,false,false,true,false,false,true],"height":0.9701803380402906},{"image_id":"6178647ca8d14dc09370f6c1b7ed2fd6","pose":[-0.870025,0.0056275,0.492973,-3.69279,-0.493005,-0.0105975,-0.869962,1.95433,0.000328893,-0.999927,0.0119957,1.51516,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,true,true,false,false,true,false],"unobstructed":[false,false,false,true,false,false,true,true,false,true,true,false],"height":1.517582101716661},{"image_id":"565cc21cd28b4ee6bb5ba83c5270c032","pose":[0.0242634,0.000986587,-0.999704,-3.91782,0.999699,0.00333371,0.024267,0.178675,0.00335701,-0.999993,-0.0009042,1.50868,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,false,false,true,true,false],"unobstructed":[false,false,true,false,false,false,false,false,false,true,true,false],"height":1.5114421933143356},{"image_id":"ef638e508e054c4aabd49b38d1b88fc7","pose":[0.0820523,0.0151057,0.996513,-4.61631,-0.995947,-0.0356725,0.0825462,-2.18899,0.0367954,-0.999249,0.0121187,1.52757,0,0,0,1],"included":true,"visible":[false,true,false,false,false,true,false,false,true,false,false,true],"unobstructed":[false,false,false,false,false,true,false,false,true,false,false,true],"height":1.5162868543024455},{"image_id":"97ed68de989e44fdaf2d9b949898fab6","pose":[0.0900997,0.0149714,0.99582,-3.64126,-0.995713,-0.0195971,0.0903844,-3.16818,0.0208687,-0.999695,0.0131427,1.52081,0,0,0,1],"included":true,"visible":[true,true,false,false,true,false,false,false,true,false,false,true],"unobstructed":[true,true,false,false,true,false,false,false,true,false,false,true],"height":1.5211418713547455},{"image_id":"5fd70cff4992429a99a84fd3c117ccb5","pose":[-0.0539877,-0.000800861,-0.998541,0.0108044,0.998337,0.0201438,-0.0539926,0.00604319,0.020158,-0.999796,-0.000286778,1.51223,0,0,0,1],"included":true,"visible":[true,false,true,true,false,false,false,true,false,true,true,false],"unobstructed":[false,false,true,false,false,false,false,true,false,false,true,false],"height":1.5113248528175798},{"image_id":"86d342c576ff46a9828d2ba377cc8cd5","pose":[0.998173,0.0151118,-0.0584746,-1.78347,0.0584707,0.000718574,0.998288,-1.89835,0.0151283,-0.999885,-0.000165129,1.52238,0,0,0,1],"included":true,"visible":[true,false,true,false,false,false,true,false,false,false,true,false],"unobstructed":[true,false,true,false,false,false,true,false,false,false,true,false],"height":1.5103397372923053},{"image_id":"8dba9ff900b14f9b84ead660f5f7f701","pose":[-0.999855,-0.0144511,0.00887107,-4.11579,-0.00895392,0.00564829,-0.999943,-2.90606,0.0144005,-0.999879,-0.00577567,1.51617,0,0,0,1],"included":true,"visible":[true,true,false,false,true,true,false,false,false,false,false,true],"unobstructed":[true,true,false,false,true,true,false,false,false,false,false,true],"height":1.5112098807574073},{"image_id":"0d8c5fbfd73f44e28d6da370520611e4","pose":[0.0769887,0.00664334,0.997009,-6.15424,-0.997016,-0.00490415,0.0770216,-0.0398163,0.00540151,-0.999965,0.00624716,1.50965,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,true,false,false,false,true,false],"unobstructed":[false,false,true,true,false,false,false,false,false,false,false,false],"height":1.5058928427471967},{"image_id":"aebb1de49d21485e8bef7633dfb58761","pose":[-0.0229751,-0.0058052,-0.999718,-1.94579,0.999719,0.00553997,-0.0230069,-0.026534,0.00567231,-0.999967,0.0056775,1.50582,0,0,0,1],"included":true,"visible":[true,false,true,true,false,false,true,true,false,true,false,false],"unobstructed":[true,false,true,true,false,false,true,true,false,false,false,false],"height":1.5101720791580233},{"image_id":"e34e51f3d6584ad09c510de5db84752f","pose":[-0.0418368,-0.0124855,0.999046,-3.99281,-0.993607,-0.104406,-0.0429142,-2.13265,0.104842,-0.994456,-0.00803644,0.980264,0,0,0,1],"included":true,"visible":[false,true,false,false,true,true,false,false,true,false,false,false],"unobstructed":[false,true,false,false,true,true,false,false,true,false,false,false],"height":0.969584316081611}]


--------------------------------------------------------------------------------
/connectivity/README.md:
--------------------------------------------------------------------------------
 1 | ## connectivity
 2 | Connectivity graphs indicating the navigable paths between viewpoints in each scan.
 3 | 
 4 | Each json file contains an array of annotations, one for each viewpoint in the scan. All annotations share the same basic structure as follows:
 5 | 
 6 | ```
 7 | {
 8 |   "image_id": str,
 9 |   "pose": [float x 16],
10 |   "included": boolean,
11 |   "visible": [boolean x num_viewpoints],
12 |   "unobstructed": [boolean x num_viewpoints],
13 |   "height": float
14 | }
15 | ```
16 | - `image_id`: matterport skybox prefix
17 | - `pose`: 4x4 matrix in row major order that transforms matterport skyboxes to global coordinates (z-up). Pose matrices are based on the assumption that the camera is facing skybox image 3.
18 | - `included`: whether viewpoint is included in the simulator. Some overlapping viewpoints are excluded.
19 | - `visible`: indicates other viewpoints that can be seen from this viewpoint.
20 | - `unobstructed`: indicates transitions to other viewpoints that are considered navigable for an agent.
21 | - `height`: estimated height of the viewpoint above the floor. Not required for the simulator.
22 | 
23 | Units are in metres.
24 | 
25 | `scans.txt` contains a list of all the scan ids in the dataset.
26 | 


--------------------------------------------------------------------------------
/connectivity/YmJkqBEsHnH_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"006933a75f764c5485cf284bea0ded0b","pose":[0.210914,-0.00824746,-0.977469,-7.64722,0.977278,0.0232484,0.210677,-2.15553,0.0209873,-0.999695,0.0129646,1.56695,0,0,0,1],"included":true,"visible":[false,false,true,false,true,true,false,true,true,true,false],"unobstructed":[false,false,false,false,false,false,false,true,true,false,false],"height":1.524793092035509},{"image_id":"e4ede0695e4e4a77aae8537abb9f11d3","pose":[-0.0422212,-0.0176246,-0.998952,-0.133122,0.998904,0.0194092,-0.0425613,-0.0184591,0.0201393,-0.999656,0.016787,1.48352,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,false,false,false,false,false],"unobstructed":[false,false,true,false,false,true,false,false,false,false,false],"height":1.5227398475592409},{"image_id":"d471e89e00be49f49a7ecace814d60bf","pose":[0.426939,-0.00370058,-0.904272,-0.421886,0.904055,0.0239963,0.426739,-2.12366,0.0201203,-0.999705,0.0135916,1.49477,0,0,0,1],"included":true,"visible":[true,true,false,true,true,true,false,true,true,true,false],"unobstructed":[false,true,false,true,false,true,false,false,false,false,false],"height":1.5263900136377955},{"image_id":"b34af02ce9b642ebbd0c7e9e0ba3b553","pose":[0.960272,0.00870611,-0.278924,-0.0905727,0.278755,0.0168277,0.960214,-3.55265,0.0130537,-0.99982,0.0137334,1.49061,0,0,0,1],"included":true,"visible":[true,true,true,false,false,false,false,false,false,false,false],"unobstructed":[false,false,true,false,false,true,false,false,false,false,false],"height":1.5323637229797105},{"image_id":"01c80b5f8fbd4c969ee0bc03f1ec7a6c","pose":[0.359562,-0.0105291,-0.933061,-3.77309,0.932771,0.0313799,0.359097,-2.1838,0.0254987,-0.999452,0.0211054,1.53932,0,0,0,1],"included":true,"visible":[true,false,true,false,false,true,false,true,true,true,false],"unobstructed":[false,false,false,false,false,true,false,true,false,false,false],"height":1.5286629461398107},{"image_id":"82ea5baa30f945fe98f6cad3064af847","pose":[0.0376233,-0.0115611,-0.999224,-2.01669,0.998821,0.0310955,0.0372487,-2.16965,0.030641,-0.999449,0.0127185,1.50807,0,0,0,1],"included":true,"visible":[true,true,true,true,true,false,false,true,true,true,false],"unobstructed":[false,true,true,true,true,false,false,false,false,false,false],"height":1.5253207999550662},{"image_id":"aecbb791f30b452a9236c5a8c7030663","pose":[0.296076,-0.0242641,-0.954855,-13.5955,0.955111,0.0179483,0.2957,-2.22547,0.00996343,-0.999544,0.0284901,1.59272,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,true,true,true,true],"unobstructed":[false,false,false,false,false,false,false,false,false,true,true],"height":1.7557263982456066},{"image_id":"d841f7b710f9470796d55561f8f524db","pose":[0.270437,0.002913,-0.962732,-5.77716,0.962325,0.0284129,0.27041,-2.21321,0.028142,-0.999591,0.00488176,1.55947,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,false,true,true,false],"unobstructed":[true,false,false,false,true,false,false,false,false,false,false],"height":1.5357935019251416},{"image_id":"8e38fdd81c7949db9646968bafbbdcfc","pose":[-0.00277118,-0.0169575,-0.999852,-9.93905,0.999791,0.020127,-0.00311204,-2.17463,0.0201771,-0.999653,0.0168993,1.60592,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,true,false,true,true],"unobstructed":[true,false,false,false,false,false,false,false,false,true,false],"height":1.5208970888736792},{"image_id":"20fd759be0b64fc9aa96d290f0a704ec","pose":[0.227815,0.0117555,-0.973633,-12.1161,0.973367,0.0235263,0.228037,-2.15724,0.025587,-0.999654,-0.00608172,1.59969,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,true,true,true,false,true],"unobstructed":[false,false,false,false,false,false,true,false,true,false,false],"height":1.5261379179165138},{"image_id":"d838acff82244c2da0cf2651e54966cb","pose":[0.310234,-0.0632421,-0.948553,-15.2317,0.950604,0.0313736,0.308813,-2.28133,0.0102298,-0.997504,0.0698525,0.902626,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,true,true,true,true,false],"unobstructed":[false,false,false,false,false,false,true,false,false,false,false],"height":1.558854711359605}]


--------------------------------------------------------------------------------
/connectivity/gZ6f7yhEvPG_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"80929af5cf234ae38ac3a2a4e60e4342","pose":[0.983395,0.00450812,-0.181418,-2.79247,0.181442,-0.00570068,0.983385,-1.38801,0.00339928,-0.999973,-0.00642298,1.42676,0,0,0,1],"included":true,"visible":[false,true,true,false,false,true,false,false],"unobstructed":[false,true,false,true,false,true,false,false],"height":1.4191402375960298},{"image_id":"ba27da20782d4e1a825f0a133ad84da9","pose":[-0.7605,-0.0115739,-0.649234,-2.38988,0.648885,0.0237502,-0.760515,-0.0538717,0.0242219,-0.999651,-0.0105509,1.4341,0,0,0,1],"included":true,"visible":[true,false,true,true,false,true,false,true],"unobstructed":[true,false,false,false,false,true,false,true],"height":1.424939020658826},{"image_id":"46cecea0b30e4786b673f5e951bf82d4","pose":[0.593129,0.0137361,-0.80499,0.99933,0.804932,0.010707,0.59327,1.17558,0.0167685,-0.999848,-0.00470498,1.41684,0,0,0,1],"included":true,"visible":[false,false,false,true,true,false,true,true],"unobstructed":[false,false,false,true,true,false,true,true],"height":1.4252108727703763},{"image_id":"bda7a9e6d1d94b3aa8ff491beb158f3a","pose":[-0.378592,-0.0208239,0.925329,-0.182918,-0.925433,-0.00820128,-0.37882,-1.72967,0.0154776,-0.999749,-0.0161651,1.42205,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,true,true],"unobstructed":[true,false,true,false,true,false,false,true],"height":1.42983949725488},{"image_id":"dbb2f8000bc04b3ebcd0a55112786149","pose":[-0.595363,0.00457706,-0.803444,1.10196,0.803383,0.0168543,-0.595222,-1.10724,0.0108174,-0.999847,-0.0137106,1.41536,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,true,true],"unobstructed":[false,false,true,true,false,false,true,true],"height":1.4186255623107038},{"image_id":"29b20fa80dcd4771974303c1ccd8953f","pose":[0.292738,0.0164579,-0.956051,-2.77306,0.956096,0.0090939,0.292909,1.55377,0.0135152,-0.999823,-0.0130722,1.43367,0,0,0,1],"included":true,"visible":[true,true,true,false,true,false,false,false],"unobstructed":[true,true,false,false,false,false,false,false],"height":1.4237594118402337},{"image_id":"0ee20663dfa34b438d48750ddcd7366c","pose":[-0.75968,-0.0019971,-0.650293,-0.111567,0.650131,0.0201598,-0.759554,1.31337,0.014627,-0.999794,-0.0140156,1.42291,0,0,0,1],"included":true,"visible":[false,false,true,true,true,false,false,true],"unobstructed":[false,false,true,false,true,false,false,true],"height":1.4276556862049736},{"image_id":"47d8a8282c1c4a7fb3eeeacc45e9d959","pose":[-0.0254788,0.00643152,-0.999654,-0.0034508,0.999603,0.0120797,-0.0253995,0.0112371,0.0119124,-0.999906,-0.00673574,1.42388,0,0,0,1],"included":true,"visible":[true,true,true,true,true,false,true,false],"unobstructed":[false,true,true,true,true,false,true,false],"height":1.4268855357216241}]


--------------------------------------------------------------------------------
/connectivity/pLe4wQe7qrG_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"e4c0a4ec08104bf5ada134b123fa53e7","pose":[-0.133089,0.0111501,-0.991041,1.16811,0.991028,0.0137789,-0.132932,-2.20571,0.0121736,-0.999843,-0.0128829,1.54855,0,0,0,1],"included":true,"visible":[false,false,true,false,true,false,true,true,false,true,false,false,true,false,true,false,false,false,false,false,false,true,false,true,true,true,true,true,false,false,true],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,true,false,false,true,false,false,false,false,false,false],"height":1.5280399019555968},{"image_id":"959ea6af304a4339bbc5d97f044d11c3","pose":[0.312992,0.0130519,-0.949666,2.47951,0.948724,0.0422726,0.313263,-2.23387,0.0442338,-0.999021,0.000849325,1.58243,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,false,false,false,true,false,true,true,false,true,true,false,false,false,false,false,false,true,true,true,true,true,false,true,false,true],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,true,false,false],"height":1.5361363756730164},{"image_id":"ffe0e6835287419c9cfe343e9d613d87","pose":[-0.802259,-0.00971694,-0.596896,5.96539,0.59688,0.00470064,-0.802316,-2.03323,0.0106021,-0.999941,0.00202973,1.57957,0,0,0,1],"included":true,"visible":[false,true,false,false,false,false,true,false,false,false,false,false,true,true,false,true,false,false,false,false,false,true,true,false,false,false,true,false,false,true,true],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true],"height":1.518586128876891},{"image_id":"47a69295198f4265958b9b1d497c328d","pose":[-0.90497,-0.00981301,-0.42536,2.46799,0.425363,0.00186582,-0.90502,2.04203,0.00967489,-0.99995,0.0024866,1.55214,0,0,0,1],"included":true,"visible":[false,true,false,false,false,true,true,false,true,false,false,true,false,false,false,true,false,false,true,true,true,false,false,false,true,false,false,true,true,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false,false,false,false,false,false,false,false,false,false],"height":1.5121750884423606},{"image_id":"3dfe07714b2f49d88bd4c8749e8bb0b7","pose":[-0.979561,-0.00709476,0.201019,-1.64821,-0.200975,-0.00640329,-0.979575,0.566531,0.0082373,-0.999954,0.00484756,1.56065,0,0,0,1],"included":true,"visible":[true,false,false,false,false,false,true,true,false,false,false,true,false,true,false,true,true,false,false,false,false,true,true,true,true,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,true,false,false,false,false,true,false,false,true,true,false,true,false,false,false],"height":1.5157095354765127},{"image_id":"87407bb6ed614926b91fc3e27eab766e","pose":[0.22909,0.0301697,-0.972937,4.56488,0.973286,0.00848048,0.229435,2.04904,0.0151732,-0.999508,-0.02742,1.5442,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,true,false,false,true,false,false,false,false,true,true,false,false,true,true,true,false,true,false,false,false,false,false,true,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false,false,false,false,false,false],"height":1.5111934219678684},{"image_id":"530f8e4126b14082a5c4ff6c3f6ae7cd","pose":[-0.172634,-0.00379856,-0.984978,8.51758,0.984978,0.00322887,-0.172647,0.14365,0.00383645,-0.999987,0.0031851,1.4578,0,0,0,1],"included":true,"visible":[false,false,true,false,true,false,false,false,false,false,true,false,false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,true,false],"height":1.5362285111230571},{"image_id":"96782d3925ec4088ab224cdc92a4fd6a","pose":[-0.216113,-0.00838211,-0.976332,1.24213,0.976316,0.00844697,-0.216182,2.38931,0.0100594,-0.999929,0.00635911,1.53856,0,0,0,1],"included":true,"visible":[true,false,false,false,true,true,true,false,true,false,false,false,false,true,false,false,true,false,true,true,false,true,true,true,false,false,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false,false,false,false,false,true,false,false,false],"height":1.5135335729735602},{"image_id":"2dcc9c6ca2d44d5080a0a7e7b7fb9c4d","pose":[-0.951188,-0.00996649,-0.308449,-1.21085,0.308409,0.00538007,-0.951238,2.40322,0.0111403,-0.999936,-0.00204269,1.55952,0,0,0,1],"included":true,"visible":[false,false,false,false,false,false,false,true,false,true,false,false,false,false,true,false,true,true,true,false,false,true,false,false,false,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,true,false,true,false,false,false],"height":1.5317176811699114},{"image_id":"0d704acada9041c48621c5d01d775da0","pose":[0.884279,0.0143861,0.466735,-1.34535,-0.466608,-0.0113974,0.88439,-2.3821,0.0180428,-0.999831,-0.00336482,1.52522,0,0,0,1],"included":true,"visible":[true,false,false,false,false,false,false,false,true,false,false,false,false,false,true,false,true,true,false,false,false,true,false,false,true,true,false,false,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false],"height":1.5405532836763522},{"image_id":"2cbd295d838b4c51b5590dcf2a37fba0","pose":[0.246342,0.0412581,-0.968304,4.76599,0.96868,0.0216735,0.247362,0.169153,0.0311925,-0.998913,-0.0346258,1.42661,0,0,0,1],"included":true,"visible":[false,false,false,false,false,false,true,false,false,false,false,false,false,true,false,false,false,false,false,false,false,true,true,true,false,false,false,false,true,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false,false,false,true,false,true,false,false,false,false,true,true,false],"height":1.5180090338091925},{"image_id":"6fbd170d8df746b0b10e3801e2dad706","pose":[-0.872353,-0.0000202749,0.488874,3.49156,-0.488854,-0.00892582,-0.872319,0.121306,0.00438157,-0.99996,0.00777758,1.41535,0,0,0,1],"included":true,"visible":[false,true,false,false,true,false,false,false,false,false,false,false,false,false,false,true,false,true,false,false,false,true,false,false,false,true,false,false,true,false,false],"unobstructed":[false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false],"height":1.5371204380160495},{"image_id":"31d308fee8284a168c28e238cf814363","pose":[0.998122,0.0164352,-0.0590029,6.9369,0.0592246,-0.0133283,0.998155,-2.13031,0.0156188,-0.999776,-0.0142757,1.58199,0,0,0,1],"included":true,"visible":[false,true,true,false,false,false,false,false,false,false,true,false,false,true,false,true,false,false,true,false,false,true,true,true,false,false,true,false,false,true,true],"unobstructed":[false,false,true,false,false,false,false,false,false,false,true,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false],"height":1.5115252320863801},{"image_id":"789faffd87b949fd9ed7e6df4fadc2f1","pose":[0.998352,0.0156401,-0.0551931,6.89589,0.0551612,0.00248225,0.998474,-1.07864,0.0157535,-0.999874,0.00161644,1.58253,0,0,0,1],"included":true,"visible":[false,false,true,false,true,false,true,false,false,false,true,false,true,false,false,true,false,false,true,false,false,true,true,true,false,false,false,false,false,true,false],"unobstructed":[false,false,true,false,false,false,true,false,false,false,true,false,true,false,false,false,false,false,false,false,false,false,true,true,false,false,false,false,false,true,false],"height":1.5156362905724483},{"image_id":"a26b0e83785f45d484e5f9b83fdb4df3","pose":[0.784717,-0.00024993,0.619854,-0.356288,-0.619842,-0.00640294,0.7847,-1.3696,0.00377304,-0.999979,-0.0051784,1.5663,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,false,false,false,true,false,false,true,false,false,true,true,true,false,false,false,true,false,false,true,true,false,true,false,false,false],"unobstructed":[true,false,false,false,true,false,false,false,false,true,false,false,false,false,false,true,true,true,false,false,false,true,false,false,true,true,false,false,false,false,false],"height":1.5217725369665362},{"image_id":"df0b69b34d04453691b72a6c16923756","pose":[0.00951654,-0.00498874,-0.999942,2.41189,0.999919,0.00833186,0.00947506,0.0914117,0.00828438,-0.999952,0.00506864,1.42153,0,0,0,1],"included":true,"visible":[false,true,false,true,true,false,false,false,false,false,true,true,false,true,true,false,true,true,false,false,false,true,false,false,false,true,false,false,true,false,false],"unobstructed":[false,false,false,true,false,false,false,false,false,false,false,true,false,false,true,false,true,false,false,false,false,true,false,false,false,false,false,false,true,false,false],"height":1.5270023190896223},{"image_id":"d7d0e431bbfa40429a561060150f24cb","pose":[0.999351,0.0057182,0.0355512,-0.337565,-0.0355828,0.00559738,0.999351,1.14528,0.00551577,-0.999968,0.00579823,1.55634,0,0,0,1],"included":true,"visible":[false,false,false,false,true,false,true,true,true,false,false,false,false,true,true,true,false,true,false,false,false,true,true,false,true,false,false,true,false,false,false],"unobstructed":[false,false,false,false,true,false,false,true,true,false,false,false,false,false,true,true,false,true,false,false,false,true,false,false,false,false,false,true,false,false,false],"height":1.5126864275679581},{"image_id":"8f17854feb134826ae42e16b303e7445","pose":[-0.04737,0.0249555,-0.998565,-0.00382618,0.998875,0.00294013,-0.0473109,-0.017549,0.00175551,-0.999684,-0.0250657,1.55087,0,0,0,1],"included":true,"visible":[false,false,false,false,false,false,false,false,true,true,false,true,false,true,true,true,true,false,false,false,false,true,false,false,true,false,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,true,false,false,false,false,true,false,false,false,false,false,true,false,false,false],"height":1.5136058544662168},{"image_id":"d0584db5d0ba41ee955f6c91195afcb3","pose":[-0.0387735,-0.000627238,0.999248,6.85886,-0.999187,-0.0109357,-0.0387783,2.09848,0.0109521,-0.99994,-0.000201698,1.56982,0,0,0,1],"included":true,"visible":[false,false,false,true,false,true,false,false,false,false,false,false,true,true,true,true,false,false,false,true,true,true,true,true,false,false,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false,false,true,false],"height":1.5123722877852799},{"image_id":"87491cd48b094270a2a1aa682b8a770c","pose":[0.995378,0.0106665,0.0954335,5.60063,-0.0953334,-0.00948957,0.9954,2.17887,0.0115233,-0.999898,-0.00842783,1.55259,0,0,0,1],"included":true,"visible":[false,false,false,true,false,true,true,false,false,false,true,true,true,true,false,true,false,false,true,false,true,true,true,true,false,false,false,false,true,false,false],"unobstructed":[false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,false,true,false],"height":1.5096271733017124},{"image_id":"8a65d3586fed4c5f9e0f28fc184b3ff2","pose":[0.999328,0.0243579,-0.0273564,3.25097,0.0277536,-0.016113,0.999485,2.12641,0.0239048,-0.999573,-0.0167772,1.55627,0,0,0,1],"included":true,"visible":[false,false,false,true,false,true,true,false,true,false,true,true,true,true,false,false,false,false,true,true,false,false,false,true,false,false,false,false,false,false,false],"unobstructed":[false,false,false,true,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false],"height":1.5216447032258948},{"image_id":"eb464984cc4847d2a61eab27e3e31e51","pose":[0.317487,0.0187868,-0.948076,1.37215,0.94826,-0.0045702,0.317459,0.120026,0.0016314,-0.999813,-0.0192648,1.55431,0,0,0,1],"included":true,"visible":[true,false,false,false,true,false,false,true,true,true,false,true,false,true,true,true,true,true,false,false,false,false,false,false,true,true,false,false,false,false,false],"unobstructed":[true,false,false,false,true,false,false,true,false,false,true,false,false,false,true,true,true,true,false,false,false,false,false,false,true,true,false,false,false,false,false],"height":1.5187432392237161},{"image_id":"ce103547e620457f935a63050cea57b3","pose":[-0.926095,-0.0151941,-0.376983,7.37065,0.376978,0.00327303,-0.926216,0.160002,0.0153072,-0.999879,0.00269771,1.43016,0,0,0,1],"included":true,"visible":[false,false,true,false,true,false,true,false,false,false,true,false,true,true,false,false,false,false,true,false,false,false,false,true,false,true,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,true,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,true,false],"height":1.5228214121764414},{"image_id":"fa48c6f958304aa8a8f765a72fe7e8d5","pose":[-0.994837,-0.00721806,0.101218,6.07693,-0.101252,0.00455002,-0.99485,0.0491342,0.00672061,-0.999963,-0.00525636,1.42403,0,0,0,1],"included":true,"visible":[false,false,false,false,true,false,true,false,false,false,true,false,true,true,false,false,false,false,false,false,false,false,true,false,false,true,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,true,false,true,true,false,false,false,false,true,false,false,false,true,false,false,false,false,false,false,true,false],"height":1.520425902170783},{"image_id":"50be95bc6efb466c90867d52cf32ba3f","pose":[0.803639,0.00102907,-0.595115,-0.280264,0.595001,0.0182495,0.803517,-2.40583,0.0116877,-0.999833,0.0140547,1.54308,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,false,false,false,true,false,false,true,false,true,false,true,true,false,false,false,true,true,false,false,true,false,true,false,false,false],"unobstructed":[true,false,false,false,true,false,false,false,false,true,false,false,false,false,true,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false,false],"height":1.5259856691595353},{"image_id":"91d1554c155e4185a8c69636d47fd58d","pose":[0.7634,0.00593063,0.645898,-1.49105,-0.645812,-0.0117048,0.763406,-0.563949,0.0120878,-0.999914,-0.00510434,1.56479,0,0,0,1],"included":true,"visible":[true,false,false,false,true,false,true,true,true,true,false,true,false,false,true,true,false,false,false,false,false,true,true,true,true,false,false,true,false,false,false],"unobstructed":[false,false,false,false,true,false,false,false,true,true,false,false,false,false,true,false,false,false,false,false,false,true,false,false,true,false,false,true,false,false,false],"height":1.5123581928141085},{"image_id":"5d4349e09ada47b0aa8b20a0d22c54ca","pose":[0.0797542,0.0285043,-0.996407,3.62156,0.996744,0.00951931,0.080054,-2.10242,0.0117672,-0.999548,-0.0276513,1.56537,0,0,0,1],"included":true,"visible":[false,true,true,false,false,false,false,false,false,true,true,true,true,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false,true],"unobstructed":[false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,true],"height":1.5223704869964667},{"image_id":"298e09e5e1144e7b9762747370ca68a5","pose":[0.31306,-0.00832259,-0.949696,0.0361493,0.949732,0.00181293,0.313056,2.42577,-0.000883427,-0.999963,0.0084728,1.55565,0,0,0,1],"included":true,"visible":[true,false,false,false,true,false,true,true,true,false,false,false,false,true,true,false,true,true,true,true,false,false,false,true,true,true,false,false,false,false,false],"unobstructed":[false,false,false,false,true,false,false,true,true,false,false,false,false,false,false,false,true,true,false,false,false,false,false,false,false,true,false,false,false,false,false],"height":1.5224640014863746},{"image_id":"f8e13e216dd6477ea05e694e2f1478d9","pose":[0.998766,0.0109404,-0.0484187,2.48582,0.0482994,0.0109393,0.998773,-1.19789,0.0114569,-0.99988,0.0103984,1.57265,0,0,0,1],"included":true,"visible":[false,true,false,true,true,false,true,false,true,true,true,true,false,false,false,true,true,true,false,false,false,false,true,true,true,false,true,false,false,false,true],"unobstructed":[false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false],"height":1.5206684141424807},{"image_id":"e5f7cab8517b47399eda8866f0e30ab3","pose":[-0.660778,-0.00608519,-0.750556,7.08848,0.750578,-0.00299603,-0.660773,1.44662,0.00177251,-0.999977,0.00654814,1.57334,0,0,0,1],"included":true,"visible":[false,false,false,false,false,true,true,false,false,false,true,false,true,true,false,false,false,false,true,true,false,false,true,true,false,true,false,false,true,false,false],"unobstructed":[false,false,false,false,false,false,true,false,false,false,true,false,false,true,false,false,false,false,true,true,false,false,true,true,false,false,false,false,false,false,false],"height":1.5050461478205863},{"image_id":"a924a5855b954d68b26ebe82ab61c71d","pose":[-0.120428,-0.000846936,-0.992721,4.79789,0.992705,0.00559062,-0.12043,-2.05172,0.0056522,-0.999984,0.000168504,1.57612,0,0,0,1],"included":true,"visible":[false,true,true,false,false,false,true,false,true,false,true,false,true,false,false,false,true,false,false,false,false,true,true,true,false,false,true,false,false,true,false],"unobstructed":[false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false],"height":1.5244946264278192}]


--------------------------------------------------------------------------------
/connectivity/scans.txt:
--------------------------------------------------------------------------------
 1 | 17DRP5sb8fy
 2 | 1LXtFkjw3qL
 3 | 1pXnuDYAj8r
 4 | 29hnd4uzFmX
 5 | 2azQ1b91cZZ
 6 | 2n8kARJN3HM
 7 | 2t7WUuJeko7
 8 | 5LpN3gDmAk7
 9 | 5q7pvUzZiYa
10 | 5ZKStnWn8Zo
11 | 759xd9YjKW5
12 | 7y3sRwLe3Va
13 | 8194nk5LbLH
14 | 82sE5b5pLXE
15 | 8WUmhLawc2A
16 | aayBHfsNo7d
17 | ac26ZMwG7aT
18 | ARNzJeq3xxb
19 | B6ByNegPMKs
20 | b8cTxDM8gDG
21 | cV4RVeZvu5T
22 | D7G3Y4RVNrH
23 | D7N2EKCX4Sj
24 | dhjEzFoUFzH
25 | E9uDoFAP3SH
26 | e9zR4mvMWw7
27 | EDJbREhghzL
28 | EU6Fwq7SyZv
29 | fzynW3qQPVF
30 | GdvgFV5R1Z5
31 | gTV8FGcVJC9
32 | gxdoqLR6rwA
33 | gYvKGZ5eRqb
34 | gZ6f7yhEvPG
35 | HxpKQynjfin
36 | i5noydFURQK
37 | JeFG25nYj2p
38 | JF19kD82Mey
39 | jh4fc5c5qoQ
40 | JmbYfDe2QKZ
41 | jtcxE69GiFV
42 | kEZ7cmS4wCh
43 | mJXqzFtmKg4
44 | oLBMNvg9in8
45 | p5wJjkQkbXX
46 | pa4otMbVnkk
47 | pLe4wQe7qrG
48 | Pm6F8kyY3z2
49 | pRbA3pwrgk9
50 | PuKPg4mmafe
51 | PX4nDJXEHrG
52 | q9vSo1VnCiC
53 | qoiz87JEwZ2
54 | QUCTc6BB5sX
55 | r1Q1Z4BcV1o
56 | r47D5H71a5s
57 | rPc6DW4iMge
58 | RPmz2sHmrrY
59 | rqfALeAoiTq
60 | s8pcmisQ38h
61 | S9hNv5qa7GM
62 | sKLMLpTHeUy
63 | SN83YJsR3w2
64 | sT4fr6TAbpF
65 | TbHJrupSAjP
66 | ULsKaCPVFJR
67 | uNb9QFRL6hY
68 | ur6pFq6Qu1A
69 | UwV83HsGsw3
70 | Uxmj2M2itWa
71 | V2XKFyX4ASd
72 | VFuaQ6m2Qom
73 | VLzqgDo317F
74 | Vt2qJdWjCF2
75 | VVfe2KiqLaN
76 | Vvot9Ly1tCj
77 | vyrNrziPKCB
78 | VzqfbhrpDEA
79 | wc2JMjhGNzB
80 | WYY7iVyf5p8
81 | X7HyMhZNoso
82 | x8F5xyUWy9e
83 | XcA2TqTSSAj
84 | YFuZgdQ5vWj
85 | YmJkqBEsHnH
86 | yqstnuAEVhm
87 | YVUC4YcDtcY
88 | Z6MFQCViBuw
89 | ZMojNkEp431
90 | zsNo4HB9uLZ


--------------------------------------------------------------------------------
/connectivity/scans_dialog.txt:
--------------------------------------------------------------------------------
 1 | JmbYfDe2QKZ
 2 | gZ6f7yhEvPG
 3 | WYY7iVyf5p8
 4 | rqfALeAoiTq
 5 | UwV83HsGsw3
 6 | mJXqzFtmKg4
 7 | ur6pFq6Qu1A
 8 | rPc6DW4iMge
 9 | D7G3Y4RVNrH
10 | RPmz2sHmrrY
11 | sKLMLpTHeUy
12 | s8pcmisQ38h
13 | q9vSo1VnCiC
14 | 82sE5b5pLXE
15 | e9zR4mvMWw7
16 | qoiz87JEwZ2
17 | Uxmj2M2itWa
18 | x8F5xyUWy9e
19 | kEZ7cmS4wCh
20 | pLe4wQe7qrG
21 | HxpKQynjfin
22 | X7HyMhZNoso
23 | zsNo4HB9uLZ
24 | ARNzJeq3xxb
25 | PuKPg4mmafe
26 | JF19kD82Mey
27 | VVfe2KiqLaN
28 | V2XKFyX4ASd
29 | ULsKaCPVFJR
30 | gTV8FGcVJC9
31 | 1LXtFkjw3qL
32 | 2t7WUuJeko7
33 | 1pXnuDYAj8r
34 | wc2JMjhGNzB
35 | fzynW3qQPVF
36 | jh4fc5c5qoQ
37 | D7N2EKCX4Sj
38 | Z6MFQCViBuw
39 | VLzqgDo317F
40 | 2n8kARJN3HM
41 | 2azQ1b91cZZ
42 | vyrNrziPKCB
43 | QUCTc6BB5sX
44 | 759xd9YjKW5
45 | XcA2TqTSSAj
46 | SN83YJsR3w2
47 | r1Q1Z4BcV1o
48 | oLBMNvg9in8
49 | YmJkqBEsHnH
50 | 5LpN3gDmAk7
51 | B6ByNegPMKs
52 | sT4fr6TAbpF
53 | YVUC4YcDtcY
54 | dhjEzFoUFzH
55 | GdvgFV5R1Z5
56 | VzqfbhrpDEA
57 | ZMojNkEp431
58 | gYvKGZ5eRqb
59 | 17DRP5sb8fy
60 | TbHJrupSAjP
61 | EDJbREhghzL
62 | ac26ZMwG7aT
63 | r47D5H71a5s
64 | pa4otMbVnkk
65 | EU6Fwq7SyZv
66 | jtcxE69GiFV
67 | i5noydFURQK
68 | gxdoqLR6rwA
69 | E9uDoFAP3SH
70 | 5q7pvUzZiYa
71 | aayBHfsNo7d
72 | b8cTxDM8gDG
73 | 8WUmhLawc2A
74 | JeFG25nYj2p
75 | yqstnuAEVhm
76 | Vvot9Ly1tCj
77 | p5wJjkQkbXX
78 | cV4RVeZvu5T
79 | 5ZKStnWn8Zo
80 | 8194nk5LbLH
81 | Vt2qJdWjCF2
82 | PX4nDJXEHrG
83 | VFuaQ6m2Qom
84 | pRbA3pwrgk9
85 | uNb9QFRL6hY
86 | S9hNv5qa7GM
87 | Pm6F8kyY3z2
88 | 29hnd4uzFmX
89 | 7y3sRwLe3Va
90 | YFuZgdQ5vWj


--------------------------------------------------------------------------------
/img_features/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeezhu/CMN.pytorch/b4e3c3ca34668cb8031d525132b013ced472ed87/img_features/.gitkeep


--------------------------------------------------------------------------------
/include/Benchmark.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MATTERSIM_BENCHMARK
 2 | #define MATTERSIM_BENCHMARK
 3 | 
 4 | #include <chrono>
 5 | 
 6 | namespace mattersim {
 7 | 
 8 |     class Timer {
 9 |     public:
10 |         Timer();
11 |         virtual void Start();
12 |         virtual void Stop();
13 |         virtual void Reset();
14 |         virtual float MilliSeconds();
15 |         virtual float MicroSeconds();
16 |         virtual float Seconds();
17 |         inline bool running() { return running_; }
18 | 
19 |     protected:
20 |         bool running_;
21 |         std::chrono::steady_clock::time_point start_;
22 |         std::chrono::steady_clock::duration elapsed_;
23 |     };
24 | }
25 | 
26 | #endif   // MATTERSIM_BENCHMARK
27 | 


--------------------------------------------------------------------------------
/include/MatterSim.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef MATTERSIM_HPP
  2 | #define MATTERSIM_HPP
  3 | 
  4 | #include <memory>
  5 | #include <vector>
  6 | #include <random>
  7 | #include <cmath>
  8 | #include <stdexcept>
  9 | 
 10 | #include <opencv2/opencv.hpp>
 11 | 
 12 | #ifdef OSMESA_RENDERING
 13 | #define GL_GLEXT_PROTOTYPES
 14 | #include <GL/gl.h>
 15 | #include <GL/osmesa.h>
 16 | #elif defined (EGL_RENDERING)
 17 | #include <epoxy/gl.h>
 18 | #include <EGL/egl.h>
 19 | #else
 20 | #include <GL/glew.h>
 21 | #endif
 22 | 
 23 | #define GLM_FORCE_RADIANS
 24 | #include <glm/glm.hpp>
 25 | #include <glm/gtc/matrix_transform.hpp>
 26 | #include <glm/gtc/type_ptr.hpp>
 27 | #include "glm/ext.hpp"
 28 | 
 29 | #include "Benchmark.hpp"
 30 | #include "NavGraph.hpp"
 31 | 
 32 | namespace mattersim {
 33 | 
 34 |     struct Viewpoint: std::enable_shared_from_this<Viewpoint> {
 35 |         Viewpoint(std::string viewpointId, unsigned int ix, double x, double y, double z,
 36 |           double rel_heading, double rel_elevation, double rel_distance) : 
 37 |             viewpointId(viewpointId), ix(ix), x(x), y(y), z(z), rel_heading(rel_heading),
 38 |             rel_elevation(rel_elevation), rel_distance(rel_distance)  
 39 |         {}
 40 | 
 41 |         //! Viewpoint identifier
 42 |         std::string viewpointId;
 43 |         //! Viewpoint index into connectivity graph
 44 |         unsigned int ix;
 45 |         //! 3D position in world coordinates
 46 |         double x;
 47 |         double y;
 48 |         double z;
 49 |         //! Heading relative to the camera
 50 |         double rel_heading;
 51 |         //! Elevation relative to the camera
 52 |         double rel_elevation;
 53 |         //! Distance from the agent
 54 |         double rel_distance;
 55 |     };
 56 | 
 57 |     typedef std::shared_ptr<Viewpoint> ViewpointPtr;
 58 |     struct ViewpointPtrComp {
 59 |         inline bool operator() (const ViewpointPtr& l, const ViewpointPtr& r){
 60 |             return sqrt(l->rel_heading*l->rel_heading+l->rel_elevation*l->rel_elevation)
 61 |                 < sqrt(r->rel_heading*r->rel_heading+r->rel_elevation*r->rel_elevation);
 62 |         }
 63 |     };
 64 | 
 65 |     /**
 66 |      * Simulator state class.
 67 |      */
 68 |     struct SimState: std::enable_shared_from_this<SimState>{
 69 |         //! Building / scan environment identifier
 70 |         std::string scanId;
 71 |         //! Number of frames since the last newEpisode() call
 72 |         unsigned int step = 0;
 73 |         //! RGB image (in BGR channel order) from the agent's current viewpoint
 74 |         cv::Mat rgb;
 75 |         //! Depth image taken from the agent's current viewpoint
 76 |         cv::Mat depth;
 77 |         //! Agent's current 3D location
 78 |         ViewpointPtr location;
 79 |         //! Agent's current camera heading in radians
 80 |         double heading = 0;
 81 |         //! Agent's current camera elevation in radians
 82 |         double elevation = 0;
 83 |         //! Agent's current view [0-35] (set only when viewing angles are discretized)
 84 |         //! [0-11] looking down, [12-23] looking at horizon, [24-35] looking up
 85 |         unsigned int viewIndex = 0;
 86 |         //! Vector of nearby navigable locations representing state-dependent action candidates, i.e.
 87 |         //! viewpoints you can move to. Index 0 is always to remain at the current viewpoint.
 88 |         //! The remaining viewpoints are sorted by their angular distance from the centre of the image.
 89 |         std::vector<ViewpointPtr> navigableLocations;
 90 |     };
 91 | 
 92 |     typedef std::shared_ptr<SimState> SimStatePtr;
 93 | 
 94 | 
 95 |     /**
 96 |      * Main class for accessing an instance of the simulator environment.
 97 |      */
 98 |     class Simulator {
 99 | 
100 |     public:
101 |         Simulator();
102 | 
103 |         ~Simulator();
104 | 
105 |         /**
106 |          * Set a non-standard path to the <a href="https://niessner.github.io/Matterport/">Matterport3D dataset</a>.
107 |          * The provided directory must contain subdirectories of the form:
108 |          * "<scanId>/matterport_skybox_images/". Default is "./data/v1/scans/".
109 |          */
110 |         void setDatasetPath(const std::string& path);
111 | 
112 |         /**
113 |          * Set a non-standard path to the viewpoint connectivity graphs. The provided directory must contain files
114 |          * of the form "/<scanId>_connectivity.json". Default is "./connectivity" (the graphs provided
115 |          * by this repo).
116 |          */
117 |         void setNavGraphPath(const std::string& path);
118 | 
119 |         /**
120 |          * Enable or disable rendering. Useful for testing. Default is true (enabled).
121 |          */
122 |         void setRenderingEnabled(bool value);
123 | 
124 |         /**
125 |          * Sets camera resolution. Default is 320 x 240.
126 |          */
127 |         void setCameraResolution(int width, int height);
128 | 
129 |         /**
130 |          * Sets camera vertical field-of-view in radians. Default is 0.8, approx 46 degrees.
131 |          */
132 |         void setCameraVFOV(double vfov);
133 | 
134 |         /**
135 |          * Set the camera elevation min and max limits in radians. Default is +-0.94 radians.
136 |          * @return true if successful.
137 |          */
138 |         bool setElevationLimits(double min, double max);
139 | 
140 |         /**
141 |          * Enable or disable discretized viewing angles. When enabled, heading and
142 |          * elevation changes will be restricted to 30 degree increments from zero,
143 |          * with left/right/up/down movement triggered by the sign of the makeAction
144 |          * heading and elevation parameters. Default is false (disabled).
145 |          */
146 |         void setDiscretizedViewingAngles(bool value);
147 | 
148 |         /**
149 |          * Enable or disable preloading of images from disk to CPU memory. Default is false (disabled).
150 |          * Enabled is better for training models, but will cause a delay when starting the simulator.
151 |          */
152 |         void setPreloadingEnabled(bool value);
153 | 
154 |         /**
155 |          * Enable or disable rendering of depth images. Default is false (disabled).
156 |          */
157 |         void setDepthEnabled(bool value);
158 | 
159 |         /**
160 |          * Set the number of environments in the batch. Default is 1.
161 |          */
162 |         void setBatchSize(unsigned int size);
163 | 
164 |         /**
165 |          * Set the cache size for storing pano images in gpu memory. Default is 200. Should be comfortably
166 |          * larger than the batch size.
167 |          */
168 |         void setCacheSize(unsigned int size);
169 | 
170 |         /**
171 |          * Set the random seed for episodes where viewpoint is not provided.
172 |          */
173 |         void setSeed(int seed);
174 | 
175 |         /**
176 |          * Initialize the simulator. Further configuration won't take any effect from now on.
177 |          */
178 |         void initialize();
179 | 
180 |         /**
181 |          * Starts a new episode. If a viewpoint is not provided initialization will be random.
182 |          * @param scanId - sets which scene is used, e.g. "2t7WUuJeko7"
183 |          * @param viewpointId - sets the initial viewpoint location, e.g. "cc34e9176bfe47ebb23c58c165203134"
184 |          * @param heading - set the agent's initial camera heading in radians. With z-axis up,
185 |          *                  heading is defined relative to the y-axis (turning right is positive).
186 |          * @param elevation - set the initial camera elevation in radians, measured from the horizon
187 |          *                    defined by the x-y plane (up is positive).
188 |          */
189 |         void newEpisode(const std::vector<std::string>& scanId, const std::vector<std::string>& viewpointId,
190 |               const std::vector<double>& heading, const std::vector<double>& elevation);
191 | 
192 |         /**
193 |          * Starts a new episode at a random viewpoint.
194 |          * @param scanId - sets which scene is used, e.g. "2t7WUuJeko7" 
195 |          */
196 |         void newRandomEpisode(const std::vector<std::string>& scanId);
197 | 
198 |         /**
199 |          * Returns the current batch of environment states including RGB images and available actions.
200 |          */
201 |         const std::vector<SimStatePtr>& getState();
202 | 
203 |         /** @brief Select an action.
204 |          *
205 |          * An RL agent will sample an action here. A task-specific reward can be determined
206 |          * based on the location, heading, elevation, etc. of the resulting state.
207 |          * @param index - an index into the set of feasible actions defined by getState()->navigableLocations.
208 |          * @param heading - desired heading change in radians. With z-axis up, heading is defined
209 |          *                  relative to the y-axis (turning right is positive).
210 |          * @param elevation - desired elevation change in radians, measured from the horizon defined
211 |          *                    by the x-y plane (up is positive).
212 |          */
213 |         void makeAction(const std::vector<unsigned int>& index, const std::vector<double>& heading, 
214 |                         const std::vector<double>& elevation);
215 | 
216 |         /**
217 |          * Closes the environment and releases underlying texture resources, OpenGL contexts, etc.
218 |          */
219 |         void close();
220 | 
221 |         /**
222 |          * Reset the rendering timers that run automatically.
223 |          */
224 |         void resetTimers();
225 | 
226 |         /**
227 |          * Return a formatted timing string.
228 |          */
229 |         std::string timingInfo(); 
230 | 
231 |     private:
232 |         const int headingCount = 12; // 12 heading values in discretized views
233 |         const double elevationIncrement = M_PI/6.0; // 30 degrees discretized up/down
234 |         void populateNavigable();
235 |         void setHeadingElevation(const std::vector<double>& heading, const std::vector<double>& elevation);
236 |         void renderScene();
237 | #ifdef OSMESA_RENDERING
238 |         void *buffer;
239 |         OSMesaContext ctx;
240 | #elif defined (EGL_RENDERING)
241 |         EGLDisplay eglDpy;
242 |         GLuint FramebufferName;
243 | #else
244 |         GLuint FramebufferName;
245 | #endif
246 |         std::vector<SimStatePtr> states;
247 |         bool initialized;
248 |         bool renderingEnabled;
249 |         bool discretizeViews;
250 |         bool preloadImages;
251 |         bool renderDepth;
252 |         int width;
253 |         int height;
254 |         int randomSeed;
255 |         unsigned int cacheSize;
256 |         unsigned int batchSize;
257 |         double vfov;
258 |         double minElevation;
259 |         double maxElevation;
260 |         glm::mat4 Projection;
261 |         glm::mat4 View;
262 |         glm::mat4 Model;
263 |         glm::mat4 Scale;
264 |         glm::mat4 RotateX;
265 |         glm::mat4 RotateZ;
266 |         GLint ProjMat;
267 |         GLint ModelViewMat;
268 |         GLint vertex;
269 |         GLint isDepth;
270 |         GLuint vao_cube;
271 |         GLuint vbo_cube_vertices;
272 |         GLuint glProgram;
273 |         GLuint glShaderV;
274 |         GLuint glShaderF;
275 |         std::string datasetPath;
276 |         std::string navGraphPath;
277 |         Timer preloadTimer; // Preloading images from disk into cpu memory
278 |         Timer loadTimer; // Loading textures from disk or cpu memory onto gpu
279 |         Timer renderTimer; // Rendering time
280 |         Timer gpuReadTimer; // Reading rendered images from gpu back to cpu memory
281 |         Timer processTimer; // Total run time for simulator
282 |         Timer wallTimer; // Wall clock timer
283 |         unsigned int frames;
284 |     };
285 | }
286 | 
287 | #endif
288 | 


--------------------------------------------------------------------------------
/include/NavGraph.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef NAVGRAPH_HPP
  2 | #define NAVGRAPH_HPP
  3 | 
  4 | #include <memory>
  5 | #include <vector>
  6 | #include <unordered_map>
  7 | #include <random>
  8 | #include <cmath>
  9 | #include <sstream>
 10 | #include <stdexcept>
 11 | 
 12 | #include <jsoncpp/json/json.h>
 13 | #include <opencv2/opencv.hpp>
 14 | 
 15 | #ifdef OSMESA_RENDERING
 16 | #define GL_GLEXT_PROTOTYPES
 17 | #include <GL/gl.h>
 18 | #include <GL/osmesa.h>
 19 | #elif defined (EGL_RENDERING)
 20 | #include <epoxy/gl.h>
 21 | #include <EGL/egl.h>
 22 | #else
 23 | #include <GL/glew.h>
 24 | #endif
 25 | 
 26 | #define GLM_FORCE_RADIANS
 27 | #include <glm/glm.hpp>
 28 | #include <glm/gtc/matrix_transform.hpp>
 29 | #include <glm/gtc/type_ptr.hpp>
 30 | 
 31 | namespace mattersim {
 32 | 
 33 |     static void assertOpenGLError(const std::string& msg) {
 34 |       GLenum error = glGetError();
 35 |       if (error != GL_NO_ERROR) {
 36 |         std::stringstream s;
 37 |         s << "OpenGL error 0x" << std::hex << error << " at " << msg;
 38 |         throw std::runtime_error(s.str());
 39 |       }
 40 |     }
 41 | #ifdef EGL_RENDERING
 42 |     static void assertEGLError(const std::string& msg) {
 43 |       EGLint error = eglGetError();
 44 | 
 45 |       if (error != EGL_SUCCESS) {
 46 |         std::stringstream s;
 47 |         s << "EGL error 0x" << std::hex << error << " at " << msg;
 48 |         throw std::runtime_error(s.str());
 49 |       }
 50 |     }
 51 | #endif
 52 | 
 53 |     /**
 54 |      * Navigation graph indicating which panoramic viewpoints are adjacent, and also 
 55 |      * containing (optionally pre-loaded) skybox / cubemap images and textures.
 56 |      * Class is a singleton to ensure images and textures are only loaded once.
 57 |      */
 58 |     class NavGraph final {
 59 | 
 60 |     private:
 61 | 
 62 |         NavGraph(const std::string& navGraphPath, const std::string& datasetPath, 
 63 |                 bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize);
 64 | 
 65 |         ~NavGraph();
 66 | 
 67 |     public:
 68 |         // Delete the default, copy and move constructors
 69 |         NavGraph() = delete;
 70 |         NavGraph(const NavGraph&) = delete;
 71 |         NavGraph& operator=(const NavGraph&) = delete;
 72 |         NavGraph(NavGraph&&) = delete;
 73 |         NavGraph& operator=(NavGraph&&) = delete;
 74 | 
 75 |         /**
 76 |          * First call will load the navigation graph from disk and (optionally) preload the 
 77 |          * cubemap images into memory.
 78 |          * @param navGraphPath - directory containing json viewpoint connectivity graphs
 79 |          * @param datasetPath - directory containing a data directory for each Matterport scan id
 80 |          * @param preloadImages - if true, all cubemap images will be loaded into CPU memory immediately
 81 |          * @param renderDepth - if true, depth map images are also required
 82 |          * @param randomSeed - only used for randomViewpoint function
 83 |          * @param cacheSize - number of pano textures to keep in GPU memory
 84 |          */
 85 |         static NavGraph& getInstance(const std::string& navGraphPath, const std::string& datasetPath, 
 86 |                 bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize);
 87 |   
 88 |         /**
 89 |          * Select a random viewpoint from a scan
 90 |          */
 91 |         const std::string& randomViewpoint(const std::string& scanId);
 92 |                       
 93 |         /**
 94 |          * Find the index of a selected viewpointId
 95 |          */
 96 |         unsigned int index(const std::string& scanId, const std::string& viewpointId) const;
 97 | 
 98 |         /**
 99 |          * ViewpointId of a selected viewpoint index
100 |          */
101 |         const std::string& viewpoint(const std::string& scanId, unsigned int ix) const;
102 | 
103 |         /**
104 |          * Camera rotation matrix for a selected viewpoint index
105 |          */
106 |         const glm::mat4& cameraRotation(const std::string& scanId, unsigned int ix) const;
107 | 
108 |         /**
109 |          * Camera position vector for a selected viewpoint index
110 |          */
111 |         const glm::vec3& cameraPosition(const std::string& scanId, unsigned int ix) const;
112 | 
113 |         /**
114 |          * Return a list of other viewpoint indices that are reachable from a selected viewpoint index
115 |          */
116 |         std::vector<unsigned int> adjacentViewpointIndices(const std::string& scanId, unsigned int ix) const;
117 | 
118 |         /**
119 |          * Get cubemap RGB (and optionally, depth) textures for a selected viewpoint index
120 |          */
121 |         std::pair<GLuint, GLuint> cubemapTextures(const std::string& scanId, unsigned int ix);
122 | 
123 |         /**
124 |          * Free GPU memory associated with this viewpoint's textures
125 |          */
126 |         void deleteCubemapTextures(const std::string& scanId, unsigned int ix);
127 | 
128 | 
129 |     protected:
130 | 
131 |         /**
132 |          * Helper class representing nodes in the navigation graph and their cubemap textures.
133 |          */
134 |         class Location {
135 | 
136 |         public:
137 |             /**
138 |              * Construct a location object from a json struct
139 |              * @param viewpoint - json struct
140 |              * @param skyboxDir - directory containing a data directory for each Matterport scan id
141 |              * @param preload - if true, all cubemap images will be loaded into CPU memory immediately
142 |              * @param depth - if true, depth textures will also be provided
143 |              */
144 |             Location(const Json::Value& viewpoint, const std::string& skyboxDir, bool preload, bool depth);
145 | 
146 |             Location() = delete; // no default constructor
147 | 
148 |             /**
149 |              * Return the cubemap RGB (and optionally, depth) textures for this viewpoint, which will 
150 |              * be loaded from CPU memory or disk if necessary
151 |              */
152 |             std::pair<GLuint, GLuint> cubemapTextures();
153 | 
154 |             /**
155 |              * Free GPU memory associated with RGB and depth textures at this location
156 |              */
157 |             void deleteCubemapTextures();
158 | 
159 |             std::string viewpointId;        //! Unique Matterport identifier for every pano
160 |             bool included;                  //! Some duplicated viewpoints have been excluded
161 |             glm::mat4 rot;                  //! Camera pose rotation component
162 |             glm::vec3 pos;                  //! Camera pose translation component
163 |             std::vector<bool> unobstructed; //! Connections to other graph locations
164 | 
165 |         protected:
166 | 
167 |             /**
168 |              * Load RGB (and optionally, depth) cubemap images from disk into CPU memory
169 |              */
170 |             void loadCubemapImages();
171 | 
172 |             /**
173 |              * Create RGB (and optionally, depth) textures from cubemap images (e.g., in GPU memory)
174 |              */
175 |             void loadCubemapTextures();
176 | 
177 |             GLuint cubemap_texture;
178 |             GLuint depth_texture;
179 |             cv::Mat xpos;                   //! RGB images for faces of the cubemap
180 |             cv::Mat xneg;
181 |             cv::Mat ypos;
182 |             cv::Mat yneg;
183 |             cv::Mat zpos;
184 |             cv::Mat zneg;
185 |             cv::Mat xposD;                   //! Depth images for faces of the cubemap
186 |             cv::Mat xnegD;
187 |             cv::Mat yposD;
188 |             cv::Mat ynegD;
189 |             cv::Mat zposD;
190 |             cv::Mat znegD;
191 |             bool im_loaded;
192 |             bool includeDepth;
193 |             std::string skyboxDir;          //! Path to skybox images
194 |         };
195 |         typedef std::shared_ptr<Location> LocationPtr;
196 | 
197 | 
198 |         /**
199 |          * Helper class implementing a LRU cache for cubemap textures.
200 |          */
201 |         class TextureCache {
202 | 
203 |         public:
204 |             TextureCache(unsigned int size) : size(size) {
205 |                 cacheMap.reserve(size+1);
206 |             }
207 | 
208 |             TextureCache() = delete; // no default constructor
209 | 
210 |             void add(LocationPtr loc) {
211 |                 auto map_it = cacheMap.find(loc);
212 |                 if (map_it != cacheMap.end()) {
213 |                     // Remove entry from middle of list
214 |                     cacheList.erase(map_it->second);
215 |                     cacheMap.erase(map_it);
216 |                 }
217 |                 // Add element to list and save iterator on map
218 |                 auto list_it = cacheList.insert(cacheList.begin(), loc);
219 |                 cacheMap.emplace(loc, list_it);
220 |                 if (cacheMap.size() >= size) {
221 |                     removeEldest();
222 |                 }
223 |             }
224 | 
225 |             void removeEldest() {
226 |                 if (cacheMap.empty()) {
227 |                     throw std::runtime_error("MatterSim: TextureCache is empty");
228 |                 }
229 |                 LocationPtr loc = cacheList.back();
230 |                 loc->deleteCubemapTextures();
231 |                 cacheMap.erase(loc);
232 |                 cacheList.pop_back();
233 |             }
234 | 
235 |         private:
236 |             unsigned int size;
237 |             std::unordered_map<LocationPtr, std::list<LocationPtr>::iterator > cacheMap;
238 |             std::list<LocationPtr> cacheList;
239 |         };
240 | 
241 |         
242 |         std::map<std::string, std::vector<LocationPtr> > scanLocations;
243 |         std::default_random_engine generator;
244 |         TextureCache cache;
245 |     };
246 | 
247 | }
248 | 
249 | #endif
250 | 


--------------------------------------------------------------------------------
/include/cbf.h:
--------------------------------------------------------------------------------
 1 | // NYU Depth V2 Dataset Matlab Toolbox
 2 | // Authors: Nathan Silberman, Pushmeet Kohli, Derek Hoiem, Rob Fergus
 3 | 
 4 | #ifndef CBF_H_
 5 | #define CBF_H_
 6 | 
 7 | #include <stdint.h>
 8 | 
 9 | namespace cbf {
10 | 
11 | // Filters the given depth image using a Cross Bilateral Filter.
12 | //
13 | // Args:
14 | //   height - height of the images.
15 | //   width - width of the images.
16 | //   depth - HxW row-major ordered matrix.
17 | //   intensity - HxW row-major ordered matrix.
18 | //   mask - HxW row-major ordered matrix.
19 | //   result - HxW row-major ordered matrix.
20 | //   num_scales - the number of scales at which to perform the filtering.
21 | //   sigma_s - the space sigma (in pixels)
22 | //   sigma_r - the range sigma (in intensity values, 0-1)
23 | void cbf(int height, int width, uint8_t* depth, uint8_t* intensity,
24 |          uint8_t* mask, uint8_t* result, unsigned num_scales, double* sigma_s,
25 |          double* sigma_r);
26 | 
27 | }	 // namespace
28 | 
29 | #endif  // CBF_H_
30 | 


--------------------------------------------------------------------------------
/scripts/depth_to_skybox.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | ''' Script for generating depth skyboxes based on undistorted depth images, 
  4 |     in order to support depth output in the simulator. The current version 
  5 |     assumes that undistorted depth images are aligned to matterport skyboxes, 
  6 |     and uses simple blending. Images are downsized 50%. '''
  7 | 
  8 | import os
  9 | import math
 10 | import cv2
 11 | import numpy as np
 12 | from multiprocessing import Pool
 13 | from numpy.linalg import inv,norm
 14 | from StringIO import StringIO
 15 | 
 16 | 
 17 | # Parameters
 18 | DOWNSIZED_WIDTH = 512
 19 | DOWNSIZED_HEIGHT = 512
 20 | NUM_WORKER_PROCESSES = 20
 21 | FILL_HOLES = True
 22 | VISUALIZE_OUTPUT = False
 23 | 
 24 | if FILL_HOLES:
 25 |   import sys
 26 |   sys.path.append('build')
 27 |   from MatterSim import cbf
 28 | 
 29 | # Constants
 30 | # Note: Matterport camera is really y=up, x=right, -z=look.
 31 | SKYBOX_WIDTH = 1024
 32 | SKYBOX_HEIGHT = 1024
 33 | base_dir = 'data/v1/scans'
 34 | skybox_template = '%s/%s/matterport_skybox_images/%s_skybox%d_sami.jpg'
 35 | color_template = '%s/%s/undistorted_color_images/%s_i%s.jpg'
 36 | depth_template = '%s/%s/undistorted_depth_images/%s_d%s.png'
 37 | camera_template = '%s/%s/undistorted_camera_parameters/%s.conf'
 38 | skybox_depth_template = '%s/%s/matterport_skybox_images/%s_skybox_depth_small.png'
 39 | 
 40 | 
 41 | # camera transform for skybox images 0-5 relative to image 1
 42 | skybox_transforms = [
 43 |   np.array([[1,0,0],[0,0,-1],[0,1,0]], dtype=np.double), #up (down)
 44 |   np.eye(3, dtype=np.double),
 45 |   np.array([[0,0,-1],[0,1,0],[1,0,0]], dtype=np.double), # right
 46 |   np.array([[-1,0,0],[0,1,0],[0,0,-1]], dtype=np.double), # 180
 47 |   np.array([[0,0,1],[0,1,0],[-1,0,0]], dtype=np.double), # left
 48 |   np.array([[1,0,0],[0,0,1],[0,-1,0]], dtype=np.double) # down (up)
 49 | ]
 50 | 
 51 | 
 52 | def camera_parameters(scan):
 53 |   ''' Returns two dicts containing undistorted camera intrinsics (3x3) and extrinsics (4x4),
 54 |       respectively, for a given scan. Viewpoint IDs are used as dict keys. '''
 55 |   intrinsics = {}
 56 |   extrinsics = {}
 57 |   with open(camera_template % (base_dir,scan,scan)) as f:
 58 |     pos = -1
 59 |     for line in f.readlines():
 60 |       if 'intrinsics_matrix' in line:
 61 |         intr = line.split()
 62 |         C = np.zeros((3, 3), np.double)
 63 |         C[0,0] = intr[1] # fx
 64 |         C[1,1] = intr[5] # fy
 65 |         C[0,2] = intr[3] # cx
 66 |         C[1,2] = intr[6] # cy
 67 |         C[2,2] = 1.0
 68 |         pos = 0
 69 |       elif pos >= 0 and pos < 6:
 70 |         q = line.find('.jpg')
 71 |         camera = line[q-37:q]
 72 |         if pos == 0:
 73 |           intrinsics[camera[:-2]] = C
 74 |         T = np.loadtxt(StringIO(line.split('jpg ')[1])).reshape((4,4))
 75 |         # T is camera-to-world transform, invert for world-to-camera
 76 |         extrinsics[camera] = (T,inv(T))
 77 |         pos += 1
 78 |   return intrinsics,extrinsics
 79 | 
 80 | 
 81 | def z_to_euclid(K_inv, depth):
 82 |   ''' Takes inverse intrinsics matrix and a depth image. Returns a new depth image with
 83 |       depth converted from z-distance into euclidean distance from the camera centre. '''
 84 | 
 85 |   assert len(depth.shape) == 2
 86 |   h = depth.shape[0]
 87 |   w = depth.shape[1]
 88 | 
 89 |   y,x = np.indices((h,w))
 90 |   homo_pixels = np.vstack((x.flatten(),y.flatten(),np.ones((x.size))))
 91 |   rays = K_inv.dot(homo_pixels)
 92 |   cos_theta = np.array([0,0,1]).dot(rays) / norm(rays,axis=0)
 93 | 
 94 |   output = depth / cos_theta.reshape(h,w)
 95 |   return output
 96 | 
 97 | 
 98 | def instrinsic_matrix(width, height):
 99 |   ''' Construct an ideal camera intrinsic matrix. '''
100 |   K = np.zeros((3, 3), np.double)
101 |   K[0,0] = width/2 #fx
102 |   K[1,1] = height/2 #fy
103 |   K[0,2] = width/2 #cx
104 |   K[1,2] = height/2 #cy
105 |   K[2,2] = 1.0
106 |   return K
107 | 
108 | 
109 | 
110 | def fill_joint_bilateral_filter(rgb, depth):
111 |   ''' Fill holes in a 16bit depth image given corresponding rgb image '''
112 | 
113 |   intensity = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
114 | 
115 |   # Convert the depth image to uint8.
116 |   maxDepth = np.max(depth)+1
117 |   depth = (depth.astype(np.float64)/maxDepth)
118 |   depth[depth > 1] = 1
119 |   depth = (depth*255).astype(np.uint8)
120 |   
121 |   # Convert to col major order
122 |   depth = np.asfortranarray(depth)
123 |   intensity = np.asfortranarray(intensity)
124 |   mask = (depth == 0)
125 |   result = np.zeros_like(depth)
126 | 
127 |   # Fill holes
128 |   cbf(depth, intensity, mask, result)
129 |   result = (result.astype(np.float64)/255*maxDepth).astype(np.uint16)
130 |   return result
131 | 
132 | 
133 | def depth_to_skybox(scan, visualize=VISUALIZE_OUTPUT, fill_holes=FILL_HOLES):
134 | 
135 |   # Load camera parameters
136 |   intrinsics,extrinsics = camera_parameters(scan)
137 |   # Skybox camera intrinsics
138 |   K_skybox = instrinsic_matrix(SKYBOX_WIDTH, SKYBOX_HEIGHT)
139 | 
140 |   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
141 |   print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids))
142 | 
143 |   if visualize:
144 |     cv2.namedWindow('RGB')
145 |     cv2.namedWindow('Depth')
146 |     cv2.namedWindow('Skybox')
147 | 
148 |   for pano in pano_ids:
149 | 
150 |     # Load undistorted depth and rgb images
151 |     depth = {}
152 |     rgb = {}
153 |     for c in range(3):
154 |       K_inv = inv(intrinsics['%s_i%d' % (pano,c)])
155 |       for i in range(6):
156 |         name = '%d_%d' % (c,i)
157 |         if visualize:
158 |           rgb[name] = cv2.imread(color_template % (base_dir,scan,pano,name))
159 |         # Load 16bit grayscale image
160 |         d_im = cv2.imread(depth_template % (base_dir,scan,pano,name), cv2.IMREAD_ANYDEPTH)
161 |         depth[name] = z_to_euclid(K_inv, d_im)
162 | 
163 |     ims = []
164 |     for skybox_ix in range(6):
165 | 
166 |       # Load skybox image
167 |       skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix))
168 | 
169 |       # Skybox index 1 is the same orientation as camera image 1_5
170 |       skybox_ctw,_ = extrinsics[pano + '_i1_5']
171 |       skybox_ctw = skybox_ctw[:3,:3].dot(skybox_transforms[skybox_ix])
172 |       skybox_wtc = inv(skybox_ctw)
173 | 
174 |       base_depth = np.zeros((SKYBOX_HEIGHT,SKYBOX_WIDTH), np.uint16)
175 |       if visualize:
176 |         base_rgb = np.zeros((SKYBOX_HEIGHT,SKYBOX_WIDTH,3), np.uint8)
177 | 
178 |       for camera in range(3):
179 |         for angle in range(6):
180 | 
181 |           # Camera parameters
182 |           im_name = '%d_%d' % (camera,angle)
183 |           K_im = intrinsics[pano + '_i' + im_name[0]]
184 |           T_ctw,T_wtc = extrinsics[pano + '_i' + im_name]
185 |           R_ctw = T_ctw[:3,:3]
186 | 
187 |           # Check if this image can be skipped (facing away)
188 |           z = np.array([0,0,1])
189 |           if R_ctw.dot(z).dot(skybox_ctw.dot(z)) < 0:
190 |             continue
191 | 
192 |           # Compute homography
193 |           H = K_skybox.dot(skybox_wtc.dot(R_ctw.dot(inv(K_im))))
194 | 
195 |           # Warp and blend the depth image
196 |           flip = cv2.flip(depth[im_name], 1) # flip around y-axis
197 |           warp = cv2.warpPerspective(flip, H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_NEAREST)
198 |           mask = cv2.warpPerspective(np.ones_like(flip), H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR)
199 |           mask[warp == 0] = 0 # Set mask to zero where we don't have any depth values
200 |           mask = cv2.erode(mask,np.ones((3,3),np.uint8),iterations = 1)
201 |           locs = np.where(mask == 1)
202 |           base_depth[locs[0], locs[1]] = warp[locs[0], locs[1]]
203 | 
204 |           if visualize:
205 |             # Warp and blend the rgb image
206 |             flip = cv2.flip(rgb[im_name], 1) # flip around y-axis
207 |             warp = cv2.warpPerspective(flip, H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR)
208 |             mask = cv2.warpPerspective(np.ones_like(flip), H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR)
209 |             mask = cv2.erode(mask,np.ones((3,3),np.uint8),iterations = 1)
210 |             locs = np.where(mask == 1)
211 |             base_rgb[locs[0], locs[1]] = warp[locs[0], locs[1]]
212 | 
213 |       depth_small = cv2.resize(cv2.flip(base_depth, 1),(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_NEAREST) # flip around y-axis, downsize
214 |       if fill_holes:
215 |         depth_filled = fill_joint_bilateral_filter(skybox, depth_small) # Fill holes
216 |         ims.append(depth_filled)
217 |       else:
218 |         ims.append(depth_small)
219 | 
220 |       if visualize and False:
221 |         cv2.imshow('Skybox', skybox)
222 |         cv2.imshow('Depth', cv2.applyColorMap((depth_small/256).astype(np.uint8), cv2.COLORMAP_JET))
223 |         rgb_output = cv2.flip(base_rgb, 1) # flip around y-axis
224 |         cv2.imshow('RGB', rgb_output)
225 |         cv2.waitKey(0)
226 | 
227 |     newimg = np.concatenate(ims, axis=1)
228 | 
229 |     if visualize:
230 |       maxDepth = np.max(newimg)+1
231 |       newimg = (newimg.astype(np.float64)/maxDepth)
232 |       newimg = (newimg*255).astype(np.uint8)
233 |       cv2.imshow('Depth pano', cv2.applyColorMap(newimg, cv2.COLORMAP_JET))
234 |       cv2.waitKey(0)
235 |     else:
236 |       # Save output
237 |       outfile = skybox_depth_template % (base_dir,scan,pano)
238 |       assert cv2.imwrite(outfile, newimg), ('Could not write to %s' % outfile)
239 | 
240 |   if visualize:
241 |     cv2.destroyAllWindows()
242 |   print 'Completed scan %s' % (scan)
243 | 
244 | 
245 | 
246 | if __name__ == '__main__':
247 | 
248 |   with open('connectivity/scans.txt') as f:
249 |     scans = [scan.strip() for scan in f.readlines()]
250 |     p = Pool(NUM_WORKER_PROCESSES)
251 |     p.map(depth_to_skybox, scans)
252 | 
253 | 
254 | 
255 | 


--------------------------------------------------------------------------------
/scripts/downsize_skybox.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | ''' Script for downsizing skybox images. '''
 4 | 
 5 | import os
 6 | import math
 7 | import cv2
 8 | import numpy as np
 9 | from multiprocessing import Pool
10 | from depth_to_skybox import camera_parameters
11 | 
12 | 
13 | NUM_WORKER_PROCESSES = 20
14 | DOWNSIZED_WIDTH = 512
15 | DOWNSIZED_HEIGHT = 512
16 | 
17 | # Constants
18 | SKYBOX_WIDTH = 1024
19 | SKYBOX_HEIGHT = 1024
20 | base_dir = 'data/v1/scans'
21 | skybox_template = '%s/%s/matterport_skybox_images/%s_skybox%d_sami.jpg'
22 | skybox_small_template = '%s/%s/matterport_skybox_images/%s_skybox%d_small.jpg'
23 | skybox_merge_template = '%s/%s/matterport_skybox_images/%s_skybox_small.jpg'
24 | 
25 | 
26 | 
27 | def downsizeWithMerge(scan):
28 |   # Load pano ids
29 |   intrinsics,_ = camera_parameters(scan)
30 |   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
31 |   print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids))
32 | 
33 |   for pano in pano_ids:
34 | 
35 |     ims = []
36 |     for skybox_ix in range(6):
37 | 
38 |       # Load and downsize skybox image
39 |       skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix))
40 |       ims.append(cv2.resize(skybox,(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_AREA))
41 | 
42 |     # Save output
43 |     newimg = np.concatenate(ims, axis=1)
44 |     fn = skybox_merge_template % (base_dir,scan,pano)
45 |     succ = cv2.imwrite(fn, newimg)
46 |     assert succ
47 | 
48 | 
49 | def downsize(scan):
50 | 
51 |   # Load pano ids
52 |   intrinsics,_ = camera_parameters(scan)
53 |   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
54 |   print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids))
55 | 
56 |   for pano in pano_ids:
57 | 
58 |     for skybox_ix in range(6):
59 | 
60 |       # Load and downsize skybox image
61 |       skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix))
62 |       newimg = cv2.resize(skybox,(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_AREA)
63 | 
64 |       # Save output
65 |       assert cv2.imwrite(skybox_small_template % (base_dir,scan,pano,skybox_ix), newimg)
66 | 
67 | 
68 | if __name__ == '__main__':
69 | 
70 |   with open('connectivity/scans.txt') as f:
71 |     scans = [scan.strip() for scan in f.readlines()]
72 |     p = Pool(NUM_WORKER_PROCESSES)
73 |     p.map(downsizeWithMerge, scans)
74 | 


--------------------------------------------------------------------------------
/scripts/fill_depth.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | ''' Script for filling missing values in undistorted depth images. '''
 4 | 
 5 | import os
 6 | import math
 7 | import cv2
 8 | import numpy as np
 9 | from multiprocessing import Pool
10 | from depth_to_skybox import camera_parameters
11 | 
12 | import sys
13 | sys.path.append('build')
14 | from MatterSim import cbf
15 | 
16 | 
17 | base_dir = 'data/v1/scans'
18 | color_template = '%s/%s/undistorted_color_images/%s_i%s.jpg'
19 | depth_template = '%s/%s/undistorted_depth_images/%s_d%s.png'
20 | filled_depth_template = '%s/%s/undistorted_depth_images/%s_d%s_filled.png'
21 | 
22 | def fill_joint_bilateral_filter(scan):
23 | 
24 |   # Load camera parameters
25 |   intrinsics,_ = camera_parameters(scan)
26 |   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
27 |   print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids))
28 | 
29 |   for pano in pano_ids:
30 | 
31 |     # Load undistorted depth and rgb images
32 |     for c in range(3):
33 |       for i in range(6):
34 |         name = '%d_%d' % (c,i)
35 |         rgb = cv2.imread(color_template % (base_dir,scan,pano,name))
36 |         intensity = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
37 | 
38 |         # Load 16bit depth image
39 |         depth = cv2.imread(depth_template % (base_dir,scan,pano,name), cv2.IMREAD_ANYDEPTH)
40 | 
41 |         # Convert the depth image to uint8.
42 |         maxDepth = np.max(depth)+1
43 |         depth = (depth.astype(np.float64)/maxDepth)
44 |         depth[depth > 1] = 1
45 |         depth = (depth*255).astype(np.uint8)
46 | 
47 |         #cv2.imshow('input', cv2.applyColorMap(depth, cv2.COLORMAP_JET))
48 |         
49 |         # Convert to col major order
50 |         depth = np.asfortranarray(depth)
51 |         intensity = np.asfortranarray(intensity)
52 |         mask = (depth == 0)
53 |         result = np.zeros_like(depth)
54 | 
55 |         # Fill holes
56 |         cbf(depth, intensity, mask, result)
57 |   
58 |         #cv2.imshow('result', cv2.applyColorMap(result, cv2.COLORMAP_JET))
59 |         #cv2.waitKey(0)
60 | 
61 |         result = (result.astype(np.float64)/255*maxDepth).astype(np.uint16)
62 |         assert cv2.imwrite(filled_depth_template % (base_dir,scan,pano,name), result)
63 | 
64 | 
65 | if __name__ == '__main__':
66 | 
67 |   with open('connectivity/scans.txt') as f:
68 |     scans = [scan.strip() for scan in f.readlines()]
69 |     p = Pool(10)
70 |     p.map(fill_joint_bilateral_filter, scans)
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/scripts/precompute_img_features.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | ''' Script to precompute image features using a Caffe ResNet CNN, using 36 discretized views
  4 |     at each viewpoint in 30 degree increments, and the provided camera WIDTH, HEIGHT 
  5 |     and VFOV parameters. '''
  6 | 
  7 | import numpy as np
  8 | import cv2
  9 | import json
 10 | import math
 11 | import base64
 12 | import csv
 13 | import sys
 14 | 
 15 | csv.field_size_limit(sys.maxsize)
 16 | 
 17 | 
 18 | # Caffe and MatterSim need to be on the Python path
 19 | sys.path.insert(0, 'build')
 20 | import MatterSim
 21 | 
 22 | #caffe_root = '../'  # your caffe build
 23 | #sys.path.insert(0, caffe_root + 'python')
 24 | import caffe
 25 | 
 26 | from timer import Timer
 27 | 
 28 | 
 29 | TSV_FIELDNAMES = ['scanId', 'viewpointId', 'image_w','image_h', 'vfov', 'features']
 30 | VIEWPOINT_SIZE = 36 # Number of discretized views from one viewpoint
 31 | FEATURE_SIZE = 2048
 32 | BATCH_SIZE = 4  # Some fraction of viewpoint size - batch size 4 equals 11GB memory
 33 | GPU_ID = 0
 34 | PROTO = 'models/ResNet-152-deploy.prototxt'
 35 | MODEL = 'models/ResNet-152-model.caffemodel'  # You need to download this, see README.md
 36 | #MODEL = 'models/resnet152_places365.caffemodel'
 37 | OUTFILE = 'img_features/ResNet-152-imagenet.tsv'
 38 | GRAPHS = 'connectivity/'
 39 | 
 40 | # Simulator image parameters
 41 | WIDTH=640
 42 | HEIGHT=480
 43 | VFOV=60
 44 | 
 45 | 
 46 | def load_viewpointids():
 47 |     viewpointIds = []
 48 |     with open(GRAPHS+'scans.txt') as f:
 49 |         scans = [scan.strip() for scan in f.readlines()]
 50 |         for scan in scans:
 51 |             with open(GRAPHS+scan+'_connectivity.json')  as j:
 52 |                 data = json.load(j)
 53 |                 for item in data:
 54 |                     if item['included']:
 55 |                         viewpointIds.append((scan, item['image_id']))
 56 |     print 'Loaded %d viewpoints' % len(viewpointIds)
 57 |     return viewpointIds
 58 | 
 59 | 
 60 | def transform_img(im):
 61 |     ''' Prep opencv 3 channel image for the network '''
 62 |     im_orig = im.astype(np.float32, copy=True)
 63 |     im_orig -= np.array([[[103.1, 115.9, 123.2]]]) # BGR pixel mean
 64 |     blob = np.zeros((1, im.shape[0], im.shape[1], 3), dtype=np.float32)
 65 |     blob[0, :, :, :] = im_orig
 66 |     blob = blob.transpose((0, 3, 1, 2))
 67 |     return blob
 68 | 
 69 | 
 70 | def build_tsv():
 71 |     # Set up the simulator
 72 |     sim = MatterSim.Simulator()
 73 |     sim.setCameraResolution(WIDTH, HEIGHT)
 74 |     sim.setCameraVFOV(math.radians(VFOV))
 75 |     sim.setDiscretizedViewingAngles(True)
 76 |     sim.init()
 77 | 
 78 |     # Set up Caffe resnet
 79 |     caffe.set_device(GPU_ID)
 80 |     caffe.set_mode_gpu()
 81 |     net = caffe.Net(PROTO, MODEL, caffe.TEST)
 82 |     net.blobs['data'].reshape(BATCH_SIZE, 3, HEIGHT, WIDTH)
 83 | 
 84 |     count = 0
 85 |     t_render = Timer()
 86 |     t_net = Timer()
 87 |     with open(OUTFILE, 'wb') as tsvfile:
 88 |         writer = csv.DictWriter(tsvfile, delimiter = '\t', fieldnames = TSV_FIELDNAMES)          
 89 | 
 90 |         # Loop all the viewpoints in the simulator
 91 |         viewpointIds = load_viewpointids()
 92 |         for scanId,viewpointId in viewpointIds:
 93 |             t_render.tic()
 94 |             # Loop all discretized views from this location
 95 |             blobs = []
 96 |             features = np.empty([VIEWPOINT_SIZE, FEATURE_SIZE], dtype=np.float32)
 97 |             for ix in range(VIEWPOINT_SIZE):
 98 |                 if ix == 0:
 99 |                     sim.newEpisode(scanId, viewpointId, 0, math.radians(-30))
100 |                 elif ix % 12 == 0:
101 |                     sim.makeAction(0, 1.0, 1.0)
102 |                 else:
103 |                     sim.makeAction(0, 1.0, 0)
104 | 
105 |                 state = sim.getState()
106 |                 assert state.viewIndex == ix
107 |                 
108 |                 # Transform and save generated image
109 |                 blobs.append(transform_img(state.rgb))
110 | 
111 |             t_render.toc()
112 |             t_net.tic()
113 |             # Run as many forward passes as necessary
114 |             assert VIEWPOINT_SIZE % BATCH_SIZE == 0
115 |             forward_passes = VIEWPOINT_SIZE / BATCH_SIZE            
116 |             ix = 0
117 |             for f in range(forward_passes):
118 |                 for n in range(BATCH_SIZE):
119 |                     # Copy image blob to the net
120 |                     net.blobs['data'].data[n, :, :, :] = blobs[ix]
121 |                     ix += 1
122 |                 # Forward pass
123 |                 output = net.forward()
124 |                 features[f*BATCH_SIZE:(f+1)*BATCH_SIZE, :] = net.blobs['pool5'].data[:,:,0,0]
125 | 
126 |             writer.writerow({
127 |                 'scanId': scanId,
128 |                 'viewpointId': viewpointId,
129 |                 'image_w': WIDTH,
130 |                 'image_h': HEIGHT,
131 |                 'vfov' : VFOV,
132 |                 'features': base64.b64encode(features)
133 |             })
134 |             count += 1
135 |             t_net.toc()
136 |             if count % 100 == 0:
137 |                 print 'Processed %d / %d viewpoints, %.1fs avg render time, %.1fs avg net time, projected %.1f hours' %\
138 |                   (count,len(viewpointIds), t_render.average_time, t_net.average_time, 
139 |                   (t_render.average_time+t_net.average_time)*len(viewpointIds)/3600)
140 | 
141 | 
142 | def read_tsv(infile):
143 |     # Verify we can read a tsv
144 |     in_data = []
145 |     with open(infile, "r+b") as tsv_in_file:
146 |         reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames = TSV_FIELDNAMES)
147 |         for item in reader:
148 |             item['image_h'] = int(item['image_h'])
149 |             item['image_w'] = int(item['image_w'])   
150 |             item['vfov'] = int(item['vfov'])   
151 |             item['features'] = np.frombuffer(base64.decodestring(item['features']), 
152 |                     dtype=np.float32).reshape((VIEWPOINT_SIZE, FEATURE_SIZE))
153 |             in_data.append(item)
154 |     return in_data
155 | 
156 | 
157 | if __name__ == "__main__":
158 | 
159 |     build_tsv()
160 |     data = read_tsv(OUTFILE)
161 |     print 'Completed %d viewpoints' % len(data)
162 | 
163 | 


--------------------------------------------------------------------------------
/scripts/precompute_optimal_policies.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | ''' Script for precomputing the optimal (shortest path) policy at each viewpoint. '''
 4 | 
 5 | from env import R2RBatch
 6 | import json
 7 | import os
 8 | import argparse
 9 | 
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument('--dir', default='./data/v1/scans')
12 | parser.add_argument('--split', default='train')
13 | args = parser.parse_args()
14 | 
15 | r2r = R2RBatch(None, batch_size=1, splits=[args.split])
16 | 
17 | def mkdir_p(path):
18 |     try:
19 |         os.makedirs(path)
20 |     except OSError as exc:
21 |         if os.path.isdir(path):
22 |             pass
23 |         else: raise
24 | 
25 | for scan in r2r.paths:
26 |     for goal in r2r.paths[scan]:
27 |         mkdir_p('{}/{}/policies'.format(args.dir, scan))
28 |         with open('{}/{}/policies/{}.json'.format(args.dir, scan, goal), 'w') as f:
29 |             f.write(json.dumps(r2r.paths[scan][goal]))
30 | 
31 | 


--------------------------------------------------------------------------------
/scripts/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/src/driver/driver.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('build')
 3 | import MatterSim
 4 | import time
 5 | import math
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | WIDTH = 800
10 | HEIGHT = 600
11 | VFOV = math.radians(60)
12 | HFOV = VFOV*WIDTH/HEIGHT
13 | TEXT_COLOR = [230, 40, 40]
14 | 
15 | cv2.namedWindow('Python RGB')
16 | cv2.namedWindow('Python Depth')
17 | 
18 | sim = MatterSim.Simulator()
19 | sim.setCameraResolution(WIDTH, HEIGHT)
20 | sim.setCameraVFOV(VFOV)
21 | sim.setDepthEnabled(True)
22 | sim.initialize()
23 | #sim.newEpisode(['2t7WUuJeko7'], ['1e6b606b44df4a6086c0f97e826d4d15'], [0], [0])
24 | #sim.newEpisode(['1LXtFkjw3qL'], ['0b22fa63d0f54a529c525afbf2e8bb25'], [0], [0])
25 | sim.newRandomEpisode(['1LXtFkjw3qL'])
26 | 
27 | heading = 0
28 | elevation = 0
29 | location = 0
30 | ANGLEDELTA = 5 * math.pi / 180
31 | 
32 | print '\nPython Demo'
33 | print 'Use arrow keys to move the camera.'
34 | print 'Use number keys (not numpad) to move to nearby viewpoints indicated in the RGB view.\n'
35 | 
36 | while True:
37 |     sim.makeAction([location], [heading], [elevation])
38 |     location = 0
39 |     heading = 0
40 |     elevation = 0
41 | 
42 |     state = sim.getState()[0]
43 |     locations = state.navigableLocations
44 |     rgb = np.array(state.rgb, copy=False)
45 |     for idx, loc in enumerate(locations[1:]):
46 |         # Draw actions on the screen
47 |         fontScale = 3.0/loc.rel_distance
48 |         x = int(WIDTH/2 + loc.rel_heading/HFOV*WIDTH)
49 |         y = int(HEIGHT/2 - loc.rel_elevation/VFOV*HEIGHT)
50 |         cv2.putText(rgb, str(idx + 1), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 
51 |             fontScale, TEXT_COLOR, thickness=3)
52 |     cv2.imshow('Python RGB', rgb)
53 | 
54 |     depth = np.array(state.depth, copy=False)
55 |     cv2.imshow('Python Depth', depth)
56 |     k = cv2.waitKey(1)
57 |     if k == -1:
58 |         continue
59 |     else:
60 |         k = (k & 255)
61 |     if k == ord('q'):
62 |         break
63 |     elif ord('1') <= k <= ord('9'):
64 |         location = k - ord('0')
65 |         if location >= len(locations):
66 |             location = 0
67 |     elif k == 81 or k == ord('a'):
68 |         heading = -ANGLEDELTA
69 |     elif k == 82 or k == ord('w'):
70 |         elevation = ANGLEDELTA
71 |     elif k == 83 or k == ord('d'):
72 |         heading = ANGLEDELTA
73 |     elif k == 84 or k == ord('s'):
74 |         elevation = -ANGLEDELTA
75 | 


--------------------------------------------------------------------------------
/src/driver/mattersim_main.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <opencv2/opencv.hpp>
 3 | 
 4 | #include "MatterSim.hpp"
 5 | 
 6 | using namespace mattersim;
 7 | 
 8 | #define WIDTH  1280
 9 | #define HEIGHT 720
10 | 
11 | #ifndef M_PI
12 | #define M_PI (3.14159265358979323846)
13 | #endif
14 | 
15 | int main(int argc, char *argv[]) {
16 | 
17 |     cv::namedWindow("C++ RGB");
18 |     cv::namedWindow("C++ Depth");
19 | 
20 |     Simulator sim;
21 | 
22 |     // Sets resolution. Default is 320X240
23 |     sim.setCameraResolution(640,480);
24 |     sim.setDepthEnabled(true);
25 | 
26 |     // Initialize the simulator. Further camera configuration won't take any effect from now on.
27 |     sim.initialize();
28 | 
29 |     std::cout << "\nC++ Demo" << std::endl;
30 |     std::cout << "Showing some random viewpoints in one building." << std::endl;
31 | 
32 |     int i = 0;
33 |     while(true) {
34 |         i++;
35 |         std::cout << "Episode #" << i << "\n";
36 | 
37 |         // Starts a new episode. It is not needed right after init() but it doesn't cost much and the loop is nicer.
38 |         sim.newRandomEpisode(std::vector<std::string>(1,"pa4otMbVnkk")); // Launches at a random location
39 | 
40 |         for (int k=0; k<500; k++) {
41 | 
42 |             // Get the state
43 |             SimStatePtr state = sim.getState().at(0); // SimStatePtr is std::shared_ptr<SimState>
44 | 
45 |             // Which consists of:
46 |             unsigned int n = state->step;
47 |             cv::Mat rgb  = state->rgb; // OpenCV CV_8UC3 type (i.e. 8bit color rgb)
48 |             cv::Mat depth  = state->depth; // OpenCV CV_16UC1 type (i.e. 16bit grayscale)
49 |             ViewpointPtr location = state->location; // Need a class to hold viewpoint id, and x,y,z location of a viewpoint
50 |             float heading = state->heading;
51 |             float elevation = state->elevation; // camera parameters
52 |             std::vector<ViewpointPtr> reachable = state->navigableLocations; // Where we can move to,
53 |             int locationIdx = 0; // Must be an index into reachable
54 |             double headingChange = M_PI / 500;
55 |             double elevationChange = 0;
56 | 
57 |             cv::imshow("C++ RGB", rgb);
58 |             cv::imshow("C++ Depth", depth);
59 |             cv::waitKey(10);
60 | 
61 |             sim.makeAction(std::vector<unsigned int>(1, locationIdx), 
62 |                            std::vector<double>(1, headingChange), 
63 |                            std::vector<double>(1, elevationChange));
64 | 
65 |         }
66 |     }
67 | 
68 |     // It will be done automatically in destructor but after close you can init it again with different settings.
69 |     sim.close();
70 | 
71 |     return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/src/lib/Benchmark.cpp:
--------------------------------------------------------------------------------
 1 | #include <chrono>
 2 | 
 3 | #include "Benchmark.hpp"
 4 | 
 5 | namespace mattersim {
 6 | 
 7 |     Timer::Timer()
 8 |         : running_(false),
 9 |           elapsed_(0) {}
10 | 
11 |     void Timer::Start() {
12 |       if (!running()) {
13 |         start_ = std::chrono::steady_clock::now();
14 |         running_ = true;
15 |       }
16 |     }
17 | 
18 |     void Timer::Stop() {
19 |       if (running()) {
20 |         elapsed_ += std::chrono::steady_clock::now() - start_;
21 |         running_ = false;
22 |       }
23 |     }
24 | 
25 |     void Timer::Reset() {
26 |       if (running()) {
27 |         running_ = false;
28 |       }
29 |       elapsed_ = std::chrono::steady_clock::duration(0);
30 |     }
31 | 
32 |     float Timer::MicroSeconds() {
33 |       if (running()) {
34 |         elapsed_ += std::chrono::steady_clock::now() - start_;
35 |       }
36 |       return std::chrono::duration_cast<std::chrono::microseconds>(elapsed_).count();
37 |     }
38 | 
39 |     float Timer::MilliSeconds() {
40 |       if (running()) {
41 |         elapsed_ += std::chrono::steady_clock::now() - start_;
42 |       }
43 |       return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_).count();
44 |     }
45 | 
46 |     float Timer::Seconds() {
47 |       if (running()) {
48 |         elapsed_ += std::chrono::steady_clock::now() - start_;
49 |       }
50 |       return std::chrono::duration_cast<std::chrono::seconds>(elapsed_).count();
51 |     }
52 | 
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/src/lib/NavGraph.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | #include <iterator>
  4 | #include <opencv2/opencv.hpp>
  5 | 
  6 | #include <json/json.h>
  7 | #ifdef _OPENMP
  8 | #include <omp.h>
  9 | #endif
 10 | #include "NavGraph.hpp"
 11 | 
 12 | namespace mattersim {
 13 | 
 14 | 
 15 | NavGraph::Location::Location(const Json::Value& viewpoint, const std::string& skyboxDir, 
 16 |         bool preload, bool depth): skyboxDir(skyboxDir), im_loaded(false), 
 17 |                                    includeDepth(depth), cubemap_texture(0), depth_texture(0) {
 18 | 
 19 |     viewpointId = viewpoint["image_id"].asString();
 20 |     included = viewpoint["included"].asBool();
 21 | 
 22 |     float posearr[16];
 23 |     int i = 0;
 24 |     for (auto f : viewpoint["pose"]) {
 25 |         posearr[i++] = f.asFloat();
 26 |     }
 27 |     // glm uses column-major order. Inputs are in row-major order.
 28 |     rot = glm::transpose(glm::make_mat4(posearr));
 29 |     // glm access is col,row
 30 |     pos = glm::vec3{rot[3][0], rot[3][1], rot[3][2]};
 31 |     rot[3] = {0,0,0,1}; // remove translation component
 32 |     
 33 |     for (auto u : viewpoint["unobstructed"]) {
 34 |         unobstructed.push_back(u.asBool());
 35 |     }
 36 | 
 37 |     if (preload) {
 38 |         // Preload skybox images
 39 |         loadCubemapImages();
 40 |     }
 41 | };
 42 | 
 43 | 
 44 | void NavGraph::Location::loadCubemapImages() {
 45 |     cv::Mat rgb = cv::imread(skyboxDir + viewpointId + "_skybox_small.jpg");
 46 |     int w = rgb.cols/6;
 47 |     int h = rgb.rows;
 48 |     xpos = rgb(cv::Rect(2*w, 0, w, h));
 49 |     xneg = rgb(cv::Rect(4*w, 0, w, h));
 50 |     ypos = rgb(cv::Rect(0*w, 0, w, h));
 51 |     yneg = rgb(cv::Rect(5*w, 0, w, h));
 52 |     zpos = rgb(cv::Rect(1*w, 0, w, h));
 53 |     zneg = rgb(cv::Rect(3*w, 0, w, h));
 54 |     if (xpos.empty() || xneg.empty() || ypos.empty() || yneg.empty() || zpos.empty() || zneg.empty()) {
 55 |         throw std::invalid_argument( "MatterSim: Could not open skybox RGB files at: " + skyboxDir + viewpointId + "_skybox_small.jpg");
 56 |     }
 57 |     if (includeDepth) {
 58 |         // 16 bit grayscale images
 59 |         cv::Mat depth = cv::imread(skyboxDir + viewpointId + "_skybox_depth_small.png", CV_LOAD_IMAGE_ANYDEPTH);
 60 |         xposD = depth(cv::Rect(2*w, 0, w, h));
 61 |         xnegD = depth(cv::Rect(4*w, 0, w, h));
 62 |         yposD = depth(cv::Rect(0*w, 0, w, h));
 63 |         ynegD = depth(cv::Rect(5*w, 0, w, h));
 64 |         zposD = depth(cv::Rect(1*w, 0, w, h));
 65 |         znegD = depth(cv::Rect(3*w, 0, w, h));
 66 |         if (xposD.empty() || xnegD.empty() || yposD.empty() || ynegD.empty() || zposD.empty() || znegD.empty()) {
 67 |             throw std::invalid_argument( "MatterSim: Could not open skybox depth files at: " + skyboxDir + viewpointId + "_skybox_depth_small.png");
 68 |         }
 69 |     }
 70 |     im_loaded = true;
 71 | }
 72 | 
 73 | 
 74 | void NavGraph::Location::loadCubemapTextures() {
 75 |     // RGB texture
 76 |     glActiveTexture(GL_TEXTURE0);
 77 |     glEnable(GL_TEXTURE_CUBE_MAP);
 78 |     glGenTextures(1, &cubemap_texture);
 79 |     glBindTexture(GL_TEXTURE_CUBE_MAP, cubemap_texture);
 80 |     glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
 81 |     glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
 82 |     glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
 83 |     glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
 84 |     glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
 85 |     //use fast 4-byte alignment (default anyway) if possible
 86 |     glPixelStorei(GL_UNPACK_ALIGNMENT, (xneg.step & 3) ? 1 : 4);
 87 |     //set length of one complete row in data (doesn't need to equal image.cols)
 88 |     glPixelStorei(GL_UNPACK_ROW_LENGTH, xneg.step/xneg.elemSize());
 89 |     glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RGB, xpos.rows, xpos.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, xpos.ptr());
 90 |     glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, GL_RGB, xneg.rows, xneg.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, xneg.ptr());
 91 |     glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, GL_RGB, ypos.rows, ypos.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, ypos.ptr());
 92 |     glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, GL_RGB, yneg.rows, yneg.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, yneg.ptr());
 93 |     glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, GL_RGB, zpos.rows, zpos.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, zpos.ptr());
 94 |     glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, GL_RGB, zneg.rows, zneg.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, zneg.ptr());
 95 |     assertOpenGLError("RGB texture");
 96 |     if (includeDepth) {
 97 |         // Depth Texture
 98 |         glActiveTexture(GL_TEXTURE0);
 99 |         glEnable(GL_TEXTURE_CUBE_MAP);
100 |         glGenTextures(1, &depth_texture);
101 |         glBindTexture(GL_TEXTURE_CUBE_MAP, depth_texture);
102 |         glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
103 |         glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
104 |         glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
105 |         glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
106 |         glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
107 |         //use fast 4-byte alignment (default anyway) if possible
108 |         glPixelStorei(GL_UNPACK_ALIGNMENT, (xnegD.step & 3) ? 1 : 4);
109 |         //set length of one complete row in data (doesn't need to equal image.cols)
110 |         glPixelStorei(GL_UNPACK_ROW_LENGTH, xnegD.step/xnegD.elemSize());
111 |         glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RED, xposD.rows, xposD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, xposD.ptr());
112 |         glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, GL_RED, xnegD.rows, xnegD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, xnegD.ptr());
113 |         glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, GL_RED, yposD.rows, yposD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, yposD.ptr());
114 |         glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, GL_RED, ynegD.rows, ynegD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, ynegD.ptr());
115 |         glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, GL_RED, zposD.rows, zposD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, zposD.ptr());
116 |         glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, GL_RED, znegD.rows, znegD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, znegD.ptr());
117 |         assertOpenGLError("Depth texture");
118 |     }
119 | }
120 | 
121 | 
122 | void NavGraph::Location::deleteCubemapTextures() {
123 |     // no need to check existence, silently ignores errors
124 |     glDeleteTextures(1, &cubemap_texture);
125 |     glDeleteTextures(1, &depth_texture);
126 |     cubemap_texture = 0;
127 |     depth_texture = 0;
128 | }
129 | 
130 | 
131 | std::pair<GLuint, GLuint> NavGraph::Location::cubemapTextures() {
132 |     if (glIsTexture(cubemap_texture)){
133 |         return {cubemap_texture, depth_texture}; 
134 |     }
135 |     if (!im_loaded) {
136 |         loadCubemapImages();
137 |     }
138 |     loadCubemapTextures();
139 |     return {cubemap_texture, depth_texture};
140 | }
141 | 
142 | 
143 | NavGraph::NavGraph(const std::string& navGraphPath, const std::string& datasetPath, 
144 |               bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize) : cache(cacheSize) {
145 | 
146 |     generator.seed(randomSeed);
147 | 
148 |     auto textFile = navGraphPath + "/scans.txt";
149 |     std::ifstream scansFile(textFile);
150 |     if (scansFile.fail()){
151 |         throw std::invalid_argument( "MatterSim: Could not open list of scans at: " +
152 |                 textFile + ", is path valid?" );
153 |     }
154 |     std::vector<std::string> scanIds;
155 |     std::copy(std::istream_iterator<std::string>(scansFile),
156 |           std::istream_iterator<std::string>(),
157 |           std::back_inserter(scanIds));
158 | 
159 |     #pragma omp parallel for
160 |     for (unsigned int i=0; i<scanIds.size(); i++) {
161 |         std::string scanId = scanIds.at(i);
162 |         Json::Value root;
163 |         auto navGraphFile =  navGraphPath + "/" + scanId + "_connectivity.json";
164 |         std::ifstream ifs(navGraphFile, std::ifstream::in);
165 |         if (ifs.fail()){
166 |             throw std::invalid_argument( "MatterSim: Could not open navigation graph file: " +
167 |                     navGraphFile + ", is path valid?" );
168 |         }
169 |         ifs >> root;
170 |         auto skyboxDir = datasetPath + "/" + scanId + "/matterport_skybox_images/";
171 |         #pragma omp critical
172 |         {
173 |             scanLocations.insert(std::pair<std::string, 
174 |                     std::vector<LocationPtr> > (scanId, std::vector<LocationPtr>()));
175 |         }
176 |         for (auto viewpoint : root) {
177 |             Location l(viewpoint, skyboxDir, preloadImages, renderDepth);
178 |             #pragma omp critical
179 |             {
180 |                 scanLocations[scanId].push_back(std::make_shared<Location>(l));
181 |             }
182 |         }
183 |     }
184 | }
185 | 
186 | 
187 | NavGraph::~NavGraph() {
188 |     // free all remaining textures
189 |     for (auto scan : scanLocations) {
190 |         for (auto loc : scan.second) {
191 |             loc->deleteCubemapTextures();
192 |         }
193 |     }
194 | }
195 | 
196 | 
197 | NavGraph& NavGraph::getInstance(const std::string& navGraphPath, const std::string& datasetPath, 
198 |                 bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize){
199 |     // magic static
200 |     static NavGraph instance(navGraphPath, datasetPath, preloadImages, renderDepth, randomSeed, cacheSize);
201 |     return instance;
202 | }
203 | 
204 | 
205 | const std::string& NavGraph::randomViewpoint(const std::string& scanId) {
206 |     std::uniform_int_distribution<int> distribution(0,scanLocations.at(scanId).size()-1);
207 |     int start_ix = distribution(generator);  // generates random starting index
208 |     int ix = start_ix;
209 |     while (!scanLocations.at(scanId).at(ix)->included) { // Don't start at an excluded viewpoint
210 |         ix++;
211 |         if (ix >= scanLocations.at(scanId).size()) ix = 0;
212 |         if (ix == start_ix) {
213 |             throw std::logic_error( "MatterSim: ScanId: " + scanId + " has no included viewpoints!");
214 |         }
215 |     }
216 |     return scanLocations.at(scanId).at(ix)->viewpointId;
217 | }
218 | 
219 | 
220 | unsigned int NavGraph::index(const std::string& scanId, const std::string& viewpointId) const {
221 |     int ix = -1;
222 |     for (int i = 0; i < scanLocations.at(scanId).size(); ++i) {
223 |         if (scanLocations.at(scanId).at(i)->viewpointId == viewpointId) {
224 |             if (!scanLocations.at(scanId).at(i)->included) {
225 |                 throw std::invalid_argument( "MatterSim: ViewpointId: " +
226 |                         viewpointId + ", is excluded from the connectivity graph." );
227 |             }
228 |             ix = i;
229 |             break;
230 |         }
231 |     }
232 |     if (ix < 0) {
233 |         throw std::invalid_argument( "MatterSim: Could not find viewpointId: " +
234 |                 viewpointId + ", is viewpoint id valid?" );
235 |     } else {
236 |         return ix;
237 |     }
238 | }
239 | 
240 | const std::string& NavGraph::viewpoint(const std::string& scanId, unsigned int ix) const {
241 |     return scanLocations.at(scanId).at(ix)->viewpointId;
242 | }
243 | 
244 | 
245 | const glm::mat4& NavGraph::cameraRotation(const std::string& scanId, unsigned int ix) const {
246 |     return scanLocations.at(scanId).at(ix)->rot;
247 | }
248 | 
249 | 
250 | const glm::vec3& NavGraph::cameraPosition(const std::string& scanId, unsigned int ix) const {
251 |     return scanLocations.at(scanId).at(ix)->pos;
252 | }
253 | 
254 | 
255 | std::vector<unsigned int> NavGraph::adjacentViewpointIndices(const std::string& scanId, unsigned int ix) const {
256 |     std::vector<unsigned int> reachable;
257 |     for (unsigned int i = 0; i < scanLocations.at(scanId).size(); ++i) {
258 |         if (i == ix) {
259 |             // Skip option to stay at the same viewpoint
260 |             continue;
261 |         }
262 |         if (scanLocations.at(scanId).at(ix)->unobstructed[i] && scanLocations.at(scanId).at(i)->included) {
263 |             reachable.push_back(i);
264 |         }
265 |     }
266 |     return reachable;
267 | }
268 | 
269 | 
270 | std::pair<GLuint, GLuint> NavGraph::cubemapTextures(const std::string& scanId, unsigned int ix) {
271 |     LocationPtr loc = scanLocations.at(scanId).at(ix);
272 |     std::pair<GLuint, GLuint> textures = loc->cubemapTextures();
273 |     cache.add(loc);
274 |     return textures;
275 | }
276 | 
277 | 
278 | void NavGraph::deleteCubemapTextures(const std::string& scanId, unsigned int ix) {
279 |     scanLocations.at(scanId).at(ix)->deleteCubemapTextures();
280 | }
281 | 
282 | 
283 | }
284 | 


--------------------------------------------------------------------------------
/src/lib/cbf.cpp:
--------------------------------------------------------------------------------
  1 | // NYU Depth V2 Dataset Matlab Toolbox
  2 | // Authors: Nathan Silberman, Pushmeet Kohli, Derek Hoiem, Rob Fergus
  3 | 
  4 | #include "cbf.h"
  5 | 
  6 | #include <fstream> // TODO: remove this.
  7 | #include <iostream>
  8 | #include <stdlib.h>
  9 | #include <math.h>
 10 | 
 11 | // Uncomment this define for intermediate filtering results.
 12 | // #define DEBUG
 13 | 
 14 | #define PI 3.14159
 15 | 
 16 | #define UCHAR_MAX 255
 17 | #define FILTER_RAD 5
 18 | 
 19 | void toc(const char* message, clock_t start) {
 20 | 	
 21 | #ifdef DEBUG
 22 | 	double d = clock() - start;
 23 | 	d = 1000 * d / CLOCKS_PER_SEC;
 24 | 	printf("[%s] %10.0f\n", message, d);
 25 | #endif
 26 | }
 27 | 
 28 | // Args:
 29 | //   filter_size - the number of pixels in the filter.
 30 | void create_offset_array(int filter_rad, int* offsets_h, int img_height) {
 31 |   int filter_len = filter_rad * 2 + 1;
 32 | 	int filter_size = filter_len * filter_len;
 33 | 	
 34 | 	int kk = 0;
 35 | 	for (int yy = -filter_rad; yy <= filter_rad; ++yy) {
 36 | 		for (int xx = -filter_rad; xx <= filter_rad; ++xx, ++kk) {
 37 | 			offsets_h[kk] = yy + img_height * xx;
 38 | 		}
 39 | 	}
 40 | }
 41 | 
 42 | void calc_pyr_sizes(int* heights, int* widths, int* pyr_offsets, int orig_height, int orig_width, int num_scales) {
 43 |   int offset = 0;
 44 |   for (int scale = 0; scale < num_scales; ++scale) {
 45 | 	  pyr_offsets[scale] = offset;
 46 | 		
 47 |     // Calculate the size of the downsampled images.
 48 |     heights[scale] = static_cast<int>(orig_height / pow((float)2, scale));
 49 |     widths[scale] = static_cast<int>(orig_width / pow((float)2, scale));
 50 | 		
 51 | 		offset += heights[scale] * widths[scale];
 52 | 	}
 53 | 	
 54 | #ifdef DEBUG
 55 |   for (int ii = 0; ii < num_scales; ++ii) {
 56 | 	  printf("Scale %d: [%d x %d], offset=%d\n", ii, heights[ii], widths[ii], pyr_offsets[ii]); 
 57 | 	}
 58 | #endif
 59 | }
 60 | 
 61 | int get_pyr_size(int* heights, int* widths, int num_scales) {
 62 | 	
 63 |   int total_pixels = 0;
 64 |   for (int ii = 0; ii < num_scales; ++ii) {
 65 | 		total_pixels += heights[ii] * widths[ii];
 66 | 	}
 67 | 	
 68 | 	return total_pixels;
 69 | }
 70 | 
 71 | // We're upsampling from the result matrix (which is small) to the depth matrix,
 72 | // which is larger.
 73 | // 
 74 | // For example, dst could be 480x640 and src may be 240x320.
 75 | //
 76 | // Args:
 77 | //   depth_dst - H1xW1 matrix where H1 and W1 are equal to height_dst and
 78 | //               width_dst.
 79 | void upsample_cpu(float* depth_dst, 
 80 | 									bool* mask_dst,
 81 | 									bool* valid_dst,
 82 |                   float* depth_src,
 83 | 									float* result_src, 
 84 | 									bool* mask_src,
 85 | 									bool* valid_src,
 86 |                   int height_src,
 87 | 									int width_src,
 88 | 								  int height_dst,
 89 | 									int width_dst,
 90 | 									int dst_img_ind) {
 91 |   
 92 | 	int num_threads = height_dst * width_dst;
 93 |   
 94 |   // Dont bother if the upsampled one isnt missing.
 95 | 	if (!mask_dst[dst_img_ind]) {
 96 | 	  return;
 97 | 	}
 98 | 	
 99 |   int x_dst = floorf((float) dst_img_ind / height_dst);
100 |   int y_dst = fmodf(dst_img_ind, height_dst);
101 |   
102 | 	int y_src = static_cast<int>((float) y_dst * height_src / height_dst);
103 | 	int x_src = static_cast<int>((float) x_dst * width_src / width_dst);
104 |   
105 | 	// Finally, convert to absolute coords.
106 |   int src_img_ind = y_src + height_src * x_src;
107 | 
108 |   if (!mask_src[src_img_ind]) {
109 |     depth_dst[dst_img_ind] = depth_src[src_img_ind];
110 |   } else {
111 |     depth_dst[dst_img_ind] = result_src[src_img_ind];
112 |   }
113 | 	
114 | 	valid_dst[dst_img_ind] = valid_src[src_img_ind];
115 | }
116 | 
117 | // Args:
118 | //   depth - the depth image, a HxW vector
119 | //   intensity - the intensity image, a HxW vector.
120 | //   is_missing - a binary mask specifying whether each pixel is missing
121 | //                (and needs to be filled in) or not.
122 | //   valid_in - a mask specifying which of the input values are allowed
123 | //              to be used for filtering.
124 | //   valid_out - a mask specifying which of the output values are allowed
125 | //               to be used for future filtering.
126 | //   result - the result of the filtering operation, a HxW matrix.
127 | //   abs_inds - the absolute indices (into depth, intensity, etc) which
128 | //              need filtering.
129 | //   offsets - vector of offsets from the current abs_ind to be used for
130 | //             filtering.
131 | //   guassian - the values (weights) of the gaussian filter corresponding
132 | //              to the offset matrix.
133 | void cbf_cpu(const float* depth, const float* intensity, bool* is_missing,
134 |             bool* valid_in, bool* valid_out, float* result,
135 | 						const int* abs_inds,
136 | 						const int* offsets,
137 | 						const float* gaussian_space,
138 | 						int height,
139 | 						int width,
140 | 						int filter_rad,
141 | 						float sigma_s,
142 | 						float sigma_r,
143 | 						int numThreads,
144 | 						int idx) {
145 | 	
146 | 	int abs_ind = abs_inds[idx];
147 |   
148 |   int src_Y = abs_ind % height;
149 |   int src_X = abs_ind / height;
150 | 	
151 | 	int filter_len = filter_rad * 2 + 1;
152 | 	int filter_size = filter_len * filter_len;
153 | 	
154 | 	float weight_sum = 0;
155 | 	float value_sum = 0;
156 | 	
157 | 	float weight_intensity_sum = 0;
158 |   
159 | 	float gaussian_range[filter_size];
160 | 	float gaussian_range_sum = 0;
161 |   
162 | 	for (int ii = 0; ii < filter_size; ++ii) {
163 |     // Unfortunately we need to double check that the radii are correct
164 |     // unless we add better processing of borders.
165 |     
166 | 	  int abs_offset = abs_ind + offsets[ii]; // THESE ARE CALC TWICE.
167 |     
168 |     int dst_Y = abs_offset % height;
169 |     int dst_X = abs_offset / height;
170 |     
171 | 		if (abs_offset < 0 || abs_offset >= (height * width)
172 |         || abs(src_Y-dst_Y) > FILTER_RAD || abs(src_X-dst_X) > FILTER_RAD) {
173 | 		  continue;
174 |       
175 |     // The offsets are into ANY part of the image. So they MAY be accessing
176 |     // a pixel that was originally missing. However, if that pixel has been
177 |     // filled in, then we can still use it.
178 | 		} else if (is_missing[abs_offset] && !valid_in[abs_offset]) {
179 | 			continue;
180 | 		}
181 | 
182 |     float vv = intensity[abs_offset] - intensity[abs_ind];
183 | 
184 | 
185 |     gaussian_range[ii] = exp(-(vv * vv) / (2*sigma_r * sigma_r));
186 |     gaussian_range_sum += gaussian_range[ii];
187 | 	}
188 |   
189 |   int count = 0;
190 |   
191 | 	for (int ii = 0; ii < filter_size; ++ii) {
192 |     // Get the Absolute offset into the image (1..N where N=H*W)
193 | 		int abs_offset = abs_ind + offsets[ii];
194 |     int dst_Y = abs_offset % height;
195 |     int dst_X = abs_offset / height;
196 | 		if (abs_offset < 0 || abs_offset >= (height * width)
197 |         || abs(src_Y-dst_Y) > FILTER_RAD || abs(src_X-dst_X) > FILTER_RAD) {
198 | 		  continue;
199 | 		} else if (is_missing[abs_offset] && !valid_in[abs_offset]) {
200 | 		  continue;
201 | 		}
202 |     
203 |     ++count;
204 |     
205 | 		weight_sum += gaussian_space[ii] * gaussian_range[ii];
206 | 		value_sum += depth[abs_offset] * gaussian_space[ii] * gaussian_range[ii];
207 | 	}
208 |   
209 | 	if (weight_sum == 0) {
210 | 		return;
211 | 	}
212 |   
213 |   if (isnan(weight_sum)) {
214 |     printf("*******************\n");
215 |     printf(" Weight sum is NaN\n");
216 |     printf("*******************\n");
217 |   }
218 | 
219 | 	value_sum /= weight_sum;
220 | 	
221 | 	result[abs_ind] = value_sum;
222 |   
223 | 	valid_out[abs_ind] = 1;
224 | }
225 | 
226 | // Args:
227 | //   filter_size - the number of pixels in the filter.
228 | void create_spatial_gaussian(int filter_rad, float sigma_s, float* gaussian_h) {
229 |   int filter_len = filter_rad * 2 + 1;
230 | 	int filter_size = filter_len * filter_len;
231 | 
232 | 	float sum = 0;
233 | 	int kk = 0;
234 | 	for (int yy = -filter_rad; yy <= filter_rad; ++yy) {
235 | 		for (int xx = -filter_rad; xx <= filter_rad; ++xx, ++kk) {
236 | 			gaussian_h[kk] = exp(-(xx*xx + yy*yy) / (2*sigma_s * sigma_s));
237 | 			sum += gaussian_h[kk];
238 | 		}
239 | 	}
240 | 
241 | 	for (int ff = 0; ff < filter_size; ++ff) {
242 | 		gaussian_h[ff] /= sum;
243 | 	}
244 | }
245 | 
246 | // Counts the number of missing pixels in the given mask. Note that the mask
247 | // MUST already be in the appropriate offset location.
248 | // 
249 | // Args:
250 | //   height - the heigh of the image at the current scale.
251 | //   width - the width of the image at the current scale.
252 | //   mask - pointer into the mask_ms_d matrix. The offset has already been
253 | //          calculated.
254 | //   abs_inds_h - pre-allocated GPU memory location.
255 | int get_missing_pixel_coords(int height, int width, bool* mask, int* abs_inds_to_filter_h) {
256 |   int num_pixels = height * width;
257 | 	
258 | 	int num_missing_pixels = 0;
259 | 	for (int nn = 0; nn < num_pixels; ++nn) {
260 |     if (mask[nn]) {
261 | 	  	abs_inds_to_filter_h[num_missing_pixels] = nn;
262 | 			++num_missing_pixels;
263 | 		}
264 | 	}
265 | 
266 | 	return num_missing_pixels;
267 | }
268 | 
269 | static void savePGM(bool* imf, const char *name, int height, int width) {
270 | 	int NN = height * width;
271 | 	uint8_t im[NN];
272 | 	
273 | 	for (int nn = 0; nn < NN; ++nn) {
274 |     // First convert to X,Y
275 |     int y = nn % height;
276 |     int x = floor(nn / height);
277 |     
278 |     // Then back to Abs Inds
279 |     int mm = y * width + x;
280 |     
281 | 		im[mm] = uint8_t(255*imf[nn]);
282 | 	}
283 | 	
284 |  std::ofstream file(name, std::ios::out | std::ios::binary);
285 | 	
286 |  file << "P5\n" << width << " " << height << "\n" << UCHAR_MAX << "\n";
287 |  file.write((char *)&im, width * height * sizeof(uint8_t));
288 | }
289 | 
290 | static void savePGM(float* imf, const char *name, int height, int width) {
291 | 	int NN = height * width;
292 | 	uint8_t im[NN];
293 | 	
294 | 	for (int nn = 0; nn < NN; ++nn) {
295 |     // First convert to X,Y
296 |     int y = nn % height;
297 |     int x = floor(nn / height);
298 |     
299 |     // Then back to Abs Inds
300 |     int mm = y * width + x;
301 |     
302 | 		im[mm] = uint8_t(255*imf[nn]);
303 | 	}
304 | 	
305 |  std::ofstream file(name, std::ios::out | std::ios::binary);
306 | 	
307 |  file << "P5\n" << width << " " << height << "\n" << UCHAR_MAX << "\n";
308 |  file.write((char *)&im, width * height * sizeof(uint8_t));
309 | }
310 | 
311 | void filter_at_scale(float* depth_h,
312 | 										 float* intensity_h,
313 | 										 bool* mask_h,
314 | 										 bool* valid_h,
315 |                      float* result_h,
316 | 										 int* abs_inds_to_filter_h, 
317 | 										 int height,
318 | 										 int width,
319 | 										 float sigma_s,
320 | 										 float sigma_r) {
321 | 	
322 |   int filter_rad = FILTER_RAD;
323 | 	int filter_size = 2 * filter_rad + 1;
324 | 	int F = filter_size * filter_size;
325 | 	
326 |   // Create the offset array.
327 | 	int* offsets_h = (int*) malloc(F * sizeof(int));
328 |   create_offset_array(filter_rad, offsets_h, height);
329 | 	
330 | 	// Create the gaussian.
331 |   float* gaussian_h = (float*) malloc(F * sizeof(float));
332 | 	create_spatial_gaussian(filter_rad, sigma_s, gaussian_h);
333 | 	
334 | 	// ************************************************
335 | 	// We need to be smart about how we do this, so rather
336 |   // than execute the filter for EVERY point in the image,
337 |   // we will only do it for the points missing depth information.
338 | 	// ************************************************
339 |   
340 | 	int num_missing_pixels = get_missing_pixel_coords(height, width, mask_h, abs_inds_to_filter_h);
341 | #ifdef DEBUG
342 |   printf("Num Missing Pixels: %d\n", num_missing_pixels);
343 | #endif
344 |   
345 |   clock_t start_filter = clock();	
346 | 
347 |   // We should not be writing into the same value for 'valid' that we're passing in.
348 |   bool* valid_in = (bool*) malloc(height * width * sizeof(bool));
349 |   for (int i = 0; i < height * width; ++i) {
350 |     valid_in[i] = valid_h[i];
351 |   }
352 |   
353 |   for (int i = 0; i < num_missing_pixels; ++i) {
354 | 		cbf_cpu(depth_h,
355 | 						intensity_h,
356 | 						mask_h,
357 |             valid_in,
358 | 						valid_h,
359 | 						result_h,
360 | 						abs_inds_to_filter_h,
361 | 						offsets_h,
362 | 						gaussian_h,
363 | 						height,
364 | 						width,
365 | 						filter_rad,
366 | 						sigma_s,
367 | 						sigma_r,
368 | 						num_missing_pixels,
369 | 						i);
370 | 	}
371 |   
372 | 	toc("FILTER OP", start_filter);
373 | 	
374 |   free(valid_in);
375 | 	free(offsets_h);
376 | 	free(gaussian_h);
377 | }
378 | 
379 | void cbf::cbf(int height, int width, uint8_t* depth, uint8_t* intensity,
380 |               uint8_t* mask_h, uint8_t* result, unsigned num_scales,
381 |               double* sigma_s, double* sigma_r) {
382 | 	
383 |   clock_t start_func = clock();
384 | 	
385 | 	int pyr_heights[num_scales];
386 | 	int pyr_widths[num_scales];
387 | 	int pyr_offsets[num_scales];
388 | 	calc_pyr_sizes(&pyr_heights[0], &pyr_widths[0], &pyr_offsets[0], height, width, num_scales);
389 | 	
390 | 	// Allocate the memory needed for the absolute missing pixel indices. We'll
391 | 	// allocate the number of bytes required for the largest image, since the
392 | 	// smaller ones obviously fit inside of it.
393 |   int N = height * width;
394 | 	int* abs_inds_to_filter_h = (int*) malloc(N * sizeof(int));
395 | 	
396 | 	int pyr_size = get_pyr_size(&pyr_heights[0], &pyr_widths[0], num_scales);
397 | 
398 | 	// ************************
399 | 	//   CREATING THE PYRAMID
400 | 	// ************************
401 | 	clock_t	start_pyr = clock();
402 | 
403 | 	// NEG TIME.
404 |   float* depth_ms_h = (float*) malloc(pyr_size * sizeof(float));
405 | 	float* intensity_ms_h = (float*) malloc(pyr_size * sizeof(float));
406 | 	bool* mask_ms_h = (bool*) malloc(pyr_size * sizeof(bool));
407 | 	float* result_ms_h = (float*) malloc(pyr_size * sizeof(float));
408 | 	bool* valid_ms_h = (bool*) malloc(pyr_size * sizeof(bool));
409 | 
410 | 	for (int nn = 0; nn < N; ++nn) {
411 |     depth_ms_h[nn] = depth[nn] / 255.0;
412 | 		intensity_ms_h[nn] = intensity[nn] / 255.0;
413 | 		mask_ms_h[nn] = mask_h[nn];
414 | 		valid_ms_h[nn] = !mask_h[nn];
415 |     result_ms_h[nn] = 0;
416 | 	}
417 | 	
418 | 	float* depth_ms_h_p = depth_ms_h + pyr_offsets[1];
419 | 	float* intensity_ms_h_p	= intensity_ms_h + pyr_offsets[1];
420 | 	bool* mask_ms_h_p	= mask_ms_h + pyr_offsets[1];
421 | 	bool* valid_ms_h_p = valid_ms_h + pyr_offsets[1];
422 | 	float* result_ms_h_p = result_ms_h + pyr_offsets[1];
423 |   
424 | 	for (int scale = 1; scale < num_scales; ++scale) {
425 |     for (int xx = 0; xx < pyr_widths[scale]; ++xx) {
426 | 	    for (int yy = 0; yy < pyr_heights[scale]; ++yy, ++depth_ms_h_p, ++intensity_ms_h_p, ++mask_ms_h_p, ++result_ms_h_p, ++valid_ms_h_p) {
427 | 			  int abs_yy = static_cast<int>(((float)yy / pyr_heights[scale]) * height);
428 | 				int abs_xx = static_cast<int>(((float)xx / pyr_widths[scale]) * width);
429 | 			  int img_offset = abs_yy + height * abs_xx;
430 | 				*depth_ms_h_p = depth_ms_h[img_offset];
431 | 				*intensity_ms_h_p = intensity_ms_h[img_offset];
432 | 				*mask_ms_h_p = mask_h[img_offset];
433 | 				*valid_ms_h_p = !mask_h[img_offset];
434 |         *result_ms_h_p = 0;
435 | 			}
436 | 		}
437 | 	}
438 | 
439 | 	// *********************************
440 | 	//   RUN THE ACTUAL FILTERING CODE
441 | 	// *********************************
442 |   
443 | 	for (int scale = num_scales - 1; scale >= 0; --scale) {
444 | #ifdef DEBUG
445 | 	  printf("Filtering at scale %d, [%dx%d]\n", scale, pyr_heights[scale], pyr_widths[scale]);
446 |     
447 |     char filename1[50];
448 |     sprintf(filename1, "missing_pixels_before_filtering_scale%d.pgm", scale);
449 |     // Now that we've performed the filtering, save the intermediate image.
450 |     savePGM(mask_ms_h + pyr_offsets[scale], filename1, pyr_heights[scale], pyr_widths[scale]);
451 |     
452 |     char filename2[50];
453 |     sprintf(filename2, "valid_pixels_before_filtering_scale%d.pgm", scale);
454 |     // Now that we've performed the filtering, save the intermediate image.
455 |     savePGM(valid_ms_h + pyr_offsets[scale], filename2, pyr_heights[scale], pyr_widths[scale]);
456 |     
457 |     sprintf(filename2, "valid_intensity_before_filtering_scale%d.pgm", scale);
458 |     // Now that we've performed the filtering, save the intermediate image.
459 |     savePGM(intensity_ms_h + pyr_offsets[scale], filename2, pyr_heights[scale], pyr_widths[scale]);
460 |     
461 |     sprintf(filename2, "depth_before_filtering_scale%d.pgm", scale);
462 |     // Now that we've performed the filtering, save the intermediate image.
463 |     savePGM(depth_ms_h + pyr_offsets[scale], filename2, pyr_heights[scale], pyr_widths[scale]);
464 | #endif
465 |     
466 |     filter_at_scale(depth_ms_h + pyr_offsets[scale],
467 | 		                intensity_ms_h + pyr_offsets[scale],
468 | 										mask_ms_h + pyr_offsets[scale],
469 | 										valid_ms_h + pyr_offsets[scale],
470 | 										result_ms_h + pyr_offsets[scale],
471 | 		                abs_inds_to_filter_h,
472 | 		                pyr_heights[scale],
473 | 										pyr_widths[scale],
474 | 										sigma_s[scale],
475 | 										sigma_r[scale]);
476 |     
477 |     
478 | #ifdef DEBUG
479 |     sprintf(filename2, "valid_pixels_after_filtering_scale%d.pgm", scale);
480 |     // Now that we've performed the filtering, save the intermediate image.
481 |     savePGM(valid_ms_h + pyr_offsets[scale], filename2, pyr_heights[scale], pyr_widths[scale]);
482 | #endif
483 |     
484 | #ifdef DEBUG
485 |     char filename[50];
486 |     sprintf(filename, "depth_after_filtering_scale%d.pgm", scale);
487 |     // Now that we've performed the filtering, save the intermediate image.
488 |     savePGM(result_ms_h + pyr_offsets[scale], filename, pyr_heights[scale], pyr_widths[scale]);
489 | #endif
490 |     
491 | 		if (scale == 0) {
492 | 		  continue;
493 | 		}
494 | 
495 | 		// Now, we need to upsample the resulting depth and store it in the next
496 | 		// highest location.
497 | 		int num_missing_pixels = pyr_heights[scale-1] * pyr_widths[scale-1];
498 | 		
499 | #ifdef DEBUG
500 | 		printf("Upsampling %d\n", num_missing_pixels);
501 | #endif
502 | 		for (int i = 0; i < num_missing_pixels; ++i) {
503 | 			upsample_cpu(depth_ms_h + pyr_offsets[scale-1],
504 | 									 mask_ms_h + pyr_offsets[scale-1],
505 | 									 valid_ms_h + pyr_offsets[scale-1],
506 |                    depth_ms_h + pyr_offsets[scale],
507 | 									 result_ms_h + pyr_offsets[scale],
508 | 									 mask_ms_h + pyr_offsets[scale],
509 | 									 valid_ms_h + pyr_offsets[scale],
510 | 									 pyr_heights[scale],
511 | 									 pyr_widths[scale],
512 | 									 pyr_heights[scale-1],
513 | 									 pyr_widths[scale-1],
514 | 									 i);
515 | 		}
516 |     
517 |     
518 | #ifdef DEBUG
519 |     sprintf(filename, "up_depth_after_filtering_scale%d.pgm", scale);
520 |     // Now that we've performed the filtering, save the intermediate image.
521 |     savePGM(depth_ms_h + pyr_offsets[scale-1], filename, pyr_heights[scale-1], pyr_widths[scale-1]);
522 |     
523 |     sprintf(filename, "up_valid_after_filtering_scale%d.pgm", scale);
524 |     // Now that we've performed the filtering, save the intermediate image.
525 |     savePGM(valid_ms_h + pyr_offsets[scale-1], filename, pyr_heights[scale-1], pyr_widths[scale-1]);
526 | #endif
527 | 	}
528 | 	
529 | 	// Copy the final result from the device.
530 | 	for (int nn = 0; nn < N; ++nn) {
531 | 		if (mask_ms_h[nn]) {
532 | 		  result[nn] = static_cast<uint8_t>(result_ms_h[nn] * 255);
533 | 		} else {
534 | 			result[nn] = depth[nn];
535 | 		}
536 | 	}
537 | 	
538 | 	free(depth_ms_h);
539 | 	free(intensity_ms_h);
540 | 	free(mask_ms_h);
541 | 	free(result_ms_h);
542 | 	free(valid_ms_h);
543 | 	free(abs_inds_to_filter_h);
544 | 											
545 | 	toc("Entire Function", start_func);
546 | }
547 | 
548 | 


--------------------------------------------------------------------------------
/src/lib/fragment.sh:
--------------------------------------------------------------------------------
 1 | R""(
 2 | #version 120
 3 | 
 4 | varying vec3 texCoord;
 5 | varying vec4 camCoord;
 6 | uniform samplerCube cubemap;
 7 | const vec3 camlook = vec3( 0.0, 0.0, -1.0 );
 8 | uniform bool isDepth;
 9 | 
10 | void main (void) {
11 |   vec4 color = textureCube(cubemap, texCoord);
12 |   if (isDepth) {
13 |     float scale = dot(camCoord.xyz, camlook) / length(camCoord.xyz);
14 |     gl_FragColor = color*scale;
15 |   } else {
16 |     gl_FragColor = color;
17 |   }
18 | }
19 | )""
20 | 


--------------------------------------------------------------------------------
/src/lib/vertex.sh:
--------------------------------------------------------------------------------
 1 | R""(
 2 | #version 120
 3 | 
 4 | attribute vec3 vertex;
 5 | varying vec3 texCoord;
 6 | varying vec4 camCoord;
 7 | uniform mat4 ProjMat;
 8 | uniform mat4 ModelViewMat;
 9 | 
10 | void main() {
11 |   camCoord = ModelViewMat * vec4(vertex, 1.0);
12 |   gl_Position = ProjMat * camCoord;
13 |   texCoord = vertex;
14 | }
15 | )""
16 | 


--------------------------------------------------------------------------------
/src/lib_python/MatterSimPython.cpp:
--------------------------------------------------------------------------------
 1 | #include <pybind11/pybind11.h>
 2 | #include <pybind11/stl.h>
 3 | #include "MatterSim.hpp"
 4 | #include "cbf.h"
 5 | 
 6 | namespace py = pybind11;
 7 | 
 8 | namespace mattersim {
 9 | 
10 |     void cbf(py::buffer depth, py::buffer intensity, py::buffer mask, py::buffer result) {
11 |         double spaceSigmas[3] = {12, 5, 8};
12 |         double rangeSigmas[3] = {0.2, 0.08, 0.02};
13 |         py::buffer_info d_info = depth.request();
14 |         py::buffer_info i_info = intensity.request();
15 |         py::buffer_info m_info = mask.request();
16 |         py::buffer_info r_info = result.request();
17 |         cbf::cbf(d_info.shape[0], d_info.shape[1],
18 |             static_cast<uint8_t*>(d_info.ptr),
19 |             static_cast<uint8_t*>(i_info.ptr),
20 |             static_cast<uint8_t*>(m_info.ptr),
21 |             static_cast<uint8_t*>(r_info.ptr),
22 |             3, &spaceSigmas[0], &rangeSigmas[0]);
23 |     }
24 | 
25 | }
26 | 
27 | using namespace mattersim;
28 | 
29 | PYBIND11_MODULE(MatterSim, m) {
30 |     m.def("cbf", &mattersim::cbf, "Cross Bilateral Filter");
31 |     py::class_<Viewpoint, ViewpointPtr>(m, "ViewPoint")
32 |         .def_readonly("viewpointId", &Viewpoint::viewpointId)
33 |         .def_readonly("ix", &Viewpoint::ix)
34 |         .def_readonly("x", &Viewpoint::x)
35 |         .def_readonly("y", &Viewpoint::y)
36 |         .def_readonly("z", &Viewpoint::z)
37 |         .def_readonly("rel_heading", &Viewpoint::rel_heading)
38 |         .def_readonly("rel_elevation", &Viewpoint::rel_elevation)
39 |         .def_readonly("rel_distance", &Viewpoint::rel_distance);
40 |     py::class_<cv::Mat>(m, "Mat", pybind11::buffer_protocol())
41 |         .def_buffer([](cv::Mat& im) -> pybind11::buffer_info {
42 |             ssize_t item_size = im.elemSize() / im.channels();
43 |             std::string format = pybind11::format_descriptor<unsigned char>::format();
44 |             if (item_size == 2) { // handle 16bit data from depth maps
45 |                 format = pybind11::format_descriptor<unsigned short>::format();
46 |             }
47 |             return pybind11::buffer_info(
48 |                 im.data, // Pointer to buffer
49 |                 item_size, // Size of one scalar
50 |                 format,
51 |                 3, // Number of dimensions (row, cols, channels)
52 |                 { im.rows, im.cols, im.channels() }, // Buffer dimensions
53 |                 {   // Strides (in bytes) for each index
54 |                     item_size * im.channels() * im.cols,
55 |                     item_size * im.channels(),
56 |                     item_size
57 |                 }
58 |             );
59 |         });
60 |     py::class_<SimState, SimStatePtr>(m, "SimState")
61 |         .def_readonly("scanId", &SimState::scanId)
62 |         .def_readonly("step", &SimState::step)
63 |         .def_readonly("rgb", &SimState::rgb)
64 |         .def_readonly("depth", &SimState::depth)
65 |         .def_readonly("location", &SimState::location)
66 |         .def_readonly("heading", &SimState::heading)
67 |         .def_readonly("elevation", &SimState::elevation)
68 |         .def_readonly("viewIndex", &SimState::viewIndex)
69 |         .def_readonly("navigableLocations", &SimState::navigableLocations);
70 |     py::class_<Simulator>(m, "Simulator")
71 |         .def(py::init<>())
72 |         .def("setDatasetPath", &Simulator::setDatasetPath)
73 |         .def("setNavGraphPath", &Simulator::setNavGraphPath)
74 |         .def("setRenderingEnabled", &Simulator::setRenderingEnabled)
75 |         .def("setCameraResolution", &Simulator::setCameraResolution)
76 |         .def("setCameraVFOV", &Simulator::setCameraVFOV)
77 |         .def("setElevationLimits", &Simulator::setElevationLimits)
78 |         .def("setDiscretizedViewingAngles", &Simulator::setDiscretizedViewingAngles)
79 |         .def("setPreloadingEnabled", &Simulator::setPreloadingEnabled)
80 |         .def("setDepthEnabled", &Simulator::setDepthEnabled)
81 |         .def("setBatchSize", &Simulator::setBatchSize)
82 |         .def("setCacheSize", &Simulator::setCacheSize)
83 |         .def("setSeed", &Simulator::setSeed)
84 |         .def("initialize", &Simulator::initialize)
85 |         .def("newEpisode", &Simulator::newEpisode)
86 |         .def("newRandomEpisode", &Simulator::newRandomEpisode)
87 |         .def("getState", &Simulator::getState, py::return_value_policy::take_ownership)
88 |         .def("makeAction", &Simulator::makeAction)
89 |         .def("close", &Simulator::close)
90 |         .def("resetTimers", &Simulator::resetTimers)
91 |         .def("timingInfo", &Simulator::timingInfo);
92 | }
93 | 


--------------------------------------------------------------------------------
/src/test/python_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('build')
 3 | 
 4 | from MatterSim import Simulator
 5 | import math
 6 | import cv2
 7 | import json
 8 | import numpy as np
 9 | 
10 | 
11 | sim = Simulator()
12 | sim.setCameraResolution(500, 300)
13 | sim.setCameraVFOV(math.radians(60))
14 | sim.setElevationLimits(math.radians(-40),math.radians(50))
15 | sim.initialize()
16 | 
17 | with open("src/test/rendertest_spec.json") as f:
18 |     spec = json.load(f)
19 |     for tc in spec[:1]:
20 |         sim.newEpisode(tc["scanId"], tc["viewpointId"], tc["heading"], tc["elevation"])
21 |         state = sim.getState()
22 |         im = np.array(state.rgb, copy=False)
23 |         imgfile = tc["reference_image"]
24 |         cv2.imwrite("sim_imgs/"+imgfile, im);
25 |         cv2.imshow('rendering', im)
26 |         cv2.waitKey(0)
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/src/test/rendertest_spec.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   [
  3 |     {
  4 |       "scanId": "17DRP5sb8fy", 
  5 |       "viewpointId": "85c23efeaecd4d43a7dcd5b90137179e", 
  6 |       "elevation": 0.008557380839564054, 
  7 |       "heading": 2.551961945320492, 
  8 |       "reference_image": "17DRP5sb8fy_85c23efeaecd4d43a7dcd5b90137179e_2.551961945320492_0.008557380839564054.png"
  9 |     }, 
 10 |     {
 11 |       "scanId": "1LXtFkjw3qL", 
 12 |       "viewpointId": "187589bb7d4644f2943079fb949c0be9", 
 13 |       "elevation": 0.0004921836022802584, 
 14 |       "heading": 1.8699330579409539, 
 15 |       "reference_image": "1LXtFkjw3qL_187589bb7d4644f2943079fb949c0be9_1.8699330579409539_0.0004921836022802584.png"
 16 |     }, 
 17 |     {
 18 |       "scanId": "1pXnuDYAj8r", 
 19 |       "viewpointId": "163d61ac7edb43fb958c5d9e69ae11ad", 
 20 |       "elevation": -0.02444352614304746, 
 21 |       "heading": 4.626331047551077, 
 22 |       "reference_image": "1pXnuDYAj8r_163d61ac7edb43fb958c5d9e69ae11ad_4.626331047551077_-0.02444352614304746.png"
 23 |     }, 
 24 |     {
 25 |       "scanId": "29hnd4uzFmX", 
 26 |       "viewpointId": "1576d62e7bbb45e8a5ef9e7bb37b1839", 
 27 |       "elevation": -0.0006838914039405167, 
 28 |       "heading": 5.844119909926444, 
 29 |       "reference_image": "29hnd4uzFmX_1576d62e7bbb45e8a5ef9e7bb37b1839_5.844119909926444_-0.0006838914039405167.png"
 30 |     }
 31 |   ],
 32 |   [
 33 |     {
 34 |       "scanId": "2azQ1b91cZZ", 
 35 |       "viewpointId": "3daad58ad53742038e50d62e91f84e7b", 
 36 |       "elevation": 0.016732869758208434, 
 37 |       "heading": 3.1736484087962933, 
 38 |       "reference_image": "2azQ1b91cZZ_3daad58ad53742038e50d62e91f84e7b_3.1736484087962933_0.016732869758208434.png"
 39 |     }, 
 40 |     {
 41 |       "scanId": "2n8kARJN3HM", 
 42 |       "viewpointId": "94ac3cea52ec455993f8562f78da3be1", 
 43 |       "elevation": -0.0009188787844489273, 
 44 |       "heading": 2.604601935142565, 
 45 |       "reference_image": "2n8kARJN3HM_94ac3cea52ec455993f8562f78da3be1_2.604601935142565_-0.0009188787844489273.png"
 46 |     }, 
 47 |     {
 48 |       "scanId": "2t7WUuJeko7", 
 49 |       "viewpointId": "529f006f8293406da0b506defd2891a5", 
 50 |       "elevation": -0.013788837143969411, 
 51 |       "heading": 0.032985516949381344, 
 52 |       "reference_image": "2t7WUuJeko7_529f006f8293406da0b506defd2891a5_0.032985516949381344_-0.013788837143969411.png"
 53 |     }, 
 54 |     {
 55 |       "scanId": "5LpN3gDmAk7", 
 56 |       "viewpointId": "bda8025f20404048a77381e9e0dc0ccf", 
 57 |       "elevation": -0.01083211073205187, 
 58 |       "heading": 5.325207878739601, 
 59 |       "reference_image": "5LpN3gDmAk7_bda8025f20404048a77381e9e0dc0ccf_5.325207878739601_-0.01083211073205187.png"
 60 |     }
 61 |   ],
 62 |   [
 63 |     {
 64 |       "scanId": "5q7pvUzZiYa", 
 65 |       "viewpointId": "397403366d784caf804d741f32fd68b9", 
 66 |       "elevation": -0.0007063598518199811, 
 67 |       "heading": 2.8746465006968234, 
 68 |       "reference_image": "5q7pvUzZiYa_397403366d784caf804d741f32fd68b9_2.8746465006968234_-0.0007063598518199811.png"
 69 |     }, 
 70 |     {
 71 |       "scanId": "5ZKStnWn8Zo", 
 72 |       "viewpointId": "c76b52856e7c4f2a9a4419000c8e646a", 
 73 |       "elevation": -0.02922217527541366, 
 74 |       "heading": 4.13470589902238, 
 75 |       "reference_image": "5ZKStnWn8Zo_c76b52856e7c4f2a9a4419000c8e646a_4.13470589902238_-0.02922217527541366.png"
 76 |     }, 
 77 |     {
 78 |       "scanId": "759xd9YjKW5", 
 79 |       "viewpointId": "2343ef3bf04a4433af62f0d527d7512a", 
 80 |       "elevation": -0.016938006310169448, 
 81 |       "heading": 3.5451019786019264, 
 82 |       "reference_image": "759xd9YjKW5_2343ef3bf04a4433af62f0d527d7512a_3.5451019786019264_-0.016938006310169448.png"
 83 |     }, 
 84 |     {
 85 |       "scanId": "7y3sRwLe3Va", 
 86 |       "viewpointId": "9bbf903d50da4ffd9e5d1fb7c9f4d69b", 
 87 |       "elevation": 0.008361841032265524, 
 88 |       "heading": 1.7348660165523566, 
 89 |       "reference_image": "7y3sRwLe3Va_9bbf903d50da4ffd9e5d1fb7c9f4d69b_1.7348660165523566_0.008361841032265524.png"
 90 |     }
 91 |   ],
 92 |   [
 93 |     {
 94 |       "scanId": "8194nk5LbLH", 
 95 |       "viewpointId": "c9e8dc09263e4d0da77d16de0ecddd39", 
 96 |       "elevation": 0.008533161479170466, 
 97 |       "heading": 4.05504292862083, 
 98 |       "reference_image": "8194nk5LbLH_c9e8dc09263e4d0da77d16de0ecddd39_4.05504292862083_0.008533161479170466.png"
 99 |     }, 
100 |     {
101 |       "scanId": "82sE5b5pLXE", 
102 |       "viewpointId": "056a491afa534b17bac36f4f5898462a", 
103 |       "elevation": -0.0037883068413356496, 
104 |       "heading": 1.689393931320027, 
105 |       "reference_image": "82sE5b5pLXE_056a491afa534b17bac36f4f5898462a_1.689393931320027_-0.0037883068413356496.png"
106 |     }, 
107 |     {
108 |       "scanId": "8WUmhLawc2A", 
109 |       "viewpointId": "d21aae0b5d944f27a0074525c803fc9f", 
110 |       "elevation": -0.04510889155759994, 
111 |       "heading": 3.047458184407221, 
112 |       "reference_image": "8WUmhLawc2A_d21aae0b5d944f27a0074525c803fc9f_3.047458184407221_-0.04510889155759994.png"
113 |     }, 
114 |     {
115 |       "scanId": "ac26ZMwG7aT", 
116 |       "viewpointId": "efeef7cc82c84690addb0bf415f075ea", 
117 |       "elevation": -0.013447513736072197, 
118 |       "heading": 0.07434352566701552, 
119 |       "reference_image": "ac26ZMwG7aT_efeef7cc82c84690addb0bf415f075ea_0.07434352566701552_-0.013447513736072197.png"
120 |     }
121 |   ],
122 |   [
123 |     {
124 |       "scanId": "ARNzJeq3xxb", 
125 |       "viewpointId": "9a671e6915de4eb897f45fee8bf2031d", 
126 |       "elevation": 0.02583868533558965, 
127 |       "heading": 5.616355886953764, 
128 |       "reference_image": "ARNzJeq3xxb_9a671e6915de4eb897f45fee8bf2031d_5.616355886953764_0.02583868533558965.png"
129 |     }, 
130 |     {
131 |       "scanId": "B6ByNegPMKs", 
132 |       "viewpointId": "e3a65955df26467581c32613c4e9f865", 
133 |       "elevation": 0.007265625492957138, 
134 |       "heading": 5.230794959607039, 
135 |       "reference_image": "B6ByNegPMKs_e3a65955df26467581c32613c4e9f865_5.230794959607039_0.007265625492957138.png"
136 |     }, 
137 |     {
138 |       "scanId": "b8cTxDM8gDG", 
139 |       "viewpointId": "f2944e0b66b9461994a7f757582f9bc3", 
140 |       "elevation": -0.007543204141144086, 
141 |       "heading": 0.0853092784395515, 
142 |       "reference_image": "b8cTxDM8gDG_f2944e0b66b9461994a7f757582f9bc3_0.0853092784395515_-0.007543204141144086.png"
143 |     }, 
144 |     {
145 |       "scanId": "cV4RVeZvu5T", 
146 |       "viewpointId": "1b321779a4374c2b952c51820daa9e6c", 
147 |       "elevation": 0.07914721704610106, 
148 |       "heading": 6.266463179566256, 
149 |       "reference_image": "cV4RVeZvu5T_1b321779a4374c2b952c51820daa9e6c_6.266463179566256_0.07914721704610106.png"
150 |     }
151 |   ]
152 | ]
153 | 


--------------------------------------------------------------------------------
/tasks/NDH/DAN_modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeezhu/CMN.pytorch/b4e3c3ca34668cb8031d525132b013ced472ed87/tasks/NDH/DAN_modules/__init__.py


--------------------------------------------------------------------------------
/tasks/NDH/DAN_modules/fc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code is modified from Hengyuan Hu's repository.
 3 | https://github.com/hengyuan-hu/bottom-up-attention-vqa
 4 | """
 5 | from __future__ import print_function
 6 | import torch.nn as nn
 7 | from torch.nn.utils.weight_norm import weight_norm
 8 | 
 9 | class FCNet(nn.Module):
10 |     """Simple class for non-linear fully connect network
11 |     """
12 |     def __init__(self, dims):
13 |         super(FCNet, self).__init__()
14 | 
15 |         layers = []
16 |         for i in range(len(dims)-2):
17 |             in_dim = dims[i]
18 |             out_dim = dims[i+1]
19 |             layers.append(weight_norm(nn.Linear(in_dim, out_dim), dim=None))
20 |             layers.append(nn.ReLU())
21 |         layers.append(weight_norm(nn.Linear(dims[-2], dims[-1]), dim=None))
22 |         layers.append(nn.ReLU())
23 | 
24 |         self.main = nn.Sequential(*layers)
25 | 
26 |     def forward(self, x):
27 |         return self.main(x)
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     fc1 = FCNet([10, 20, 10])
32 |     print(fc1)
33 | 
34 |     print('============')
35 |     fc2 = FCNet([10, 20])
36 |     print(fc2)


--------------------------------------------------------------------------------
/tasks/NDH/DAN_modules/language_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Variable
 5 | from torch.nn import functional as F
 6 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 7 | 
 8 | class DynamicRNN(nn.Module):
 9 |     """
10 |     This code is modified from batra-mlp-lab's repository.
11 |     https://github.com/batra-mlp-lab/visdial-challenge-starter-pytorch
12 |     """
13 |     def __init__(self, rnn_model):
14 |         super(DynamicRNN, self).__init__()
15 |         self.rnn_model = rnn_model
16 | 
17 |     def forward(self, seq_input, seq_lens, initial_state=None):
18 |         """A wrapper over pytorch's rnn to handle sequences of variable length.
19 | 
20 |         Arguments
21 |         ---------
22 |         seq_input : torch.autograd.Variable
23 |             Input sequence tensor (padded) for RNN model. (b, max_seq_len, embed_size)
24 |         seq_lens : torch.LongTensor
25 |             Length of sequences (b, )
26 |         initial_state : torch.autograd.Variable
27 |             Initial (hidden, cell) states of RNN model.
28 | 
29 |         Returns
30 |         -------
31 |             A single tensor of shape (batch_size, rnn_hidden_size) corresponding
32 |             to the outputs of the RNN model at the last time step of each input
33 |             sequence.
34 |         """
35 |         sorted_len, fwd_order, bwd_order = self._get_sorted_order(seq_lens)
36 |         sorted_seq_input = seq_input.index_select(0, fwd_order)
37 |         packed_seq_input = pack_padded_sequence(
38 |             sorted_seq_input, lengths=sorted_len, batch_first=True)
39 | 
40 |         if initial_state is not None:
41 |             hx = initial_state
42 |             sorted_hx = [x.index_select(1, fwd_order) for x in hx]
43 |             assert hx[0].size(0) == self.rnn_model.num_layers
44 |         else:
45 |             hx = None
46 |         self.rnn_model.flatten_parameters()
47 |         enc_h, (h_n, c_n) = self.rnn_model(packed_seq_input, hx)
48 |         ctx, _ = pad_packed_sequence(enc_h, batch_first=True)
49 | 
50 |         c_t = c_n[-1].index_select(dim=0, index=bwd_order)
51 |         rnn_output = h_n[-1].index_select(dim=0, index=bwd_order)
52 |         return ctx, rnn_output, c_t
53 | 
54 |     @staticmethod
55 |     def _get_sorted_order(lens):
56 |         sorted_len, fwd_order = torch.sort(lens.contiguous().view(-1), 0, descending=True)
57 |         _, bwd_order = torch.sort(fwd_order)
58 |         if isinstance(sorted_len, Variable):
59 |             sorted_len = sorted_len.data
60 |         sorted_len = list(sorted_len)
61 |         return sorted_len, fwd_order, bwd_order
62 | 
63 | 
64 | class WordEmbedding(nn.Module):
65 |     """
66 |     code from @jnhwkim (Jin-Hwa Kim)
67 |     https://github.com/jnhwkim/ban-vqa
68 |     """
69 |     def __init__(self, ntoken, emb_dim, dropout, padding_idx):
70 |         super(WordEmbedding, self).__init__()
71 |         self.emb = nn.Embedding(ntoken+1, emb_dim, padding_idx)
72 |         self.dropout = nn.Dropout(dropout)
73 |         self.ntoken = ntoken
74 |         self.emb_dim = emb_dim
75 | 
76 |     def init_embedding(self, np_file, tfidf=None, tfidf_weights=None):
77 |         weight_init = torch.from_numpy(np.load(np_file))
78 |         assert weight_init.shape == (self.ntoken, self.emb_dim)
79 |         self.emb.weight.data[:self.ntoken] = weight_init
80 | 
81 |     def forward(self, x):
82 |         emb = self.emb(x)
83 |         emb = self.dropout(emb)
84 |         return emb
85 | 


--------------------------------------------------------------------------------
/tasks/NDH/DAN_modules/refer_find_modules.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Dual Attention Networks for Visual Reference Resolution in Visual Dialog
 3 | Gi-Cheon Kang, Jaeseo Lim, Byoung-Tak Zhang
 4 | https://arxiv.org/abs/1902.09368
 5 | """
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | from torch.autograd import Variable
10 | from torch.nn.utils.weight_norm import weight_norm
11 | from .submodules import MultiHeadAttention, PositionwiseFeedForward
12 | from .fc import FCNet
13 | 
14 | class REFER(nn.Module):
15 |     """ This code is modified from Yu-Hsiang Huang's repository
16 |         https://github.com/jadore801120/attention-is-all-you-need-pytorch
17 |     """
18 |     def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.2):
19 |         super(REFER, self).__init__()
20 |         self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
21 |         self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
22 | 
23 |     def forward(self, q, m):
24 |         enc_output, enc_slf_attn = self.slf_attn(q, m, m)
25 |         enc_output = self.pos_ffn(enc_output)
26 |         return enc_output, enc_slf_attn
27 | 
28 | class FIND(nn.Module):
29 |     """ This code is modified from Hengyuan Hu's repository.
30 |         https://github.com/hengyuan-hu/bottom-up-attention-vqa
31 |     """
32 |     def __init__(self, v_dim, q_dim, num_hid, dropout=0.2):
33 |         super(FIND, self).__init__()
34 | 
35 |         self.v_proj = FCNet([v_dim, num_hid])
36 |         self.q_proj = FCNet([q_dim, num_hid])
37 |         self.dropout = nn.Dropout(dropout)
38 |         self.linear = weight_norm(nn.Linear(num_hid, 1), dim=None)
39 | 
40 |     def forward(self, v, q):
41 |         """
42 |         v: [batch, v, 2048]
43 |         q: [10, batch, 1024]
44 |         """
45 |         logits = self.logits(v, q)
46 |         w = nn.functional.softmax(logits, 1)
47 |         return w
48 | 
49 |     def logits(self, v, q):
50 |         batch, k, _ = v.size()
51 |         v_proj = self.v_proj(v) 
52 |         q_proj = self.q_proj(q).unsqueeze(1).repeat(1, k, 1)
53 |         joint_repr = v_proj * q_proj
54 |         joint_repr = self.dropout(joint_repr)
55 |         logits = self.linear(joint_repr)
56 |         return logits
57 | 


--------------------------------------------------------------------------------
/tasks/NDH/DAN_modules/submodules.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | This code is modified from Yu-Hsiang Huang's repository
  3 | https://github.com/jadore801120/attention-is-all-you-need-pytorch
  4 | """
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from torch.autograd import Variable
 10 | 
 11 | class MultiHeadAttention(nn.Module):
 12 |     def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
 13 |         super(MultiHeadAttention, self).__init__()
 14 | 
 15 |         self.n_head = n_head
 16 |         self.d_k = d_k
 17 |         self.d_v = d_v
 18 | 
 19 |         self.w_qs = nn.Linear(d_model, n_head * d_k)
 20 |         self.w_ks = nn.Linear(d_model, n_head * d_k)
 21 |         self.w_vs = nn.Linear(d_model, n_head * d_v)
 22 |         nn.init.normal_(self.w_qs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k)))
 23 |         nn.init.normal_(self.w_ks.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k)))
 24 |         nn.init.normal_(self.w_vs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_v)))
 25 | 
 26 |         self.attention = ScaledDotProductAttention(temperature=np.power(d_k, 0.5))
 27 |         self.layer_norm = LayerNorm(d_model)
 28 | 
 29 |         self.fc = nn.Linear(n_head * d_v, d_model)
 30 |         nn.init.xavier_normal_(self.fc.weight)
 31 |         self.dropout = nn.Dropout(dropout)
 32 | 
 33 |     def forward(self, q, k, v, mask=None):
 34 |         '''
 35 |         q: [batch, 1, 512]
 36 |         k, v: [batch, num_entry, 512]
 37 |         '''
 38 |         d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
 39 | 
 40 |         sz_b, len_q, _ = q.size()
 41 |         sz_b, len_k, _ = k.size()
 42 |         sz_b, len_v, _ = v.size()
 43 | 
 44 |         residual = q
 45 | 
 46 |         q = self.w_qs(q).view(sz_b, len_q, n_head, d_k)
 47 |         k = self.w_ks(k).view(sz_b, len_k, n_head, d_k)
 48 |         v = self.w_vs(v).view(sz_b, len_v, n_head, d_v)
 49 | 
 50 |         q = q.permute(2, 0, 1, 3).contiguous().view(-1, len_q, d_k) # (n*b) x lq x dk
 51 |         k = k.permute(2, 0, 1, 3).contiguous().view(-1, len_k, d_k) # (n*b) x lk x dk
 52 |         v = v.permute(2, 0, 1, 3).contiguous().view(-1, len_v, d_v) # (n*b) x lv x dv
 53 | 
 54 |         if mask is not None:
 55 |             mask = mask.repeat(n_head, 1, 1) # (n*b) x .. x ..
 56 |         output, attn = self.attention(q, k, v, mask=mask)
 57 | 
 58 |         output = output.view(n_head, sz_b, len_q, d_v)
 59 |         output = output.permute(1, 2, 0, 3).contiguous().view(sz_b, len_q, -1) # b x lq x (n*dv)
 60 | 
 61 |         output = self.dropout(self.fc(output))
 62 |         output = self.layer_norm(output + residual)
 63 | 
 64 |         return output, attn
 65 | 
 66 | class ScaledDotProductAttention(nn.Module):
 67 |     ''' Scaled Dot-Product Attention '''
 68 | 
 69 |     def __init__(self, temperature, attn_dropout=0.1):
 70 |         super(ScaledDotProductAttention, self).__init__()
 71 |         self.temperature = temperature
 72 |         self.dropout = nn.Dropout(attn_dropout)
 73 |         self.softmax = nn.Softmax(dim=2)
 74 | 
 75 |     def forward(self, q, k, v, mask=None):
 76 | 
 77 |         attn = torch.bmm(q, k.transpose(1, 2))
 78 |         attn = attn / self.temperature
 79 | 
 80 |         if mask is not None:
 81 |             attn = attn.masked_fill(mask, -np.inf)
 82 | 
 83 |         attn = self.softmax(attn)
 84 |         attn = self.dropout(attn)
 85 |         output = torch.bmm(attn, v)
 86 | 
 87 |         return output, attn
 88 | 
 89 | class PositionwiseFeedForward(nn.Module):
 90 |     ''' A two-feed-forward-layer module '''
 91 | 
 92 |     def __init__(self, d_in, d_hid, dropout=0.1):
 93 |         super(PositionwiseFeedForward, self).__init__()
 94 |         self.w_1 = nn.Conv1d(d_in, d_hid, 1) # position-wise
 95 |         self.w_2 = nn.Conv1d(d_hid, d_in, 1) # position-wise
 96 |         self.layer_norm = LayerNorm(d_in)
 97 |         self.dropout = nn.Dropout(dropout)
 98 | 
 99 |     def forward(self, x):
100 |         residual = x
101 |         output = x.transpose(1, 2)
102 |         output = self.w_2(F.relu(self.w_1(output)))
103 |         output = output.transpose(1, 2)
104 |         output = self.dropout(output)
105 |         output = self.layer_norm(output + residual)
106 |         return output
107 | 
108 | class LayerNorm(nn.Module):
109 |     """ 
110 |     Layer Normalization 
111 |     """
112 |     def __init__(self, features, eps=1e-6):
113 |         super(LayerNorm, self).__init__()
114 |         self.gamma = nn.Parameter(torch.ones(features))
115 |         self.beta = nn.Parameter(torch.zeros(features))
116 |         self.eps = eps
117 | 
118 |     def forward(self, x):
119 |         mean = x.mean(-1, keepdim=True)
120 |         std = x.std(-1, keepdim=True)
121 |         return self.gamma * (x - mean) / (std + self.eps) + self.beta
122 | 


--------------------------------------------------------------------------------
/tasks/NDH/data/README.md:
--------------------------------------------------------------------------------
 1 | # Navigation from Dialog History (NDH) Task Data
 2 | 
 3 | A brief overview of the metadata available in each NDH instance:
 4 | 
 5 | #### Always Available:
 6 | | Metadata | Explanation |
 7 | |---|---|
 8 | | `inst_idx` | The unique index of this task instance. |
 9 | | `scan` | The unique scan ID of the house in which this instance took place. |
10 | | `target` | The target object for the dialog this instance was drawn from. |
11 | | `start_pano` | The `heading`, `elevation`, and panorama id `pano` of the position from which the navigator asked the last question. |
12 | | `nav_camera` | A list of camera heading adjustments that occurred since the navigator moved to the most recent navigation node (i.e., looking around before asking a question). |
13 | | `dialog_history` | A list of turns. Each turn has a `nav_idx` (the `nav_history` list index where the utterance was transmitted), a `role` (either 'oracle' or 'navigator'), and a `message` (the utterance). |
14 | | `nav_history` | The navigation nodes traversed by the navigator before the latest question. |
15 | 
16 | #### Only Available at Training Time:
17 | | Training Metadata | Explanation |
18 | |---|---|
19 | | `game_idx` | The unique index of the dialog from which this instance was drawn. |
20 | | `end_panos` | The navigation nodes that compose the end region. |
21 | | `player_path` | The navigation nodes traversed by the navigator in response to the latest answer. |
22 | | `planner_path` | The navigation nodes shown to the oracle in response to the most recent question (first 5 shortest path steps towards the `end_panos`, if there is no dialog history). | 
23 | | `navigator_game_quality` | The 1-5 rating received by the navigator from the oracle in this game. |
24 | | `navigator_avg_quality` | The average 1-5 rating received by the navigator across all games in which they were involved. |
25 | | `oracle_game_quality` | The 1-5 rating received by the oracle from the navigator in this game. |
26 | | `oracle_avg_quality` | The average 1-5 rating received by the oracle across all games in which they were involved. |
27 | | `R2R_success` |  The R2R success metric of the player path calculated against the last node in the planner path. |
28 | | `R2R_spl` | The R2R SPL metric of the player path against the end node of the planner path. |
29 | | `R2R_oracle_success` | The R2R success metric calculated as though the player path stopped within three meters of the last node in the planner path, or 0 if it never got close. |
30 | | `R2R_oracle_spl` | The R2R SPL metric calculated as though the player path stopped within three meters of the last node in the planner path, or 0 if it never got close. |
31 | 


--------------------------------------------------------------------------------
/tasks/NDH/data/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | wget https://cvdn.dev/dataset/NDH/train_val/train.json -P tasks/NDH/data/
4 | wget https://cvdn.dev/dataset/NDH/train_val/val_seen.json -P tasks/NDH/data/
5 | wget https://cvdn.dev/dataset/NDH/train_val/val_unseen.json -P tasks/NDH/data/
6 | wget https://cvdn.dev/dataset/NDH/test_cleaned/test_cleaned.json -P tasks/NDH/data/
7 | 
8 | 


--------------------------------------------------------------------------------
/tasks/NDH/eval.py:
--------------------------------------------------------------------------------
  1 | ''' Evaluation of agent trajectories '''
  2 | 
  3 | import json
  4 | import os
  5 | import sys
  6 | from collections import defaultdict
  7 | import networkx as nx
  8 | import numpy as np
  9 | import pprint
 10 | pp = pprint.PrettyPrinter(indent=4)
 11 | 
 12 | from env import R2RBatch
 13 | from utils import load_datasets, load_nav_graphs
 14 | from agent import BaseAgent, StopAgent, RandomAgent, ShortestAgent
 15 | 
 16 | 
 17 | class Evaluation(object):
 18 |     ''' Results submission format:  [{'instr_id': string, 'trajectory':[(viewpoint_id, heading_rads, elevation_rads),] } ] '''
 19 | 
 20 |     def __init__(self, splits, path_type='planner_path'):
 21 |         self.error_margin = 3.0
 22 |         self.splits = splits
 23 |         self.gt = {}
 24 |         self.instr_ids = []
 25 |         self.scans = []
 26 |         for item in load_datasets(splits):
 27 |             self.gt[item['inst_idx']] = item
 28 |             self.instr_ids.append(item['inst_idx'])
 29 |             self.scans.append(item['scan'])
 30 | 
 31 |             # Add 'trusted_path' to gt metadata if necessary.
 32 |             if path_type == 'trusted_path' and 'test' not in splits:
 33 |                 planner_goal = item['planner_path'][-1]
 34 |                 if planner_goal in item['player_path'][1:]:
 35 |                     self.gt[item['inst_idx']]['trusted_path'] = item['player_path'][:]
 36 |                 else:
 37 |                     self.gt[item['inst_idx']]['trusted_path'] = item['planner_path'][:]
 38 | 
 39 |         self.scans = set(self.scans)
 40 |         self.instr_ids = set(self.instr_ids)
 41 |         self.graphs = load_nav_graphs(self.scans)
 42 |         self.distances = {}
 43 |         self.path_type = path_type
 44 |         for scan,G in self.graphs.iteritems(): # compute all shortest paths
 45 |             self.distances[scan] = dict(nx.all_pairs_dijkstra_path_length(G))
 46 | 
 47 |     def _get_nearest(self, scan, goal_id, path):
 48 |         near_id = path[0][0]
 49 |         near_d = self.distances[scan][near_id][goal_id]
 50 |         for item in path:
 51 |             d = self.distances[scan][item[0]][goal_id]
 52 |             if d < near_d:
 53 |                 near_id = item[0]
 54 |                 near_d = d
 55 |         return near_id
 56 | 
 57 |     def _score_item(self, instr_id, path):
 58 |         ''' Calculate error based on the final position in trajectory, and also 
 59 |             the closest position (oracle stopping rule). '''
 60 |         gt = self.gt[int(instr_id)]
 61 |         start = gt[self.path_type][0]
 62 |         assert start == path[0][0], 'Result trajectories should include the start position'
 63 |         goal = gt[self.path_type][-1]
 64 |         planner_goal = gt['planner_path'][-1]  # for calculating oracle planner success (e.g., passed over desc goal?)
 65 |         final_position = path[-1][0]
 66 |         nearest_position = self._get_nearest(gt['scan'], goal, path)
 67 |         nearest_planner_position = self._get_nearest(gt['scan'], planner_goal, path)
 68 |         dist_to_end_start = None
 69 |         dist_to_end_end = None
 70 |         for end_pano in gt['end_panos']:
 71 |             d = self.distances[gt['scan']][start][end_pano]
 72 |             if dist_to_end_start is None or d < dist_to_end_start:
 73 |                 dist_to_end_start = d
 74 |             d = self.distances[gt['scan']][final_position][end_pano]
 75 |             if dist_to_end_end is None or d < dist_to_end_end:
 76 |                 dist_to_end_end = d
 77 |         self.scores['nav_errors'].append(self.distances[gt['scan']][final_position][goal])
 78 |         self.scores['oracle_errors'].append(self.distances[gt['scan']][nearest_position][goal])
 79 |         self.scores['oracle_plan_errors'].append(self.distances[gt['scan']][nearest_planner_position][planner_goal])
 80 |         self.scores['dist_to_end_reductions'].append(dist_to_end_start - dist_to_end_end)
 81 |         distance = 0  # Work out the length of the path in meters
 82 |         prev = path[0]
 83 |         for curr in path[1:]:
 84 |             if prev[0] != curr[0]:
 85 |                 try:
 86 |                     self.graphs[gt['scan']][prev[0]][curr[0]]
 87 |                 except KeyError as err:
 88 |                     print 'Error: The provided trajectory moves from %s to %s but the navigation graph contains no '\
 89 |                         'edge between these viewpoints. Please ensure the provided navigation trajectories '\
 90 |                         'are valid, so that trajectory length can be accurately calculated.' % (prev[0], curr[0])
 91 |                     raise
 92 |             distance += self.distances[gt['scan']][prev[0]][curr[0]]
 93 |             prev = curr
 94 |         self.scores['trajectory_lengths'].append(distance)
 95 |         self.scores['shortest_path_lengths'].append(self.distances[gt['scan']][start][goal])
 96 | 
 97 |     def score(self, output_file):
 98 |         ''' Evaluate each agent trajectory based on how close it got to the goal location '''
 99 |         self.scores = defaultdict(list)
100 |         instr_ids = set(self.instr_ids)
101 |         
102 |         item_list = []
103 |         with open(output_file) as f:
104 |             for item in json.load(f):
105 |                 # Check against expected ids
106 |                 if item['inst_idx'] in instr_ids:
107 |                     instr_ids.remove(item['inst_idx'])
108 |                     self._score_item(item['inst_idx'], item['trajectory'])
109 |                     item['dist_to_end_reductions'] = self.scores['dist_to_end_reductions'][-1]
110 |                 item_list.append(item)
111 |         
112 |         with open(output_file + '.a', 'w') as f:
113 |             json.dump(item_list, f)
114 | 
115 |         assert len(instr_ids) == 0, 'Trajectories not provided for %d instruction ids: %s' % (len(instr_ids), instr_ids)
116 |         assert len(self.scores['nav_errors']) == len(self.instr_ids)
117 | 
118 |         num_successes = len([i for i in self.scores['nav_errors'] if i < self.error_margin])
119 |         oracle_successes = len([i for i in self.scores['oracle_errors'] if i < self.error_margin])
120 |         oracle_plan_successes = len([i for i in self.scores['oracle_plan_errors'] if i < self.error_margin])
121 | 
122 |         spls = []
123 |         for err, length, sp in zip(self.scores['nav_errors'], self.scores['trajectory_lengths'], self.scores['shortest_path_lengths']):
124 |             if err < self.error_margin:
125 |                 if sp > 0:
126 |                     spls.append(sp / max(length, sp))
127 |                 else:  # In IF, some Q/A pairs happen when we're already in the goal region, so taking no action is correct.
128 |                     spls.append(1 if length == 0 else 0)
129 |             else:
130 |                 spls.append(0)
131 |         
132 |         score_summary ={
133 |             'length': np.average(self.scores['trajectory_lengths']),
134 |             'nav_error': np.average(self.scores['nav_errors']),
135 |             'oracle success_rate': float(oracle_successes)/float(len(self.scores['oracle_errors'])),
136 |             'success_rate': float(num_successes)/float(len(self.scores['nav_errors'])),
137 |             'spl': np.average(spls),
138 |             'oracle path_success_rate': float(oracle_plan_successes)/float(len(self.scores['oracle_plan_errors'])),
139 |             'dist_to_end_reduction': sum(self.scores['dist_to_end_reductions']) / float(len(self.scores['dist_to_end_reductions']))
140 |         }
141 | 
142 |         assert score_summary['spl'] <= score_summary['success_rate']
143 |         return score_summary, self.scores
144 | 
145 | 
146 | RESULT_DIR = 'tasks/NDH/eval/results/'
147 | 
148 | 
149 | def eval_simple_agents():
150 |     # path_type = 'planner_path'
151 |     # path_type = 'player_path'
152 |     path_type = 'trusted_path'
153 | 
154 |     ''' Run simple baselines on each split. '''
155 |     for split in ['train', 'val_seen', 'val_unseen', 'test']:
156 |         env = R2RBatch(None, batch_size=1, splits=[split], path_type=path_type)
157 |         ev = Evaluation([split], path_type=path_type)
158 | 
159 |         for agent_type in ['Stop', 'Shortest', 'Random']:
160 |             outfile = '%s%s_%s_agent.json' % (RESULT_DIR, split, agent_type.lower())
161 |             agent = BaseAgent.get_agent(agent_type)(env, outfile)
162 |             agent.test()
163 |             agent.write_results()
164 |             score_summary, _ = ev.score(outfile)
165 |             print '\n%s' % agent_type
166 |             pp.pprint(score_summary)
167 | 
168 | 
169 | def eval_seq2seq():
170 |     ''' Eval sequence to sequence models on val splits (iteration selected from training error) '''
171 |     outfiles = [
172 |         RESULT_DIR + 'seq2seq_teacher_imagenet_%s_iter_5000.json',
173 |         RESULT_DIR + 'seq2seq_sample_imagenet_%s_iter_20000.json'
174 |     ]
175 |     for outfile in outfiles:
176 |         for split in ['val_seen', 'val_unseen']:
177 |             ev = Evaluation([split])
178 |             score_summary, _ = ev.score(outfile % split)
179 |             print '\n%s' % outfile
180 |             pp.pprint(score_summary)
181 | 
182 | 
183 | if __name__ == '__main__':
184 | 
185 |     eval_simple_agents()
186 |     #eval_seq2seq()
187 | 


--------------------------------------------------------------------------------
/tasks/NDH/model.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.autograd import Variable
  5 | import torch.nn.functional as F
  6 | from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
  7 | from param import args
  8 | from DAN_modules.refer_find_modules import REFER, FIND
  9 | from DAN_modules.language_model import WordEmbedding, DynamicRNN
 10 | 
 11 | class EncoderLSTM(nn.Module):
 12 |     ''' Encodes navigation instructions, returning hidden state context (for
 13 |         attention methods) and a decoder initial state. '''
 14 | 
 15 |     def __init__(self, vocab_size, embedding_size, hidden_size, padding_idx, 
 16 |                             dropout_ratio, bidirectional=False, num_layers=1):
 17 |         super(EncoderLSTM, self).__init__()
 18 |         self.embedding_size = embedding_size
 19 |         self.hidden_size = hidden_size
 20 |         self.drop = nn.Dropout(p=dropout_ratio)
 21 |         self.num_directions = 2 if bidirectional else 1
 22 |         self.num_layers = num_layers
 23 | 
 24 |         self.word_embed = nn.Embedding(vocab_size, embedding_size, padding_idx)
 25 |         self.sent_embed = nn.LSTM(embedding_size, hidden_size, 2, dropout=dropout_ratio, batch_first=True)
 26 |         self.sent_embed = DynamicRNN(self.sent_embed)
 27 |         self.hist_embed = nn.LSTM(embedding_size, hidden_size, 2, dropout=dropout_ratio, batch_first=True)
 28 |         self.hist_embed = DynamicRNN(self.hist_embed)
 29 | 
 30 |         self.encoder2decoder = nn.Linear(2 * hidden_size * self.num_directions,
 31 |             hidden_size * self.num_directions)
 32 | 
 33 |         self.layer_stack = nn.ModuleList([
 34 |             REFER(d_model=512, d_inner=1024, n_head=4, d_k=256, d_v=256, dropout=0.2)
 35 |             for _ in range(2)])
 36 | 
 37 |     def init_state(self, inputs):
 38 |         ''' Initialize to zero cell states and hidden states.'''
 39 |         batch_size = inputs.size(0)
 40 |         h0 = Variable(torch.zeros(
 41 |             self.num_layers * self.num_directions,
 42 |             batch_size,
 43 |             self.hidden_size
 44 |         ), requires_grad=False)
 45 |         c0 = Variable(torch.zeros(
 46 |             self.num_layers * self.num_directions,
 47 |             batch_size,
 48 |             self.hidden_size
 49 |         ), requires_grad=False)
 50 |         return h0.cuda(), c0.cuda()
 51 | 
 52 |     def add_entry(self, mem, hist, hl):
 53 |         h_emb = self.word_embed(hist)
 54 |         h_emb = self.drop(h_emb)
 55 |         _, h_emb, _ = self.hist_embed(h_emb, hl)
 56 |         h_emb = h_emb.unsqueeze(1)
 57 | 
 58 |         if mem is None: mem = h_emb
 59 |         else: mem = torch.cat((mem, h_emb), 1)
 60 |         return mem
 61 | 
 62 |     def refer_module(self, mem, q):
 63 |         '''
 64 |         q : [b, 512]
 65 |         mem : [b, number of entry, 512]
 66 |         '''
 67 |         context = q.unsqueeze(1)
 68 |         for enc_layer in self.layer_stack:
 69 |             context, _ = enc_layer(context, mem)
 70 |         return context.squeeze(1)
 71 | 
 72 |     def forward(self, inputs, lengths, Last_QA, Last_QA_lengths, hist, hist_lengths, tar, tar_lengths):
 73 |         ''' Expects input vocab indices as (batch, seq_len). Also requires a 
 74 |             list of lengths for dynamic batching. '''
 75 | 
 76 |         q = Last_QA
 77 |         ql = Last_QA_lengths
 78 |         c = tar
 79 |         cl = tar_lengths
 80 |         h = hist
 81 |         hl = hist_lengths
 82 |         # write history embedding to memory
 83 |         mem = self.add_entry(None, c, cl)
 84 |         enc_outs = []
 85 |         q_emb = self.word_embed(q)
 86 |         q_emb = self.drop(q_emb)
 87 | 
 88 |         ctx, q_emb, c_t = self.sent_embed(q_emb, ql)
 89 | 
 90 |         for i in range(15):
 91 |             
 92 |             his = self.refer_module(mem, q_emb)
 93 |             ref_aware = torch.cat((q_emb, his), 1)
 94 |             enc_outs.append(ref_aware)
 95 | 
 96 |             # write history embedding to memory
 97 |             if i != 14:
 98 |                 mem = self.add_entry(mem, h[:, i, :], hl[:, i])
 99 | 
100 |         enc_out = torch.stack(enc_outs, 1)
101 |         # enc_out = self.linear(enc_out[:, -1, :])
102 |         decoder_init = nn.Tanh()(self.encoder2decoder(enc_out[:, -1, :]))
103 |         
104 |         mem = torch.cat((mem, q_emb.unsqueeze(1)), 1)
105 |         mem = self.drop(mem)
106 | 
107 |         return mem, decoder_init, c_t
108 | 
109 | 
110 | class SoftDotAttention(nn.Module):
111 |     '''Soft Dot Attention.
112 | 
113 |     Ref: http://www.aclweb.org/anthology/D15-1166
114 |     Adapted from PyTorch OPEN NMT.
115 |     '''
116 | 
117 |     def __init__(self, query_dim, ctx_dim):
118 |         '''Initialize layer.'''
119 |         super(SoftDotAttention, self).__init__()
120 |         self.linear_in = nn.Linear(query_dim, ctx_dim, bias=False)
121 |         self.sm = nn.Softmax()
122 |         self.linear_out = nn.Linear(query_dim + ctx_dim, query_dim, bias=False)
123 |         self.tanh = nn.Tanh()
124 | 
125 |     def forward(self, h, context, mask=None,
126 |                 output_tilde=True, output_prob=True):
127 |         '''Propagate h through the network.
128 | 
129 |         h: batch x dim
130 |         context: batch x seq_len x dim
131 |         mask: batch x seq_len indices to be masked
132 |         '''
133 |         target = self.linear_in(h).unsqueeze(2)  # batch x dim x 1
134 | 
135 |         # Get attention
136 |         attn = torch.bmm(context, target).squeeze(2)  # batch x seq_len
137 |         logit = attn
138 | 
139 |         if mask is not None:
140 |             # -Inf masking prior to the softmax
141 |             attn.masked_fill_(mask, -float('inf'))
142 |         # attn = self.sm(attn, dim=1)    # There will be a bug here, but it's actually a problem in torch source code.
143 |         attn = self.sm(attn)
144 |         attn3 = attn.view(attn.size(0), 1, attn.size(1))  # batch x 1 x seq_len
145 | 
146 |         weighted_context = torch.bmm(attn3, context).squeeze(1)  # batch x dim
147 |         if not output_prob:
148 |             attn = logit
149 |         if output_tilde:
150 |             h_tilde = torch.cat((weighted_context, h), 1)
151 |             h_tilde = self.tanh(self.linear_out(h_tilde))
152 |             return h_tilde, attn
153 |         else:
154 |             return weighted_context, attn
155 | 
156 | 
157 | class AttnDecoderLSTM(nn.Module):
158 |     ''' An unrolled LSTM with attention over instructions for decoding navigation actions. '''
159 | 
160 |     def __init__(self, embedding_size, hidden_size,
161 |                        dropout_ratio, feature_size=2048+4):
162 |         super(AttnDecoderLSTM, self).__init__()
163 |         self.embedding_size = embedding_size
164 |         self.feature_size = feature_size
165 |         self.hidden_size = hidden_size
166 |         self.embedding = nn.Sequential(
167 |             nn.Linear(args.angle_feat_size, self.embedding_size),
168 |             nn.Tanh()
169 |         )
170 |         self.drop = nn.Dropout(p=dropout_ratio)
171 |         self.drop_env = nn.Dropout(p=args.featdropout)
172 |         self.lstm = nn.LSTMCell(embedding_size+feature_size, hidden_size)
173 |         self.feat_att_layer = SoftDotAttention(hidden_size, feature_size)
174 |         self.attention_layer = SoftDotAttention(hidden_size, hidden_size)
175 |         self.candidate_att_layer = SoftDotAttention(hidden_size, feature_size)
176 | 
177 |     def forward(self, action, feature, cand_feat,
178 |                 h_0, prev_h1, c_0,
179 |                 ctx, ctx_mask=None,
180 |                 already_dropfeat=False):
181 |         '''
182 |         Takes a single step in the decoder LSTM (allowing sampling).
183 |         action: batch x angle_feat_size
184 |         feature: batch x 36 x (feature_size + angle_feat_size)
185 |         cand_feat: batch x cand x (feature_size + angle_feat_size)
186 |         h_0: batch x hidden_size
187 |         prev_h1: batch x hidden_size
188 |         c_0: batch x hidden_size
189 |         ctx: batch x seq_len x dim
190 |         ctx_mask: batch x seq_len - indices to be masked
191 |         already_dropfeat: used in EnvDrop
192 |         '''
193 |         action_embeds = self.embedding(action)
194 | 
195 |         # Adding Dropout
196 |         action_embeds = self.drop(action_embeds)
197 | 
198 |         if not already_dropfeat:
199 |             # Dropout the raw feature as a common regularization
200 |             feature[..., :-args.angle_feat_size] = self.drop_env(feature[..., :-args.angle_feat_size])   # Do not drop the last args.angle_feat_size (position feat)
201 | 
202 |         prev_h1_drop = self.drop(prev_h1)
203 |         attn_feat, _ = self.feat_att_layer(prev_h1_drop, feature, output_tilde=False)
204 |         
205 |         concat_input = torch.cat((action_embeds, attn_feat), 1)
206 |         
207 |         h_1, c_1 = self.lstm(concat_input, (prev_h1, c_0))
208 | 
209 |         h_1_drop = self.drop(h_1)
210 |         h_tilde, alpha = self.attention_layer(h_1_drop, ctx, ctx_mask)
211 | 
212 |         # Adding Dropout
213 |         h_tilde_drop = self.drop(h_tilde)
214 | 
215 |         if not already_dropfeat:
216 |             cand_feat[..., :-args.angle_feat_size] = self.drop_env(cand_feat[..., :-args.angle_feat_size])
217 | 
218 |         _, logit = self.candidate_att_layer(h_tilde_drop, cand_feat, output_prob=False)
219 | 
220 |         return h_1, c_1, logit, h_tilde


--------------------------------------------------------------------------------
/tasks/NDH/param.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | parser = argparse.ArgumentParser()
 3 | parser.add_argument('--path_type', type=str, required=True,
 4 |                     help='planner_path, player_path, or trusted_path')
 5 | parser.add_argument('--history', type=str, required=True,
 6 |                     help='none, target, oracle_ans, nav_q_oracle_ans, or all')
 7 | parser.add_argument('--feedback', type=str, required=True,
 8 |                     help='teacher or sample')
 9 | parser.add_argument('--eval_type', type=str, required=True,
10 |                     help='val or test')
11 | parser.add_argument('--blind', action='store_true', required=False,
12 |                     help='whether to replace the ResNet encodings with zero vectors at inference time')
13 | parser.add_argument('--angle_feat_size', type=int, default=4)
14 | parser.add_argument('--num_view', type=int, default=36)
15 | parser.add_argument('--featdropout', type=float, default=0.3)
16 | parser.add_argument('--ignoreid', type=int, default=-100)
17 | parser.add_argument('--prefix', type=str, default="v1", required=True)
18 | args = parser.parse_args()
19 | 


--------------------------------------------------------------------------------
/tasks/NDH/requirements.txt:
--------------------------------------------------------------------------------
 1 | backports.functools-lru-cache==1.4
 2 | cycler==0.10.0
 3 | decorator==4.1.2
 4 | matplotlib==2.1.0
 5 | networkx==2.0
 6 | numpy==1.13.3
 7 | olefile==0.44
 8 | pandas==0.21.0
 9 | Pillow==4.3.0
10 | pyparsing==2.2.0
11 | python-dateutil==2.6.1
12 | pytz==2017.3
13 | PyYAML==3.12
14 | six==1.11.0
15 | subprocess32==3.2.7
16 | torch==1.0.0
17 | torchvision==0.1.9
18 | 


--------------------------------------------------------------------------------
/tasks/NDH/summarize_perf.py:
--------------------------------------------------------------------------------
 1 | ''' Plotting losses etc.  '''
 2 | 
 3 | 
 4 | import numpy as np
 5 | import pandas as pd
 6 | import os
 7 | 
 8 | PLOT_DIR = 'tasks/NDH/plots/'
 9 | dfs = {}
10 | # val-seq2seq-all-planner_path-sample-imagenet-log
11 | summary = {"val_seen": {}, "val_unseen": {}, "test": {}}
12 | for path_type, path_len in [['planner_path', 20], ['player_path', 80], ['trusted_path', 80]]:
13 |     print(path_type)
14 |     for eval_type in ['val', 'test']:
15 |         print('\t%s (%d)' % (eval_type, path_len))
16 |         for feedback in ['sample']:
17 |             print('\t\t%s' % feedback)
18 |             for history in ['none', 'target', 'oracle_ans', 'nav_q_oracle_ans', 'all']:
19 |                 for blind in [True, False]:
20 |                     print('\t\t\t%s (%s)' % (history, 'blind' if blind else 'vision'))
21 |                     if path_len is None:
22 |                         if blind:
23 |                             log = '%s-seq2seq-%s-%s-%s-imagenet-blind-log.csv' % (eval_type, history,
24 |                                                                                   path_type, feedback)
25 |                         else:
26 |                             log = '%s-seq2seq-%s-%s-%s-imagenet-log.csv' % (eval_type, history, path_type, feedback)
27 |                     else:
28 |                         if blind:
29 |                             log = '%s-seq2seq-%s-%s-%d-%s-imagenet-blind-log.csv' % (eval_type, history,
30 |                                                                                      path_type, path_len, feedback)
31 |                         else:
32 |                             log = '%s-seq2seq-%s-%s-%d-%s-imagenet-log.csv' % (eval_type, history,
33 |                                                                                path_type, path_len, feedback)
34 |                     fn = os.path.join(PLOT_DIR, log)
35 |                     if os.path.isfile(fn):
36 |                         dfs[log] = pd.read_csv(fn)
37 |                         print('\t\t\t\t%d' % len(dfs[log]))
38 |                         metrics = [
39 |                             'val_seen success_rate',
40 |                             'val_seen oracle path_success_rate',
41 |                             'val_seen dist_to_end_reduction',
42 |                             'val_unseen success_rate',
43 |                             'val_unseen oracle path_success_rate',
44 |                             'val_unseen dist_to_end_reduction'] if eval_type == 'val' else [
45 |                             'test success_rate',
46 |                             'test oracle path_success_rate',
47 |                             'test dist_to_end_reduction']
48 |                         for metric in metrics:
49 |                             v = max(dfs[log][metric])
50 |                             print('\t\t\t\t%s\t%.3f' % (metric, v))
51 | 
52 |                         # Populate summary.
53 |                         if len(dfs[log]) == 200:
54 |                             for cond in ['val_seen', 'val_unseen', 'test']:
55 |                                 abl = history + "-%s" % ('blind' if blind else 'vis')
56 |                                 if abl not in summary[cond]:
57 |                                     summary[cond][abl] = {"if": {}, "gd": {}}
58 |                                 ifm = '%s oracle path_success_rate' % cond
59 |                                 if ifm in dfs[log]:
60 |                                     summary[cond][abl]["if"][path_type] = list(dfs[log][ifm])
61 |                                 gdm = '%s dist_to_end_reduction' % cond
62 |                                 if gdm in dfs[log]:
63 |                                     summary[cond][abl]["gd"][path_type] = list(dfs[log][gdm])
64 | 
65 | # Print partial table rows for easy copy/paste to latex.
66 | print('')
67 | for cond in ['val_seen', 'val_unseen', 'test']:
68 |     for history in ['none', 'target', 'oracle_ans', 'nav_q_oracle_ans', 'all']:
69 |         for blind in [True, False]:
70 |             abl = history + "-%s" % ('blind' if blind else 'vis')
71 |             if abl not in summary[cond]:
72 |                 continue
73 |             l = '%s\t%s\t' % (cond, abl)
74 |             ns = []
75 |             for metric in ['if', 'gd']:
76 |                 for sup in ['planner_path', 'player_path', 'trusted_path']:
77 |                     if sup in summary[cond][abl][metric]:
78 |                         if cond == 'test':  # performance is at epoch of best val_seen GD performance.
79 |                             if sup not in summary["val_unseen"][abl]["gd"]:
80 |                                 print("val_unseen not yet finished for %s" % abl)
81 |                                 ns.append(-2)
82 |                             else:
83 |                                 b = max(summary["val_unseen"][abl]["gd"][sup])
84 |                                 best_idx = summary["val_unseen"][abl]["gd"][sup].index(b)
85 |                                 ns.append(summary[cond][abl][metric][sup][best_idx])
86 |                         else:
87 |                             ns.append(max(summary[cond][abl][metric][sup]))
88 |                     else:
89 |                         ns.append(-1)
90 |             l += ' & '.join(["$%.1f$" % (n*100) for n in ns[:3]]) + ' & ' + \
91 |                  ' & '.join(["$%.2f$" % n for n in ns[3:]])
92 |             print(l)
93 | 


--------------------------------------------------------------------------------
/tasks/NDH/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.autograd import Variable
  6 | from torch import optim
  7 | import torch.nn.functional as F
  8 | 
  9 | import os
 10 | import time
 11 | import numpy as np
 12 | import pandas as pd
 13 | from collections import defaultdict
 14 | 
 15 | from utils import read_vocab,write_vocab,build_vocab,Tokenizer,padding_idx,timeSince
 16 | from env import R2RBatch
 17 | from model import EncoderLSTM, AttnDecoderLSTM
 18 | from agent import Seq2SeqAgent
 19 | from eval import Evaluation
 20 | from param import args
 21 | 
 22 | import warnings
 23 | warnings.filterwarnings("ignore")
 24 | 
 25 | TRAIN_VOCAB = 'tasks/NDH/data/train_vocab.txt'
 26 | TRAINVAL_VOCAB = 'tasks/NDH/data/trainval_vocab.txt'
 27 | 
 28 | prefix=args.prefix
 29 | 
 30 | RESULT_DIR = 'tasks/NDH/results/' + prefix 
 31 | SNAPSHOT_DIR = 'tasks/NDH/snapshots/' + prefix
 32 | PLOT_DIR = 'tasks/NDH/plots/' + prefix
 33 | 
 34 | if not os.path.exists(RESULT_DIR):
 35 |     os.makedirs(RESULT_DIR)
 36 | if not os.path.exists(SNAPSHOT_DIR):
 37 |     os.makedirs(SNAPSHOT_DIR)
 38 | if not os.path.exists(PLOT_DIR):
 39 |     os.makedirs(PLOT_DIR)
 40 | 
 41 | IMAGENET_FEATURES = 'img_features/ResNet-152-imagenet.tsv'
 42 | 
 43 | # Training settings.
 44 | agent_type = 'seq2seq'
 45 | 
 46 | # Fixed params from MP.
 47 | features = IMAGENET_FEATURES
 48 | batch_size = 100
 49 | # word_embedding_size = 256
 50 | word_embedding_size = 256
 51 | action_embedding_size = 32
 52 | target_embedding_size = 32
 53 | hidden_size = 512
 54 | bidirectional = False
 55 | dropout_ratio = 0.5
 56 | learning_rate = 0.0001
 57 | weight_decay = 0.0005
 58 | 
 59 | def train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix,
 60 |     log_every=100, val_envs=None):
 61 |     ''' Train on training set, validating on both seen and unseen. '''
 62 |     if val_envs is None:
 63 |         val_envs = {}
 64 | 
 65 |     if agent_type == 'seq2seq':
 66 |         agent = Seq2SeqAgent(train_env, "", encoder, decoder, max_episode_len)
 67 |     else:
 68 |         sys.exit("Unrecognized agent_type '%s'" % agent_type)
 69 |     print 'Training a %s agent with %s feedback' % (agent_type, feedback_method)
 70 |     encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate, weight_decay=weight_decay)
 71 |     decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate, weight_decay=weight_decay) 
 72 | 
 73 |     data_log = defaultdict(list)
 74 |     start = time.time()
 75 |     print 'Start training'
 76 |     for idx in range(0, n_iters, log_every):
 77 | 
 78 |         interval = min(log_every,n_iters-idx)
 79 |         iter = idx + interval
 80 |         data_log['iteration'].append(iter)
 81 | 
 82 |         # Train for log_every interval
 83 |         agent.train(encoder_optimizer, decoder_optimizer, interval, feedback=feedback_method)
 84 |         train_losses = np.array(agent.losses)
 85 |         assert len(train_losses) == interval
 86 |         train_loss_avg = np.average(train_losses)
 87 |         data_log['train loss'].append(train_loss_avg)
 88 |         loss_str = 'train loss: %.4f' % train_loss_avg
 89 | 
 90 |         # Run validation
 91 |         for env_name, (env, evaluator) in val_envs.iteritems():
 92 |             agent.env = env
 93 |             agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, env_name, iter)
 94 |             # Get validation loss under the same conditions as training
 95 |             agent.test(use_dropout=True, feedback=feedback_method, allow_cheat=True)
 96 |             val_losses = np.array(agent.losses)
 97 |             val_loss_avg = np.average(val_losses)
 98 |             data_log['%s loss' % env_name].append(val_loss_avg)
 99 |             # Get validation distance from goal under test evaluation conditions
100 |             agent.test(use_dropout=False, feedback='argmax')
101 |             agent.write_results()
102 |             score_summary, _ = evaluator.score(agent.results_path)
103 |             loss_str = ', %s loss: %.4f' % (env_name, val_loss_avg)
104 |             for metric, val in score_summary.iteritems():
105 |                  data_log['%s %s' % (env_name, metric)].append(val)
106 |                  if metric in ['success_rate', 'oracle success_rate', 'oracle path_success_rate', 'dist_to_end_reduction']:
107 |                      loss_str += ', %s: %.3f' % (metric, val)
108 | 
109 |         agent.env = train_env
110 | 
111 |         print('%s (%d %d%%) %s' % (timeSince(start, float(iter)/n_iters),
112 |                                              iter, float(iter)/n_iters*100, loss_str))
113 |         df = pd.DataFrame(data_log)
114 |         df.set_index('iteration')
115 |         df_path = '%s%s-log.csv' % (PLOT_DIR, model_prefix)
116 |         df.to_csv(df_path)
117 |         
118 |         split_string = "-".join(train_env.splits)
119 |         enc_path = '%s%s_%s_enc_iter_%d' % (SNAPSHOT_DIR, model_prefix, split_string, iter)
120 |         dec_path = '%s%s_%s_dec_iter_%d' % (SNAPSHOT_DIR, model_prefix, split_string, iter)
121 |         agent.save(enc_path, dec_path)
122 | 
123 |     print 'Finish training'
124 | def setup():
125 |     torch.manual_seed(1)
126 |     torch.cuda.manual_seed(1)
127 |     # Check for vocabs
128 |     if not os.path.exists(TRAIN_VOCAB):
129 |         write_vocab(build_vocab(splits=['train']), TRAIN_VOCAB)
130 |     if not os.path.exists(TRAINVAL_VOCAB):
131 |         write_vocab(build_vocab(splits=['train', 'val_seen', 'val_unseen']), TRAINVAL_VOCAB)
132 | 
133 | 
134 | def test_submission(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind):
135 |     ''' Train on combined training and validation sets, and generate test submission. '''
136 |   
137 |     setup()
138 | 
139 |     # Create a batch training environment that will also preprocess text
140 |     vocab = read_vocab(TRAINVAL_VOCAB)
141 |     tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
142 |     train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok,
143 |                          path_type=path_type, history=history, blind=blind)
144 |     
145 |     # Build models and train
146 |     enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
147 |     encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, 
148 |                   dropout_ratio, bidirectional=bidirectional).cuda()
149 |     decoder = AttnDecoderLSTM(action_embedding_size, hidden_size, dropout_ratio).cuda()
150 | 
151 |     train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix)
152 | 
153 |     # Generate test submission
154 |     test_env = R2RBatch(features, batch_size=batch_size, splits=['test'], tokenizer=tok,
155 |                         path_type=path_type, history=history, blind=blind)
156 |     agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len)
157 |     agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, 'test', 5000)
158 |     agent.test(use_dropout=False, feedback='argmax')
159 |     agent.write_results()
160 | 
161 | 
162 | # NOTE: only available to us, now, for writing the paper.
163 | def train_test(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind):
164 |     ''' Train on the training set, and validate on the test split. '''
165 | 
166 |     setup()
167 |     # Create a batch training environment that will also preprocess text
168 |     vocab = read_vocab(TRAINVAL_VOCAB)
169 |     tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
170 |     train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok,
171 |                          path_type=path_type, history=history, blind=blind)
172 | 
173 |     # Creat validation environments
174 |     val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split],
175 |                                  tokenizer=tok, path_type=path_type, history=history, blind=blind),
176 |                         Evaluation([split], path_type=path_type)) for split in ['test']}
177 | 
178 |     # Build models and train
179 |     enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
180 |     encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx,
181 |                           dropout_ratio, bidirectional=bidirectional).cuda()
182 |     decoder = AttnDecoderLSTM(action_embedding_size, hidden_size, dropout_ratio).cuda()
183 |     train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH,
184 |           model_prefix, val_envs=val_envs)
185 | 
186 | 
187 | def train_val(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind):
188 |     ''' Train on the training set, and validate on seen and unseen splits. '''
189 |   
190 |     setup()
191 |     # Create a batch training environment that will also preprocess text
192 |     vocab = read_vocab(TRAIN_VOCAB)
193 |     tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
194 |     train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok,
195 |                          path_type=path_type, history=history, blind=blind)
196 | 
197 |     # Creat validation environments
198 |     val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split], 
199 |                 tokenizer=tok, path_type=path_type, history=history, blind=blind),
200 |                 Evaluation([split], path_type=path_type)) for split in ['val_seen', 'val_unseen']}
201 | 
202 |     # Build models and train
203 |     enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
204 |     encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, 
205 |                   dropout_ratio, bidirectional=bidirectional).cuda()
206 |     decoder = AttnDecoderLSTM(action_embedding_size, hidden_size, dropout_ratio).cuda()
207 |     train(train_env, encoder, decoder, n_iters,
208 |           path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, val_envs=val_envs)
209 | 
210 |     
211 | if __name__ == "__main__":
212 | 
213 |     assert args.path_type in ['planner_path', 'player_path', 'trusted_path']
214 |     assert args.history in ['none', 'target', 'oracle_ans', 'nav_q_oracle_ans', 'all']
215 |     assert args.feedback in ['sample', 'teacher']
216 |     assert args.eval_type in ['val', 'test']
217 | 
218 |     blind = args.blind
219 | 
220 |     # Set default args.
221 |     path_type = args.path_type
222 |     # In MP, max_episode_len = 20 while average hop range [4, 7], e.g. ~3x max.
223 |     # max_episode_len has to account for turns; this heuristically allowed for about 1 turn per hop.
224 |     if path_type == 'planner_path':
225 |         max_episode_len = 20  # [1, 6], e.g., ~3x max
226 |     else:
227 |         max_episode_len = 80  # [2, 41], e.g., ~2x max (120 ~3x) (80 ~2x) [for player/trusted paths]
228 | 
229 |     # Input settings.
230 |     history = args.history
231 |     # In MP, MAX_INPUT_LEN = 80 while average utt len is 29, e.g., a bit less than 3x avg.
232 |     if history == 'none':
233 |         MAX_INPUT_LENGTH = 1  # [<EOS>] fixed length.
234 |     elif history == 'target':
235 |         MAX_INPUT_LENGTH = 3  # [<TAR> target <EOS>] fixed length.
236 |     elif history == 'oracle_ans':
237 |         MAX_INPUT_LENGTH = 70  # 16.16+/-9.67 ora utt len, 35.5 at x2 stddevs. 71 is double that.
238 |     elif history == 'nav_q_oracle_ans':
239 |         MAX_INPUT_LENGTH = 120  # 11.24+/-6.43 [plus Ora avg], 24.1 at x2 std. 71+48 ~~ 120 per QA doubles both.
240 |     else:  # i.e., 'all'
241 |         MAX_INPUT_LENGTH = 120 * 6  # 4.93+/-3.21 turns -> 2.465+/-1.605 Q/A. 5.67 at x2 std. Call it 6 (real max 13).
242 | 
243 |     # Training settings.
244 |     feedback_method = args.feedback
245 |     n_iters = 20000
246 | 
247 |     # Model prefix to uniquely id this instance.
248 |     model_prefix = '%s-seq2seq-%s-%s-%d-%s-imagenet' % (args.eval_type, history, path_type, max_episode_len, feedback_method)
249 |     if blind:
250 |         model_prefix += '-blind'
251 | 
252 |     if args.eval_type == 'val':
253 |         train_val(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind)
254 |     else:
255 |         train_test(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind)
256 |     
257 |     # test_submission(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind)
258 | 


--------------------------------------------------------------------------------
/tasks/NDH/utils.py:
--------------------------------------------------------------------------------
  1 | ''' Utils for io, language, connectivity graphs etc '''
  2 | 
  3 | import os
  4 | import sys
  5 | import re
  6 | import string
  7 | import json
  8 | import time
  9 | import math
 10 | from collections import Counter
 11 | import numpy as np
 12 | import networkx as nx
 13 | from param import args
 14 | import torch
 15 | 
 16 | 
 17 | def length2mask(length, size=None):
 18 |     batch_size = len(length)
 19 |     size = int(max(length)) if size is None else size
 20 |     mask = (torch.arange(size, dtype=torch.int64).unsqueeze(0).repeat(batch_size, 1)
 21 |                 > (torch.LongTensor(length) - 1).unsqueeze(1)).cuda()
 22 |     return mask
 23 | 
 24 | def new_simulator():
 25 |     import MatterSim
 26 |     # Simulator image parameters
 27 |     WIDTH = 640
 28 |     HEIGHT = 480
 29 |     VFOV = 60
 30 | 
 31 |     sim = MatterSim.Simulator()
 32 |     sim.setRenderingEnabled(False)
 33 |     sim.setCameraResolution(WIDTH, HEIGHT)
 34 |     sim.setCameraVFOV(math.radians(VFOV))
 35 |     sim.setDiscretizedViewingAngles(True)
 36 |     sim.initialize()
 37 | 
 38 |     return sim
 39 | 
 40 | def angle_feature(heading, elevation):
 41 |     import math
 42 |     # twopi = math.pi * 2
 43 |     # heading = (heading + twopi) % twopi     # From 0 ~ 2pi
 44 |     # It will be the same
 45 |     return np.array([math.sin(heading), math.cos(heading),
 46 |                      math.sin(elevation), math.cos(elevation)] * (args.angle_feat_size // 4),
 47 |                     dtype=np.float32)
 48 | 
 49 | def get_point_angle_feature(baseViewId=0):
 50 |     sim = new_simulator()
 51 | 
 52 |     feature = np.empty((36, args.angle_feat_size), np.float32)
 53 |     base_heading = (baseViewId % 12) * math.radians(30)
 54 |     for ix in range(36):
 55 |         if ix == 0:
 56 |             sim.newEpisode(['ZMojNkEp431'], ['2f4d90acd4024c269fb0efe49a8ac540'], [0], [math.radians(-30)])
 57 |         elif ix % 12 == 0:
 58 |             sim.makeAction([0], [1.0], [1.0])
 59 |         else:
 60 |             sim.makeAction([0], [1.0], [0])
 61 | 
 62 |         state = sim.getState()[0]
 63 |         assert state.viewIndex == ix
 64 | 
 65 |         heading = state.heading - base_heading
 66 | 
 67 |         feature[ix, :] = angle_feature(heading, state.elevation)
 68 |     return feature
 69 | 
 70 | def get_all_point_angle_feature():
 71 |     return [get_point_angle_feature(baseViewId) for baseViewId in range(36)]
 72 | 
 73 | # padding, unknown word, end of sentence
 74 | base_vocab = ['<PAD>', '<UNK>', '<EOS>', '<NAV>', '<ORA>', '<TAR>']
 75 | padding_idx = base_vocab.index('<PAD>')
 76 | 
 77 | def load_nav_graphs(scans):
 78 |     ''' Load connectivity graph for each scan '''
 79 | 
 80 |     def distance(pose1, pose2):
 81 |         ''' Euclidean distance between two graph poses '''
 82 |         return ((pose1['pose'][3]-pose2['pose'][3])**2\
 83 |           + (pose1['pose'][7]-pose2['pose'][7])**2\
 84 |           + (pose1['pose'][11]-pose2['pose'][11])**2)**0.5
 85 | 
 86 |     graphs = {}
 87 |     for scan in scans:
 88 |         with open('connectivity/%s_connectivity.json' % scan) as f:
 89 |             G = nx.Graph()
 90 |             positions = {}
 91 |             data = json.load(f)
 92 |             for i,item in enumerate(data):
 93 |                 if item['included']:
 94 |                     for j,conn in enumerate(item['unobstructed']):
 95 |                         if conn and data[j]['included']:
 96 |                             positions[item['image_id']] = np.array([item['pose'][3], 
 97 |                                     item['pose'][7], item['pose'][11]]);
 98 |                             assert data[j]['unobstructed'][i], 'Graph should be undirected'
 99 |                             G.add_edge(item['image_id'],data[j]['image_id'],weight=distance(item,data[j]))
100 |             nx.set_node_attributes(G, values=positions, name='position')
101 |             graphs[scan] = G
102 |     return graphs
103 | 
104 | 
105 | def load_datasets(splits):
106 |     data = []
107 |     for split in splits:
108 |         assert split in ['train', 'val_seen', 'val_unseen', 'test']
109 |         with open('tasks/NDH/data/%s.json' % split) as f:
110 |             data += json.load(f)
111 |     return data
112 | 
113 | 
114 | class Tokenizer(object):
115 |     ''' Class to tokenize and encode a sentence. '''
116 |     SENTENCE_SPLIT_REGEX = re.compile(r'(\W+)')  # Split on any non-alphanumeric character
117 |   
118 |     def __init__(self, vocab=None, encoding_length=20):
119 |         self.encoding_length = encoding_length
120 |         self.vocab = vocab
121 |         self.word_to_index = {}
122 |         if vocab:
123 |             for i,word in enumerate(vocab):
124 |                 self.word_to_index[word] = i
125 | 
126 |     def split_sentence(self, sentence):
127 |         ''' Break sentence into a list of words and punctuation '''
128 |         toks = []
129 |         for word in [s.strip().lower() for s in self.SENTENCE_SPLIT_REGEX.split(sentence.strip()) if len(s.strip()) > 0]:
130 |             # Break up any words containing punctuation only, e.g. '!?', unless it is multiple full stops e.g. '..'
131 |             if all(c in string.punctuation for c in word) and not all(c in '.' for c in word):
132 |                 toks += list(word)
133 |             else:
134 |                 toks.append(word)
135 |         return toks
136 | 
137 |     def encode_sentence(self, sentences, seps=None):
138 |         if len(self.word_to_index) == 0:
139 |             sys.exit('Tokenizer has no vocab')
140 |         encoding = []
141 |         if type(sentences) is not list:
142 |             sentences = [sentences]
143 |             seps = [seps]
144 |         for sentence, sep in zip(sentences, seps):
145 |             if sep is not None:
146 |                 encoding.append(self.word_to_index[sep])
147 |             for word in self.split_sentence(sentence)[::-1]:  # reverse input sentences
148 |                 if word in self.word_to_index:
149 |                     encoding.append(self.word_to_index[word])
150 |                 else:
151 |                     encoding.append(self.word_to_index['<UNK>'])
152 |         encoding.append(self.word_to_index['<EOS>'])
153 |         if len(encoding) < self.encoding_length:
154 |             encoding += [self.word_to_index['<PAD>']] * (self.encoding_length-len(encoding))
155 | 
156 |         # cut off the LHS of the encoding if it's over-size (e.g., words from the end of an individual command,
157 |         # favoring those at the beginning of the command (since inst word order is reversed) (e.g., cut off the early
158 |         # instructions in a dialog if the dialog is over size, preserving the latest QA pairs).
159 |         prefix_cut = max(0, len(encoding) - self.encoding_length)
160 |         return np.array(encoding[prefix_cut:])
161 | 
162 |     def decode_sentence(self, encoding):
163 |         sentence = []
164 |         for ix in encoding:
165 |             if ix == self.word_to_index['<PAD>']:
166 |                 break
167 |             else:
168 |                 sentence.append(self.vocab[ix])
169 |         return " ".join(sentence[::-1]) # unreverse before output
170 | 
171 |     def encode_dial(self, sentences, seps=None):
172 |         if len(self.word_to_index) == 0:
173 |             sys.exit('Tokenizer has no vocab')
174 |         encoding = []
175 |         if type(sentences) is list:
176 |             for sentence, sep in zip(sentences, seps):
177 |                 if sep is not None:
178 |                     encoding.append(self.word_to_index[sep])
179 |                 for word in self.split_sentence(sentence)[::-1]:  # reverse input sentences
180 |                     if word in self.word_to_index:
181 |                         encoding.append(self.word_to_index[word])
182 |                     else:
183 |                         encoding.append(self.word_to_index['<UNK>'])
184 |         encoding.append(self.word_to_index['<EOS>'])
185 |         if len(encoding) < self.encoding_length:
186 |             encoding += [self.word_to_index['<PAD>']] * (self.encoding_length-len(encoding))
187 | 
188 |         # cut off the LHS of the encoding if it's over-size (e.g., words from the end of an individual command,
189 |         # favoring those at the beginning of the command (since inst word order is reversed) (e.g., cut off the early
190 |         # instructions in a dialog if the dialog is over size, preserving the latest QA pairs).
191 |         prefix_cut = max(0, len(encoding) - self.encoding_length)
192 |         return encoding[prefix_cut:]
193 | 
194 | 
195 | def build_vocab(splits=['train'], min_count=5, start_vocab=base_vocab):
196 |     ''' Build a vocab, starting with base vocab containing a few useful tokens. '''
197 |     count = Counter()
198 |     t = Tokenizer()
199 |     data = load_datasets(splits)
200 |     for item in data:
201 |         for turn in item['dialog_history']:
202 |             count.update(t.split_sentence(turn['message']))
203 |     vocab = list(start_vocab)
204 | 
205 |     # Add words that are object targets.
206 |     targets = set()
207 |     for item in data:
208 |         target = item['target']
209 |         targets.add(target)
210 |     vocab.extend(list(targets))
211 | 
212 |     # Add words above min_count threshold.
213 |     for word, num in count.most_common():
214 |         if word in vocab:  # targets strings may also appear as regular vocabulary.
215 |             continue
216 |         if num >= min_count:
217 |             vocab.append(word)
218 |         else:
219 |             break
220 |     return vocab
221 | 
222 | 
223 | def write_vocab(vocab, path):
224 |     print 'Writing vocab of size %d to %s' % (len(vocab),path)
225 |     with open(path, 'w') as f:
226 |         for word in vocab:
227 |             f.write("%s\n" % word)
228 | 
229 | 
230 | def read_vocab(path):
231 |     with open(path) as f:
232 |         vocab = [word.strip() for word in f.readlines()]
233 |     return vocab
234 | 
235 | 
236 | def asMinutes(s):
237 |     m = math.floor(s / 60)
238 |     s -= m * 60
239 |     return '%dm %ds' % (m, s)
240 | 
241 | 
242 | def timeSince(since, percent):
243 |     now = time.time()
244 |     s = now - since
245 |     es = s / percent
246 |     rs = es - s
247 |     return '%s (- %s)' % (asMinutes(s), asMinutes(rs))
248 | 
249 | 


--------------------------------------------------------------------------------
/teaser/vdn_demo_v2_512.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yeezhu/CMN.pytorch/b4e3c3ca34668cb8031d525132b013ced472ed87/teaser/vdn_demo_v2_512.gif


--------------------------------------------------------------------------------