├── .gitmodules
├── CMakeLists.txt
├── Dockerfile
├── Doxyfile
├── LICENSE
├── README.md
├── cmake
    └── Modules
    │   └── FindNumPy.cmake
├── connectivity
    ├── 17DRP5sb8fy_connectivity.json
    ├── 1LXtFkjw3qL_connectivity.json
    ├── 1pXnuDYAj8r_connectivity.json
    ├── 29hnd4uzFmX_connectivity.json
    ├── 2azQ1b91cZZ_connectivity.json
    ├── 2n8kARJN3HM_connectivity.json
    ├── 2t7WUuJeko7_connectivity.json
    ├── 5LpN3gDmAk7_connectivity.json
    ├── 5ZKStnWn8Zo_connectivity.json
    ├── 5q7pvUzZiYa_connectivity.json
    ├── 759xd9YjKW5_connectivity.json
    ├── 7y3sRwLe3Va_connectivity.json
    ├── 8194nk5LbLH_connectivity.json
    ├── 82sE5b5pLXE_connectivity.json
    ├── 8WUmhLawc2A_connectivity.json
    ├── ARNzJeq3xxb_connectivity.json
    ├── B6ByNegPMKs_connectivity.json
    ├── D7G3Y4RVNrH_connectivity.json
    ├── D7N2EKCX4Sj_connectivity.json
    ├── E9uDoFAP3SH_connectivity.json
    ├── EDJbREhghzL_connectivity.json
    ├── EU6Fwq7SyZv_connectivity.json
    ├── GdvgFV5R1Z5_connectivity.json
    ├── HxpKQynjfin_connectivity.json
    ├── JF19kD82Mey_connectivity.json
    ├── JeFG25nYj2p_connectivity.json
    ├── JmbYfDe2QKZ_connectivity.json
    ├── PX4nDJXEHrG_connectivity.json
    ├── Pm6F8kyY3z2_connectivity.json
    ├── PuKPg4mmafe_connectivity.json
    ├── QUCTc6BB5sX_connectivity.json
    ├── README.md
    ├── RPmz2sHmrrY_connectivity.json
    ├── S9hNv5qa7GM_connectivity.json
    ├── SN83YJsR3w2_connectivity.json
    ├── TbHJrupSAjP_connectivity.json
    ├── ULsKaCPVFJR_connectivity.json
    ├── UwV83HsGsw3_connectivity.json
    ├── Uxmj2M2itWa_connectivity.json
    ├── V2XKFyX4ASd_connectivity.json
    ├── VFuaQ6m2Qom_connectivity.json
    ├── VLzqgDo317F_connectivity.json
    ├── VVfe2KiqLaN_connectivity.json
    ├── Vt2qJdWjCF2_connectivity.json
    ├── Vvot9Ly1tCj_connectivity.json
    ├── VzqfbhrpDEA_connectivity.json
    ├── WYY7iVyf5p8_connectivity.json
    ├── X7HyMhZNoso_connectivity.json
    ├── XcA2TqTSSAj_connectivity.json
    ├── YFuZgdQ5vWj_connectivity.json
    ├── YVUC4YcDtcY_connectivity.json
    ├── YmJkqBEsHnH_connectivity.json
    ├── Z6MFQCViBuw_connectivity.json
    ├── ZMojNkEp431_connectivity.json
    ├── aayBHfsNo7d_connectivity.json
    ├── ac26ZMwG7aT_connectivity.json
    ├── b8cTxDM8gDG_connectivity.json
    ├── cV4RVeZvu5T_connectivity.json
    ├── dhjEzFoUFzH_connectivity.json
    ├── e9zR4mvMWw7_connectivity.json
    ├── fzynW3qQPVF_connectivity.json
    ├── gTV8FGcVJC9_connectivity.json
    ├── gYvKGZ5eRqb_connectivity.json
    ├── gZ6f7yhEvPG_connectivity.json
    ├── gxdoqLR6rwA_connectivity.json
    ├── i5noydFURQK_connectivity.json
    ├── jh4fc5c5qoQ_connectivity.json
    ├── jtcxE69GiFV_connectivity.json
    ├── kEZ7cmS4wCh_connectivity.json
    ├── mJXqzFtmKg4_connectivity.json
    ├── oLBMNvg9in8_connectivity.json
    ├── p5wJjkQkbXX_connectivity.json
    ├── pLe4wQe7qrG_connectivity.json
    ├── pRbA3pwrgk9_connectivity.json
    ├── pa4otMbVnkk_connectivity.json
    ├── q9vSo1VnCiC_connectivity.json
    ├── qoiz87JEwZ2_connectivity.json
    ├── r1Q1Z4BcV1o_connectivity.json
    ├── r47D5H71a5s_connectivity.json
    ├── rPc6DW4iMge_connectivity.json
    ├── rqfALeAoiTq_connectivity.json
    ├── s8pcmisQ38h_connectivity.json
    ├── sKLMLpTHeUy_connectivity.json
    ├── sT4fr6TAbpF_connectivity.json
    ├── scans.txt
    ├── uNb9QFRL6hY_connectivity.json
    ├── ur6pFq6Qu1A_connectivity.json
    ├── vyrNrziPKCB_connectivity.json
    ├── wc2JMjhGNzB_connectivity.json
    ├── x8F5xyUWy9e_connectivity.json
    ├── yqstnuAEVhm_connectivity.json
    └── zsNo4HB9uLZ_connectivity.json
├── include
    ├── Benchmark.hpp
    ├── Catch.hpp
    ├── MatterSim.hpp
    ├── NavGraph.hpp
    └── cbf.h
├── models
    └── ResNet-152-deploy.prototxt
├── requirements.txt
├── scripts
    ├── depth_to_skybox.py
    ├── downsize_skybox.py
    ├── fill_depth.py
    ├── precompute_img_features.py
    └── timer.py
├── src
    ├── driver
    │   ├── driver.py
    │   └── mattersim_main.cpp
    ├── lib
    │   ├── Benchmark.cpp
    │   ├── MatterSim.cpp
    │   ├── NavGraph.cpp
    │   ├── cbf.cpp
    │   ├── fragment.sh
    │   └── vertex.sh
    ├── lib_python
    │   └── MatterSimPython.cpp
    └── test
    │   ├── main.cpp
    │   ├── python_test.py
    │   └── rendertest_spec.json
├── tasks
    └── R2R
    │   ├── Agents
    │       ├── __init__.py
    │       └── agent.py
    │   ├── Models
    │       ├── __init__.py
    │       └── dynamic.py
    │   ├── data
    │       ├── R2R_test.json
    │       ├── R2R_train.json
    │       ├── R2R_val_seen.json
    │       ├── R2R_val_unseen.json
    │       └── download.sh
    │   ├── env.py
    │   ├── eval.py
    │   ├── main.py
    │   ├── results
    │       ├── data_augmentation
    │       │   ├── decoder_weights_best
    │       │   └── encoder_weights_best
    │       └── normal_data
    │       │   ├── decoder_weights_best
    │       │   └── encoder_weights_best
    │   ├── trainer.py
    │   └── utils.py
├── web
    ├── README.md
    ├── app
    │   ├── collect-hit.html
    │   ├── eval-hit.html
    │   ├── index.html
    │   ├── js
    │   │   ├── Detector.js
    │   │   ├── Matterport3D.js
    │   │   ├── PTZCameraControls.js
    │   │   ├── RequestAnimationFrame.js
    │   │   └── Trajectory.js
    │   ├── sample_room_paths.json
    │   ├── trajectory.html
    │   └── val_unseen_shortest_agent.json
    ├── gulpfile.js
    ├── package-lock.json
    └── package.json
└── webgl_imgs
    ├── 17DRP5sb8fy_85c23efeaecd4d43a7dcd5b90137179e_2.551961945320492_0.008557380839564054.png
    ├── 1LXtFkjw3qL_187589bb7d4644f2943079fb949c0be9_1.8699330579409539_0.0004921836022802584.png
    ├── 1pXnuDYAj8r_163d61ac7edb43fb958c5d9e69ae11ad_4.626331047551077_-0.02444352614304746.png
    ├── 29hnd4uzFmX_1576d62e7bbb45e8a5ef9e7bb37b1839_5.844119909926444_-0.0006838914039405167.png
    ├── 2azQ1b91cZZ_3daad58ad53742038e50d62e91f84e7b_3.1736484087962933_0.016732869758208434.png
    ├── 2n8kARJN3HM_94ac3cea52ec455993f8562f78da3be1_2.604601935142565_-0.0009188787844489273.png
    ├── 2t7WUuJeko7_529f006f8293406da0b506defd2891a5_0.032985516949381344_-0.013788837143969411.png
    ├── 5LpN3gDmAk7_bda8025f20404048a77381e9e0dc0ccf_5.325207878739601_-0.01083211073205187.png
    ├── 5ZKStnWn8Zo_c76b52856e7c4f2a9a4419000c8e646a_4.13470589902238_-0.02922217527541366.png
    ├── 5q7pvUzZiYa_397403366d784caf804d741f32fd68b9_2.8746465006968234_-0.0007063598518199811.png
    ├── 759xd9YjKW5_2343ef3bf04a4433af62f0d527d7512a_3.5451019786019264_-0.016938006310169448.png
    ├── 7y3sRwLe3Va_9bbf903d50da4ffd9e5d1fb7c9f4d69b_1.7348660165523566_0.008361841032265524.png
    ├── 8194nk5LbLH_c9e8dc09263e4d0da77d16de0ecddd39_4.05504292862083_0.008533161479170466.png
    ├── 82sE5b5pLXE_056a491afa534b17bac36f4f5898462a_1.689393931320027_-0.0037883068413356496.png
    ├── 8WUmhLawc2A_d21aae0b5d944f27a0074525c803fc9f_3.047458184407221_-0.04510889155759994.png
    ├── ARNzJeq3xxb_9a671e6915de4eb897f45fee8bf2031d_5.616355886953764_0.02583868533558965.png
    ├── B6ByNegPMKs_e3a65955df26467581c32613c4e9f865_5.230794959607039_0.007265625492957138.png
    ├── ac26ZMwG7aT_efeef7cc82c84690addb0bf415f075ea_0.07434352566701552_-0.013447513736072197.png
    ├── b8cTxDM8gDG_f2944e0b66b9461994a7f757582f9bc3_0.0853092784395515_-0.007543204141144086.png
    └── cV4RVeZvu5T_1b321779a4374c2b952c51820daa9e6c_6.266463179566256_0.07914721704610106.png


/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "pybind11"]
2 | 	path = pybind11
3 | 	url = https://github.com/pybind/pybind11.git
4 | [submodule "speaksee"]
5 | 	path = speaksee
6 | 	url = https://github.com/aimagelab/speaksee.git
7 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(Matterport_Simulator CXX)
 2 | cmake_minimum_required(VERSION 2.8)
 3 | 
 4 | option(OSMESA_RENDERING "Offscreen CPU rendering with OSMesa" OFF)
 5 | option(EGL_RENDERING "Offscreen GPU rendering with EGL" OFF)
 6 | 
 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 8 | # Make custom find-modules available
 9 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/Modules")
10 | 
11 | if(NOT CMAKE_BUILD_TYPE)
12 |   set(CMAKE_BUILD_TYPE Release)
13 | endif()
14 | 
15 | include_directories("${PROJECT_SOURCE_DIR}/include")
16 | 
17 | find_package(OpenCV REQUIRED)
18 | find_package(PkgConfig REQUIRED)
19 | find_package(OpenMP)
20 | if (OPENMP_CXX_FOUND)
21 |     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
22 |     set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
23 | endif()
24 | 
25 | pkg_check_modules(JSONCPP REQUIRED jsoncpp)
26 | 
27 | if(EGL_RENDERING)
28 |   add_definitions(-DEGL_RENDERING)
29 |   find_package(OpenGL REQUIRED COMPONENTS OpenGL EGL)
30 |   pkg_check_modules(EPOXY REQUIRED epoxy)
31 |   set(GL_LIBS OpenGL::OpenGL OpenGL::EGL ${EPOXY_LIBRARIES})
32 | elseif(OSMESA_RENDERING)
33 |   add_definitions(-DOSMESA_RENDERING)
34 |   pkg_check_modules(OSMESA REQUIRED osmesa)
35 |   set(GL_LIBS ${OSMESA_LIBRARIES})
36 | else()
37 |   cmake_policy(SET CMP0072 OLD)
38 |   find_package(OpenGL REQUIRED)
39 |   find_package(GLEW REQUIRED)
40 |   set(GL_LIBS ${OPENGL_LIBRARIES} ${GLEW_LIBRARIES})
41 | endif()
42 | 
43 | add_library(MatterSim SHARED src/lib/MatterSim.cpp src/lib/NavGraph.cpp src/lib/Benchmark.cpp src/lib/cbf.cpp)
44 | if(OSMESA_RENDERING)
45 |   target_compile_definitions(MatterSim PUBLIC "-DOSMESA_RENDERING")
46 | endif()
47 | target_include_directories(MatterSim PRIVATE ${JSONCPP_INCLUDE_DIRS})
48 | target_link_libraries(MatterSim ${JSONCPP_LIBRARIES} ${OpenCV_LIBS} ${GL_LIBS})
49 | 
50 | add_executable(tests src/test/main.cpp)
51 | target_include_directories(tests PRIVATE ${JSONCPP_INCLUDE_DIRS})
52 | target_link_libraries(tests MatterSim ${JSONCPP_LIBRARIES} ${OpenCV_LIBS})
53 | 
54 | add_executable(mattersim_main src/driver/mattersim_main.cpp)
55 | target_link_libraries(mattersim_main MatterSim)
56 | 
57 | add_subdirectory(pybind11)
58 | 
59 | find_package(PythonInterp 2.7)
60 | message(${PYTHON_EXECUTABLE})
61 | 
62 | # Need to search for python executable again to pick up an activated
63 | # virtualenv python, if any.
64 | unset(PYTHON_EXECUTABLE CACHE)
65 | find_program(PYTHON_EXECUTABLE python
66 |       PATHS ENV PATH         # look in the PATH environment variable
67 |       NO_DEFAULT_PATH        # do not look anywhere else...
68 |       )
69 | 
70 | find_package(NumPy REQUIRED)
71 | 
72 | pybind11_add_module(MatterSimPython src/lib_python/MatterSimPython.cpp)
73 | target_include_directories(MatterSimPython PRIVATE ${NUMPY_INCLUDES})
74 | target_link_libraries(MatterSimPython PRIVATE MatterSim)
75 | set_target_properties(MatterSimPython
76 |   PROPERTIES
77 |   OUTPUT_NAME MatterSim)
78 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Matterport3DSimulator
 2 | # Requires nvidia gpu with driver 384.xx or higher
 3 | 
 4 | 
 5 | FROM nvidia/cudagl:9.0-devel-ubuntu16.04
 6 | 
 7 | # Install a few libraries to support both EGL and OSMESA options
 8 | RUN apt-get update && apt-get install -y wget doxygen curl libjsoncpp-dev libepoxy-dev libglm-dev libosmesa6 libosmesa6-dev libglew-dev libopencv-dev python-opencv python-setuptools python-dev
 9 | RUN easy_install pip
10 | RUN pip install torch torchvision pandas networkx
11 | 
12 | #install latest cmake
13 | ADD https://cmake.org/files/v3.12/cmake-3.12.2-Linux-x86_64.sh /cmake-3.12.2-Linux-x86_64.sh
14 | RUN mkdir /opt/cmake
15 | RUN sh /cmake-3.12.2-Linux-x86_64.sh --prefix=/opt/cmake --skip-license
16 | RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
17 | RUN cmake --version
18 | 
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Federico Landi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Embodied Vision-and-Language Navigation with Dynamic Convolutional Filters
  2 | 
  3 | This is the PyTorch implementation for our paper:
  4 | 
  5 | [**Embodied Vision-and-Language Navigation with Dynamic Convolutional Filters**](https://bmvc2019.org/wp-content/uploads/papers/0384-paper.pdf)<br>
  6 | __***Federico Landi***__, Lorenzo Baraldi, Massimiliano Corsini, Rita Cucchiara<br>
  7 | British Machine Vision Conference (BMVC), 2019<br>
  8 | **Oral Presentation**<br>
  9 | 
 10 | Visit the main [website](http://imagelab.ing.unimore.it/vln-dynamic-filters) for more details.
 11 | 
 12 | ## Reference
 13 | 
 14 | If you use our code for your research, please cite our paper (BMVC 2019 oral):
 15 | 
 16 | ### Bibtex:
 17 | ```
 18 | @inproceedings{landi2019embodied,
 19 |       title={Embodied Vision-and-Language Navigation with Dynamic Convolutional Filters},
 20 |       author={Landi, Federico and Baraldi, Lorenzo and Corsini, Massimiliano and Cucchiara, Rita},
 21 |       booktitle={Proceedings of the British Machine Vision Conference},
 22 |       year={2019}
 23 |     }
 24 | ```
 25 | 
 26 | ## Installation
 27 | 
 28 | ### Clone Repo
 29 | 
 30 | Clone the repository:
 31 | ```
 32 | # Make sure to clone with --recursive
 33 | git clone --recursive https://github.com/fdlandi/DynamicConv-agent.git
 34 | cd DynamicConv-agent
 35 | ```
 36 | 
 37 | If you didn't clone with the `--recursive` flag, then you'll need to manually clone the pybind submodule from the top-level directory:
 38 | ```
 39 | git submodule update --init --recursive
 40 | ```
 41 | 
 42 | ### Python setup
 43 | 
 44 | Python 3.6 is required to run our code. You can install the other modules via:
 45 | ```
 46 | cd speaksee
 47 | pip install -e .
 48 | cd ..
 49 | pip install -r requirements.txt
 50 | ``` 
 51 | 
 52 | ### Building with Docker
 53 | 
 54 | Please follow the instructions on the [Matterport3DSimulator](https://github.com/peteanderson80/Matterport3DSimulator) to install the simulator via Docker.
 55 | 
 56 | ### Bulding without Docker
 57 | 
 58 | The simulator can be built outside of a docker container using the cmake build commands described above. However, this is not the recommended approach, as all dependencies will need to be installed locally and may conflict with existing libraries. The main requirements are:
 59 | - Ubuntu >= 14.04
 60 | - Nvidia-driver with CUDA installed 
 61 | - C++ compiler with C++11 support
 62 | - [CMake](https://cmake.org/) >= 3.10
 63 | - [OpenCV](http://opencv.org/) >= 2.4 including 3.x
 64 | - [OpenGL](https://www.opengl.org/)
 65 | - [GLM](https://glm.g-truc.net/0.9.8/index.html)
 66 | - [Numpy](http://www.numpy.org/)
 67 | 
 68 | Optional dependences (depending on the cmake rendering options):
 69 | - [OSMesa](https://www.mesa3d.org/osmesa.html) for OSMesa backend support
 70 | - [epoxy](https://github.com/anholt/libepoxy) for EGL backend support
 71 | 
 72 | ### Build and Test
 73 | 
 74 | Build the simulator and run the unit tests:
 75 | ```
 76 | cd DynamicConv-agent
 77 | mkdir build && cd build
 78 | cmake -DEGL_RENDERING=ON ..
 79 | make
 80 | cd ../
 81 | ./build/tests ~Timing
 82 | ```
 83 | 
 84 | If you use a conda environment for your experiments, you should specify the python path in the cmake options:
 85 | ```
 86 | cmake -DEGL_RENDERING=ON -DPYTHON_EXECUTABLE:FILEPATH='path_to_your_python_bin' ..
 87 | ```
 88 | 
 89 | ### Precomputing ResNet Image Features
 90 | 
 91 | Alternatively, skip the generation and just download and extract our tsv files into the `img_features` directory:
 92 | - [ResNet-152-imagenet features [380K/2.9GB]](https://www.dropbox.com/s/715bbj8yjz32ekf/ResNet-152-imagenet.zip?dl=1)
 93 | - [ResNet-152-places365 features [380K/2.9GB]](https://www.dropbox.com/s/gox1rbdebyaa98w/ResNet-152-places365.zip?dl=1)
 94 | 
 95 | 
 96 | ## Training and Testing
 97 | 
 98 | You can train our agent by running:
 99 | ```
100 | python tasks/R2R/main.py
101 | ```
102 | The number of dynamic filters can be set with the `--num_heads` parameter:
103 | ```
104 | python tasks/R2R/main.py --num_heads=4
105 | ```
106 | 
107 | ## Reproducibility Note
108 | 
109 | Results in our paper were obtained with version v0.1 of the Matterport3DSimulator. Due to this difference, results could vary from the one in the paper. Using different GPUs for training, as well as different random seeds, may also affect results.
110 | 
111 | We provide the weights obtained with our training. To reproduce results from the paper, run:
112 | ```
113 | python tasks/R2R/main.py --name=normal_data --num_heads=4 --eval_only
114 | ```
115 | 
116 | or:
117 | ```
118 | python tasks/R2R/main.py --name=data_augmentation --num_heads=4 --eval_only
119 | ```
120 | 
121 | ## License
122 | 
123 | The Matterport3D dataset, and data derived from it, is released under the [Matterport3D Terms of Use](http://dovahkiin.stanford.edu/matterport/public/MP_TOS.pdf). Our code is released under the MIT license.
124 | 


--------------------------------------------------------------------------------
/cmake/Modules/FindNumPy.cmake:
--------------------------------------------------------------------------------
  1 | #-------------------------------------------------------------------------------
  2 | # Copyright (c) 2013, Lars Baehren <lbaehren@gmail.com>
  3 | # All rights reserved.
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without modification,
  6 | # are permitted provided that the following conditions are met:
  7 | #
  8 | #  * Redistributions of source code must retain the above copyright notice, this
  9 | #    list of conditions and the following disclaimer.
 10 | #  * Redistributions in binary form must reproduce the above copyright notice,
 11 | #    this list of conditions and the following disclaimer in the documentation
 12 | #    and/or other materials provided with the distribution.
 13 | #
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 17 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 18 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 20 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 21 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 22 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 23 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | #-------------------------------------------------------------------------------
 25 | 
 26 | # - Check for the presence of NumPy
 27 | #
 28 | # The following variables are set when NumPy is found:
 29 | #  NUMPY_FOUND      = Set to true, if all components of NUMPY have been found.
 30 | #  NUMPY_INCLUDES   = Include path for the header files of NUMPY
 31 | #  NUMPY_LIBRARIES  = Link these to use NUMPY
 32 | #  NUMPY_LFLAGS     = Linker flags (optional)
 33 | 
 34 | if (NOT NUMPY_FOUND)
 35 | 
 36 |     if (NOT NUMPY_ROOT_DIR)
 37 |         set (NUMPY_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
 38 |     endif (NOT NUMPY_ROOT_DIR)
 39 | 
 40 |     if (NOT PYTHONINTERP_FOUND)
 41 |         find_package (PythonInterp)
 42 |     endif (NOT PYTHONINTERP_FOUND)
 43 | 
 44 |     ##__________________________________________________________________________
 45 |     ## Check for the header files
 46 | 
 47 |     ## Use Python to determine the include directory
 48 |     execute_process (
 49 |         COMMAND ${PYTHON_EXECUTABLE} -c import\ numpy\;\ print\(numpy.get_include\(\)\)\;
 50 |         ERROR_VARIABLE NUMPY_FIND_ERROR
 51 |         RESULT_VARIABLE NUMPY_FIND_RESULT
 52 |         OUTPUT_VARIABLE NUMPY_FIND_OUTPUT
 53 |         OUTPUT_STRIP_TRAILING_WHITESPACE
 54 |         )
 55 |     ## process the output from the execution of the command
 56 |     if (NOT NUMPY_FIND_RESULT)
 57 |         set (NUMPY_INCLUDES ${NUMPY_FIND_OUTPUT})
 58 |     endif (NOT NUMPY_FIND_RESULT)
 59 | 
 60 |     ##__________________________________________________________________________
 61 |     ## Check for the library
 62 | 
 63 |     unset (NUMPY_LIBRARIES)
 64 | 
 65 |     if (PYTHON_SITE_PACKAGES)
 66 |         find_library (NUMPY_NPYMATH_LIBRARY npymath
 67 |             HINTS ${PYTHON_SITE_PACKAGES}/numpy/core
 68 |             PATH_SUFFIXES lib
 69 |             )
 70 |         if (NUMPY_NPYMATH_LIBRARY)
 71 |             list (APPEND NUMPY_LIBRARIES ${NUMPY_NPYMATH_LIBRARY})
 72 |         endif (NUMPY_NPYMATH_LIBRARY)
 73 |     endif (PYTHON_SITE_PACKAGES)
 74 | 
 75 |     ##__________________________________________________________________________
 76 |     ## Get API version of NumPy from 'numpy/numpyconfig.h'
 77 | 
 78 |     if (PYTHON_EXECUTABLE)
 79 |         execute_process (
 80 |             COMMAND ${PYTHON_EXECUTABLE} -c import\ numpy\;\ print\(numpy.__version__\)\;
 81 |             ERROR_VARIABLE NUMPY_API_VERSION_ERROR
 82 |             RESULT_VARIABLE NUMPY_API_VERSION_RESULT
 83 |             OUTPUT_VARIABLE NUMPY_API_VERSION
 84 |             OUTPUT_STRIP_TRAILING_WHITESPACE
 85 |             )
 86 |     else ()
 87 |         ## Backup procedure: extract version number directly from the header file
 88 |         if (NUMPY_INCLUDES)
 89 |             find_file (HAVE_NUMPYCONFIG_H numpy/numpyconfig.h
 90 |                 HINTS ${NUMPY_INCLUDES}
 91 |                 )
 92 |         endif (NUMPY_INCLUDES)
 93 |     endif ()
 94 | 
 95 |     ## Dissect full version number into major, minor and patch version
 96 |     if (NUMPY_API_VERSION)
 97 |         string (REGEX REPLACE "\\." ";" _tmp ${NUMPY_API_VERSION})
 98 |         list (GET _tmp 0 NUMPY_API_VERSION_MAJOR)
 99 |         list (GET _tmp 1 NUMPY_API_VERSION_MINOR)
100 |         list (GET _tmp 2 NUMPY_API_VERSION_PATCH)
101 |     endif (NUMPY_API_VERSION)
102 | 
103 |     ##__________________________________________________________________________
104 |     ## Actions taken when all components have been found
105 | 
106 |     find_package_handle_standard_args (NUMPY DEFAULT_MSG NUMPY_INCLUDES)
107 | 
108 |     if (NUMPY_FOUND)
109 |         if (NOT NUMPY_FIND_QUIETLY)
110 |             message (STATUS "Found components for NumPy")
111 |             message (STATUS "NUMPY_ROOT_DIR    = ${NUMPY_ROOT_DIR}")
112 |             message (STATUS "NUMPY_INCLUDES    = ${NUMPY_INCLUDES}")
113 |             message (STATUS "NUMPY_LIBRARIES   = ${NUMPY_LIBRARIES}")
114 |             message (STATUS "NUMPY_API_VERSION = ${NUMPY_API_VERSION}")
115 |         endif (NOT NUMPY_FIND_QUIETLY)
116 |     else (NUMPY_FOUND)
117 |         if (NUMPY_FIND_REQUIRED)
118 |             message (FATAL_ERROR "Could not find NUMPY!")
119 |         endif (NUMPY_FIND_REQUIRED)
120 |     endif (NUMPY_FOUND)
121 | 
122 |     ##__________________________________________________________________________
123 |     ## Mark advanced variables
124 | 
125 |   mark_as_advanced (
126 |     NUMPY_ROOT_DIR
127 |     NUMPY_INCLUDES
128 |     NUMPY_LIBRARIES
129 |     )
130 | 
131 | endif (NOT NUMPY_FOUND)
132 | 


--------------------------------------------------------------------------------
/connectivity/8194nk5LbLH_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"c9e8dc09263e4d0da77d16de0ecddd39","pose":[-0.611043,-0.00396746,-0.791588,-0.213904,0.791585,-0.00882497,-0.610996,2.305,-0.00456166,-0.999953,0.00853306,1.56916,0,0,0,1],"included":true,"visible":[false,false,false,false,true,true,false,true,true,true,false,false,false,false,false,false,false,false,false,false],"unobstructed":[false,false,false,false,true,false,false,false,true,true,false,false,false,false,false,false,false,false,false,false],"height":1.5826326295962942},{"image_id":"286b0c2d9a46408ba80b6ccebb21e582","pose":[0.951596,0.00201098,0.307346,6.58012,-0.307351,0.00915895,0.951552,-2.96479,-0.000901435,-0.999956,0.00933374,4.36353,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,false,false,false,false,false,false,false,true,false,true,false,true,false,true],"unobstructed":[false,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,true,true,false],"height":1.5712253956498747},{"image_id":"6776097c17ed4b93aee61704eb32f06c","pose":[-0.711582,-0.00419131,-0.702591,-1.68941,0.702575,0.00464776,-0.711594,-5.37908,0.00624796,-0.99998,-0.000362505,1.58622,0,0,0,1],"included":true,"visible":[false,false,false,false,false,true,true,true,false,true,false,true,false,false,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,true,true,false,false,false,false,true,false,false,false,false,false,false,false,true],"height":1.5804941871490743},{"image_id":"8c7e8da7d4a44ab695e6b3195eac0cf1","pose":[0.709879,0.011247,0.704234,8.62929,-0.70424,-0.00407304,0.70995,-1.77115,0.0108531,-0.999928,0.00502926,4.38556,0,0,0,1],"included":true,"visible":[false,true,false,false,false,false,false,false,false,false,true,false,true,true,false,false,false,true,true,false],"unobstructed":[false,true,false,false,false,false,false,false,false,false,true,false,false,true,false,false,false,true,true,false],"height":1.585645804390483},{"image_id":"f33c718aaf2c41469389a87944442c62","pose":[0.619478,0.0166688,0.784837,-3.88437,-0.784902,-0.00375152,0.619609,-0.528748,0.0132725,-0.999854,0.0107595,1.58368,0,0,0,1],"included":true,"visible":[true,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true],"unobstructed":[true,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true],"height":1.5829827809014503},{"image_id":"fcd90a404061413385286bef9662630e","pose":[-0.111393,0.00837906,0.993741,2.80245,-0.993773,-0.00348217,-0.111367,-3.78204,0.0025272,-0.999959,0.00871482,1.58057,0,0,0,1],"included":true,"visible":[true,false,true,false,false,false,false,true,false,false,false,false,false,false,false,true,false,false,false,false],"unobstructed":[false,false,true,false,false,false,true,true,true,false,false,false,false,false,false,false,false,false,false,false],"height":1.5763528408163245},{"image_id":"c07d4ae8330542a09cf8f8dddb9728ce","pose":[-0.985207,-0.0101267,0.171069,0.656519,-0.171094,0.00168538,-0.985253,-5.08928,0.00968898,-0.999947,-0.00339301,1.57611,0,0,0,1],"included":true,"visible":[true,false,true,false,false,true,false,true,false,false,false,false,false,false,true,false,true,false,false,true],"unobstructed":[false,false,true,false,false,true,false,true,false,false,false,true,false,false,false,false,false,false,false,false],"height":1.575276915205382},{"image_id":"2393bffb53fe4205bcc67796c6fb76e3","pose":[-0.241654,0.00228344,-0.97036,3.33582,0.970294,0.0124463,-0.241608,-5.90025,0.0115256,-0.99992,-0.00522325,1.57791,0,0,0,1],"included":true,"visible":[false,false,true,false,false,true,true,false,false,false,false,false,false,false,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,true,true,false,false,false,false,false,false,false,true,false,false,false,false,false],"height":1.5730354249357412},{"image_id":"71bf74df73cd4e24a191ef4f2338ca22","pose":[0.906931,-0.00688335,-0.421222,0.122562,0.421182,-0.00662188,0.906952,-0.00319673,-0.00903217,-0.999954,-0.00310641,1.57207,0,0,0,1],"included":true,"visible":[true,false,false,false,true,true,true,true,false,true,false,false,false,false,false,false,false,false,false,false],"unobstructed":[true,false,false,false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false],"height":1.570272020216938},{"image_id":"be8a2edacab34ec8887ba6a7b1e4945f","pose":[0.791463,0.0101015,0.611133,-3.50132,-0.611154,-0.00121731,0.791511,1.58103,0.00873934,-0.999948,0.00521015,1.56992,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,false,true,false,false,false,true,false,false,false,false,false,false,false,true],"unobstructed":[true,false,false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false],"height":1.577126892771864},{"image_id":"9bdde31adaa1443bb206b09bfa3c474c","pose":[0.799844,0.0047414,0.60019,8.67581,-0.600208,0.0075118,0.799809,-4.8108,-0.000716311,-0.99996,0.00885413,2.82261,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,true,true,false,false],"unobstructed":[false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false],"height":1.58264400638767},{"image_id":"66d4adb61b57494aa2c1ad141a0fad9b","pose":[-0.34536,-0.0108675,-0.938407,-2.27885,0.938436,0.00459882,-0.345423,-3.2282,0.00806945,-0.99993,0.00861029,1.58739,0,0,0,1],"included":true,"visible":[false,false,true,false,false,true,true,true,false,true,false,false,false,false,true,true,false,false,false,true],"unobstructed":[false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true],"height":1.5705441219971223},{"image_id":"83ff709c0e3e46079836153ea5c7feac","pose":[0.68423,0.0137303,0.729137,3.42529,-0.729235,0.00364543,0.684254,1.65175,0.00673696,-0.999899,0.012507,4.37069,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false],"height":1.578378655072358},{"image_id":"d9e325df2f3948679c78b93d8025e2da","pose":[0.826698,0.0192407,0.562317,8.49764,-0.562455,0.00220125,0.826825,-0.816805,0.0146709,-0.999812,0.0126418,4.38875,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,true,false,true,false,false,false,false,true,true,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,true,true,false],"height":1.5865892751674604},{"image_id":"423efb97f77f4e7995f19c66fe82afbc","pose":[0.958879,0.00141119,0.283813,5.51819,-0.283808,0.0124035,0.958801,-5.67527,-0.00216725,-0.999922,0.012294,1.58856,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,true,false,false,false,false,false,false,false,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,true,false,false,false],"height":1.5784339701720043},{"image_id":"6c49579a5cd34df8acb7f790b74e9eae","pose":[-0.95716,-0.00676032,-0.289482,-6.48379,0.289538,-0.00977451,-0.957117,-2.57899,0.00364085,-0.999929,0.0113132,1.59886,0,0,0,1],"included":true,"visible":[false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,true],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true],"height":1.5798282335589897},{"image_id":"aeed67040d744240b188f66f17d87d43","pose":[0.132175,0.0257204,0.990893,7.67989,-0.991226,0.00381825,0.132121,-5.81072,-0.000385302,-0.999662,0.0259995,2.29866,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,true,false,false,true,false,false,false,true,false,false,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false,false],"height":1.6026680667792301},{"image_id":"aae01016bb354f78bd6db86e9d71af2b","pose":[0.0788252,0.00384462,0.996881,6.79041,-0.996887,0.00184069,0.0788186,-0.995862,-0.00153193,-0.999991,0.0039778,4.37219,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false],"height":1.5770919536040346},{"image_id":"346b680ac5904359a1859c929ad312b6","pose":[-0.589008,0.00463239,0.808114,5.58585,-0.808123,0.00000695791,-0.589015,0.644327,-0.00273419,-0.999989,0.00373948,4.38174,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,true,false,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,true,false,false],"height":1.5707587596461066},{"image_id":"ae91518ed77047b3bdeeca864cd04029","pose":[0.310985,0.0070688,0.950389,-4.60607,-0.950392,-0.00460962,0.31102,-2.5949,0.00657945,-0.999964,0.00528466,1.58581,0,0,0,1],"included":true,"visible":[false,false,true,false,true,true,false,true,false,true,false,true,false,false,false,true,false,false,false,false],"unobstructed":[false,false,true,false,true,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false],"height":1.5747548700639524}]


--------------------------------------------------------------------------------
/connectivity/GdvgFV5R1Z5_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"0b02e18654324edd8d74c078b66bfb20","pose":[-0.057695,-0.000357129,0.998334,-2.46692,-0.998304,-0.00769199,-0.0576965,-3.15814,0.00770012,-0.99997,0.0000884733,1.5171,0,0,0,1],"included":true,"visible":[false,true,false,false,false,true,true,true,true,false,true,false],"unobstructed":[false,false,false,false,false,true,false,true,true,false,true,false],"height":1.51470410293751},{"image_id":"1db1c0a09ecf40d188197efc05ced3bb","pose":[-0.442443,0.0138817,0.896688,-4.03893,-0.89679,-0.0101225,-0.442338,-3.05434,0.00293664,-0.999852,0.0169288,0.974424,0,0,0,1],"included":true,"visible":[true,false,false,false,true,true,false,false,true,false,false,true],"unobstructed":[false,false,false,false,false,true,false,false,true,false,false,true],"height":0.9701803380402906},{"image_id":"6178647ca8d14dc09370f6c1b7ed2fd6","pose":[-0.870025,0.0056275,0.492973,-3.69279,-0.493005,-0.0105975,-0.869962,1.95433,0.000328893,-0.999927,0.0119957,1.51516,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,true,true,false,false,true,false],"unobstructed":[false,false,false,true,false,false,true,true,false,true,true,false],"height":1.517582101716661},{"image_id":"565cc21cd28b4ee6bb5ba83c5270c032","pose":[0.0242634,0.000986587,-0.999704,-3.91782,0.999699,0.00333371,0.024267,0.178675,0.00335701,-0.999993,-0.0009042,1.50868,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,false,false,true,true,false],"unobstructed":[false,false,true,false,false,false,false,false,false,true,true,false],"height":1.5114421933143356},{"image_id":"ef638e508e054c4aabd49b38d1b88fc7","pose":[0.0820523,0.0151057,0.996513,-4.61631,-0.995947,-0.0356725,0.0825462,-2.18899,0.0367954,-0.999249,0.0121187,1.52757,0,0,0,1],"included":true,"visible":[false,true,false,false,false,true,false,false,true,false,false,true],"unobstructed":[false,false,false,false,false,true,false,false,true,false,false,true],"height":1.5162868543024455},{"image_id":"97ed68de989e44fdaf2d9b949898fab6","pose":[0.0900997,0.0149714,0.99582,-3.64126,-0.995713,-0.0195971,0.0903844,-3.16818,0.0208687,-0.999695,0.0131427,1.52081,0,0,0,1],"included":true,"visible":[true,true,false,false,true,false,false,false,true,false,false,true],"unobstructed":[true,true,false,false,true,false,false,false,true,false,false,true],"height":1.5211418713547455},{"image_id":"5fd70cff4992429a99a84fd3c117ccb5","pose":[-0.0539877,-0.000800861,-0.998541,0.0108044,0.998337,0.0201438,-0.0539926,0.00604319,0.020158,-0.999796,-0.000286778,1.51223,0,0,0,1],"included":true,"visible":[true,false,true,true,false,false,false,true,false,true,true,false],"unobstructed":[false,false,true,false,false,false,false,true,false,false,true,false],"height":1.5113248528175798},{"image_id":"86d342c576ff46a9828d2ba377cc8cd5","pose":[0.998173,0.0151118,-0.0584746,-1.78347,0.0584707,0.000718574,0.998288,-1.89835,0.0151283,-0.999885,-0.000165129,1.52238,0,0,0,1],"included":true,"visible":[true,false,true,false,false,false,true,false,false,false,true,false],"unobstructed":[true,false,true,false,false,false,true,false,false,false,true,false],"height":1.5103397372923053},{"image_id":"8dba9ff900b14f9b84ead660f5f7f701","pose":[-0.999855,-0.0144511,0.00887107,-4.11579,-0.00895392,0.00564829,-0.999943,-2.90606,0.0144005,-0.999879,-0.00577567,1.51617,0,0,0,1],"included":true,"visible":[true,true,false,false,true,true,false,false,false,false,false,true],"unobstructed":[true,true,false,false,true,true,false,false,false,false,false,true],"height":1.5112098807574073},{"image_id":"0d8c5fbfd73f44e28d6da370520611e4","pose":[0.0769887,0.00664334,0.997009,-6.15424,-0.997016,-0.00490415,0.0770216,-0.0398163,0.00540151,-0.999965,0.00624716,1.50965,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,true,false,false,false,true,false],"unobstructed":[false,false,true,true,false,false,false,false,false,false,false,false],"height":1.5058928427471967},{"image_id":"aebb1de49d21485e8bef7633dfb58761","pose":[-0.0229751,-0.0058052,-0.999718,-1.94579,0.999719,0.00553997,-0.0230069,-0.026534,0.00567231,-0.999967,0.0056775,1.50582,0,0,0,1],"included":true,"visible":[true,false,true,true,false,false,true,true,false,true,false,false],"unobstructed":[true,false,true,true,false,false,true,true,false,false,false,false],"height":1.5101720791580233},{"image_id":"e34e51f3d6584ad09c510de5db84752f","pose":[-0.0418368,-0.0124855,0.999046,-3.99281,-0.993607,-0.104406,-0.0429142,-2.13265,0.104842,-0.994456,-0.00803644,0.980264,0,0,0,1],"included":true,"visible":[false,true,false,false,true,true,false,false,true,false,false,false],"unobstructed":[false,true,false,false,true,true,false,false,true,false,false,false],"height":0.969584316081611}]


--------------------------------------------------------------------------------
/connectivity/README.md:
--------------------------------------------------------------------------------
 1 | ## connectivity
 2 | Connectivity graphs indicating the navigable paths between viewpoints in each scan.
 3 | 
 4 | Each json file contains an array of annotations, one for each viewpoint in the scan. All annotations share the same basic structure as follows:
 5 | 
 6 | ```
 7 | {
 8 |   "image_id": str,
 9 |   "pose": [float x 16],
10 |   "included": boolean,
11 |   "visible": [boolean x num_viewpoints],
12 |   "unobstructed": [boolean x num_viewpoints],
13 |   "height": float
14 | }
15 | ```
16 | - `image_id`: matterport skybox prefix
17 | - `pose`: 4x4 matrix in row major order that transforms matterport skyboxes to global coordinates (z-up). Pose matrices are based on the assumption that the camera is facing skybox image 3.
18 | - `included`: whether viewpoint is included in the simulator. Some overlapping viewpoints are excluded.
19 | - `visible`: indicates other viewpoints that can be seen from this viewpoint.
20 | - `unobstructed`: indicates transitions to other viewpoints that are considered navigable for an agent.
21 | - `height`: estimated height of the viewpoint above the floor. Not required for the simulator.
22 | 
23 | Units are in metres.
24 | 
25 | `scans.txt` contains a list of all the scan ids in the dataset.
26 | 


--------------------------------------------------------------------------------
/connectivity/YmJkqBEsHnH_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"006933a75f764c5485cf284bea0ded0b","pose":[0.210914,-0.00824746,-0.977469,-7.64722,0.977278,0.0232484,0.210677,-2.15553,0.0209873,-0.999695,0.0129646,1.56695,0,0,0,1],"included":true,"visible":[false,false,true,false,true,true,false,true,true,true,false],"unobstructed":[false,false,false,false,false,false,false,true,true,false,false],"height":1.524793092035509},{"image_id":"e4ede0695e4e4a77aae8537abb9f11d3","pose":[-0.0422212,-0.0176246,-0.998952,-0.133122,0.998904,0.0194092,-0.0425613,-0.0184591,0.0201393,-0.999656,0.016787,1.48352,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,false,false,false,false,false],"unobstructed":[false,false,true,false,false,true,false,false,false,false,false],"height":1.5227398475592409},{"image_id":"d471e89e00be49f49a7ecace814d60bf","pose":[0.426939,-0.00370058,-0.904272,-0.421886,0.904055,0.0239963,0.426739,-2.12366,0.0201203,-0.999705,0.0135916,1.49477,0,0,0,1],"included":true,"visible":[true,true,false,true,true,true,false,true,true,true,false],"unobstructed":[false,true,false,true,false,true,false,false,false,false,false],"height":1.5263900136377955},{"image_id":"b34af02ce9b642ebbd0c7e9e0ba3b553","pose":[0.960272,0.00870611,-0.278924,-0.0905727,0.278755,0.0168277,0.960214,-3.55265,0.0130537,-0.99982,0.0137334,1.49061,0,0,0,1],"included":true,"visible":[true,true,true,false,false,false,false,false,false,false,false],"unobstructed":[false,false,true,false,false,true,false,false,false,false,false],"height":1.5323637229797105},{"image_id":"01c80b5f8fbd4c969ee0bc03f1ec7a6c","pose":[0.359562,-0.0105291,-0.933061,-3.77309,0.932771,0.0313799,0.359097,-2.1838,0.0254987,-0.999452,0.0211054,1.53932,0,0,0,1],"included":true,"visible":[true,false,true,false,false,true,false,true,true,true,false],"unobstructed":[false,false,false,false,false,true,false,true,false,false,false],"height":1.5286629461398107},{"image_id":"82ea5baa30f945fe98f6cad3064af847","pose":[0.0376233,-0.0115611,-0.999224,-2.01669,0.998821,0.0310955,0.0372487,-2.16965,0.030641,-0.999449,0.0127185,1.50807,0,0,0,1],"included":true,"visible":[true,true,true,true,true,false,false,true,true,true,false],"unobstructed":[false,true,true,true,true,false,false,false,false,false,false],"height":1.5253207999550662},{"image_id":"aecbb791f30b452a9236c5a8c7030663","pose":[0.296076,-0.0242641,-0.954855,-13.5955,0.955111,0.0179483,0.2957,-2.22547,0.00996343,-0.999544,0.0284901,1.59272,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,true,true,true,true],"unobstructed":[false,false,false,false,false,false,false,false,false,true,true],"height":1.7557263982456066},{"image_id":"d841f7b710f9470796d55561f8f524db","pose":[0.270437,0.002913,-0.962732,-5.77716,0.962325,0.0284129,0.27041,-2.21321,0.028142,-0.999591,0.00488176,1.55947,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,false,true,true,false],"unobstructed":[true,false,false,false,true,false,false,false,false,false,false],"height":1.5357935019251416},{"image_id":"8e38fdd81c7949db9646968bafbbdcfc","pose":[-0.00277118,-0.0169575,-0.999852,-9.93905,0.999791,0.020127,-0.00311204,-2.17463,0.0201771,-0.999653,0.0168993,1.60592,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,true,false,true,true],"unobstructed":[true,false,false,false,false,false,false,false,false,true,false],"height":1.5208970888736792},{"image_id":"20fd759be0b64fc9aa96d290f0a704ec","pose":[0.227815,0.0117555,-0.973633,-12.1161,0.973367,0.0235263,0.228037,-2.15724,0.025587,-0.999654,-0.00608172,1.59969,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,true,true,true,false,true],"unobstructed":[false,false,false,false,false,false,true,false,true,false,false],"height":1.5261379179165138},{"image_id":"d838acff82244c2da0cf2651e54966cb","pose":[0.310234,-0.0632421,-0.948553,-15.2317,0.950604,0.0313736,0.308813,-2.28133,0.0102298,-0.997504,0.0698525,0.902626,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,true,true,true,true,false],"unobstructed":[false,false,false,false,false,false,true,false,false,false,false],"height":1.558854711359605}]


--------------------------------------------------------------------------------
/connectivity/gZ6f7yhEvPG_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"80929af5cf234ae38ac3a2a4e60e4342","pose":[0.983395,0.00450812,-0.181418,-2.79247,0.181442,-0.00570068,0.983385,-1.38801,0.00339928,-0.999973,-0.00642298,1.42676,0,0,0,1],"included":true,"visible":[false,true,true,false,false,true,false,false],"unobstructed":[false,true,false,true,false,true,false,false],"height":1.4191402375960298},{"image_id":"ba27da20782d4e1a825f0a133ad84da9","pose":[-0.7605,-0.0115739,-0.649234,-2.38988,0.648885,0.0237502,-0.760515,-0.0538717,0.0242219,-0.999651,-0.0105509,1.4341,0,0,0,1],"included":true,"visible":[true,false,true,true,false,true,false,true],"unobstructed":[true,false,false,false,false,true,false,true],"height":1.424939020658826},{"image_id":"46cecea0b30e4786b673f5e951bf82d4","pose":[0.593129,0.0137361,-0.80499,0.99933,0.804932,0.010707,0.59327,1.17558,0.0167685,-0.999848,-0.00470498,1.41684,0,0,0,1],"included":true,"visible":[false,false,false,true,true,false,true,true],"unobstructed":[false,false,false,true,true,false,true,true],"height":1.4252108727703763},{"image_id":"bda7a9e6d1d94b3aa8ff491beb158f3a","pose":[-0.378592,-0.0208239,0.925329,-0.182918,-0.925433,-0.00820128,-0.37882,-1.72967,0.0154776,-0.999749,-0.0161651,1.42205,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,true,true],"unobstructed":[true,false,true,false,true,false,false,true],"height":1.42983949725488},{"image_id":"dbb2f8000bc04b3ebcd0a55112786149","pose":[-0.595363,0.00457706,-0.803444,1.10196,0.803383,0.0168543,-0.595222,-1.10724,0.0108174,-0.999847,-0.0137106,1.41536,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,true,true],"unobstructed":[false,false,true,true,false,false,true,true],"height":1.4186255623107038},{"image_id":"29b20fa80dcd4771974303c1ccd8953f","pose":[0.292738,0.0164579,-0.956051,-2.77306,0.956096,0.0090939,0.292909,1.55377,0.0135152,-0.999823,-0.0130722,1.43367,0,0,0,1],"included":true,"visible":[true,true,true,false,true,false,false,false],"unobstructed":[true,true,false,false,false,false,false,false],"height":1.4237594118402337},{"image_id":"0ee20663dfa34b438d48750ddcd7366c","pose":[-0.75968,-0.0019971,-0.650293,-0.111567,0.650131,0.0201598,-0.759554,1.31337,0.014627,-0.999794,-0.0140156,1.42291,0,0,0,1],"included":true,"visible":[false,false,true,true,true,false,false,true],"unobstructed":[false,false,true,false,true,false,false,true],"height":1.4276556862049736},{"image_id":"47d8a8282c1c4a7fb3eeeacc45e9d959","pose":[-0.0254788,0.00643152,-0.999654,-0.0034508,0.999603,0.0120797,-0.0253995,0.0112371,0.0119124,-0.999906,-0.00673574,1.42388,0,0,0,1],"included":true,"visible":[true,true,true,true,true,false,true,false],"unobstructed":[false,true,true,true,true,false,true,false],"height":1.4268855357216241}]


--------------------------------------------------------------------------------
/connectivity/scans.txt:
--------------------------------------------------------------------------------
 1 | 17DRP5sb8fy
 2 | 1LXtFkjw3qL
 3 | 1pXnuDYAj8r
 4 | 29hnd4uzFmX
 5 | 2azQ1b91cZZ
 6 | 2n8kARJN3HM
 7 | 2t7WUuJeko7
 8 | 5LpN3gDmAk7
 9 | 5q7pvUzZiYa
10 | 5ZKStnWn8Zo
11 | 759xd9YjKW5
12 | 7y3sRwLe3Va
13 | 8194nk5LbLH
14 | 82sE5b5pLXE
15 | 8WUmhLawc2A
16 | aayBHfsNo7d
17 | ac26ZMwG7aT
18 | ARNzJeq3xxb
19 | B6ByNegPMKs
20 | b8cTxDM8gDG
21 | cV4RVeZvu5T
22 | D7G3Y4RVNrH
23 | D7N2EKCX4Sj
24 | dhjEzFoUFzH
25 | E9uDoFAP3SH
26 | e9zR4mvMWw7
27 | EDJbREhghzL
28 | EU6Fwq7SyZv
29 | fzynW3qQPVF
30 | GdvgFV5R1Z5
31 | gTV8FGcVJC9
32 | gxdoqLR6rwA
33 | gYvKGZ5eRqb
34 | gZ6f7yhEvPG
35 | HxpKQynjfin
36 | i5noydFURQK
37 | JeFG25nYj2p
38 | JF19kD82Mey
39 | jh4fc5c5qoQ
40 | JmbYfDe2QKZ
41 | jtcxE69GiFV
42 | kEZ7cmS4wCh
43 | mJXqzFtmKg4
44 | oLBMNvg9in8
45 | p5wJjkQkbXX
46 | pa4otMbVnkk
47 | pLe4wQe7qrG
48 | Pm6F8kyY3z2
49 | pRbA3pwrgk9
50 | PuKPg4mmafe
51 | PX4nDJXEHrG
52 | q9vSo1VnCiC
53 | qoiz87JEwZ2
54 | QUCTc6BB5sX
55 | r1Q1Z4BcV1o
56 | r47D5H71a5s
57 | rPc6DW4iMge
58 | RPmz2sHmrrY
59 | rqfALeAoiTq
60 | s8pcmisQ38h
61 | S9hNv5qa7GM
62 | sKLMLpTHeUy
63 | SN83YJsR3w2
64 | sT4fr6TAbpF
65 | TbHJrupSAjP
66 | ULsKaCPVFJR
67 | uNb9QFRL6hY
68 | ur6pFq6Qu1A
69 | UwV83HsGsw3
70 | Uxmj2M2itWa
71 | V2XKFyX4ASd
72 | VFuaQ6m2Qom
73 | VLzqgDo317F
74 | Vt2qJdWjCF2
75 | VVfe2KiqLaN
76 | Vvot9Ly1tCj
77 | vyrNrziPKCB
78 | VzqfbhrpDEA
79 | wc2JMjhGNzB
80 | WYY7iVyf5p8
81 | X7HyMhZNoso
82 | x8F5xyUWy9e
83 | XcA2TqTSSAj
84 | YFuZgdQ5vWj
85 | YmJkqBEsHnH
86 | yqstnuAEVhm
87 | YVUC4YcDtcY
88 | Z6MFQCViBuw
89 | ZMojNkEp431
90 | zsNo4HB9uLZ
91 | 


--------------------------------------------------------------------------------
/include/Benchmark.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MATTERSIM_BENCHMARK
 2 | #define MATTERSIM_BENCHMARK
 3 | 
 4 | #include <chrono>
 5 | 
 6 | namespace mattersim {
 7 | 
 8 |     class Timer {
 9 |     public:
10 |         Timer();
11 |         virtual void Start();
12 |         virtual void Stop();
13 |         virtual void Reset();
14 |         virtual float MilliSeconds();
15 |         virtual float MicroSeconds();
16 |         virtual float Seconds();
17 |         inline bool running() { return running_; }
18 | 
19 |     protected:
20 |         bool running_;
21 |         std::chrono::steady_clock::time_point start_;
22 |         std::chrono::steady_clock::duration elapsed_;
23 |     };
24 | }
25 | 
26 | #endif   // MATTERSIM_BENCHMARK
27 | 


--------------------------------------------------------------------------------
/include/MatterSim.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef MATTERSIM_HPP
  2 | #define MATTERSIM_HPP
  3 | 
  4 | #include <memory>
  5 | #include <vector>
  6 | #include <random>
  7 | #include <cmath>
  8 | #include <stdexcept>
  9 | 
 10 | #include <opencv2/opencv.hpp>
 11 | 
 12 | #ifdef OSMESA_RENDERING
 13 | #define GL_GLEXT_PROTOTYPES
 14 | #include <GL/gl.h>
 15 | #include <GL/osmesa.h>
 16 | #elif defined (EGL_RENDERING)
 17 | #include <epoxy/gl.h>
 18 | #include <EGL/egl.h>
 19 | #else
 20 | #include <GL/glew.h>
 21 | #endif
 22 | 
 23 | #define GLM_FORCE_RADIANS
 24 | #include <glm/glm.hpp>
 25 | #include <glm/gtc/matrix_transform.hpp>
 26 | #include <glm/gtc/type_ptr.hpp>
 27 | #include "glm/ext.hpp"
 28 | 
 29 | #include "Benchmark.hpp"
 30 | #include "NavGraph.hpp"
 31 | 
 32 | namespace mattersim {
 33 | 
 34 |     struct Viewpoint: std::enable_shared_from_this<Viewpoint> {
 35 |         Viewpoint(std::string viewpointId, unsigned int ix, double x, double y, double z,
 36 |           double rel_heading, double rel_elevation, double rel_distance) : 
 37 |             viewpointId(viewpointId), ix(ix), x(x), y(y), z(z), rel_heading(rel_heading),
 38 |             rel_elevation(rel_elevation), rel_distance(rel_distance)  
 39 |         {}
 40 | 
 41 |         //! Viewpoint identifier
 42 |         std::string viewpointId;
 43 |         //! Viewpoint index into connectivity graph
 44 |         unsigned int ix;
 45 |         //! 3D position in world coordinates
 46 |         double x;
 47 |         double y;
 48 |         double z;
 49 |         //! Heading relative to the camera
 50 |         double rel_heading;
 51 |         //! Elevation relative to the camera
 52 |         double rel_elevation;
 53 |         //! Distance from the agent
 54 |         double rel_distance;
 55 |     };
 56 | 
 57 |     typedef std::shared_ptr<Viewpoint> ViewpointPtr;
 58 |     struct ViewpointPtrComp {
 59 |         inline bool operator() (const ViewpointPtr& l, const ViewpointPtr& r){
 60 |             return sqrt(l->rel_heading*l->rel_heading+l->rel_elevation*l->rel_elevation)
 61 |                 < sqrt(r->rel_heading*r->rel_heading+r->rel_elevation*r->rel_elevation);
 62 |         }
 63 |     };
 64 | 
 65 |     /**
 66 |      * Simulator state class.
 67 |      */
 68 |     struct SimState: std::enable_shared_from_this<SimState>{
 69 |         //! Building / scan environment identifier
 70 |         std::string scanId;
 71 |         //! Number of frames since the last newEpisode() call
 72 |         unsigned int step = 0;
 73 |         //! RGB image (in BGR channel order) from the agent's current viewpoint
 74 |         cv::Mat rgb;
 75 |         //! Depth image taken from the agent's current viewpoint
 76 |         cv::Mat depth;
 77 |         //! Agent's current 3D location
 78 |         ViewpointPtr location;
 79 |         //! Agent's current camera heading in radians
 80 |         double heading = 0;
 81 |         //! Agent's current camera elevation in radians
 82 |         double elevation = 0;
 83 |         //! Agent's current view [0-35] (set only when viewing angles are discretized)
 84 |         //! [0-11] looking down, [12-23] looking at horizon, [24-35] looking up
 85 |         unsigned int viewIndex = 0;
 86 |         //! Vector of nearby navigable locations representing state-dependent action candidates, i.e.
 87 |         //! viewpoints you can move to. Index 0 is always to remain at the current viewpoint.
 88 |         //! The remaining viewpoints are sorted by their angular distance from the centre of the image.
 89 |         std::vector<ViewpointPtr> navigableLocations;
 90 |     };
 91 | 
 92 |     typedef std::shared_ptr<SimState> SimStatePtr;
 93 | 
 94 | 
 95 |     /**
 96 |      * Main class for accessing an instance of the simulator environment.
 97 |      */
 98 |     class Simulator {
 99 | 
100 |     public:
101 |         Simulator();
102 | 
103 |         ~Simulator();
104 | 
105 |         /**
106 |          * Set a non-standard path to the <a href="https://niessner.github.io/Matterport/">Matterport3D dataset</a>.
107 |          * The provided directory must contain subdirectories of the form:
108 |          * "<scanId>/matterport_skybox_images/". Default is "./data/v1/scans/".
109 |          */
110 |         void setDatasetPath(const std::string& path);
111 | 
112 |         /**
113 |          * Set a non-standard path to the viewpoint connectivity graphs. The provided directory must contain files
114 |          * of the form "/<scanId>_connectivity.json". Default is "./connectivity" (the graphs provided
115 |          * by this repo).
116 |          */
117 |         void setNavGraphPath(const std::string& path);
118 | 
119 |         /**
120 |          * Enable or disable rendering. Useful for testing. Default is true (enabled).
121 |          */
122 |         void setRenderingEnabled(bool value);
123 | 
124 |         /**
125 |          * Sets camera resolution. Default is 320 x 240.
126 |          */
127 |         void setCameraResolution(int width, int height);
128 | 
129 |         /**
130 |          * Sets camera vertical field-of-view in radians. Default is 0.8, approx 46 degrees.
131 |          */
132 |         void setCameraVFOV(double vfov);
133 | 
134 |         /**
135 |          * Set the camera elevation min and max limits in radians. Default is +-0.94 radians.
136 |          * @return true if successful.
137 |          */
138 |         bool setElevationLimits(double min, double max);
139 | 
140 |         /**
141 |          * Enable or disable discretized viewing angles. When enabled, heading and
142 |          * elevation changes will be restricted to 30 degree increments from zero,
143 |          * with left/right/up/down movement triggered by the sign of the makeAction
144 |          * heading and elevation parameters. Default is false (disabled).
145 |          */
146 |         void setDiscretizedViewingAngles(bool value);
147 | 
148 |         /**
149 |          * Enable or disable preloading of images from disk to CPU memory. Default is false (disabled).
150 |          * Enabled is better for training models, but will cause a delay when starting the simulator.
151 |          */
152 |         void setPreloadingEnabled(bool value);
153 | 
154 |         /**
155 |          * Enable or disable rendering of depth images. Default is false (disabled).
156 |          */
157 |         void setDepthEnabled(bool value);
158 | 
159 |         /**
160 |          * Set the number of environments in the batch. Default is 1.
161 |          */
162 |         void setBatchSize(unsigned int size);
163 | 
164 |         /**
165 |          * Set the cache size for storing pano images in gpu memory. Default is 200. Should be comfortably
166 |          * larger than the batch size.
167 |          */
168 |         void setCacheSize(unsigned int size);
169 | 
170 |         /**
171 |          * Set the random seed for episodes where viewpoint is not provided.
172 |          */
173 |         void setSeed(int seed);
174 | 
175 |         /**
176 |          * Initialize the simulator. Further configuration won't take any effect from now on.
177 |          */
178 |         void initialize();
179 | 
180 |         /**
181 |          * Starts a new episode. If a viewpoint is not provided initialization will be random.
182 |          * @param scanId - sets which scene is used, e.g. "2t7WUuJeko7"
183 |          * @param viewpointId - sets the initial viewpoint location, e.g. "cc34e9176bfe47ebb23c58c165203134"
184 |          * @param heading - set the agent's initial camera heading in radians. With z-axis up,
185 |          *                  heading is defined relative to the y-axis (turning right is positive).
186 |          * @param elevation - set the initial camera elevation in radians, measured from the horizon
187 |          *                    defined by the x-y plane (up is positive).
188 |          */
189 |         void newEpisode(const std::vector<std::string>& scanId, const std::vector<std::string>& viewpointId,
190 |               const std::vector<double>& heading, const std::vector<double>& elevation);
191 | 
192 |         /**
193 |          * Starts a new episode at a random viewpoint.
194 |          * @param scanId - sets which scene is used, e.g. "2t7WUuJeko7" 
195 |          */
196 |         void newRandomEpisode(const std::vector<std::string>& scanId);
197 | 
198 |         /**
199 |          * Returns the current batch of environment states including RGB images and available actions.
200 |          */
201 |         const std::vector<SimStatePtr>& getState();
202 | 
203 |         /** @brief Select an action.
204 |          *
205 |          * An RL agent will sample an action here. A task-specific reward can be determined
206 |          * based on the location, heading, elevation, etc. of the resulting state.
207 |          * @param index - an index into the set of feasible actions defined by getState()->navigableLocations.
208 |          * @param heading - desired heading change in radians. With z-axis up, heading is defined
209 |          *                  relative to the y-axis (turning right is positive).
210 |          * @param elevation - desired elevation change in radians, measured from the horizon defined
211 |          *                    by the x-y plane (up is positive).
212 |          */
213 |         void makeAction(const std::vector<unsigned int>& index, const std::vector<double>& heading, 
214 |                         const std::vector<double>& elevation);
215 | 
216 |         /**
217 |          * Closes the environment and releases underlying texture resources, OpenGL contexts, etc.
218 |          */
219 |         void close();
220 | 
221 |         /**
222 |          * Reset the rendering timers that run automatically.
223 |          */
224 |         void resetTimers();
225 | 
226 |         /**
227 |          * Return a formatted timing string.
228 |          */
229 |         std::string timingInfo(); 
230 | 
231 |     private:
232 |         const int headingCount = 12; // 12 heading values in discretized views
233 |         const double elevationIncrement = M_PI/6.0; // 30 degrees discretized up/down
234 |         void populateNavigable();
235 |         void setHeadingElevation(const std::vector<double>& heading, const std::vector<double>& elevation);
236 |         void renderScene();
237 | #ifdef OSMESA_RENDERING
238 |         void *buffer;
239 |         OSMesaContext ctx;
240 | #elif defined (EGL_RENDERING)
241 |         EGLDisplay eglDpy;
242 |         GLuint FramebufferName;
243 | #else
244 |         GLuint FramebufferName;
245 | #endif
246 |         std::vector<SimStatePtr> states;
247 |         bool initialized;
248 |         bool renderingEnabled;
249 |         bool discretizeViews;
250 |         bool preloadImages;
251 |         bool renderDepth;
252 |         int width;
253 |         int height;
254 |         int randomSeed;
255 |         unsigned int cacheSize;
256 |         unsigned int batchSize;
257 |         double vfov;
258 |         double minElevation;
259 |         double maxElevation;
260 |         glm::mat4 Projection;
261 |         glm::mat4 View;
262 |         glm::mat4 Model;
263 |         glm::mat4 Scale;
264 |         glm::mat4 RotateX;
265 |         glm::mat4 RotateZ;
266 |         GLint ProjMat;
267 |         GLint ModelViewMat;
268 |         GLint vertex;
269 |         GLint isDepth;
270 |         GLuint vao_cube;
271 |         GLuint vbo_cube_vertices;
272 |         GLuint glProgram;
273 |         GLuint glShaderV;
274 |         GLuint glShaderF;
275 |         std::string datasetPath;
276 |         std::string navGraphPath;
277 |         Timer preloadTimer; // Preloading images from disk into cpu memory
278 |         Timer loadTimer; // Loading textures from disk or cpu memory onto gpu
279 |         Timer renderTimer; // Rendering time
280 |         Timer gpuReadTimer; // Reading rendered images from gpu back to cpu memory
281 |         Timer processTimer; // Total run time for simulator
282 |         Timer wallTimer; // Wall clock timer
283 |         unsigned int frames;
284 |     };
285 | }
286 | 
287 | #endif
288 | 


--------------------------------------------------------------------------------
/include/NavGraph.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef NAVGRAPH_HPP
  2 | #define NAVGRAPH_HPP
  3 | 
  4 | #include <memory>
  5 | #include <vector>
  6 | #include <unordered_map>
  7 | #include <random>
  8 | #include <cmath>
  9 | #include <sstream>
 10 | #include <stdexcept>
 11 | 
 12 | #include <jsoncpp/json/json.h>
 13 | #include <opencv2/opencv.hpp>
 14 | 
 15 | #ifdef OSMESA_RENDERING
 16 | #define GL_GLEXT_PROTOTYPES
 17 | #include <GL/gl.h>
 18 | #include <GL/osmesa.h>
 19 | #elif defined (EGL_RENDERING)
 20 | #include <epoxy/gl.h>
 21 | #include <EGL/egl.h>
 22 | #else
 23 | #include <GL/glew.h>
 24 | #endif
 25 | 
 26 | #define GLM_FORCE_RADIANS
 27 | #include <glm/glm.hpp>
 28 | #include <glm/gtc/matrix_transform.hpp>
 29 | #include <glm/gtc/type_ptr.hpp>
 30 | 
 31 | namespace mattersim {
 32 | 
 33 |     static void assertOpenGLError(const std::string& msg) {
 34 |       GLenum error = glGetError();
 35 |       if (error != GL_NO_ERROR) {
 36 |         std::stringstream s;
 37 |         s << "OpenGL error 0x" << std::hex << error << " at " << msg;
 38 |         throw std::runtime_error(s.str());
 39 |       }
 40 |     }
 41 | #ifdef EGL_RENDERING
 42 |     static void assertEGLError(const std::string& msg) {
 43 |       EGLint error = eglGetError();
 44 | 
 45 |       if (error != EGL_SUCCESS) {
 46 |         std::stringstream s;
 47 |         s << "EGL error 0x" << std::hex << error << " at " << msg;
 48 |         throw std::runtime_error(s.str());
 49 |       }
 50 |     }
 51 | #endif
 52 | 
 53 |     /**
 54 |      * Navigation graph indicating which panoramic viewpoints are adjacent, and also 
 55 |      * containing (optionally pre-loaded) skybox / cubemap images and textures.
 56 |      * Class is a singleton to ensure images and textures are only loaded once.
 57 |      */
 58 |     class NavGraph final {
 59 | 
 60 |     private:
 61 | 
 62 |         NavGraph(const std::string& navGraphPath, const std::string& datasetPath, 
 63 |                 bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize);
 64 | 
 65 |         ~NavGraph();
 66 | 
 67 |     public:
 68 |         // Delete the default, copy and move constructors
 69 |         NavGraph() = delete;
 70 |         NavGraph(const NavGraph&) = delete;
 71 |         NavGraph& operator=(const NavGraph&) = delete;
 72 |         NavGraph(NavGraph&&) = delete;
 73 |         NavGraph& operator=(NavGraph&&) = delete;
 74 | 
 75 |         /**
 76 |          * First call will load the navigation graph from disk and (optionally) preload the 
 77 |          * cubemap images into memory.
 78 |          * @param navGraphPath - directory containing json viewpoint connectivity graphs
 79 |          * @param datasetPath - directory containing a data directory for each Matterport scan id
 80 |          * @param preloadImages - if true, all cubemap images will be loaded into CPU memory immediately
 81 |          * @param renderDepth - if true, depth map images are also required
 82 |          * @param randomSeed - only used for randomViewpoint function
 83 |          * @param cacheSize - number of pano textures to keep in GPU memory
 84 |          */
 85 |         static NavGraph& getInstance(const std::string& navGraphPath, const std::string& datasetPath, 
 86 |                 bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize);
 87 |   
 88 |         /**
 89 |          * Select a random viewpoint from a scan
 90 |          */
 91 |         const std::string& randomViewpoint(const std::string& scanId);
 92 |                       
 93 |         /**
 94 |          * Find the index of a selected viewpointId
 95 |          */
 96 |         unsigned int index(const std::string& scanId, const std::string& viewpointId) const;
 97 | 
 98 |         /**
 99 |          * ViewpointId of a selected viewpoint index
100 |          */
101 |         const std::string& viewpoint(const std::string& scanId, unsigned int ix) const;
102 | 
103 |         /**
104 |          * Camera rotation matrix for a selected viewpoint index
105 |          */
106 |         const glm::mat4& cameraRotation(const std::string& scanId, unsigned int ix) const;
107 | 
108 |         /**
109 |          * Camera position vector for a selected viewpoint index
110 |          */
111 |         const glm::vec3& cameraPosition(const std::string& scanId, unsigned int ix) const;
112 | 
113 |         /**
114 |          * Return a list of other viewpoint indices that are reachable from a selected viewpoint index
115 |          */
116 |         std::vector<unsigned int> adjacentViewpointIndices(const std::string& scanId, unsigned int ix) const;
117 | 
118 |         /**
119 |          * Get cubemap RGB (and optionally, depth) textures for a selected viewpoint index
120 |          */
121 |         std::pair<GLuint, GLuint> cubemapTextures(const std::string& scanId, unsigned int ix);
122 | 
123 |         /**
124 |          * Free GPU memory associated with this viewpoint's textures
125 |          */
126 |         void deleteCubemapTextures(const std::string& scanId, unsigned int ix);
127 | 
128 | 
129 |     protected:
130 | 
131 |         /**
132 |          * Helper class representing nodes in the navigation graph and their cubemap textures.
133 |          */
134 |         class Location {
135 | 
136 |         public:
137 |             /**
138 |              * Construct a location object from a json struct
139 |              * @param viewpoint - json struct
140 |              * @param skyboxDir - directory containing a data directory for each Matterport scan id
141 |              * @param preload - if true, all cubemap images will be loaded into CPU memory immediately
142 |              * @param depth - if true, depth textures will also be provided
143 |              */
144 |             Location(const Json::Value& viewpoint, const std::string& skyboxDir, bool preload, bool depth);
145 | 
146 |             Location() = delete; // no default constructor
147 | 
148 |             /**
149 |              * Return the cubemap RGB (and optionally, depth) textures for this viewpoint, which will 
150 |              * be loaded from CPU memory or disk if necessary
151 |              */
152 |             std::pair<GLuint, GLuint> cubemapTextures();
153 | 
154 |             /**
155 |              * Free GPU memory associated with RGB and depth textures at this location
156 |              */
157 |             void deleteCubemapTextures();
158 | 
159 |             std::string viewpointId;        //! Unique Matterport identifier for every pano
160 |             bool included;                  //! Some duplicated viewpoints have been excluded
161 |             glm::mat4 rot;                  //! Camera pose rotation component
162 |             glm::vec3 pos;                  //! Camera pose translation component
163 |             std::vector<bool> unobstructed; //! Connections to other graph locations
164 | 
165 |         protected:
166 | 
167 |             /**
168 |              * Load RGB (and optionally, depth) cubemap images from disk into CPU memory
169 |              */
170 |             void loadCubemapImages();
171 | 
172 |             /**
173 |              * Create RGB (and optionally, depth) textures from cubemap images (e.g., in GPU memory)
174 |              */
175 |             void loadCubemapTextures();
176 | 
177 |             GLuint cubemap_texture;
178 |             GLuint depth_texture;
179 |             cv::Mat xpos;                   //! RGB images for faces of the cubemap
180 |             cv::Mat xneg;
181 |             cv::Mat ypos;
182 |             cv::Mat yneg;
183 |             cv::Mat zpos;
184 |             cv::Mat zneg;
185 |             cv::Mat xposD;                   //! Depth images for faces of the cubemap
186 |             cv::Mat xnegD;
187 |             cv::Mat yposD;
188 |             cv::Mat ynegD;
189 |             cv::Mat zposD;
190 |             cv::Mat znegD;
191 |             bool im_loaded;
192 |             bool includeDepth;
193 |             std::string skyboxDir;          //! Path to skybox images
194 |         };
195 |         typedef std::shared_ptr<Location> LocationPtr;
196 | 
197 | 
198 |         /**
199 |          * Helper class implementing a LRU cache for cubemap textures.
200 |          */
201 |         class TextureCache {
202 | 
203 |         public:
204 |             TextureCache(unsigned int size) : size(size) {
205 |                 cacheMap.reserve(size+1);
206 |             }
207 | 
208 |             TextureCache() = delete; // no default constructor
209 | 
210 |             void add(LocationPtr loc) {
211 |                 auto map_it = cacheMap.find(loc);
212 |                 if (map_it != cacheMap.end()) {
213 |                     // Remove entry from middle of list
214 |                     cacheList.erase(map_it->second);
215 |                     cacheMap.erase(map_it);
216 |                 }
217 |                 // Add element to list and save iterator on map
218 |                 auto list_it = cacheList.insert(cacheList.begin(), loc);
219 |                 cacheMap.emplace(loc, list_it);
220 |                 if (cacheMap.size() >= size) {
221 |                     removeEldest();
222 |                 }
223 |             }
224 | 
225 |             void removeEldest() {
226 |                 if (cacheMap.empty()) {
227 |                     throw std::runtime_error("MatterSim: TextureCache is empty");
228 |                 }
229 |                 LocationPtr loc = cacheList.back();
230 |                 loc->deleteCubemapTextures();
231 |                 cacheMap.erase(loc);
232 |                 cacheList.pop_back();
233 |             }
234 | 
235 |         private:
236 |             unsigned int size;
237 |             std::unordered_map<LocationPtr, std::list<LocationPtr>::iterator > cacheMap;
238 |             std::list<LocationPtr> cacheList;
239 |         };
240 | 
241 |         
242 |         std::map<std::string, std::vector<LocationPtr> > scanLocations;
243 |         std::default_random_engine generator;
244 |         TextureCache cache;
245 |     };
246 | 
247 | }
248 | 
249 | #endif
250 | 


--------------------------------------------------------------------------------
/include/cbf.h:
--------------------------------------------------------------------------------
 1 | // NYU Depth V2 Dataset Matlab Toolbox
 2 | // Authors: Nathan Silberman, Pushmeet Kohli, Derek Hoiem, Rob Fergus
 3 | 
 4 | #ifndef CBF_H_
 5 | #define CBF_H_
 6 | 
 7 | #include <stdint.h>
 8 | 
 9 | namespace cbf {
10 | 
11 | // Filters the given depth image using a Cross Bilateral Filter.
12 | //
13 | // Args:
14 | //   height - height of the images.
15 | //   width - width of the images.
16 | //   depth - HxW row-major ordered matrix.
17 | //   intensity - HxW row-major ordered matrix.
18 | //   mask - HxW row-major ordered matrix.
19 | //   result - HxW row-major ordered matrix.
20 | //   num_scales - the number of scales at which to perform the filtering.
21 | //   sigma_s - the space sigma (in pixels)
22 | //   sigma_r - the range sigma (in intensity values, 0-1)
23 | void cbf(int height, int width, uint8_t* depth, uint8_t* intensity,
24 |          uint8_t* mask, uint8_t* result, unsigned num_scales, double* sigma_s,
25 |          double* sigma_r);
26 | 
27 | }	 // namespace
28 | 
29 | #endif  // CBF_H_
30 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torchvision==0.4.0
 2 | requests==2.22.0
 3 | h5py==2.9.0
 4 | six==1.12.0
 5 | nltk==3.4.4
 6 | tqdm==4.32.1
 7 | torch==1.2.0
 8 | setuptools==41.0.1
 9 | pycocotools==2.0.0
10 | numpy==1.16.4
11 | revtok==0.0.3
12 | spacy==2.1.8
13 | networkx==2.3
14 | 


--------------------------------------------------------------------------------
/scripts/depth_to_skybox.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | ''' Script for generating depth skyboxes based on undistorted depth images, 
  4 |     in order to support depth output in the simulator. The current version 
  5 |     assumes that undistorted depth images are aligned to matterport skyboxes, 
  6 |     and uses simple blending. Images are downsized 50%. '''
  7 | 
  8 | import os
  9 | import math
 10 | import cv2
 11 | import numpy as np
 12 | from multiprocessing import Pool
 13 | from numpy.linalg import inv,norm
 14 | from StringIO import StringIO
 15 | 
 16 | 
 17 | # Parameters
 18 | DOWNSIZED_WIDTH = 512
 19 | DOWNSIZED_HEIGHT = 512
 20 | NUM_WORKER_PROCESSES = 20
 21 | FILL_HOLES = True
 22 | VISUALIZE_OUTPUT = False
 23 | 
 24 | if FILL_HOLES:
 25 |   import sys
 26 |   sys.path.append('build')
 27 |   from MatterSim import cbf
 28 | 
 29 | # Constants
 30 | # Note: Matterport camera is really y=up, x=right, -z=look.
 31 | SKYBOX_WIDTH = 1024
 32 | SKYBOX_HEIGHT = 1024
 33 | base_dir = 'data/v1/scans'
 34 | skybox_template = '%s/%s/matterport_skybox_images/%s_skybox%d_sami.jpg'
 35 | color_template = '%s/%s/undistorted_color_images/%s_i%s.jpg'
 36 | depth_template = '%s/%s/undistorted_depth_images/%s_d%s.png'
 37 | camera_template = '%s/%s/undistorted_camera_parameters/%s.conf'
 38 | skybox_depth_template = '%s/%s/matterport_skybox_images/%s_skybox_depth_small.png'
 39 | 
 40 | 
 41 | # camera transform for skybox images 0-5 relative to image 1
 42 | skybox_transforms = [
 43 |   np.array([[1,0,0],[0,0,-1],[0,1,0]], dtype=np.double), #up (down)
 44 |   np.eye(3, dtype=np.double),
 45 |   np.array([[0,0,-1],[0,1,0],[1,0,0]], dtype=np.double), # right
 46 |   np.array([[-1,0,0],[0,1,0],[0,0,-1]], dtype=np.double), # 180
 47 |   np.array([[0,0,1],[0,1,0],[-1,0,0]], dtype=np.double), # left
 48 |   np.array([[1,0,0],[0,0,1],[0,-1,0]], dtype=np.double) # down (up)
 49 | ]
 50 | 
 51 | 
 52 | def camera_parameters(scan):
 53 |   ''' Returns two dicts containing undistorted camera intrinsics (3x3) and extrinsics (4x4),
 54 |       respectively, for a given scan. Viewpoint IDs are used as dict keys. '''
 55 |   intrinsics = {}
 56 |   extrinsics = {}
 57 |   with open(camera_template % (base_dir,scan,scan)) as f:
 58 |     pos = -1
 59 |     for line in f.readlines():
 60 |       if 'intrinsics_matrix' in line:
 61 |         intr = line.split()
 62 |         C = np.zeros((3, 3), np.double)
 63 |         C[0,0] = intr[1] # fx
 64 |         C[1,1] = intr[5] # fy
 65 |         C[0,2] = intr[3] # cx
 66 |         C[1,2] = intr[6] # cy
 67 |         C[2,2] = 1.0
 68 |         pos = 0
 69 |       elif pos >= 0 and pos < 6:
 70 |         q = line.find('.jpg')
 71 |         camera = line[q-37:q]
 72 |         if pos == 0:
 73 |           intrinsics[camera[:-2]] = C
 74 |         T = np.loadtxt(StringIO(line.split('jpg ')[1])).reshape((4,4))
 75 |         # T is camera-to-world transform, invert for world-to-camera
 76 |         extrinsics[camera] = (T,inv(T))
 77 |         pos += 1
 78 |   return intrinsics,extrinsics
 79 | 
 80 | 
 81 | def z_to_euclid(K_inv, depth):
 82 |   ''' Takes inverse intrinsics matrix and a depth image. Returns a new depth image with
 83 |       depth converted from z-distance into euclidean distance from the camera centre. '''
 84 | 
 85 |   assert len(depth.shape) == 2
 86 |   h = depth.shape[0]
 87 |   w = depth.shape[1]
 88 | 
 89 |   y,x = np.indices((h,w))
 90 |   homo_pixels = np.vstack((x.flatten(),y.flatten(),np.ones((x.size))))
 91 |   rays = K_inv.dot(homo_pixels)
 92 |   cos_theta = np.array([0,0,1]).dot(rays) / norm(rays,axis=0)
 93 | 
 94 |   output = depth / cos_theta.reshape(h,w)
 95 |   return output
 96 | 
 97 | 
 98 | def instrinsic_matrix(width, height):
 99 |   ''' Construct an ideal camera intrinsic matrix. '''
100 |   K = np.zeros((3, 3), np.double)
101 |   K[0,0] = width/2 #fx
102 |   K[1,1] = height/2 #fy
103 |   K[0,2] = width/2 #cx
104 |   K[1,2] = height/2 #cy
105 |   K[2,2] = 1.0
106 |   return K
107 | 
108 | 
109 | 
110 | def fill_joint_bilateral_filter(rgb, depth):
111 |   ''' Fill holes in a 16bit depth image given corresponding rgb image '''
112 | 
113 |   intensity = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
114 | 
115 |   # Convert the depth image to uint8.
116 |   maxDepth = np.max(depth)+1
117 |   depth = (depth.astype(np.float64)/maxDepth)
118 |   depth[depth > 1] = 1
119 |   depth = (depth*255).astype(np.uint8)
120 |   
121 |   # Convert to col major order
122 |   depth = np.asfortranarray(depth)
123 |   intensity = np.asfortranarray(intensity)
124 |   mask = (depth == 0)
125 |   result = np.zeros_like(depth)
126 | 
127 |   # Fill holes
128 |   cbf(depth, intensity, mask, result)
129 |   result = (result.astype(np.float64)/255*maxDepth).astype(np.uint16)
130 |   return result
131 | 
132 | 
133 | def depth_to_skybox(scan, visualize=VISUALIZE_OUTPUT, fill_holes=FILL_HOLES):
134 | 
135 |   # Load camera parameters
136 |   intrinsics,extrinsics = camera_parameters(scan)
137 |   # Skybox camera intrinsics
138 |   K_skybox = instrinsic_matrix(SKYBOX_WIDTH, SKYBOX_HEIGHT)
139 | 
140 |   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
141 |   print('Processing scan %s with %d panoramas' % (scan, len(pano_ids)))
142 | 
143 |   if visualize:
144 |     cv2.namedWindow('RGB')
145 |     cv2.namedWindow('Depth')
146 |     cv2.namedWindow('Skybox')
147 | 
148 |   for pano in pano_ids:
149 | 
150 |     # Load undistorted depth and rgb images
151 |     depth = {}
152 |     rgb = {}
153 |     for c in range(3):
154 |       K_inv = inv(intrinsics['%s_i%d' % (pano,c)])
155 |       for i in range(6):
156 |         name = '%d_%d' % (c,i)
157 |         if visualize:
158 |           rgb[name] = cv2.imread(color_template % (base_dir,scan,pano,name))
159 |         # Load 16bit grayscale image
160 |         d_im = cv2.imread(depth_template % (base_dir,scan,pano,name), cv2.IMREAD_ANYDEPTH)
161 |         depth[name] = z_to_euclid(K_inv, d_im)
162 | 
163 |     ims = []
164 |     for skybox_ix in range(6):
165 | 
166 |       # Load skybox image
167 |       skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix))
168 | 
169 |       # Skybox index 1 is the same orientation as camera image 1_5
170 |       skybox_ctw,_ = extrinsics[pano + '_i1_5']
171 |       skybox_ctw = skybox_ctw[:3,:3].dot(skybox_transforms[skybox_ix])
172 |       skybox_wtc = inv(skybox_ctw)
173 | 
174 |       base_depth = np.zeros((SKYBOX_HEIGHT,SKYBOX_WIDTH), np.uint16)
175 |       if visualize:
176 |         base_rgb = np.zeros((SKYBOX_HEIGHT,SKYBOX_WIDTH,3), np.uint8)
177 | 
178 |       for camera in range(3):
179 |         for angle in range(6):
180 | 
181 |           # Camera parameters
182 |           im_name = '%d_%d' % (camera,angle)
183 |           K_im = intrinsics[pano + '_i' + im_name[0]]
184 |           T_ctw,T_wtc = extrinsics[pano + '_i' + im_name]
185 |           R_ctw = T_ctw[:3,:3]
186 | 
187 |           # Check if this image can be skipped (facing away)
188 |           z = np.array([0,0,1])
189 |           if R_ctw.dot(z).dot(skybox_ctw.dot(z)) < 0:
190 |             continue
191 | 
192 |           # Compute homography
193 |           H = K_skybox.dot(skybox_wtc.dot(R_ctw.dot(inv(K_im))))
194 | 
195 |           # Warp and blend the depth image
196 |           flip = cv2.flip(depth[im_name], 1) # flip around y-axis
197 |           warp = cv2.warpPerspective(flip, H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_NEAREST)
198 |           mask = cv2.warpPerspective(np.ones_like(flip), H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR)
199 |           mask[warp == 0] = 0 # Set mask to zero where we don't have any depth values
200 |           mask = cv2.erode(mask,np.ones((3,3),np.uint8),iterations = 1)
201 |           locs = np.where(mask == 1)
202 |           base_depth[locs[0], locs[1]] = warp[locs[0], locs[1]]
203 | 
204 |           if visualize:
205 |             # Warp and blend the rgb image
206 |             flip = cv2.flip(rgb[im_name], 1) # flip around y-axis
207 |             warp = cv2.warpPerspective(flip, H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR)
208 |             mask = cv2.warpPerspective(np.ones_like(flip), H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR)
209 |             mask = cv2.erode(mask,np.ones((3,3),np.uint8),iterations = 1)
210 |             locs = np.where(mask == 1)
211 |             base_rgb[locs[0], locs[1]] = warp[locs[0], locs[1]]
212 | 
213 |       depth_small = cv2.resize(cv2.flip(base_depth, 1),(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_NEAREST) # flip around y-axis, downsize
214 |       if fill_holes:
215 |         depth_filled = fill_joint_bilateral_filter(skybox, depth_small) # Fill holes
216 |         ims.append(depth_filled)
217 |       else:
218 |         ims.append(depth_small)
219 | 
220 |       if visualize and False:
221 |         cv2.imshow('Skybox', skybox)
222 |         cv2.imshow('Depth', cv2.applyColorMap((depth_small/256).astype(np.uint8), cv2.COLORMAP_JET))
223 |         rgb_output = cv2.flip(base_rgb, 1) # flip around y-axis
224 |         cv2.imshow('RGB', rgb_output)
225 |         cv2.waitKey(0)
226 | 
227 |     newimg = np.concatenate(ims, axis=1)
228 | 
229 |     if visualize:
230 |       maxDepth = np.max(newimg)+1
231 |       newimg = (newimg.astype(np.float64)/maxDepth)
232 |       newimg = (newimg*255).astype(np.uint8)
233 |       cv2.imshow('Depth pano', cv2.applyColorMap(newimg, cv2.COLORMAP_JET))
234 |       cv2.waitKey(0)
235 |     else:
236 |       # Save output
237 |       outfile = skybox_depth_template % (base_dir,scan,pano)
238 |       assert cv2.imwrite(outfile, newimg), ('Could not write to %s' % outfile)
239 | 
240 |   if visualize:
241 |     cv2.destroyAllWindows()
242 |   print ('Completed scan %s' % (scan))
243 | 
244 | 
245 | 
246 | if __name__ == '__main__':
247 | 
248 |   with open('connectivity/scans.txt') as f:
249 |     scans = [scan.strip() for scan in f.readlines()]
250 |     p = Pool(NUM_WORKER_PROCESSES)
251 |     p.map(depth_to_skybox, scans)
252 | 
253 | 
254 | 
255 | 


--------------------------------------------------------------------------------
/scripts/downsize_skybox.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | ''' Script for downsizing skybox images. '''
 4 | 
 5 | import os
 6 | import math
 7 | import cv2
 8 | import numpy as np
 9 | from multiprocessing import Pool
10 | from depth_to_skybox import camera_parameters
11 | 
12 | 
13 | NUM_WORKER_PROCESSES = 20
14 | DOWNSIZED_WIDTH = 512
15 | DOWNSIZED_HEIGHT = 512
16 | 
17 | # Constants
18 | SKYBOX_WIDTH = 1024
19 | SKYBOX_HEIGHT = 1024
20 | base_dir = 'data/v1/scans'
21 | skybox_template = '%s/%s/matterport_skybox_images/%s_skybox%d_sami.jpg'
22 | skybox_small_template = '%s/%s/matterport_skybox_images/%s_skybox%d_small.jpg'
23 | skybox_merge_template = '%s/%s/matterport_skybox_images/%s_skybox_small.jpg'
24 | 
25 | 
26 | 
27 | def downsizeWithMerge(scan):
28 |   # Load pano ids
29 |   intrinsics,_ = camera_parameters(scan)
30 |   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
31 |   print('Processing scan %s with %d panoramas' % (scan, len(pano_ids)))
32 | 
33 |   for pano in pano_ids:
34 | 
35 |     ims = []
36 |     for skybox_ix in range(6):
37 | 
38 |       # Load and downsize skybox image
39 |       skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix))
40 |       ims.append(cv2.resize(skybox,(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_AREA))
41 | 
42 |     # Save output
43 |     newimg = np.concatenate(ims, axis=1)
44 |     assert cv2.imwrite(skybox_merge_template % (base_dir,scan,pano), newimg)
45 | 
46 | 
47 | def downsize(scan):
48 | 
49 |   # Load pano ids
50 |   intrinsics,_ = camera_parameters(scan)
51 |   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
52 |   print('Processing scan %s with %d panoramas' % (scan, len(pano_ids)))
53 | 
54 |   for pano in pano_ids:
55 | 
56 |     for skybox_ix in range(6):
57 | 
58 |       # Load and downsize skybox image
59 |       skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix))
60 |       newimg = cv2.resize(skybox,(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_AREA)
61 | 
62 |       # Save output
63 |       assert cv2.imwrite(skybox_small_template % (base_dir,scan,pano,skybox_ix), newimg)
64 | 
65 | 
66 | if __name__ == '__main__':
67 | 
68 |   with open('connectivity/scans.txt') as f:
69 |     scans = [scan.strip() for scan in f.readlines()]
70 |     p = Pool(NUM_WORKER_PROCESSES)
71 |     p.map(downsizeWithMerge, scans)  
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/scripts/fill_depth.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | ''' Script for filling missing values in undistorted depth images. '''
 4 | 
 5 | import os
 6 | import math
 7 | import cv2
 8 | import numpy as np
 9 | from multiprocessing import Pool
10 | from depth_to_skybox import camera_parameters
11 | 
12 | import sys
13 | sys.path.append('build')
14 | from MatterSim import cbf
15 | 
16 | 
17 | base_dir = 'data/v1/scans'
18 | color_template = '%s/%s/undistorted_color_images/%s_i%s.jpg'
19 | depth_template = '%s/%s/undistorted_depth_images/%s_d%s.png'
20 | filled_depth_template = '%s/%s/undistorted_depth_images/%s_d%s_filled.png'
21 | 
22 | def fill_joint_bilateral_filter(scan):
23 | 
24 |   # Load camera parameters
25 |   intrinsics,_ = camera_parameters(scan)
26 |   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
27 |   print('Processing scan %s with %d panoramas' % (scan, len(pano_ids)))
28 | 
29 |   for pano in pano_ids:
30 | 
31 |     # Load undistorted depth and rgb images
32 |     for c in range(3):
33 |       for i in range(6):
34 |         name = '%d_%d' % (c,i)
35 |         rgb = cv2.imread(color_template % (base_dir,scan,pano,name))
36 |         intensity = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
37 | 
38 |         # Load 16bit depth image
39 |         depth = cv2.imread(depth_template % (base_dir,scan,pano,name), cv2.IMREAD_ANYDEPTH)
40 | 
41 |         # Convert the depth image to uint8.
42 |         maxDepth = np.max(depth)+1
43 |         depth = (depth.astype(np.float64)/maxDepth)
44 |         depth[depth > 1] = 1
45 |         depth = (depth*255).astype(np.uint8)
46 | 
47 |         #cv2.imshow('input', cv2.applyColorMap(depth, cv2.COLORMAP_JET))
48 |         
49 |         # Convert to col major order
50 |         depth = np.asfortranarray(depth)
51 |         intensity = np.asfortranarray(intensity)
52 |         mask = (depth == 0)
53 |         result = np.zeros_like(depth)
54 | 
55 |         # Fill holes
56 |         cbf(depth, intensity, mask, result)
57 |   
58 |         #cv2.imshow('result', cv2.applyColorMap(result, cv2.COLORMAP_JET))
59 |         #cv2.waitKey(0)
60 | 
61 |         result = (result.astype(np.float64)/255*maxDepth).astype(np.uint16)
62 |         assert cv2.imwrite(filled_depth_template % (base_dir,scan,pano,name), result)
63 | 
64 | 
65 | if __name__ == '__main__':
66 | 
67 |   with open('connectivity/scans.txt') as f:
68 |     scans = [scan.strip() for scan in f.readlines()]
69 |     p = Pool(10)
70 |     p.map(fill_joint_bilateral_filter, scans)
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/scripts/precompute_img_features.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | ''' Script to precompute image features using a Caffe ResNet CNN, using 36 discretized views
  4 |     at each viewpoint in 30 degree increments, and the provided camera WIDTH, HEIGHT 
  5 |     and VFOV parameters. '''
  6 | 
  7 | import numpy as np
  8 | import cv2
  9 | import json
 10 | import math
 11 | import base64
 12 | import csv
 13 | import sys
 14 | 
 15 | csv.field_size_limit(sys.maxsize)
 16 | 
 17 | 
 18 | # Caffe and MatterSim need to be on the Python path
 19 | sys.path.insert(0, 'build')
 20 | import MatterSim
 21 | 
 22 | #caffe_root = '../'  # your caffe build
 23 | #sys.path.insert(0, caffe_root + 'python')
 24 | import caffe
 25 | 
 26 | from timer import Timer
 27 | 
 28 | 
 29 | TSV_FIELDNAMES = ['scanId', 'viewpointId', 'image_w','image_h', 'vfov', 'features']
 30 | VIEWPOINT_SIZE = 36 # Number of discretized views from one viewpoint
 31 | FEATURE_SIZE = 2048
 32 | BATCH_SIZE = 4  # Some fraction of viewpoint size - batch size 4 equals 11GB memory
 33 | GPU_ID = 0
 34 | PROTO = 'models/ResNet-152-deploy.prototxt'
 35 | MODEL = 'models/ResNet-152-model.caffemodel'  # You need to download this, see README.md
 36 | #MODEL = 'models/resnet152_places365.caffemodel'
 37 | OUTFILE = 'img_features/ResNet-152-imagenet.tsv'
 38 | GRAPHS = 'connectivity/'
 39 | 
 40 | # Simulator image parameters
 41 | WIDTH=640
 42 | HEIGHT=480
 43 | VFOV=60
 44 | 
 45 | 
 46 | def load_viewpointids():
 47 |     viewpointIds = []
 48 |     with open(GRAPHS+'scans.txt') as f:
 49 |         scans = [scan.strip() for scan in f.readlines()]
 50 |         for scan in scans:
 51 |             with open(GRAPHS+scan+'_connectivity.json')  as j:
 52 |                 data = json.load(j)
 53 |                 for item in data:
 54 |                     if item['included']:
 55 |                         viewpointIds.append((scan, item['image_id']))
 56 |     print('Loaded %d viewpoints' % len(viewpointIds))
 57 |     return viewpointIds
 58 | 
 59 | 
 60 | def transform_img(im):
 61 |     ''' Prep opencv 3 channel image for the network '''
 62 |     im_orig = im.astype(np.float32, copy=True)
 63 |     im_orig -= np.array([[[103.1, 115.9, 123.2]]]) # BGR pixel mean
 64 |     blob = np.zeros((1, im.shape[0], im.shape[1], 3), dtype=np.float32)
 65 |     blob[0, :, :, :] = im_orig
 66 |     blob = blob.transpose((0, 3, 1, 2))
 67 |     return blob
 68 | 
 69 | 
 70 | def build_tsv():
 71 |     # Set up the simulator
 72 |     sim = MatterSim.Simulator()
 73 |     sim.setCameraResolution(WIDTH, HEIGHT)
 74 |     sim.setCameraVFOV(math.radians(VFOV))
 75 |     sim.setDiscretizedViewingAngles(True)
 76 |     sim.init()
 77 | 
 78 |     # Set up Caffe resnet
 79 |     caffe.set_device(GPU_ID)
 80 |     caffe.set_mode_gpu()
 81 |     net = caffe.Net(PROTO, MODEL, caffe.TEST)
 82 |     net.blobs['data'].reshape(BATCH_SIZE, 3, HEIGHT, WIDTH)
 83 | 
 84 |     count = 0
 85 |     t_render = Timer()
 86 |     t_net = Timer()
 87 |     with open(OUTFILE, 'wb') as tsvfile:
 88 |         writer = csv.DictWriter(tsvfile, delimiter = '\t', fieldnames = TSV_FIELDNAMES)          
 89 | 
 90 |         # Loop all the viewpoints in the simulator
 91 |         viewpointIds = load_viewpointids()
 92 |         for scanId,viewpointId in viewpointIds:
 93 |             t_render.tic()
 94 |             # Loop all discretized views from this location
 95 |             blobs = []
 96 |             features = np.empty([VIEWPOINT_SIZE, FEATURE_SIZE], dtype=np.float32)
 97 |             for ix in range(VIEWPOINT_SIZE):
 98 |                 if ix == 0:
 99 |                     sim.newEpisode(scanId, viewpointId, 0, math.radians(-30))
100 |                 elif ix % 12 == 0:
101 |                     sim.makeAction(0, 1.0, 1.0)
102 |                 else:
103 |                     sim.makeAction(0, 1.0, 0)
104 | 
105 |                 state = sim.getState()
106 |                 assert state.viewIndex == ix
107 |                 
108 |                 # Transform and save generated image
109 |                 blobs.append(transform_img(state.rgb))
110 | 
111 |             t_render.toc()
112 |             t_net.tic()
113 |             # Run as many forward passes as necessary
114 |             assert VIEWPOINT_SIZE % BATCH_SIZE == 0
115 |             forward_passes = VIEWPOINT_SIZE / BATCH_SIZE            
116 |             ix = 0
117 |             for f in range(forward_passes):
118 |                 for n in range(BATCH_SIZE):
119 |                     # Copy image blob to the net
120 |                     net.blobs['data'].data[n, :, :, :] = blobs[ix]
121 |                     ix += 1
122 |                 # Forward pass
123 |                 output = net.forward()
124 |                 features[f*BATCH_SIZE:(f+1)*BATCH_SIZE, :] = net.blobs['pool5'].data[:,:,0,0]
125 | 
126 |             writer.writerow({
127 |                 'scanId': scanId,
128 |                 'viewpointId': viewpointId,
129 |                 'image_w': WIDTH,
130 |                 'image_h': HEIGHT,
131 |                 'vfov' : VFOV,
132 |                 'features': base64.b64encode(features)
133 |             })
134 |             count += 1
135 |             t_net.toc()
136 |             if count % 100 == 0:
137 |                 print('Processed %d / %d viewpoints, %.1fs avg render time, %.1fs avg net time, projected %.1f hours' %\
138 |                   (count,len(viewpointIds), t_render.average_time, t_net.average_time, 
139 |                   (t_render.average_time+t_net.average_time)*len(viewpointIds)/3600))
140 | 
141 | 
142 | def read_tsv(infile):
143 |     # Verify we can read a tsv
144 |     in_data = []
145 |     with open(infile, "r+b") as tsv_in_file:
146 |         reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames = TSV_FIELDNAMES)
147 |         for item in reader:
148 |             item['image_h'] = int(item['image_h'])
149 |             item['image_w'] = int(item['image_w'])   
150 |             item['vfov'] = int(item['vfov'])   
151 |             item['features'] = np.frombuffer(base64.decodestring(item['features']), 
152 |                     dtype=np.float32).reshape((VIEWPOINT_SIZE, FEATURE_SIZE))
153 |             in_data.append(item)
154 |     return in_data
155 | 
156 | 
157 | if __name__ == "__main__":
158 | 
159 |     build_tsv()
160 |     data = read_tsv(OUTFILE)
161 |     print('Completed %d viewpoints' % len(data))
162 | 
163 | 


--------------------------------------------------------------------------------
/scripts/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/src/driver/driver.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('build')
 3 | import MatterSim
 4 | import time
 5 | import math
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | WIDTH = 800
10 | HEIGHT = 600
11 | VFOV = math.radians(60)
12 | HFOV = VFOV*WIDTH/HEIGHT
13 | TEXT_COLOR = [230, 40, 40]
14 | 
15 | cv2.namedWindow('Python RGB')
16 | cv2.namedWindow('Python Depth')
17 | 
18 | sim = MatterSim.Simulator()
19 | sim.setCameraResolution(WIDTH, HEIGHT)
20 | sim.setCameraVFOV(VFOV)
21 | sim.setDepthEnabled(True)
22 | sim.initialize()
23 | #sim.newEpisode(['2t7WUuJeko7'], ['1e6b606b44df4a6086c0f97e826d4d15'], [0], [0])
24 | #sim.newEpisode(['1LXtFkjw3qL'], ['0b22fa63d0f54a529c525afbf2e8bb25'], [0], [0])
25 | sim.newRandomEpisode(['1LXtFkjw3qL'])
26 | 
27 | heading = 0
28 | elevation = 0
29 | location = 0
30 | ANGLEDELTA = 5 * math.pi / 180
31 | 
32 | print('\nPython Demo')
33 | print('Use arrow keys to move the camera.')
34 | print('Use number keys (not numpad) to move to nearby viewpoints indicated in the RGB view.\n')
35 | 
36 | while True:
37 |     sim.makeAction([location], [heading], [elevation])
38 |     location = 0
39 |     heading = 0
40 |     elevation = 0
41 | 
42 |     state = sim.getState()[0]
43 |     locations = state.navigableLocations
44 |     rgb = np.array(state.rgb, copy=False)
45 |     for idx, loc in enumerate(locations[1:]):
46 |         # Draw actions on the screen
47 |         fontScale = 3.0/loc.rel_distance
48 |         x = int(WIDTH/2 + loc.rel_heading/HFOV*WIDTH)
49 |         y = int(HEIGHT/2 - loc.rel_elevation/VFOV*HEIGHT)
50 |         cv2.putText(rgb, str(idx + 1), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 
51 |             fontScale, TEXT_COLOR, thickness=3)
52 |     cv2.imshow('Python RGB', rgb)
53 | 
54 |     depth = np.array(state.depth, copy=False)
55 |     cv2.imshow('Python Depth', depth)
56 |     k = cv2.waitKey(1)
57 |     if k == -1:
58 |         continue
59 |     else:
60 |         k = (k & 255)
61 |     if k == ord('q'):
62 |         break
63 |     elif ord('1') <= k <= ord('9'):
64 |         location = k - ord('0')
65 |         if location >= len(locations):
66 |             location = 0
67 |     elif k == 81 or k == ord('a'):
68 |         heading = -ANGLEDELTA
69 |     elif k == 82 or k == ord('w'):
70 |         elevation = ANGLEDELTA
71 |     elif k == 83 or k == ord('d'):
72 |         heading = ANGLEDELTA
73 |     elif k == 84 or k == ord('s'):
74 |         elevation = -ANGLEDELTA
75 | 


--------------------------------------------------------------------------------
/src/driver/mattersim_main.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <opencv2/opencv.hpp>
 3 | 
 4 | #include "MatterSim.hpp"
 5 | 
 6 | using namespace mattersim;
 7 | 
 8 | #define WIDTH  1280
 9 | #define HEIGHT 720
10 | 
11 | #ifndef M_PI
12 | #define M_PI (3.14159265358979323846)
13 | #endif
14 | 
15 | int main(int argc, char *argv[]) {
16 | 
17 |     cv::namedWindow("C++ RGB");
18 |     cv::namedWindow("C++ Depth");
19 | 
20 |     Simulator sim;
21 | 
22 |     // Sets resolution. Default is 320X240
23 |     sim.setCameraResolution(640,480);
24 |     sim.setDepthEnabled(true);
25 | 
26 |     // Initialize the simulator. Further camera configuration won't take any effect from now on.
27 |     sim.initialize();
28 | 
29 |     std::cout << "\nC++ Demo" << std::endl;
30 |     std::cout << "Showing some random viewpoints in one building." << std::endl;
31 | 
32 |     int i = 0;
33 |     while(true) {
34 |         i++;
35 |         std::cout << "Episode #" << i << "\n";
36 | 
37 |         // Starts a new episode. It is not needed right after init() but it doesn't cost much and the loop is nicer.
38 |         sim.newRandomEpisode(std::vector<std::string>(1,"pa4otMbVnkk")); // Launches at a random location
39 | 
40 |         for (int k=0; k<500; k++) {
41 | 
42 |             // Get the state
43 |             SimStatePtr state = sim.getState().at(0); // SimStatePtr is std::shared_ptr<SimState>
44 | 
45 |             // Which consists of:
46 |             unsigned int n = state->step;
47 |             cv::Mat rgb  = state->rgb; // OpenCV CV_8UC3 type (i.e. 8bit color rgb)
48 |             cv::Mat depth  = state->depth; // OpenCV CV_16UC1 type (i.e. 16bit grayscale)
49 |             ViewpointPtr location = state->location; // Need a class to hold viewpoint id, and x,y,z location of a viewpoint
50 |             float heading = state->heading;
51 |             float elevation = state->elevation; // camera parameters
52 |             std::vector<ViewpointPtr> reachable = state->navigableLocations; // Where we can move to,
53 |             int locationIdx = 0; // Must be an index into reachable
54 |             double headingChange = M_PI / 500;
55 |             double elevationChange = 0;
56 | 
57 |             cv::imshow("C++ RGB", rgb);
58 |             cv::imshow("C++ Depth", depth);
59 |             cv::waitKey(10);
60 | 
61 |             sim.makeAction(std::vector<unsigned int>(1, locationIdx), 
62 |                            std::vector<double>(1, headingChange), 
63 |                            std::vector<double>(1, elevationChange));
64 | 
65 |         }
66 |     }
67 | 
68 |     // It will be done automatically in destructor but after close you can init it again with different settings.
69 |     sim.close();
70 | 
71 |     return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/src/lib/Benchmark.cpp:
--------------------------------------------------------------------------------
 1 | #include <chrono>
 2 | 
 3 | #include "Benchmark.hpp"
 4 | 
 5 | namespace mattersim {
 6 | 
 7 |     Timer::Timer()
 8 |         : running_(false),
 9 |           elapsed_(0) {}
10 | 
11 |     void Timer::Start() {
12 |       if (!running()) {
13 |         start_ = std::chrono::steady_clock::now();
14 |         running_ = true;
15 |       }
16 |     }
17 | 
18 |     void Timer::Stop() {
19 |       if (running()) {
20 |         elapsed_ += std::chrono::steady_clock::now() - start_;
21 |         running_ = false;
22 |       }
23 |     }
24 | 
25 |     void Timer::Reset() {
26 |       if (running()) {
27 |         running_ = false;
28 |       }
29 |       elapsed_ = std::chrono::steady_clock::duration(0);
30 |     }
31 | 
32 |     float Timer::MicroSeconds() {
33 |       if (running()) {
34 |         elapsed_ += std::chrono::steady_clock::now() - start_;
35 |       }
36 |       return std::chrono::duration_cast<std::chrono::microseconds>(elapsed_).count();
37 |     }
38 | 
39 |     float Timer::MilliSeconds() {
40 |       if (running()) {
41 |         elapsed_ += std::chrono::steady_clock::now() - start_;
42 |       }
43 |       return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_).count();
44 |     }
45 | 
46 |     float Timer::Seconds() {
47 |       if (running()) {
48 |         elapsed_ += std::chrono::steady_clock::now() - start_;
49 |       }
50 |       return std::chrono::duration_cast<std::chrono::seconds>(elapsed_).count();
51 |     }
52 | 
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/src/lib/NavGraph.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | #include <iterator>
  4 | #include <opencv2/opencv.hpp>
  5 | 
  6 | #include <json/json.h>
  7 | #ifdef _OPENMP
  8 | #include <omp.h>
  9 | #endif
 10 | #include "NavGraph.hpp"
 11 | 
 12 | namespace mattersim {
 13 | 
 14 | 
 15 | NavGraph::Location::Location(const Json::Value& viewpoint, const std::string& skyboxDir, 
 16 |         bool preload, bool depth): skyboxDir(skyboxDir), im_loaded(false), 
 17 |                                    includeDepth(depth), cubemap_texture(0), depth_texture(0) {
 18 | 
 19 |     viewpointId = viewpoint["image_id"].asString();
 20 |     included = viewpoint["included"].asBool();
 21 | 
 22 |     float posearr[16];
 23 |     int i = 0;
 24 |     for (auto f : viewpoint["pose"]) {
 25 |         posearr[i++] = f.asFloat();
 26 |     }
 27 |     // glm uses column-major order. Inputs are in row-major order.
 28 |     rot = glm::transpose(glm::make_mat4(posearr));
 29 |     // glm access is col,row
 30 |     pos = glm::vec3{rot[3][0], rot[3][1], rot[3][2]};
 31 |     rot[3] = {0,0,0,1}; // remove translation component
 32 |     
 33 |     for (auto u : viewpoint["unobstructed"]) {
 34 |         unobstructed.push_back(u.asBool());
 35 |     }
 36 | 
 37 |     if (preload) {
 38 |         // Preload skybox images
 39 |         loadCubemapImages();
 40 |     }
 41 | };
 42 | 
 43 | 
 44 | void NavGraph::Location::loadCubemapImages() {
 45 |     cv::Mat rgb = cv::imread(skyboxDir + viewpointId + "_skybox_small.jpg");
 46 |     int w = rgb.cols/6;
 47 |     int h = rgb.rows;
 48 |     xpos = rgb(cv::Rect(2*w, 0, w, h));
 49 |     xneg = rgb(cv::Rect(4*w, 0, w, h));
 50 |     ypos = rgb(cv::Rect(0*w, 0, w, h));
 51 |     yneg = rgb(cv::Rect(5*w, 0, w, h));
 52 |     zpos = rgb(cv::Rect(1*w, 0, w, h));
 53 |     zneg = rgb(cv::Rect(3*w, 0, w, h));
 54 |     if (xpos.empty() || xneg.empty() || ypos.empty() || yneg.empty() || zpos.empty() || zneg.empty()) {
 55 |         throw std::invalid_argument( "MatterSim: Could not open skybox RGB files at: " + skyboxDir + viewpointId + "_skybox_small.jpg");
 56 |     }
 57 |     if (includeDepth) {
 58 |         // 16 bit grayscale images
 59 |         cv::Mat depth = cv::imread(skyboxDir + viewpointId + "_skybox_depth_small.png", CV_LOAD_IMAGE_ANYDEPTH);
 60 |         xposD = depth(cv::Rect(2*w, 0, w, h));
 61 |         xnegD = depth(cv::Rect(4*w, 0, w, h));
 62 |         yposD = depth(cv::Rect(0*w, 0, w, h));
 63 |         ynegD = depth(cv::Rect(5*w, 0, w, h));
 64 |         zposD = depth(cv::Rect(1*w, 0, w, h));
 65 |         znegD = depth(cv::Rect(3*w, 0, w, h));
 66 |         if (xposD.empty() || xnegD.empty() || yposD.empty() || ynegD.empty() || zposD.empty() || znegD.empty()) {
 67 |             throw std::invalid_argument( "MatterSim: Could not open skybox depth files at: " + skyboxDir + viewpointId + "_skybox_depth_small.png");
 68 |         }
 69 |     }
 70 |     im_loaded = true;
 71 | }
 72 | 
 73 | 
 74 | void NavGraph::Location::loadCubemapTextures() {
 75 |     // RGB texture
 76 |     glActiveTexture(GL_TEXTURE0);
 77 |     glEnable(GL_TEXTURE_CUBE_MAP);
 78 |     glGenTextures(1, &cubemap_texture);
 79 |     glBindTexture(GL_TEXTURE_CUBE_MAP, cubemap_texture);
 80 |     glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
 81 |     glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
 82 |     glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
 83 |     glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
 84 |     glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
 85 |     //use fast 4-byte alignment (default anyway) if possible
 86 |     glPixelStorei(GL_UNPACK_ALIGNMENT, (xneg.step & 3) ? 1 : 4);
 87 |     //set length of one complete row in data (doesn't need to equal image.cols)
 88 |     glPixelStorei(GL_UNPACK_ROW_LENGTH, xneg.step/xneg.elemSize());
 89 |     glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RGB, xpos.rows, xpos.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, xpos.ptr());
 90 |     glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, GL_RGB, xneg.rows, xneg.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, xneg.ptr());
 91 |     glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, GL_RGB, ypos.rows, ypos.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, ypos.ptr());
 92 |     glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, GL_RGB, yneg.rows, yneg.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, yneg.ptr());
 93 |     glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, GL_RGB, zpos.rows, zpos.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, zpos.ptr());
 94 |     glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, GL_RGB, zneg.rows, zneg.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, zneg.ptr());
 95 |     assertOpenGLError("RGB texture");
 96 |     if (includeDepth) {
 97 |         // Depth Texture
 98 |         glActiveTexture(GL_TEXTURE0);
 99 |         glEnable(GL_TEXTURE_CUBE_MAP);
100 |         glGenTextures(1, &depth_texture);
101 |         glBindTexture(GL_TEXTURE_CUBE_MAP, depth_texture);
102 |         glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
103 |         glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
104 |         glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
105 |         glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
106 |         glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
107 |         //use fast 4-byte alignment (default anyway) if possible
108 |         glPixelStorei(GL_UNPACK_ALIGNMENT, (xnegD.step & 3) ? 1 : 4);
109 |         //set length of one complete row in data (doesn't need to equal image.cols)
110 |         glPixelStorei(GL_UNPACK_ROW_LENGTH, xnegD.step/xnegD.elemSize());
111 |         glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RED, xposD.rows, xposD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, xposD.ptr());
112 |         glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, GL_RED, xnegD.rows, xnegD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, xnegD.ptr());
113 |         glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, GL_RED, yposD.rows, yposD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, yposD.ptr());
114 |         glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, GL_RED, ynegD.rows, ynegD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, ynegD.ptr());
115 |         glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, GL_RED, zposD.rows, zposD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, zposD.ptr());
116 |         glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, GL_RED, znegD.rows, znegD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, znegD.ptr());
117 |         assertOpenGLError("Depth texture");
118 |     }
119 | }
120 | 
121 | 
122 | void NavGraph::Location::deleteCubemapTextures() {
123 |     // no need to check existence, silently ignores errors
124 |     glDeleteTextures(1, &cubemap_texture);
125 |     glDeleteTextures(1, &depth_texture);
126 |     cubemap_texture = 0;
127 |     depth_texture = 0;
128 | }
129 | 
130 | 
131 | std::pair<GLuint, GLuint> NavGraph::Location::cubemapTextures() {
132 |     if (glIsTexture(cubemap_texture)){
133 |         return {cubemap_texture, depth_texture}; 
134 |     }
135 |     if (!im_loaded) {
136 |         loadCubemapImages();
137 |     }
138 |     loadCubemapTextures();
139 |     return {cubemap_texture, depth_texture};
140 | }
141 | 
142 | 
143 | NavGraph::NavGraph(const std::string& navGraphPath, const std::string& datasetPath, 
144 |               bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize) : cache(cacheSize) {
145 | 
146 |     generator.seed(randomSeed);
147 | 
148 |     auto textFile = navGraphPath + "/scans.txt";
149 |     std::ifstream scansFile(textFile);
150 |     if (scansFile.fail()){
151 |         throw std::invalid_argument( "MatterSim: Could not open list of scans at: " +
152 |                 textFile + ", is path valid?" );
153 |     }
154 |     std::vector<std::string> scanIds;
155 |     std::copy(std::istream_iterator<std::string>(scansFile),
156 |           std::istream_iterator<std::string>(),
157 |           std::back_inserter(scanIds));
158 | 
159 |     #pragma omp parallel for
160 |     for (unsigned int i=0; i<scanIds.size(); i++) {
161 |         std::string scanId = scanIds.at(i);
162 |         Json::Value root;
163 |         auto navGraphFile =  navGraphPath + "/" + scanId + "_connectivity.json";
164 |         std::ifstream ifs(navGraphFile, std::ifstream::in);
165 |         if (ifs.fail()){
166 |             throw std::invalid_argument( "MatterSim: Could not open navigation graph file: " +
167 |                     navGraphFile + ", is path valid?" );
168 |         }
169 |         ifs >> root;
170 |         auto skyboxDir = datasetPath + "/" + scanId + "/matterport_skybox_images/";
171 |         #pragma omp critical
172 |         {
173 |             scanLocations.insert(std::pair<std::string, 
174 |                     std::vector<LocationPtr> > (scanId, std::vector<LocationPtr>()));
175 |         }
176 |         for (auto viewpoint : root) {
177 |             Location l(viewpoint, skyboxDir, preloadImages, renderDepth);
178 |             #pragma omp critical
179 |             {
180 |                 scanLocations[scanId].push_back(std::make_shared<Location>(l));
181 |             }
182 |         }
183 |     }
184 | }
185 | 
186 | 
187 | NavGraph::~NavGraph() {
188 |     // free all remaining textures
189 |     for (auto scan : scanLocations) {
190 |         for (auto loc : scan.second) {
191 |             loc->deleteCubemapTextures();
192 |         }
193 |     }
194 | }
195 | 
196 | 
197 | NavGraph& NavGraph::getInstance(const std::string& navGraphPath, const std::string& datasetPath, 
198 |                 bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize){
199 |     // magic static
200 |     static NavGraph instance(navGraphPath, datasetPath, preloadImages, renderDepth, randomSeed, cacheSize);
201 |     return instance;
202 | }
203 | 
204 | 
205 | const std::string& NavGraph::randomViewpoint(const std::string& scanId) {
206 |     std::uniform_int_distribution<int> distribution(0,scanLocations.at(scanId).size()-1);
207 |     int start_ix = distribution(generator);  // generates random starting index
208 |     int ix = start_ix;
209 |     while (!scanLocations.at(scanId).at(ix)->included) { // Don't start at an excluded viewpoint
210 |         ix++;
211 |         if (ix >= scanLocations.at(scanId).size()) ix = 0;
212 |         if (ix == start_ix) {
213 |             throw std::logic_error( "MatterSim: ScanId: " + scanId + " has no included viewpoints!");
214 |         }
215 |     }
216 |     return scanLocations.at(scanId).at(ix)->viewpointId;
217 | }
218 | 
219 | 
220 | unsigned int NavGraph::index(const std::string& scanId, const std::string& viewpointId) const {
221 |     int ix = -1;
222 |     for (int i = 0; i < scanLocations.at(scanId).size(); ++i) {
223 |         if (scanLocations.at(scanId).at(i)->viewpointId == viewpointId) {
224 |             if (!scanLocations.at(scanId).at(i)->included) {
225 |                 throw std::invalid_argument( "MatterSim: ViewpointId: " +
226 |                         viewpointId + ", is excluded from the connectivity graph." );
227 |             }
228 |             ix = i;
229 |             break;
230 |         }
231 |     }
232 |     if (ix < 0) {
233 |         throw std::invalid_argument( "MatterSim: Could not find viewpointId: " +
234 |                 viewpointId + ", is viewpoint id valid?" );
235 |     } else {
236 |         return ix;
237 |     }
238 | }
239 | 
240 | const std::string& NavGraph::viewpoint(const std::string& scanId, unsigned int ix) const {
241 |     return scanLocations.at(scanId).at(ix)->viewpointId;
242 | }
243 | 
244 | 
245 | const glm::mat4& NavGraph::cameraRotation(const std::string& scanId, unsigned int ix) const {
246 |     return scanLocations.at(scanId).at(ix)->rot;
247 | }
248 | 
249 | 
250 | const glm::vec3& NavGraph::cameraPosition(const std::string& scanId, unsigned int ix) const {
251 |     return scanLocations.at(scanId).at(ix)->pos;
252 | }
253 | 
254 | 
255 | std::vector<unsigned int> NavGraph::adjacentViewpointIndices(const std::string& scanId, unsigned int ix) const {
256 |     std::vector<unsigned int> reachable;
257 |     for (unsigned int i = 0; i < scanLocations.at(scanId).size(); ++i) {
258 |         if (i == ix) {
259 |             // Skip option to stay at the same viewpoint
260 |             continue;
261 |         }
262 |         if (scanLocations.at(scanId).at(ix)->unobstructed[i] && scanLocations.at(scanId).at(i)->included) {
263 |             reachable.push_back(i);
264 |         }
265 |     }
266 |     return reachable;
267 | }
268 | 
269 | 
270 | std::pair<GLuint, GLuint> NavGraph::cubemapTextures(const std::string& scanId, unsigned int ix) {
271 |     LocationPtr loc = scanLocations.at(scanId).at(ix);
272 |     std::pair<GLuint, GLuint> textures = loc->cubemapTextures();
273 |     cache.add(loc);
274 |     return textures;
275 | }
276 | 
277 | 
278 | void NavGraph::deleteCubemapTextures(const std::string& scanId, unsigned int ix) {
279 |     scanLocations.at(scanId).at(ix)->deleteCubemapTextures();
280 | }
281 | 
282 | 
283 | }
284 | 


--------------------------------------------------------------------------------
/src/lib/fragment.sh:
--------------------------------------------------------------------------------
 1 | R""(
 2 | #version 120
 3 | 
 4 | varying vec3 texCoord;
 5 | varying vec4 camCoord;
 6 | uniform samplerCube cubemap;
 7 | const vec3 camlook = vec3( 0.0, 0.0, -1.0 );
 8 | uniform bool isDepth;
 9 | 
10 | void main (void) {
11 |   vec4 color = textureCube(cubemap, texCoord);
12 |   if (isDepth) {
13 |     float scale = dot(camCoord.xyz, camlook) / length(camCoord.xyz);
14 |     gl_FragColor = color*scale;
15 |   } else {
16 |     gl_FragColor = color;
17 |   }
18 | }
19 | )""
20 | 


--------------------------------------------------------------------------------
/src/lib/vertex.sh:
--------------------------------------------------------------------------------
 1 | R""(
 2 | #version 120
 3 | 
 4 | attribute vec3 vertex;
 5 | varying vec3 texCoord;
 6 | varying vec4 camCoord;
 7 | uniform mat4 ProjMat;
 8 | uniform mat4 ModelViewMat;
 9 | 
10 | void main() {
11 |   camCoord = ModelViewMat * vec4(vertex, 1.0);
12 |   gl_Position = ProjMat * camCoord;
13 |   texCoord = vertex;
14 | }
15 | )""
16 | 


--------------------------------------------------------------------------------
/src/lib_python/MatterSimPython.cpp:
--------------------------------------------------------------------------------
 1 | #include <pybind11/pybind11.h>
 2 | #include <pybind11/stl.h>
 3 | #include "MatterSim.hpp"
 4 | #include "cbf.h"
 5 | 
 6 | namespace py = pybind11;
 7 | 
 8 | namespace mattersim {
 9 | 
10 |     void cbf(py::buffer depth, py::buffer intensity, py::buffer mask, py::buffer result) {
11 |         double spaceSigmas[3] = {12, 5, 8};
12 |         double rangeSigmas[3] = {0.2, 0.08, 0.02};
13 |         py::buffer_info d_info = depth.request();
14 |         py::buffer_info i_info = intensity.request();
15 |         py::buffer_info m_info = mask.request();
16 |         py::buffer_info r_info = result.request();
17 |         cbf::cbf(d_info.shape[0], d_info.shape[1],
18 |             static_cast<uint8_t*>(d_info.ptr),
19 |             static_cast<uint8_t*>(i_info.ptr),
20 |             static_cast<uint8_t*>(m_info.ptr),
21 |             static_cast<uint8_t*>(r_info.ptr),
22 |             3, &spaceSigmas[0], &rangeSigmas[0]);
23 |     }
24 | 
25 | }
26 | 
27 | using namespace mattersim;
28 | 
29 | PYBIND11_MODULE(MatterSim, m) {
30 |     m.def("cbf", &mattersim::cbf, "Cross Bilateral Filter");
31 |     py::class_<Viewpoint, ViewpointPtr>(m, "ViewPoint")
32 |         .def_readonly("viewpointId", &Viewpoint::viewpointId)
33 |         .def_readonly("ix", &Viewpoint::ix)
34 |         .def_readonly("x", &Viewpoint::x)
35 |         .def_readonly("y", &Viewpoint::y)
36 |         .def_readonly("z", &Viewpoint::z)
37 |         .def_readonly("rel_heading", &Viewpoint::rel_heading)
38 |         .def_readonly("rel_elevation", &Viewpoint::rel_elevation)
39 |         .def_readonly("rel_distance", &Viewpoint::rel_distance);
40 |     py::class_<cv::Mat>(m, "Mat", pybind11::buffer_protocol())
41 |         .def_buffer([](cv::Mat& im) -> pybind11::buffer_info {
42 |             ssize_t item_size = im.elemSize() / im.channels();
43 |             std::string format = pybind11::format_descriptor<unsigned char>::format();
44 |             if (item_size == 2) { // handle 16bit data from depth maps
45 |                 format = pybind11::format_descriptor<unsigned short>::format();
46 |             }
47 |             return pybind11::buffer_info(
48 |                 im.data, // Pointer to buffer
49 |                 item_size, // Size of one scalar
50 |                 format,
51 |                 3, // Number of dimensions (row, cols, channels)
52 |                 { im.rows, im.cols, im.channels() }, // Buffer dimensions
53 |                 {   // Strides (in bytes) for each index
54 |                     item_size * im.channels() * im.cols,
55 |                     item_size * im.channels(),
56 |                     item_size
57 |                 }
58 |             );
59 |         });
60 |     py::class_<SimState, SimStatePtr>(m, "SimState")
61 |         .def_readonly("scanId", &SimState::scanId)
62 |         .def_readonly("step", &SimState::step)
63 |         .def_readonly("rgb", &SimState::rgb)
64 |         .def_readonly("depth", &SimState::depth)
65 |         .def_readonly("location", &SimState::location)
66 |         .def_readonly("heading", &SimState::heading)
67 |         .def_readonly("elevation", &SimState::elevation)
68 |         .def_readonly("viewIndex", &SimState::viewIndex)
69 |         .def_readonly("navigableLocations", &SimState::navigableLocations);
70 |     py::class_<Simulator>(m, "Simulator")
71 |         .def(py::init<>())
72 |         .def("setDatasetPath", &Simulator::setDatasetPath)
73 |         .def("setNavGraphPath", &Simulator::setNavGraphPath)
74 |         .def("setRenderingEnabled", &Simulator::setRenderingEnabled)
75 |         .def("setCameraResolution", &Simulator::setCameraResolution)
76 |         .def("setCameraVFOV", &Simulator::setCameraVFOV)
77 |         .def("setElevationLimits", &Simulator::setElevationLimits)
78 |         .def("setDiscretizedViewingAngles", &Simulator::setDiscretizedViewingAngles)
79 |         .def("setPreloadingEnabled", &Simulator::setPreloadingEnabled)
80 |         .def("setDepthEnabled", &Simulator::setDepthEnabled)
81 |         .def("setBatchSize", &Simulator::setBatchSize)
82 |         .def("setCacheSize", &Simulator::setCacheSize)
83 |         .def("setSeed", &Simulator::setSeed)
84 |         .def("initialize", &Simulator::initialize)
85 |         .def("newEpisode", &Simulator::newEpisode)
86 |         .def("newRandomEpisode", &Simulator::newRandomEpisode)
87 |         .def("getState", &Simulator::getState, py::return_value_policy::take_ownership)
88 |         .def("makeAction", &Simulator::makeAction)
89 |         .def("close", &Simulator::close)
90 |         .def("resetTimers", &Simulator::resetTimers)
91 |         .def("timingInfo", &Simulator::timingInfo);
92 | }
93 | 


--------------------------------------------------------------------------------
/src/test/python_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('build')
 3 | 
 4 | from MatterSim import Simulator
 5 | import math
 6 | import cv2
 7 | import json
 8 | import numpy as np
 9 | 
10 | 
11 | sim = Simulator()
12 | sim.setCameraResolution(500, 300)
13 | sim.setCameraVFOV(math.radians(60))
14 | sim.setElevationLimits(math.radians(-40),math.radians(50))
15 | sim.initialize()
16 | 
17 | with open("src/test/rendertest_spec.json") as f:
18 |     spec = json.load(f)
19 |     for tc in spec[:1]:
20 |         sim.newEpisode(tc["scanId"], tc["viewpointId"], tc["heading"], tc["elevation"])
21 |         state = sim.getState()
22 |         im = np.array(state.rgb, copy=False)
23 |         imgfile = tc["reference_image"]
24 |         cv2.imwrite("sim_imgs/"+imgfile, im);
25 |         cv2.imshow('rendering', im)
26 |         cv2.waitKey(0)
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/src/test/rendertest_spec.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   [
  3 |     {
  4 |       "scanId": "17DRP5sb8fy", 
  5 |       "viewpointId": "85c23efeaecd4d43a7dcd5b90137179e", 
  6 |       "elevation": 0.008557380839564054, 
  7 |       "heading": 2.551961945320492, 
  8 |       "reference_image": "17DRP5sb8fy_85c23efeaecd4d43a7dcd5b90137179e_2.551961945320492_0.008557380839564054.png"
  9 |     }, 
 10 |     {
 11 |       "scanId": "1LXtFkjw3qL", 
 12 |       "viewpointId": "187589bb7d4644f2943079fb949c0be9", 
 13 |       "elevation": 0.0004921836022802584, 
 14 |       "heading": 1.8699330579409539, 
 15 |       "reference_image": "1LXtFkjw3qL_187589bb7d4644f2943079fb949c0be9_1.8699330579409539_0.0004921836022802584.png"
 16 |     }, 
 17 |     {
 18 |       "scanId": "1pXnuDYAj8r", 
 19 |       "viewpointId": "163d61ac7edb43fb958c5d9e69ae11ad", 
 20 |       "elevation": -0.02444352614304746, 
 21 |       "heading": 4.626331047551077, 
 22 |       "reference_image": "1pXnuDYAj8r_163d61ac7edb43fb958c5d9e69ae11ad_4.626331047551077_-0.02444352614304746.png"
 23 |     }, 
 24 |     {
 25 |       "scanId": "29hnd4uzFmX", 
 26 |       "viewpointId": "1576d62e7bbb45e8a5ef9e7bb37b1839", 
 27 |       "elevation": -0.0006838914039405167, 
 28 |       "heading": 5.844119909926444, 
 29 |       "reference_image": "29hnd4uzFmX_1576d62e7bbb45e8a5ef9e7bb37b1839_5.844119909926444_-0.0006838914039405167.png"
 30 |     }
 31 |   ],
 32 |   [
 33 |     {
 34 |       "scanId": "2azQ1b91cZZ", 
 35 |       "viewpointId": "3daad58ad53742038e50d62e91f84e7b", 
 36 |       "elevation": 0.016732869758208434, 
 37 |       "heading": 3.1736484087962933, 
 38 |       "reference_image": "2azQ1b91cZZ_3daad58ad53742038e50d62e91f84e7b_3.1736484087962933_0.016732869758208434.png"
 39 |     }, 
 40 |     {
 41 |       "scanId": "2n8kARJN3HM", 
 42 |       "viewpointId": "94ac3cea52ec455993f8562f78da3be1", 
 43 |       "elevation": -0.0009188787844489273, 
 44 |       "heading": 2.604601935142565, 
 45 |       "reference_image": "2n8kARJN3HM_94ac3cea52ec455993f8562f78da3be1_2.604601935142565_-0.0009188787844489273.png"
 46 |     }, 
 47 |     {
 48 |       "scanId": "2t7WUuJeko7", 
 49 |       "viewpointId": "529f006f8293406da0b506defd2891a5", 
 50 |       "elevation": -0.013788837143969411, 
 51 |       "heading": 0.032985516949381344, 
 52 |       "reference_image": "2t7WUuJeko7_529f006f8293406da0b506defd2891a5_0.032985516949381344_-0.013788837143969411.png"
 53 |     }, 
 54 |     {
 55 |       "scanId": "5LpN3gDmAk7", 
 56 |       "viewpointId": "bda8025f20404048a77381e9e0dc0ccf", 
 57 |       "elevation": -0.01083211073205187, 
 58 |       "heading": 5.325207878739601, 
 59 |       "reference_image": "5LpN3gDmAk7_bda8025f20404048a77381e9e0dc0ccf_5.325207878739601_-0.01083211073205187.png"
 60 |     }
 61 |   ],
 62 |   [
 63 |     {
 64 |       "scanId": "5q7pvUzZiYa", 
 65 |       "viewpointId": "397403366d784caf804d741f32fd68b9", 
 66 |       "elevation": -0.0007063598518199811, 
 67 |       "heading": 2.8746465006968234, 
 68 |       "reference_image": "5q7pvUzZiYa_397403366d784caf804d741f32fd68b9_2.8746465006968234_-0.0007063598518199811.png"
 69 |     }, 
 70 |     {
 71 |       "scanId": "5ZKStnWn8Zo", 
 72 |       "viewpointId": "c76b52856e7c4f2a9a4419000c8e646a", 
 73 |       "elevation": -0.02922217527541366, 
 74 |       "heading": 4.13470589902238, 
 75 |       "reference_image": "5ZKStnWn8Zo_c76b52856e7c4f2a9a4419000c8e646a_4.13470589902238_-0.02922217527541366.png"
 76 |     }, 
 77 |     {
 78 |       "scanId": "759xd9YjKW5", 
 79 |       "viewpointId": "2343ef3bf04a4433af62f0d527d7512a", 
 80 |       "elevation": -0.016938006310169448, 
 81 |       "heading": 3.5451019786019264, 
 82 |       "reference_image": "759xd9YjKW5_2343ef3bf04a4433af62f0d527d7512a_3.5451019786019264_-0.016938006310169448.png"
 83 |     }, 
 84 |     {
 85 |       "scanId": "7y3sRwLe3Va", 
 86 |       "viewpointId": "9bbf903d50da4ffd9e5d1fb7c9f4d69b", 
 87 |       "elevation": 0.008361841032265524, 
 88 |       "heading": 1.7348660165523566, 
 89 |       "reference_image": "7y3sRwLe3Va_9bbf903d50da4ffd9e5d1fb7c9f4d69b_1.7348660165523566_0.008361841032265524.png"
 90 |     }
 91 |   ],
 92 |   [
 93 |     {
 94 |       "scanId": "8194nk5LbLH", 
 95 |       "viewpointId": "c9e8dc09263e4d0da77d16de0ecddd39", 
 96 |       "elevation": 0.008533161479170466, 
 97 |       "heading": 4.05504292862083, 
 98 |       "reference_image": "8194nk5LbLH_c9e8dc09263e4d0da77d16de0ecddd39_4.05504292862083_0.008533161479170466.png"
 99 |     }, 
100 |     {
101 |       "scanId": "82sE5b5pLXE", 
102 |       "viewpointId": "056a491afa534b17bac36f4f5898462a", 
103 |       "elevation": -0.0037883068413356496, 
104 |       "heading": 1.689393931320027, 
105 |       "reference_image": "82sE5b5pLXE_056a491afa534b17bac36f4f5898462a_1.689393931320027_-0.0037883068413356496.png"
106 |     }, 
107 |     {
108 |       "scanId": "8WUmhLawc2A", 
109 |       "viewpointId": "d21aae0b5d944f27a0074525c803fc9f", 
110 |       "elevation": -0.04510889155759994, 
111 |       "heading": 3.047458184407221, 
112 |       "reference_image": "8WUmhLawc2A_d21aae0b5d944f27a0074525c803fc9f_3.047458184407221_-0.04510889155759994.png"
113 |     }, 
114 |     {
115 |       "scanId": "ac26ZMwG7aT", 
116 |       "viewpointId": "efeef7cc82c84690addb0bf415f075ea", 
117 |       "elevation": -0.013447513736072197, 
118 |       "heading": 0.07434352566701552, 
119 |       "reference_image": "ac26ZMwG7aT_efeef7cc82c84690addb0bf415f075ea_0.07434352566701552_-0.013447513736072197.png"
120 |     }
121 |   ],
122 |   [
123 |     {
124 |       "scanId": "ARNzJeq3xxb", 
125 |       "viewpointId": "9a671e6915de4eb897f45fee8bf2031d", 
126 |       "elevation": 0.02583868533558965, 
127 |       "heading": 5.616355886953764, 
128 |       "reference_image": "ARNzJeq3xxb_9a671e6915de4eb897f45fee8bf2031d_5.616355886953764_0.02583868533558965.png"
129 |     }, 
130 |     {
131 |       "scanId": "B6ByNegPMKs", 
132 |       "viewpointId": "e3a65955df26467581c32613c4e9f865", 
133 |       "elevation": 0.007265625492957138, 
134 |       "heading": 5.230794959607039, 
135 |       "reference_image": "B6ByNegPMKs_e3a65955df26467581c32613c4e9f865_5.230794959607039_0.007265625492957138.png"
136 |     }, 
137 |     {
138 |       "scanId": "b8cTxDM8gDG", 
139 |       "viewpointId": "f2944e0b66b9461994a7f757582f9bc3", 
140 |       "elevation": -0.007543204141144086, 
141 |       "heading": 0.0853092784395515, 
142 |       "reference_image": "b8cTxDM8gDG_f2944e0b66b9461994a7f757582f9bc3_0.0853092784395515_-0.007543204141144086.png"
143 |     }, 
144 |     {
145 |       "scanId": "cV4RVeZvu5T", 
146 |       "viewpointId": "1b321779a4374c2b952c51820daa9e6c", 
147 |       "elevation": 0.07914721704610106, 
148 |       "heading": 6.266463179566256, 
149 |       "reference_image": "cV4RVeZvu5T_1b321779a4374c2b952c51820daa9e6c_6.266463179566256_0.07914721704610106.png"
150 |     }
151 |   ]
152 | ]
153 | 


--------------------------------------------------------------------------------
/tasks/R2R/Agents/__init__.py:
--------------------------------------------------------------------------------
 1 | from tasks.R2R.Agents.agent import R2RAgent, Oracle, Stop, Random, Dynamic
 2 | 
 3 | agents = {'Base': R2RAgent,
 4 |           'Oracle': Oracle,
 5 |           'Stop': Stop,
 6 |           'Random': Random,
 7 |           'Dynamic': Dynamic,
 8 |           }
 9 | 
10 | 
11 | def get_agent(name, config):
12 |     assert name in agents.keys(), '%s is not valid agent name' % name
13 |     return agents[name](config)
14 | 


--------------------------------------------------------------------------------
/tasks/R2R/Agents/agent.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import sys
  4 | 
  5 | from tasks.R2R.Models import DynamicDecoder, InstructionEncoder
  6 | from tasks.R2R.utils import append_coordinates, batched_sentence_embedding, to_one_hot
  7 | 
  8 | sys.path.append('speaksee')
  9 | import speaksee.vocab as ssvoc
 10 | 
 11 | 
 12 | class R2RAgent:
 13 | 
 14 |     low_level_actions = [
 15 |       (0, -1, 0),  # left
 16 |       (0, 1, 0),   # right
 17 |       (0, 0, 1),   # up
 18 |       (0, 0, -1),  # down
 19 |       (1, 0, 0),   # forward
 20 |       (0, 0, 0),   # <end>
 21 |     ]
 22 | 
 23 |     def __init__(self, config):
 24 |         self.config = config
 25 |         self.name = 'Base'
 26 | 
 27 |     def get_name(self):
 28 |         return self.name
 29 | 
 30 |     def get_config(self):
 31 |         return self.config
 32 | 
 33 |     def rollout(self, env):
 34 |         raise NotImplementedError
 35 | 
 36 |     def train(self):
 37 |         """ Should call Module.train() on each torch.nn.Module, if present """
 38 |         pass
 39 | 
 40 |     def eval(self):
 41 |         """ Should call Module.eval() on each torch.nn.Module, if present """
 42 |         pass
 43 | 
 44 | 
 45 | class Oracle(R2RAgent):
 46 |     def __init__(self, config):
 47 |         super(Oracle, self).__init__(config)
 48 |         self.name = 'Oracle'
 49 | 
 50 |     def rollout(self, env):
 51 |         obs = env.reset()
 52 |         traj = [{
 53 |             'instr_id': ob['instr_id'],
 54 |             'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])]
 55 |         } for ob in obs]
 56 |         ended = np.array([False] * len(obs))
 57 | 
 58 |         while True:
 59 |             actions = [ob['teacher'] for ob in obs]
 60 |             obs = env.step(actions)
 61 |             for i, a in enumerate(actions):
 62 |                 if a == (0, 0, 0):
 63 |                     ended[i] = True
 64 |             for i, ob in enumerate(obs):
 65 |                 if not ended[i]:
 66 |                     traj[i]['path'].append((ob['viewpoint'], ob['heading'], ob['elevation']))
 67 |             if ended.all():
 68 |                 break
 69 | 
 70 |         return traj
 71 | 
 72 | 
 73 | class Stop(R2RAgent):
 74 |     def __init__(self, config):
 75 |         super(Stop, self).__init__(config)
 76 |         self.name = 'Stop'
 77 | 
 78 |     def rollout(self, env):
 79 |         obs = env.reset()
 80 |         traj = [{
 81 |             'instr_id': ob['instr_id'],
 82 |             'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])]
 83 |         } for ob in obs]
 84 |         return traj
 85 | 
 86 | 
 87 | class Random(R2RAgent):
 88 |     def __init__(self, config):
 89 |         super(Random, self).__init__(config)
 90 |         self.name = 'Random'
 91 | 
 92 |     def rollout(self, env):
 93 |         obs = env.reset()
 94 |         traj = [{
 95 |             'instr_id': ob['instr_id'],
 96 |             'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])]
 97 |         } for ob in obs]
 98 |         ended = np.array([False] * len(obs))
 99 | 
100 |         for t in range(20):
101 |             actions_idx = np.random.randint(0, len(R2RAgent.low_level_actions), len(obs))
102 |             actions = [(0, 1, 0) if len(obs[i]['navigableLocations']) <= 1 and idx == R2RAgent.low_level_actions.index((1, 0, 0))
103 |                        else R2RAgent.low_level_actions[idx] for i, idx in enumerate(actions_idx)]
104 |             obs = env.step(actions)
105 |             for i, a in enumerate(actions):
106 |                 if a == (0, 0, 0):
107 |                     ended[i] = True
108 |             for i, ob in enumerate(obs):
109 |                 if not ended[i]:
110 |                     traj[i]['path'].append((ob['viewpoint'], ob['heading'], ob['elevation']))
111 |             if ended.all():
112 |                 break
113 | 
114 |         return traj
115 | 
116 | 
117 | class Dynamic(R2RAgent):
118 | 
119 |     env_actions = [
120 |         (0, -1, 0),  # left
121 |         (0, 1, 0),   # right
122 | 
123 |         (0, 0, 1),   # up
124 |         (0, 0, -1),  # down
125 | 
126 |         (1, 0, 0),   # forward
127 | 
128 |         (0, 0, 0),   # <end>
129 |         (0, 0, 0),   # <start>
130 |     ]
131 | 
132 |     def __init__(self, config):
133 |         super(Dynamic, self).__init__(config)
134 |         self.name = 'Dynamic'
135 |         self.mode = None
136 | 
137 |         self.device = config['device']
138 |         self.max_episode_len = config['max_episode_len']
139 |         self.criterion = torch.nn.CrossEntropyLoss()
140 |         self.num_heads = config['num_heads']
141 |         self.glove = ssvoc.GloVe()
142 |         self.lstm_input_size = 36 * self.num_heads + Dynamic.n_inputs()
143 | 
144 |         self.encoder = InstructionEncoder(input_size=300,
145 |                                           hidden_size=512,
146 |                                           use_bias=True).to(device=self.device)
147 | 
148 |         self.policy = DynamicDecoder(input_size=self.lstm_input_size,
149 |                                      hidden_size=512, output_size=6,
150 |                                      key_size=128, query_size=128, value_size=512,
151 |                                      image_size=2051, filter_size=512,
152 |                                      num_heads=self.num_heads,
153 |                                      drop_prob=0.5,
154 |                                      use_bias=True,
155 |                                      filter_activation=torch.nn.Tanh(),
156 |                                      policy_activation=torch.nn.Softmax(dim=-1)).to(device=self.device)
157 | 
158 |     @staticmethod
159 |     def n_inputs():
160 |         return len(Dynamic.env_actions)
161 | 
162 |     def train(self):
163 |         self.mode = 'train'
164 |         self.encoder.train()
165 |         self.policy.train()
166 | 
167 |     def eval(self):
168 |         self.mode = 'eval'
169 |         self.encoder.eval()
170 |         self.policy.eval()
171 | 
172 |     def save(self, encoder_path, policy_path):
173 |         torch.save(self.encoder.state_dict(), encoder_path)
174 |         torch.save(self.policy.state_dict(), policy_path)
175 | 
176 |     def load(self, encoder_path, policy_path):
177 |         pretrained_dict_encoder = torch.load(encoder_path)
178 |         pretrained_dict_decoder = torch.load(policy_path)
179 | 
180 |         encoder_dict = self.encoder.state_dict()
181 |         decoder_dict = self.policy.state_dict()
182 | 
183 |         # 1. filter out unnecessary keys
184 |         pretrained_dict_encoder = {k: v for k, v in pretrained_dict_encoder.items() if k in encoder_dict}
185 |         pretrained_dict_decoder = {k: v for k, v in pretrained_dict_decoder.items() if k in decoder_dict}
186 | 
187 |         # 2. overwrite entries in the existing state dict
188 |         encoder_dict.update(pretrained_dict_encoder)
189 |         decoder_dict.update(pretrained_dict_decoder)
190 | 
191 |         # 3. load the new state dict
192 |         self.encoder.load_state_dict(pretrained_dict_encoder)
193 |         self.policy.load_state_dict(pretrained_dict_decoder)
194 | 
195 |     def _get_targets_and_features(self, obs):
196 |         target_actions = []
197 |         target_idx = []
198 |         features = []
199 | 
200 |         for i, ob in enumerate(obs):
201 |             target_actions.append(
202 |                 ob['teacher'] if ob['teacher'] in self.env_actions else (1, 0, 0)
203 |             )
204 |             target_idx.append(self.env_actions.index(
205 |                 ob['teacher'] if ob['teacher'] in self.env_actions else (1, 0, 0)
206 |             ))
207 |             features.append(torch.from_numpy(ob['feature']))
208 | 
209 |         return target_actions, torch.tensor(target_idx), features
210 | 
211 |     def _encode_instruction(self, instructions):
212 |         instr_embedding, instr_len = batched_sentence_embedding(instructions, self.glove, device=self.device)
213 |         value = self.encoder(instr_embedding)
214 |         return value
215 | 
216 |     def get_trainable_params(self):
217 |         return list(self.encoder.parameters()) + list(self.policy.parameters())
218 | 
219 |     def rollout(self, env):
220 | 
221 |         assert self.mode is not None, "This agent contains trainable modules! Please call either agent.train() or agent.eval() before rollout"
222 |         assert self.mode in ['train', 'eval'], "Agent.mode expected to be in ['train', 'eval'], found %s" % self.mode
223 | 
224 |         obs = env.reset()
225 |         ended = np.array([False] * len(obs))
226 |         losses = []
227 | 
228 |         traj = [{
229 |             'instr_id': ob['instr_id'],
230 |             'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])]
231 |         } for ob in obs]
232 | 
233 |         instr = [ob['instructions'] for ob in obs]
234 |         value = self._encode_instruction(instr)
235 | 
236 |         target_actions, target_idx, features = self._get_targets_and_features(obs)
237 |         previous_action = to_one_hot([Dynamic.n_inputs() - 1] * len(obs), Dynamic.n_inputs())  # Action at t=0 is <start> for every agent
238 | 
239 |         for t in range(self.max_episode_len):
240 | 
241 |             image_features = torch.stack(
242 |                 [append_coordinates(features[i], ob['heading'], ob['elevation']) for i, ob in enumerate(obs)]
243 |             ).to(device=self.device)
244 | 
245 |             pred, logits, response_map = self.policy(image_features, value, previous_action, init_lstm_state=t == 0)
246 | 
247 |             """ Losses """
248 |             step_loss = self.criterion(pred, target_idx.to(device=self.device))
249 |             losses.append(step_loss)
250 | 
251 |             """ Performs steps """
252 |             # Mask outputs where agent can't move forward
253 |             probs = logits.clone().detach().to(device=torch.device('cpu'))
254 |             for i, ob in enumerate(obs):
255 |                 if len(ob['navigableLocations']) <= 1:
256 |                     probs[i, self.env_actions.index((1, 0, 0))] = 0.
257 | 
258 |             if self.mode == 'eval':
259 |                 _, a_t = probs.max(1)  # argmax
260 |                 actions = [self.env_actions[idx] for idx in a_t]
261 |             else:
262 |                 m = torch.distributions.Categorical(probs)  # sampling from distribution
263 |                 a_t = m.sample()
264 |                 actions = [self.env_actions[idx] if target_actions[i] != (0, 0, 0) else (0, 0, 0) for i, idx in enumerate(a_t)]
265 | 
266 |             """ Next step """
267 |             obs = env.step(actions)
268 | 
269 |             for i, ob in enumerate(obs):
270 |                 if not ended[i]:
271 |                     if actions[i] == (0, 0, 0):
272 |                         ended[i] = True
273 |                     else:
274 |                         traj[i]['path'].append((ob['viewpoint'], ob['heading'], ob['elevation']))
275 | 
276 |             if ended.all():
277 |                 break
278 | 
279 |             target_actions, target_idx, features = self._get_targets_and_features(obs)
280 |             previous_action = to_one_hot(a_t, self.n_inputs())
281 | 
282 |         """ Compute the loss for the whole rollout """
283 |         losses = torch.stack(losses).to(device=self.device)
284 |         rollout_loss = torch.mean(losses)
285 | 
286 |         return traj, rollout_loss
287 | 


--------------------------------------------------------------------------------
/tasks/R2R/Models/__init__.py:
--------------------------------------------------------------------------------
1 | from tasks.R2R.Models.dynamic import InstructionEncoder, DynamicDecoder
2 | 


--------------------------------------------------------------------------------
/tasks/R2R/Models/dynamic.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class InstructionEncoder(nn.Module):
  7 |     """ Encodes instruction via LSTM """
  8 |     def __init__(self, input_size=300, hidden_size=512, use_bias=True):
  9 |         super(InstructionEncoder, self).__init__()
 10 |         self.input_size = input_size
 11 |         self.hidden_size = hidden_size
 12 |         self.use_bias = use_bias
 13 | 
 14 |         """ LSTM init"""
 15 |         self.lstm_cell = nn.LSTMCell(self.input_size, self.hidden_size, bias=self.use_bias)
 16 |         """ init weights"""
 17 |         for name, param in self.lstm_cell.named_parameters():
 18 |             if 'bias' in name:
 19 |                 nn.init.constant_(param, 0.0)
 20 |             elif 'weight' in name:
 21 |                 nn.init.orthogonal_(param)
 22 | 
 23 |     def forward(self, x):
 24 |         """ Checking data shape """
 25 |         forwd = x
 26 |         assert forwd.shape[1] == self.input_size, "Expected input with shape [batch, %s, seq_len], found %s" % (self.input_size, forwd.shape)
 27 |         batch_size = forwd.shape[0]
 28 | 
 29 |         """ init hidden and cell state """
 30 |         hx = torch.zeros(batch_size, self.hidden_size).cuda()
 31 |         cx = torch.zeros(batch_size, self.hidden_size).cuda()
 32 |         history = []
 33 | 
 34 |         """ forward through lstm """
 35 |         for seq in range(forwd.shape[-1]):
 36 |             input_data = forwd[..., seq]
 37 |             hx, cx = self.lstm_cell(input_data, (hx, cx))
 38 |             history.append(hx)
 39 | 
 40 |         stacked = torch.stack(history).transpose(0, 1)
 41 |         return stacked
 42 | 
 43 | 
 44 | class DynamicDecoder(nn.Module):
 45 |     def __init__(self, input_size, hidden_size=512, output_size=6,
 46 |                  key_size=128, query_size=128, value_size=512,
 47 |                  image_size=2051, filter_size=512,
 48 |                  num_heads=1,
 49 |                  drop_prob=0.5, use_bias=True,
 50 |                  filter_activation=nn.Tanh(),
 51 |                  policy_activation=nn.Softmax(dim=-1)):
 52 |         super(DynamicDecoder, self).__init__()
 53 | 
 54 |         """ policy variables """
 55 |         self.input_size = input_size
 56 |         self.hidden_size = hidden_size
 57 |         self.output_size = output_size
 58 |         self.drop_prob = drop_prob
 59 |         self.use_bias = use_bias
 60 |         self.hx = None
 61 |         self.cx = None
 62 | 
 63 |         """ attention variables """
 64 |         self.key_size = key_size
 65 |         self.query_size = query_size
 66 |         self.value_size = value_size
 67 | 
 68 |         """ image feature pre-processing variables """
 69 |         self.image_size = image_size
 70 |         self.filter_size = filter_size
 71 | 
 72 |         """ attention linear layers and activations """
 73 |         self.fc_key = nn.Linear(self.value_size, self.key_size, bias=self.use_bias)
 74 |         self.fc_query = nn.Linear(self.hidden_size, self.query_size, bias=self.use_bias)
 75 |         self.softmax = nn.Softmax(dim=1)
 76 |         self.filter_activation = filter_activation
 77 |         self.num_heads = num_heads
 78 |         self.heads = [nn.Linear(
 79 |             self.value_size, self.filter_size
 80 |                 ).to(device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 81 |                      ) for _ in range(self.num_heads)]
 82 | 
 83 |         """ policy layers and activation"""
 84 |         self.bottleneck = nn.Conv1d(self.image_size, self.filter_size, 1, stride=1, padding=0, bias=self.use_bias)
 85 |         self.fc_action = nn.Linear(7, 7, bias=True)
 86 |         self.lstm_cell = nn.LSTMCell(self.input_size, self.hidden_size, bias=self.use_bias)
 87 |         self.linear = nn.Linear(self.hidden_size, self.output_size, bias=self.use_bias)
 88 |         self.drop = nn.Dropout(p=self.drop_prob)
 89 |         self.drop_h = nn.Dropout(p=0.2)
 90 |         self.policy_activation = policy_activation
 91 | 
 92 |         """ init LSTM weights"""
 93 |         for name, param in self.lstm_cell.named_parameters():
 94 |             if 'bias' in name:
 95 |                 nn.init.constant_(param, 0.0)
 96 |             elif 'weight' in name:
 97 |                 nn.init.orthogonal_(param)
 98 | 
 99 |     def init_lstm_cell(self, batch_size):
100 |         self.hx = torch.zeros(batch_size, self.hidden_size).cuda()
101 |         self.cx = torch.zeros(batch_size, self.hidden_size).cuda()
102 | 
103 |     def forward(self, x, value, action, init_lstm_state=True):
104 |         assert x.shape[0] == value.shape[0]
105 |         assert x.shape[0] == action.shape[0]
106 |         batch_size = x.shape[0]
107 | 
108 |         if init_lstm_state:
109 |             self.init_lstm_cell(batch_size)
110 | 
111 |         """ value shape: [B, T, 512] -> key shape: [B, T, 128] """
112 |         key = F.relu(self.fc_key(value))
113 | 
114 |         """ hx shape: [B, 512] -> query shape: [B, 128, 1]"""
115 |         query = F.relu(self.fc_query(self.hx))
116 |         query = query.unsqueeze(dim=-1)
117 | 
118 |         """ scaled-dot-product attention """
119 |         scale_1 = torch.sqrt(torch.tensor(key.shape[-1], dtype=torch.double))
120 |         scaled_dot_product = torch.bmm(key, query) / scale_1  # shape: [B, T, 1]
121 |         softmax = self.softmax(scaled_dot_product)  # shape: [B, T, 1]
122 |         element_wise_product = value*softmax  # shape: [B, T, 512]
123 |         current_instruction = torch.sum(element_wise_product, dim=1)  # shape: [B, 512]
124 | 
125 |         """ dynamic convolutional filters """
126 |         dynamic_filter = torch.stack([head(self.drop_h(current_instruction)) for head in self.heads]).transpose(0, 1)
127 |         dynamic_filter = self.filter_activation(dynamic_filter)
128 |         dynamic_filter = F.normalize(dynamic_filter, p=2, dim=-1)
129 | 
130 |         """ Key must be in the format [Batch, Channels, L]; Channels == image_size """
131 |         if x.shape[1] != self.image_size:
132 |             x = x.transpose(1, 2)
133 | 
134 |         x = self.bottleneck(x)
135 | 
136 |         """ [36, N] = T[512, 36] * T[N, 512] """
137 |         scale_2 = torch.sqrt(torch.tensor(x.shape[1], dtype=torch.double))
138 |         attention_map = torch.bmm(x.transpose(1, 2), dynamic_filter.transpose(-1, -2)) / scale_2
139 |         b, c, f = attention_map.shape
140 |         attention_map = attention_map.reshape(b, c*f)
141 | 
142 |         action_embedded = self.fc_action(action.cuda())
143 |         in_data = torch.cat((attention_map, action_embedded), 1)
144 | 
145 |         """ Shape of in_data must be [Batch, Input_size] """
146 |         self.hx, self.cx = self.lstm_cell(in_data, (self.hx, self.cx))
147 | 
148 |         policy_data = self.hx
149 | 
150 |         drop = self.drop(policy_data)
151 |         pred = self.linear(drop)
152 |         logits = self.policy_activation(pred)
153 | 
154 |         return pred, logits, attention_map.reshape(b, c, f)
155 | 


--------------------------------------------------------------------------------
/tasks/R2R/data/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | wget https://www.dropbox.com/s/lztjsji51pr5ig2/R2R_train.json -P tasks/R2R/data/
4 | wget https://www.dropbox.com/s/66nowglznzx1le9/R2R_val_seen.json -P tasks/R2R/data/
5 | wget https://www.dropbox.com/s/it9zpexb97d6bes/R2R_val_unseen.json -P tasks/R2R/data/
6 | wget https://www.dropbox.com/s/0huat2lc5iy5o8j/R2R_test.json -P tasks/R2R/data/
7 | 


--------------------------------------------------------------------------------
/tasks/R2R/env.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import csv
  3 | import numpy as np
  4 | import math
  5 | import base64
  6 | import random
  7 | import networkx as nx
  8 | 
  9 | from tasks.R2R.utils import load_datasets, load_nav_graphs, print_progress
 10 | 
 11 | sys.path.append('build')
 12 | import MatterSim
 13 | 
 14 | 
 15 | csv.field_size_limit(sys.maxsize)
 16 | 
 17 | 
 18 | def _make_id(scan_id, viewpoint_id):
 19 |     return scan_id + '_' + viewpoint_id
 20 | 
 21 | 
 22 | def load_features(feature_store):
 23 |     image_w, image_h, vfov = 640, 480, 60
 24 | 
 25 |     # if the tsv file for image features is provided
 26 |     if feature_store:
 27 |         tsv_fieldnames = ['scanId', 'viewpointId', 'image_w', 'image_h', 'vfov', 'features']
 28 |         features = {}
 29 |         with open(feature_store, "r") as tsv_in_file:
 30 |             print('Reading image features file %s' % feature_store)
 31 |             reader = list(csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=tsv_fieldnames))
 32 |             total_length = len(reader)
 33 | 
 34 |             print('Loading image features...')
 35 |             for i, item in enumerate(reader):
 36 |                 image_h = int(item['image_h'])
 37 |                 image_w = int(item['image_w'])
 38 |                 vfov = int(item['vfov'])
 39 |                 long_id = _make_id(item['scanId'], item['viewpointId'])
 40 |                 features[long_id] = np.frombuffer(base64.b64decode(item['features']),
 41 |                                                   dtype=np.float32).reshape((36, 2048))
 42 |                 print_progress(i + 1, total_length, prefix='Progress:',
 43 |                                suffix='Complete', bar_length=50)
 44 |     else:
 45 |         print('Image features not provided')
 46 |         features = None
 47 | 
 48 |     return features, (image_w, image_h, vfov)
 49 | 
 50 | 
 51 | class EnvBatch:
 52 |     """ A simple wrapper for a batch of MatterSim environments,
 53 |         using discretized viewpoints and pretrained features """
 54 | 
 55 |     def __init__(self, features, img_spec, batch_size=100):
 56 |         self.features = features
 57 |         self.image_w, self.image_h, self.vfov = img_spec
 58 | 
 59 |         self.batch_size = batch_size
 60 |         self.sim = MatterSim.Simulator()
 61 |         self.sim.setRenderingEnabled(False)
 62 |         self.sim.setDiscretizedViewingAngles(True)
 63 |         self.sim.setBatchSize(self.batch_size)
 64 |         self.sim.setCameraResolution(self.image_w, self.image_h)
 65 |         self.sim.setCameraVFOV(math.radians(self.vfov))
 66 |         self.sim.initialize()
 67 | 
 68 |     def new_episode(self, scan_ids, viewpoint_ids, headings):
 69 |         self.sim.newEpisode(scan_ids, viewpoint_ids, headings, [0] * self.batch_size)
 70 | 
 71 |     def get_states(self):
 72 |         """ Get list of states augmented with precomputed image features. rgb field will be empty. """
 73 |         feature_states = []
 74 |         for state in self.sim.getState():
 75 |             long_id = _make_id(state.scanId, state.location.viewpointId)
 76 |             if self.features:
 77 |                 feature = self.features[long_id]
 78 |                 feature_states.append((feature, state))
 79 |             else:
 80 |                 feature_states.append((None, state))
 81 |         return feature_states
 82 | 
 83 |     def make_actions(self, actions):
 84 |         """ Take an action using the full state dependent action interface (with batched input).
 85 |             Every action element should be an (index, heading, elevation) tuple. """
 86 |         ix = []
 87 |         heading = []
 88 |         elevation = []
 89 |         for i, h, e in actions:
 90 |             ix.append(int(i))
 91 |             heading.append(float(h))
 92 |             elevation.append(float(e))
 93 |         self.sim.makeAction(ix, heading, elevation)
 94 | 
 95 |     def make_simple_actions(self, simple_indices):
 96 |         """ Take an action using a simple interface: 0-forward, 1-turn left, 2-turn right, 3-look up, 4-look down.
 97 |             All viewpoint changes are 30 degrees. Forward, look up and look down may not succeed - check state.
 98 |             WARNING - Very likely this simple interface restricts some edges in the graph. Parts of the
 99 |             environment may not longer be navigable. """
100 |         actions = []
101 |         for i, index in enumerate(simple_indices):
102 |             if index == 0:
103 |                 actions.append((1, 0, 0))
104 |             elif index == 1:
105 |                 actions.append((0, -1, 0))
106 |             elif index == 2:
107 |                 actions.append((0, 1, 0))
108 |             elif index == 3:
109 |                 actions.append((0, 0, 1))
110 |             elif index == 4:
111 |                 actions.append((0, 0, -1))
112 |             else:
113 |                 sys.exit("Invalid simple action")
114 |         self.make_actions(actions)
115 | 
116 | 
117 | class R2RBatch:
118 |     """ Implements the Room to Room navigation task, using discretized viewpoints and pretrained features """
119 | 
120 |     def __init__(self, features, img_spec, batch_size=100, seed=10, splits='train', tokenizer=None):
121 |         self.env = EnvBatch(features, img_spec, batch_size=batch_size)
122 |         self.data = []
123 |         self.scans = []
124 | 
125 |         if isinstance(splits, str):
126 |             splits = [splits]
127 | 
128 |         assert isinstance(splits, list), 'expected type list or str type for argument "splits", found %s' % type(splits)
129 | 
130 |         print('Loading {} dataset'.format(",".join(splits)))
131 | 
132 |         json_data = load_datasets(splits)
133 |         total_length = len(json_data)
134 | 
135 |         for i, item in enumerate(json_data):
136 |             # Split multiple instructions into separate entries
137 |             for j, instr in enumerate(item['instructions']):
138 |                 self.scans.append(item['scan'])
139 |                 new_item = dict(item)
140 |                 new_item['instr_id'] = '%s_%d' % (item['path_id'], j)
141 |                 new_item['instructions'] = instr
142 |                 if tokenizer:
143 |                     new_item['instr_encoding'] = tokenizer.encode_sentence(instr)
144 |                 self.data.append(new_item)
145 |             print_progress(i + 1, total_length, prefix='Progress:', suffix='Complete', bar_length=50)
146 |         self.scans = set(self.scans)
147 |         self.splits = splits
148 |         self.seed = seed
149 |         random.seed(self.seed)
150 |         random.shuffle(self.data)
151 |         self.ix = 0
152 |         self.batch_size = batch_size
153 |         self._load_nav_graphs()
154 |         print('R2RBatch loaded with %d instructions, using splits: %s' % (len(self.data), ",".join(splits)))
155 | 
156 |     def _load_nav_graphs(self):
157 |         """ Load connectivity graph for each scan, useful for reasoning about shortest paths """
158 |         print('Loading navigation graphs for %d scans' % len(self.scans))
159 |         self.graphs = load_nav_graphs(self.scans)
160 |         self.paths = {}
161 |         for scan, G in self.graphs.items():  # compute all shortest paths
162 |             self.paths[scan] = dict(nx.all_pairs_dijkstra_path(G))
163 |         self.distances = {}
164 |         for scan, G in self.graphs.items():  # compute all shortest paths
165 |             self.distances[scan] = dict(nx.all_pairs_dijkstra_path_length(G))
166 | 
167 |     def _next_minibatch(self):
168 |         batch = self.data[self.ix:self.ix + self.batch_size]
169 |         if len(batch) < self.batch_size:
170 |             random.shuffle(self.data)
171 |             self.ix = self.batch_size - len(batch)
172 |             batch += self.data[:self.ix]
173 |         else:
174 |             self.ix += self.batch_size
175 |         self.batch = batch
176 | 
177 |     def reset_epoch(self):
178 |         """ Reset the data index to beginning of epoch. Primarily for testing.
179 |             You must still call reset() for a new episode. """
180 |         self.ix = 0
181 | 
182 |     def _get_obs(self):
183 |         raise NotImplementedError
184 | 
185 |     def reset(self):
186 |         raise NotImplementedError
187 | 
188 |     def step(self, actions):
189 |         raise NotImplementedError
190 | 
191 | 
192 | class LowLevelR2RBatch(R2RBatch):
193 |     def __init__(self, features, img_spec, batch_size=100, seed=10, splits='train', tokenizer=None):
194 |         super(LowLevelR2RBatch, self).__init__(features, img_spec, batch_size, seed, splits, tokenizer)
195 | 
196 |     def _shortest_path_action(self, state, goalviewpoint_id):
197 |         """ Determine next action on the shortest path to goal, for supervised training. """
198 |         if state.location.viewpointId == goalviewpoint_id:
199 |             return 0, 0, 0  # do nothing
200 |         path = self.paths[state.scanId][state.location.viewpointId][goalviewpoint_id]
201 |         nextviewpoint_id = path[1]
202 |         # Can we see the next viewpoint?
203 |         for i, loc in enumerate(state.navigableLocations):
204 |             if loc.viewpointId == nextviewpoint_id:
205 |                 # Look directly at the viewpoint before moving
206 |                 if loc.rel_heading > math.pi / 6.0:
207 |                     return 0, 1, 0  # Turn right
208 |                 elif loc.rel_heading < -math.pi / 6.0:
209 |                     return 0, -1, 0  # Turn left
210 |                 elif loc.rel_elevation > math.pi / 6.0 and state.viewIndex // 12 < 2:
211 |                     return 0, 0, 1  # Look up
212 |                 elif loc.rel_elevation < -math.pi / 6.0 and state.viewIndex // 12 > 0:
213 |                     return 0, 0, -1  # Look down
214 |                 else:
215 |                     return i, 0, 0  # Move
216 |         # Can't see it - first neutralize camera elevation
217 |         if state.viewIndex // 12 == 0:
218 |             return 0, 0, 1  # Look up
219 |         elif state.viewIndex // 12 == 2:
220 |             return 0, 0, -1  # Look down
221 |         # Otherwise decide which way to turn
222 |         pos = [state.location.x, state.location.y, state.location.z]
223 |         target_rel = self.graphs[state.scanId].node[nextviewpoint_id]['position'] - pos
224 |         target_heading = math.pi / 2.0 - math.atan2(target_rel[1], target_rel[0])  # convert to rel to y axis
225 |         if target_heading < 0:
226 |             target_heading += 2.0 * math.pi
227 |         if state.heading > target_heading and state.heading - target_heading < math.pi:
228 |             return 0, -1, 0  # Turn left
229 |         if target_heading > state.heading and target_heading - state.heading > math.pi:
230 |             return 0, -1, 0  # Turn left
231 |         return 0, 1, 0  # Turn right
232 | 
233 |     def _get_obs(self):
234 |         obs = []
235 |         for i, (feature, state) in enumerate(self.env.get_states()):
236 |             item = self.batch[i]
237 |             obs.append({
238 |                 'instr_id': item['instr_id'],
239 |                 'scan': state.scanId,
240 |                 'viewpoint': state.location.viewpointId,
241 |                 'viewIndex': state.viewIndex,
242 |                 'heading': state.heading,
243 |                 'elevation': state.elevation,
244 |                 'feature': feature,
245 |                 'step': state.step,
246 |                 'navigableLocations': state.navigableLocations,
247 |                 'instructions': item['instructions'],
248 |                 'teacher': self._shortest_path_action(state, item['path'][-1]),
249 |             })
250 |             if 'instr_encoding' in item:
251 |                 obs[-1]['instr_encoding'] = item['instr_encoding']
252 |         return obs
253 | 
254 |     def reset(self):
255 |         """ Load a new minibatch / episodes. """
256 |         self._next_minibatch()
257 |         scan_ids = [item['scan'] for item in self.batch]
258 |         viewpoint_ids = [item['path'][0] for item in self.batch]
259 |         headings = [item['heading'] for item in self.batch]
260 |         self.env.new_episode(scan_ids, viewpoint_ids, headings)
261 |         return self._get_obs()
262 | 
263 |     def step(self, actions):
264 |         """ Take action (same interface as make_actions) """
265 |         self.env.make_actions(actions)
266 |         return self._get_obs()
267 | 
268 | 


--------------------------------------------------------------------------------
/tasks/R2R/eval.py:
--------------------------------------------------------------------------------
  1 | from tasks.R2R.utils import load_datasets, load_nav_graphs
  2 | from tasks.R2R.env import LowLevelR2RBatch
  3 | from tasks.R2R.utils import check_config_judge
  4 | from collections import defaultdict
  5 | 
  6 | import json
  7 | import os
  8 | import networkx as nx
  9 | import numpy as np
 10 | import pprint
 11 | pp = pprint.PrettyPrinter(indent=4)
 12 | 
 13 | 
 14 | class Evaluation(object):
 15 |     """ Results submission format:  [{'instr_id': string, 'trajectory':[(viewpoint_id, heading_rads, elevation_rads),] } ] """
 16 | 
 17 |     def __init__(self, splits):
 18 |         self.error_margin = 3.0
 19 |         self.splits = splits
 20 |         self.gt = {}
 21 |         self.instr_ids = []
 22 |         self.scans = []
 23 |         for item in load_datasets(splits):
 24 |             self.gt[item['path_id']] = item
 25 |             self.scans.append(item['scan'])
 26 |             self.instr_ids += ['%d_%d' % (item['path_id'], i) for i in range(3)]
 27 |         self.scans = set(self.scans)
 28 |         self.instr_ids = set(self.instr_ids)
 29 |         self.graphs = load_nav_graphs(self.scans)
 30 |         self.distances = {}
 31 |         self.scores = None
 32 |         for scan, G in self.graphs.items():  # compute all shortest paths
 33 |             self.distances[scan] = dict(nx.all_pairs_dijkstra_path_length(G))
 34 | 
 35 |     def _get_nearest(self, scan, goal_id, path):
 36 |         near_id = path[0][0]
 37 |         near_d = self.distances[scan][near_id][goal_id]
 38 |         for item in path:
 39 |             d = self.distances[scan][item[0]][goal_id]
 40 |             if d < near_d:
 41 |                 near_id = item[0]
 42 |                 near_d = d
 43 |         return near_id
 44 | 
 45 |     def _score_item(self, instr_id, path):
 46 |         """ Calculate error based on the final position in trajectory, and also
 47 |             the closest position (oracle stopping rule). """
 48 |         gt = self.gt[int(instr_id.split('_')[0])]
 49 |         start = gt['path'][0]
 50 |         assert start == path[0][0], 'Result trajectories should include the start position'
 51 |         goal = gt['path'][-1]
 52 |         final_position = path[-1][0]
 53 |         nearest_position = self._get_nearest(gt['scan'], goal, path)
 54 |         self.scores['nav_errors'].append(self.distances[gt['scan']][final_position][goal])
 55 |         self.scores['oracle_errors'].append(self.distances[gt['scan']][nearest_position][goal])
 56 |         self.scores['trajectory_steps'].append(len(path) - 1)
 57 |         distance = 0  # Work out the length of the path in meters
 58 |         prev = path[0]
 59 |         for curr in path[1:]:
 60 |             if prev[0] != curr[0]:
 61 |                 try:
 62 |                     self.graphs[gt['scan']][prev[0]][curr[0]]
 63 |                 except KeyError:
 64 |                     print('Error: The provided trajectory moves from %s to %s but the navigation graph contains no ' 
 65 |                           'edge between these viewpoints. Please ensure the provided navigation trajectories ' 
 66 |                           'are valid, so that trajectory length can be accurately calculated.' % (prev[0], curr[0]))
 67 |                     raise
 68 |             distance += self.distances[gt['scan']][prev[0]][curr[0]]
 69 |             prev = curr
 70 |         self.scores['trajectory_lengths'].append(distance)
 71 |         self.scores['shortest_path_lengths'].append(self.distances[gt['scan']][start][goal])
 72 | 
 73 |     def score(self, output_file):
 74 |         """ Evaluate each agent trajectory based on how close it got to the goal location """
 75 |         self.scores = defaultdict(list)
 76 |         instr_ids = set(self.instr_ids)
 77 |         with open(output_file) as f:
 78 |             for item in json.load(f):
 79 |                 # Check against expected ids
 80 |                 if item['instr_id'] in instr_ids:
 81 |                     instr_ids.remove(item['instr_id'])
 82 |                     self._score_item(item['instr_id'], item['trajectory'])
 83 |         assert len(instr_ids) == 0, 'Trajectories not provided for %d instruction ids: %s' % (len(instr_ids), instr_ids)
 84 |         assert len(self.scores['nav_errors']) == len(self.instr_ids)
 85 |         num_successes = len([i for i in self.scores['nav_errors'] if i < self.error_margin])
 86 | 
 87 |         oracle_successes = len([i for i in self.scores['oracle_errors'] if i < self.error_margin])
 88 | 
 89 |         spls = []
 90 |         for err, length, sp in zip(self.scores['nav_errors'], self.scores['trajectory_lengths'], self.scores['shortest_path_lengths']):
 91 |             if err < self.error_margin:
 92 |                 spls.append(sp / max(length, sp))
 93 |             else:
 94 |                 spls.append(0)
 95 | 
 96 |         score_summary = {
 97 |             'length': np.average(self.scores['trajectory_lengths']),
 98 |             'steps': np.average(self.scores['trajectory_steps']),
 99 |             'nav_error': np.average(self.scores['nav_errors']),
100 |             'oracle success_rate': float(oracle_successes) / float(len(self.scores['oracle_errors'])),
101 |             'success_rate': float(num_successes) / float(len(self.scores['nav_errors'])),
102 |             'spl': np.average(spls),
103 |         }
104 | 
105 |         assert score_summary['spl'] <= score_summary['success_rate']
106 |         return score_summary, self.scores
107 | 
108 | 
109 | class Judge:
110 |     def __init__(self, config):
111 |         self.results = dict()
112 |         self.config = check_config_judge(config)
113 |         self.env = LowLevelR2RBatch(features=config['features'],
114 |                                     img_spec=config['img_spec'],
115 |                                     batch_size=config['batch_size'],
116 |                                     seed=config['seed'],
117 |                                     splits=config['splits']
118 |                                     )
119 | 
120 |         self.results_path = os.path.join(self.config['results_path'], 'results.json')
121 |         self.evaluations = [Evaluation([split]) for split in config['splits']]
122 | 
123 |         self.main_split = 'val_unseen'
124 |         self.main_metric = 'spl'
125 | 
126 |     def test(self, agent):
127 |         agent.eval()
128 |         self.env.reset_epoch()
129 | 
130 |         # We rely on env showing the entire batch before repeating anything
131 |         self.results = {}
132 |         looped = False
133 |         while True:
134 |             if agent.get_name() == 'Dynamic':
135 |                 trajectories, _ = agent.rollout(self.env)
136 |             else:
137 |                 trajectories = agent.rollout(self.env)
138 | 
139 |             for traj in trajectories:
140 |                 if traj['instr_id'] in self.results:
141 |                     looped = True
142 |                 else:
143 |                     self.results[traj['instr_id']] = traj['path']
144 | 
145 |             if looped:
146 |                 break
147 | 
148 |         output = [{'instr_id': k, 'trajectory': v} for k, v in self.results.items()]
149 | 
150 |         with open(self.results_path, 'w') as f:
151 |             json.dump(output, f)
152 | 
153 |         main_metric = None
154 | 
155 |         for split, evaluation in zip(self.config['splits'], self.evaluations):
156 |             score_summary, scores = evaluation.score(self.results_path)
157 |             print("Agent: %s -- Split: %s" % (agent.get_name(), ",".join(evaluation.splits)))
158 |             pp.pprint(score_summary)
159 |             if split == self.main_split:
160 |                 assert self.main_metric in score_summary, 'Field %s not found in score_summary' % self.main_metric
161 |                 main_metric = score_summary[self.main_metric]
162 | 
163 |         return main_metric
164 | 


--------------------------------------------------------------------------------
/tasks/R2R/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import sys
  4 | import os
  5 | import torch
  6 | import torch.optim as optim
  7 | import numpy as np
  8 | 
  9 | sys.path.append(os.getcwd())
 10 | 
 11 | from tasks.R2R.Agents import get_agent
 12 | from tasks.R2R.env import load_features
 13 | from tasks.R2R.trainer import Trainer
 14 | from tasks.R2R.eval import Judge
 15 | 
 16 | 
 17 | parser = argparse.ArgumentParser(description='PyTorch for Matterport3D Agent with Dynamic Convolutional Filters')
 18 | 
 19 | # General options
 20 | parser.add_argument('--name', type=str, default='custom_experiment',
 21 |                     help='name for the experiment')
 22 | parser.add_argument('--results_dir', type=str, default='tasks/R2R/results',
 23 |                     help='home directory for results')
 24 | parser.add_argument('--feature_store', type=str, default='img_features/ResNet-152-imagenet.tsv',
 25 |                     help='feature store file')
 26 | parser.add_argument('--eval_only', action="store_true",
 27 |                     help='if true, does not train the model before evaluating')
 28 | parser.add_argument('--seed', type=int, default=42,
 29 |                     help='initial random seed')
 30 | # Training options
 31 | parser.add_argument('--num_epoch', type=int, default=100,
 32 |                     help='number of epochs')
 33 | parser.add_argument('--eval_every', type=int, default=5,
 34 |                     help='number of training epochs between evaluations')
 35 | parser.add_argument('--patience', type=int, default=30,
 36 |                     help='number of epochs to wait before early stopping')
 37 | parser.add_argument('--lr', type=float, default=0.001,
 38 |                     help='base learning rate')
 39 | parser.add_argument('--batch_size', type=int, default=128,
 40 |                     help='batch size')
 41 | # Agent options
 42 | parser.add_argument('--num_heads', type=int, default=1,
 43 |                     help='number of heads for multi-headed dynamic convolution')
 44 | parser.add_argument('--max_episode_len', type=int, default=20,
 45 |                     help='agent max number of steps before stopping')
 46 | 
 47 | 
 48 | """ Device info """
 49 | if torch.cuda.is_available():
 50 |     device = torch.device('cuda')
 51 | else:
 52 |     device = torch.device('cpu')
 53 | print('Running on device: {}'.format(device))
 54 | 
 55 | 
 56 | def main(opts):
 57 | 
 58 |     splits = 'train'
 59 |     results_path = os.path.join(opts.results_dir, opts.name)
 60 |     features, img_spec = load_features(opts.feature_store)
 61 | 
 62 |     agent_config = {
 63 |                     'max_episode_len': opts.max_episode_len,
 64 |                     'num_heads': opts.num_heads,
 65 |                     'device': device,
 66 |                     }
 67 | 
 68 |     trainer_config = {
 69 |                       'features': features,
 70 |                       'img_spec': img_spec,
 71 |                       'splits': splits,
 72 |                       'batch_size': opts.batch_size,
 73 |                       'seed': opts.seed,
 74 |                       'results_path': results_path,
 75 |                       }
 76 | 
 77 |     judge_config = {
 78 |                     'features': features,
 79 |                     'img_spec': img_spec,
 80 |                     'splits': ['val_seen', 'val_unseen'],
 81 |                     'batch_size': opts.batch_size,
 82 |                     'seed': opts.seed,
 83 |                     'results_path': results_path,
 84 |                     }
 85 | 
 86 |     agent = get_agent('Dynamic', agent_config)
 87 |     judge = Judge(judge_config)
 88 | 
 89 |     if opts.eval_only:
 90 |         agent.load(os.path.join(results_path, 'encoder_weights_best'),
 91 |                    os.path.join(results_path, 'decoder_weights_best'))
 92 |         metric = judge.test(agent)
 93 |         print('Main metric result for this test: {:.4f}'.format(metric))
 94 |     else:
 95 |         trainer = Trainer(trainer_config)
 96 |         optimizer = optim.Adam(agent.get_trainable_params(), lr=opts.lr)
 97 |         best = trainer.train(agent, optimizer, opts.num_epoch, patience=opts.patience, eval_every=opts.eval_every, judge=judge)
 98 |         print('Best metric result for this test: {:.4f}'.format(best))
 99 | 
100 |     print('----- End -----')
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     args = parser.parse_args()
105 | 
106 |     if os.path.exists(os.path.join(args.results_dir, args.name)):
107 |         print('WARNING: Experiment with this name already exists! - {}'.format(args.name))
108 |     else:
109 |         os.makedirs(os.path.join(args.results_dir, args.name))
110 | 
111 |     torch.manual_seed(args.seed)
112 |     np.random.seed(args.seed)
113 | 
114 |     main(args)
115 | 


--------------------------------------------------------------------------------
/tasks/R2R/results/data_augmentation/decoder_weights_best:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/tasks/R2R/results/data_augmentation/decoder_weights_best


--------------------------------------------------------------------------------
/tasks/R2R/results/data_augmentation/encoder_weights_best:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/tasks/R2R/results/data_augmentation/encoder_weights_best


--------------------------------------------------------------------------------
/tasks/R2R/results/normal_data/decoder_weights_best:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/tasks/R2R/results/normal_data/decoder_weights_best


--------------------------------------------------------------------------------
/tasks/R2R/results/normal_data/encoder_weights_best:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/tasks/R2R/results/normal_data/encoder_weights_best


--------------------------------------------------------------------------------
/tasks/R2R/trainer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from tasks.R2R.env import LowLevelR2RBatch
 4 | from tasks.R2R.utils import check_config_trainer, print_progress
 5 | 
 6 | 
 7 | class Trainer:
 8 |     def __init__(self, config):
 9 |         self.results = dict()
10 |         self.config = check_config_trainer(config)
11 |         self.env = LowLevelR2RBatch(features=config['features'],
12 |                                     img_spec=config['img_spec'],
13 |                                     batch_size=config['batch_size'],
14 |                                     seed=config['seed'],
15 |                                     splits=config['splits']
16 |                                     )
17 |         print('Success!')
18 | 
19 |     def _train_epoch(self, agent, optimizer, num_iter):
20 |         epoch_loss = 0.
21 |         agent.train()
22 |         self.env.reset_epoch()
23 | 
24 |         for it in range(num_iter):
25 |             optimizer.zero_grad()
26 |             _, loss = agent.rollout(self.env)
27 |             loss.backward()
28 |             optimizer.step()
29 |             epoch_loss += loss.item()
30 |             suffix_msg = 'Running Loss: {:.4f}'.format(epoch_loss / (it+1))
31 |             print_progress(it, num_iter, suffix=suffix_msg)
32 |         else:
33 |             suffix_msg = 'Running Loss: {:.4f}'.format(epoch_loss / num_iter)
34 |             print_progress(num_iter, num_iter, suffix=suffix_msg)
35 | 
36 |         return epoch_loss / num_iter
37 | 
38 |     def train(self, agent, optimizer, num_epoch, num_iter_epoch=None, patience=None, eval_every=None, judge=None):
39 |         best_metric = 0.
40 | 
41 |         if num_iter_epoch is None:
42 |             num_iter_epoch = len(self.env.data) // self.env.batch_size + 1
43 |         if eval_every is None:
44 |             if judge is None:
45 |                 eval_every = num_epoch + 1  # Never tested
46 |             else:
47 |                 eval_every = num_epoch  # Test only on the last epoch
48 |         if patience is None:
49 |             patience = num_epoch
50 |         reset_patience = patience
51 | 
52 |         for epoch in range(num_epoch):
53 |             mean_loss = self._train_epoch(agent, optimizer, num_iter_epoch)
54 |             print("Epoch {}/{} terminated: Epoch Loss = {:.4f}".format(epoch+1, num_epoch, mean_loss))
55 |             agent.save(os.path.join(self.config['results_path'], 'encoder_weights_last'),
56 |                        os.path.join(self.config['results_path'], 'decoder_weights_last'))
57 | 
58 |             if (epoch+1) % eval_every == 0:
59 |                 metric = judge.test(agent)
60 |                 if metric is not None:
61 |                     print('Main metric results for this test: {:.4f}'.format(metric))
62 |                     if metric > best_metric:
63 |                         best_metric = metric
64 |                         patience = reset_patience
65 |                         print('New best! Saving weights...')
66 |                         agent.save(os.path.join(self.config['results_path'], 'encoder_weights_best'),
67 |                                    os.path.join(self.config['results_path'], 'decoder_weights_best'))
68 |                     else:
69 |                         patience -= 1
70 |                         if patience == 0:
71 |                             print('{} epochs without improvement in main metric ({}) - patience is over!'.format(reset_patience, judge.main_metric))
72 |                             break
73 | 
74 |         print("Finishing training")
75 |         return best_metric
76 | 


--------------------------------------------------------------------------------
/tasks/R2R/utils.py:
--------------------------------------------------------------------------------
  1 | """ Utils for io, language, connectivity graphs etc """
  2 | import sys
  3 | import json
  4 | import numpy as np
  5 | import networkx as nx
  6 | import torch
  7 | 
  8 | import nltk
  9 | nltk.download('stopwords')
 10 | 
 11 | from nltk.tokenize import RegexpTokenizer
 12 | from nltk.corpus import stopwords
 13 | 
 14 | # padding, unknown word, end of sentence
 15 | base_vocab = ['<PAD>', '<UNK>', '<EOS>']
 16 | padding_idx = base_vocab.index('<PAD>')
 17 | 
 18 | 
 19 | def load_nav_graphs(scans):
 20 |     """ Load connectivity graph for each scan """
 21 | 
 22 |     def distance(pose1, pose2):
 23 |         """ Euclidean distance between two graph poses """
 24 |         return ((pose1['pose'][3] - pose2['pose'][3]) ** 2
 25 |                 + (pose1['pose'][7] - pose2['pose'][7]) ** 2
 26 |                 + (pose1['pose'][11] - pose2['pose'][11]) ** 2) ** 0.5
 27 | 
 28 |     graphs = {}
 29 |     for scan in scans:
 30 |         with open('connectivity/%s_connectivity.json' % scan) as f:
 31 |             g = nx.Graph()
 32 |             positions = {}
 33 |             data = json.load(f)
 34 |             for i, item in enumerate(data):
 35 |                 if item['included']:
 36 |                     for j, conn in enumerate(item['unobstructed']):
 37 |                         if conn and data[j]['included']:
 38 |                             positions[item['image_id']] = np.array([item['pose'][3],
 39 |                                                                     item['pose'][7], item['pose'][11]]);
 40 |                             assert data[j]['unobstructed'][i], 'Graph should be undirected'
 41 |                             g.add_edge(item['image_id'], data[j]['image_id'], weight=distance(item, data[j]))
 42 |             nx.set_node_attributes(g, values=positions, name='position')
 43 |             graphs[scan] = g
 44 |     return graphs
 45 | 
 46 | 
 47 | def load_datasets(splits):
 48 |     data = []
 49 |     for split in splits:
 50 |         assert split in ['train', 'val_seen', 'val_unseen', 'test']
 51 |         with open('tasks/R2R/data/R2R_%s.json' % split) as f:
 52 |             data += json.load(f)
 53 |     return data
 54 | 
 55 | 
 56 | def print_progress(iteration, total, prefix='', suffix='', decimals=1, bar_length=100):
 57 |     """
 58 |     Call in a loop to create terminal progress bar
 59 |     @params:
 60 |         iteration   - Required  : current iteration (Int)
 61 |         total       - Required  : total iterations (Int)
 62 |         prefix      - Optional  : prefix string (Str)
 63 |         suffix      - Optional  : suffix string (Str)
 64 |         decimals    - Optional  : positive number of decimals in percent complete (Int)
 65 |         bar_length  - Optional  : character length of bar (Int)
 66 |     """
 67 |     str_format = "{0:." + str(decimals) + "f}"
 68 |     percents = str_format.format(100 * (iteration / float(total)))
 69 |     filled_length = int(round(bar_length * iteration / float(total)))
 70 |     bar = '█' * filled_length + '-' * (bar_length - filled_length)
 71 | 
 72 |     sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
 73 | 
 74 |     if iteration == total:
 75 |         sys.stdout.write('\n')
 76 |     sys.stdout.flush()
 77 | 
 78 | 
 79 | def check_config_trainer(config):
 80 |     field_list = ['features', 'img_spec', 'splits', 'batch_size', 'seed', 'results_path']
 81 |     assert isinstance(config, dict), 'expected type dict for argument config, found %s' % type(config)
 82 |     for field in field_list:
 83 |         assert field in config, 'missing required field in config: %s' % field
 84 |     return config
 85 | 
 86 | 
 87 | def check_config_judge(config):
 88 |     field_list = ['features', 'img_spec', 'splits', 'batch_size', 'seed', 'results_path']
 89 |     assert isinstance(config, dict), 'expected type dict for argument config, found %s' % type(config)
 90 |     for field in field_list:
 91 |         assert field in config, 'missing required field in config: %s' % field
 92 | 
 93 |     if isinstance(config['splits'], str):
 94 |         config['splits'] = [config['splits']]
 95 |     assert isinstance(config['splits'], list), 'expected type list or str type for argument "splits", found %s' % type(config['splits'])
 96 | 
 97 |     return config
 98 | 
 99 | 
100 | def my_split_func(sentence):
101 |     tokenizer = RegexpTokenizer(r'\w+')
102 |     stop_words = set(stopwords.words('english'))
103 |     include_words = ['up', 'down', 'out', 'into', 'through', 'above', 'before', 'below', 'to', 'over', 'under']
104 | 
105 |     for word in include_words:
106 |         stop_words.remove(word)
107 | 
108 |     word_tokens = tokenizer.tokenize(sentence)
109 |     filtered_sentence = [w for w in word_tokens if w not in stop_words]
110 |     filtered_sentence_reversed = [w for w in reversed(word_tokens) if w not in stop_words]
111 | 
112 |     return filtered_sentence, filtered_sentence_reversed
113 | 
114 | 
115 | def batched_sentence_embedding(batch, word_encoder, device=torch.device('cpu')):
116 |     """
117 |     :param batch: batch of instructions of variable lengths --> suppose range [min_l, max_l]
118 |     :param word_encoder: provides single-word embeddings --- must support __getitem__ method
119 |     :param device: may be cpu or cuda -- default is cpu
120 |     :return: tensor of shape [batch_len, max_l, embedding_size] where sentences are zero-padded to have same size
121 |     """
122 |     split_batch = []
123 | 
124 |     for sentence in batch:
125 |         spl, spl_rev = my_split_func(sentence)
126 |         split_batch.append(spl)
127 | 
128 |     lengths = [len(spl) for spl in split_batch]
129 |     max_l = max(lengths)
130 | 
131 |     t = torch.zeros(len(batch), max_l, 300)
132 | 
133 |     for i, spl in enumerate(split_batch):
134 |         e = torch.stack([word_encoder[word] for word in spl])
135 |         t[i, :e.shape[0], :] = e.squeeze(dim=1)
136 | 
137 |     t = t.transpose(1, 2)
138 | 
139 |     embeddings = t.to(device=device)
140 |     return embeddings, lengths
141 | 
142 | 
143 | def append_coordinates(features, agent_heading, agent_elevation):
144 |     """ Appends elevation and headings coordinates to attention heatmap """
145 | 
146 |     """
147 |     Assume features is 36 x num_features: appends 36-dimensional maps with elevation and headings.
148 |     Indexing is the following:
149 |             _________________________________________________
150 |             |                                                |
151 |     up      | 24  25  26  27  28  29  30  31  32  33  34  35 |
152 |             |                                                |
153 |     center  | 12  13  14  15  16  17  18  19  20  21  22  23 |
154 |             |                                                |
155 |     down    |  0   1   2   3   4   5   6   7   8   9  10  11 |
156 |             |________________________________________________|
157 | 
158 |             left                 center                  right
159 |     """
160 | 
161 |     abs_elevations = torch.tensor([-0.5, 0, 0.5], dtype=torch.float)
162 |     elevations = abs_elevations - agent_elevation
163 |     elevations_map = elevations.repeat(12, 1).transpose(0, 1).contiguous().view(36, 1)
164 | 
165 |     abs_headings = torch.tensor(np.linspace(0, (11./6.)*np.pi, 12), dtype=torch.float)
166 |     headings = abs_headings - agent_heading
167 | 
168 |     headings_cos_map = torch.cos(headings).repeat(3).view(36, 1)
169 |     headings_sin_map = torch.sin(headings).repeat(3).view(36, 1)
170 | 
171 |     feature_map = torch.cat((features, elevations_map, headings_cos_map, headings_sin_map), dim=-1)
172 | 
173 |     return feature_map
174 | 
175 | 
176 | def to_one_hot(indexes, output_dim):
177 |     """
178 |     :param indexes: list of numbers in the range [0, output_dim)
179 |     :param output_dim: size of a single one-hot tensor
180 |     :return: tensor containing one_hot representation of indexes
181 |     """
182 |     assert output_dim >= 2
183 |     assert output_dim > max(indexes)
184 |     assert min(indexes) >= 0
185 | 
186 |     return torch.eye(output_dim)[indexes]
187 | 
188 | 


--------------------------------------------------------------------------------
/web/README.md:
--------------------------------------------------------------------------------
 1 | # Web
 2 | 
 3 | This directory contains web-based applications for:
 4 | - Viewing and saving first-person trajectories
 5 | - Amazon Mechanical Turk (AMT) interfaces that were used to collect and evaluate navigation instructions
 6 | 
 7 | Code is based on Javascript and the [three.js](https://threejs.org/) wrapper for OpenGL, as well as the [tween.js](https://github.com/tweenjs/tween.js/) library for animation. The [Gulp](https://gulpjs.com/) task runner (based on Node.js) is used for spinning up a web servers and optimizing and minifying javascript for deployment (e.g. to AMT).
 8 | 
 9 | To get started, make sure you have [Node.js](https://nodejs.org/en/) >=6.0.0 installed, then install the remaining dependencies using the npm package manager:
10 | ```
11 | npm install
12 | ```
13 | 
14 | You will also need to first install the Matterport data as described [here](../README.md). Then, set up symlinks to data (from the app directory) as follows:
15 | ```
16 | cd app
17 | ln -s ../../tasks/R2R/data/ R2Rdata
18 | ln -s ../../connectivity connectivity
19 | ln -s ../../data data
20 | ```
21 | 
22 | Also, download the R2R trajectory data by running this script from the top level directory (if you haven't already done this):
23 | ```
24 | ./tasks/R2R/data/download.sh
25 | ```
26 | 
27 | 
28 | Now you can start a web server to check out the various visualizations and AMT user interfaces:
29 | ```
30 | gulp
31 | ```
32 | 
33 | ## Trajectory Visualization
34 | 
35 | `trajectory.html` is an application for viewing first-person trajectories and downloading them as videos:
36 | - Use `Choose File` to select a trajectory file in the leaderboard submission format. By default, the included file `val_unseen_shortest_agent.json` is selected (containing the shortest paths to goal in the unseen validation set).
37 | - `Play` visualizes the trajectory with the provided index.
38 | - `Download video` visualizes the trajectory then downloads it as a .webm video.
39 | - Camera parameters can be set with the `Width`, `Height` and `V-FOV` fields.
40 | - Change the `Index` field to view different trajectories from the file.
41 | 
42 | 
43 | ## AMT Interfaces
44 | 
45 | `collect-hit.html` and `eval-hit.html` are the AMT interfaces used for collecting navigation instructions for the R2R data set, and benchmarking human performance on the R2R test set, respectively. Both interfaces appear as they would to a worker on AMT, except there is not 'Submit' button. Instead, both interfaces have a url parameter `?ix=0` that can be directly edited in your browser address bar to view different HITs. There are also instructions at the top of the UI that can be expanded.
46 | 
47 | ### collect-hit
48 | 
49 | The UI `collect-hit.html` shows workers a navigation trajectory that must be annotated with a navigation instruction. Workers can only move along the trajectory (either fly-through or by clicking through each step), but cannot move anywhere else. Trajectories are loaded from the file `sample_room_paths.json`. Navigation instructions are collected in the textarea with id `tag1`, which can be integrated with AMT. 
50 | 
51 | ### eval-hit
52 | 
53 | The UI `eval-hit.html` situates workers in an environment and provides a navigation instruction sourced from `R2R_test.json`. Workers can move anywhere, and must submit when they are as close as possible to the goal location. The actual navigation trajectories are collected in a hidden input with id `traj`, in the form of comma-separated (viewpointID, heading_degrees, elevation_degrees) tuples.
54 | 
55 | ### Integrating with AMT
56 | 
57 | To actually use these interfaces to collect data they must be integrated with AMT. Please check the AMT docs. At high level, several additional steps are required to achieve this:
58 | - Run `gulp build` to generate optimized and minified javascript (`main.min.js`) in the `dist` directory. 
59 | - Host online the minified javascript files, along with the Matterport skybox images (we suggest downsampling the originals to 50% or smaller to keep the HITs responsive), our connectivity graphs, and any other necessary files for the particular html template (e.g. your own version of `sample_room_paths.json` or `R2R_test.json`) so they are publicly accessible.
60 | - In the html template(s): 
61 |   - Review the HIT instructions and replace references to ACRV with your research group.
62 |   - Replace all local urls with urls linking to your own publicly hosted assets, and
63 |   - Switch to AMT parameters instead of url parameters, i.e., replace `var ix = location.search.split('ix=')[1];` with `var ix = ${ix}` and provide these parameters to AMT (e.g., in an uploaded csv file) when creating a batch of HITs. Note that the `ix` parameter is just an index into `sample_room_paths.json` or `R2R_test.json`.
64 | - Follow the AMT instructions to create a batch of HITs using your modified html template(s), such that the data collected in the `tag1` and/or `traj` fields will be available through AMT.
65 | 
66 | Disclaimer: We provide this code to assist others collecting AMT annotations on top of Matterport-style data, but this is academic code and not a supported library. We may have forgotten something or left out a step! Feel free to submit pull requests with fixes.
67 | 


--------------------------------------------------------------------------------
/web/app/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |   <meta charset="UTF-8">
 6 |   <title>Matterport3D Simulator Web Tools</title>
 7 |   <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
 8 |   <meta name="viewport" content="width=device-width, initial-scale=1">
 9 |   <style>
10 |     .btn{ width:150px; }
11 |   </style>
12 | </head>
13 | 
14 | <body>
15 |   <div class="container">
16 |     <div class="page-header">
17 |       <h1>Web tools for Matterport data</h1>
18 |     <div>
19 |     <p>
20 |       <a class="btn btn-primary" href="/trajectory.html">trajectory.html</a> Visualize first-person trajectories in Matterport environments.
21 |     </p>
22 |     <!--p>
23 |       <a class="btn btn-primary" href="/connectivity.html">connectivity.html</a> Visualize navigation graphs.
24 |     </p-->
25 |     <p>
26 |       <a class="btn btn-warning" href="/collect-hit.html?ix=0">collect-hit.html</a> AMT HIT for collecting navigation instructions for the R2R data set.
27 |     </p>
28 |     <p>
29 |       <a class="btn btn-warning" href="/eval-hit.html?ix=0">eval-hit.html</a> AMT HIT for benchmarking human performance on the R2R test set.
30 |     </p>
31 |   <div>
32 | </body>
33 | 
34 | </html>
35 | 


--------------------------------------------------------------------------------
/web/app/js/Detector.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @author alteredq / http://alteredqualia.com/
 3 |  * @author mr.doob / http://mrdoob.com/
 4 |  */
 5 | 
 6 | Detector = {
 7 | 
 8 | 	canvas : !! window.CanvasRenderingContext2D,
 9 | 	webgl : ( function () { try { return !! window.WebGLRenderingContext && !! document.createElement( 'canvas' ).getContext( 'experimental-webgl' ); } catch( e ) { return false; } } )(),
10 | 	workers : !! window.Worker,
11 | 	fileapi : window.File && window.FileReader && window.FileList && window.Blob,
12 | 
13 | 	getWebGLErrorMessage : function () {
14 | 
15 | 		var domElement = document.createElement( 'div' );
16 | 
17 | 		domElement.style.fontFamily = 'monospace';
18 | 		domElement.style.fontSize = '13px';
19 | 		domElement.style.textAlign = 'center';
20 | 		domElement.style.background = '#eee';
21 | 		domElement.style.color = '#000';
22 | 		domElement.style.padding = '1em';
23 | 		domElement.style.width = '475px';
24 | 		domElement.style.margin = '5em auto 0';
25 | 
26 | 		if ( ! this.webgl ) {
27 | 
28 | 			domElement.innerHTML = window.WebGLRenderingContext ? [
29 | 				'Your graphics card does not seem to support <a href="http://khronos.org/webgl/wiki/Getting_a_WebGL_Implementation">WebGL</a>.<br />',
30 | 				'Find out how to get it <a href="http://get.webgl.org/">here</a>.'
31 | 			].join( '\n' ) : [
32 | 				'Your browser does not seem to support <a href="http://khronos.org/webgl/wiki/Getting_a_WebGL_Implementation">WebGL</a>.<br/>',
33 | 				'Find out how to get it <a href="http://get.webgl.org/">here</a>.'
34 | 			].join( '\n' );
35 | 
36 | 		}
37 | 
38 | 		return domElement;
39 | 
40 | 	},
41 | 
42 | 	addGetWebGLMessage : function ( parameters ) {
43 | 
44 | 		var parent, id, domElement;
45 | 
46 | 		parameters = parameters || {};
47 | 
48 | 		parent = parameters.parent !== undefined ? parameters.parent : document.body;
49 | 		id = parameters.id !== undefined ? parameters.id : 'oldie';
50 | 
51 | 		domElement = Detector.getWebGLErrorMessage();
52 | 		domElement.id = id;
53 | 
54 | 		parent.appendChild( domElement );
55 | 
56 | 	}
57 | 
58 | };
59 | 


--------------------------------------------------------------------------------
/web/app/js/Matterport3D.js:
--------------------------------------------------------------------------------
 1 | 
 2 | // Matterport3D utils for three.js
 3 | 
 4 | function Matterport3D(data_dir) {
 5 |   this.data_dir = (typeof data_dir !== 'undefined') ?  data_dir : "v1/scans/";
 6 | };
 7 | 
 8 | // Load a textured scene mesh
 9 | Matterport3D.prototype.load_mesh = function(scan_id, mesh_id, callback) { 
10 |   var base_url = this.data_dir+scan+"/matterport_mesh/"+mesh+"/"
11 |   var obj_url = base_url + mesh + ".obj";
12 |   var mat_url = base_url + mesh + ".mtl"; 
13 |   var mtlLoader = new THREE.MTLLoader();
14 |   mtlLoader.setTexturePath(this.data_dir+scan+"/matterport_mesh/"+mesh+"/");
15 |   mtlLoader.load(mat_url, function( materials ) {
16 |     materials.preload();
17 |     var objLoader = new THREE.OBJLoader();
18 |     objLoader.setMaterials( materials );
19 |     objLoader.load(obj_url, function ( object ) {
20 |       callback(object);
21 |     });
22 |   });
23 | };
24 | 
25 | // Load cube texture and return a promise
26 | Matterport3D.prototype.loadCubeTexture = function(urls) {
27 |   return new Promise(function(resolve, reject) {
28 |     const onLoad = function (texture) { return resolve(texture); }
29 |     const onError = function (event) { return reject(event); }
30 |     var loader = new THREE.CubeTextureLoader();
31 |     loader.setCrossOrigin('anonymous');
32 |     loader.load(urls, onLoad, null, onError);
33 |   });
34 | };
35 | 
36 | // Load json file and return a promise
37 | Matterport3D.prototype.loadJson = function(url) {
38 |   return new Promise(function(resolve, reject) {
39 |     d3.json(url, function(error, data) {
40 |       if (error) reject(error);
41 |       else resolve(data);
42 |     });
43 |   });
44 | };
45 | 
46 | // Load small cylinders representing viewpoints (projected down to floor level)
47 | Matterport3D.prototype.load_viewpoints = function(data, {h=0.5, opacity=0.7} = {}) {
48 |   var group = new THREE.Group();
49 |   for (var i = 0; i < data.length; i++) {
50 |     var pose = data[i]['pose'];
51 |     for(var k=0; k<pose.length;k++) pose[k] = parseFloat(pose[k]);
52 |     var height = parseFloat(data[i]['height']);
53 |     pose[11] -= height; // drop to surface level
54 |     var m = new THREE.Matrix4();
55 |     m.fromArray(pose);
56 |     m.transpose(); // switch row major to column major to suit three.js
57 |     var geometry = new THREE.CylinderBufferGeometry(0.15, 0.15, h, 128);
58 |     var material = new THREE.MeshLambertMaterial({color: 0x0000ff});
59 |     material.transparent = true;
60 |     material.opacity = opacity;
61 |     var cylinder = new THREE.Mesh(geometry, material);
62 |     cylinder.applyMatrix(m);
63 |     cylinder.height = height;
64 |     cylinder.name = data[i]['image_id'];
65 |     group.add(cylinder);
66 |     cylinder.included = true;
67 |     if (data[i].hasOwnProperty('included') ) {
68 |       if (data[i]['included'] == false){
69 |         cylinder.included = false;
70 |       }
71 |     }
72 |   }
73 |   return group;
74 | };
75 | 
76 | 


--------------------------------------------------------------------------------
/web/app/js/PTZCameraControls.js:
--------------------------------------------------------------------------------
  1 | 
  2 | // This set of controls performs pan, tilt and zoom. 
  3 | //
  4 | //    Pan - left mouse left and right / touch: one finger move
  5 | //    Tilt - left mouse up and down / touch: one finger move
  6 | //    Zoom - middle mouse, or mousewheel / touch: two finger spread or squish
  7 | 
  8 | THREE.PTZCameraControls = function ( camera, domElement ) {
  9 | 
 10 |   camera.rotation.order = 'YXZ'; 
 11 | 	this.camera = camera;
 12 | 
 13 | 	this.domElement = ( domElement !== undefined ) ? domElement : document;
 14 | 
 15 | 	// Set to false to disable this control
 16 | 	this.enabled = true;
 17 |   
 18 |   // Set true to translate instead of rotate in pan / tilt
 19 |   this.translate = false;
 20 | 
 21 | 	// How far you can tilt vertically, upper and lower limits.
 22 | 	// If set, must be a sub-interval of the interval [ -Math.PI/2, Math.PI/2 ].
 23 | 	this.minTilt = -Math.PI/2; // radians
 24 | 	this.maxTilt = Math.PI/2; // radians
 25 | 
 26 | 	// How far you can zoom in and out
 27 | 	this.minZoom = 0;
 28 | 	this.maxZoom = Infinity;
 29 | 
 30 | 	// Set to true to enable damping (inertia)
 31 | 	this.enableDamping = false;
 32 | 	this.dampingFactor = 0.25;
 33 | 
 34 | 	// Set to false to disable panning
 35 | 	this.enablePan = true;
 36 | 	this.panSpeed = 1.0;
 37 | 
 38 | 	// Set to false to disable tilting
 39 | 	this.enableTilt = true;
 40 | 	this.tiltSpeed = 1.0;
 41 | 
 42 | 	// Set to false to disable zooming
 43 | 	this.enableZoom = true;
 44 | 	this.zoomSpeed = 1.0;
 45 | 
 46 | 	// Mouse buttons
 47 | 	this.mouseButtons = { PAN_TILT: THREE.MOUSE.LEFT, ZOOM: THREE.MOUSE.MIDDLE, SELECT: THREE.MOUSE.RIGHT };
 48 | 
 49 | 	//
 50 | 	// public methods
 51 | 	//
 52 | 	this.dispose = function () {
 53 | 
 54 | 		scope.domElement.removeEventListener( 'contextmenu', onContextMenu, false );
 55 | 		scope.domElement.removeEventListener( 'mousedown', onMouseDown, false );
 56 | 		scope.domElement.removeEventListener( 'wheel', onMouseWheel, false );
 57 | 
 58 | 		scope.domElement.removeEventListener( 'touchstart', onTouchStart, false );
 59 | 		scope.domElement.removeEventListener( 'touchend', onTouchEnd, false );
 60 | 		scope.domElement.removeEventListener( 'touchmove', onTouchMove, false );
 61 | 
 62 | 		document.removeEventListener( 'mousemove', onMouseMove, false );
 63 | 		document.removeEventListener( 'mouseup', onMouseUp, false );
 64 | 
 65 | 	};
 66 | 
 67 | 	//
 68 | 	// internals
 69 | 	//
 70 | 
 71 | 	var scope = this;
 72 | 
 73 | 	var selectEvent = { type: 'select' };
 74 |   var changeEvent = { type: 'change' };
 75 |   var rotateEvent = { type: 'rotate' };
 76 | 
 77 | 	var STATE = { NONE: - 1, PAN_TILT: 0, ZOOM: 1, TOUCH_PAN_TILT: 2, TOUCH_ZOOM: 3 };
 78 | 
 79 | 	var state = STATE.NONE;
 80 | 
 81 | 	var rotateStart = new THREE.Vector2();
 82 | 	var rotateEnd = new THREE.Vector2();
 83 | 	var rotateDelta = new THREE.Vector2();
 84 | 
 85 | 	var zoomStart = new THREE.Vector2();
 86 | 	var zoomEnd = new THREE.Vector2();
 87 | 	var zoomDelta = new THREE.Vector2();
 88 | 
 89 | 	function getZoomScale() {
 90 | 		return Math.pow( 0.95, scope.zoomSpeed );
 91 | 	}
 92 | 
 93 | 	function rotateLeft( angle ) {
 94 |     if (scope.translate){
 95 |       scope.camera.position.x -= angle;
 96 |     } else {
 97 | 		  scope.camera.rotation.y -= angle;
 98 |     }
 99 |     scope.dispatchEvent( changeEvent );
100 | 	}
101 | 
102 | 	function rotateUp( angle ) {
103 |     if (scope.translate){
104 |       scope.camera.position.y += angle;
105 |     } else {
106 |   	  scope.camera.rotation.x = Math.max( scope.minTilt, Math.min(scope.maxTilt, scope.camera.rotation.x - angle));
107 |     }
108 |     scope.dispatchEvent( changeEvent );
109 | 	}
110 | 
111 | 	function zoomIn( zoomScale ) {
112 |   	scope.camera.zoom = Math.max( scope.minZoom, Math.min( scope.maxZoom, scope.camera.zoom * zoomScale ) );
113 | 		scope.camera.updateProjectionMatrix();
114 |     scope.dispatchEvent( changeEvent );
115 | 	}
116 | 
117 | 	function zoomOut( zoomScale ) {
118 | 		scope.camera.zoom = Math.max( scope.minZoom, Math.min( scope.maxZoom, scope.camera.zoom / zoomScale ) );
119 | 		scope.camera.updateProjectionMatrix();
120 |     scope.dispatchEvent( changeEvent );
121 | 	}
122 | 
123 | 	//
124 | 	// event callbacks - update the camera state
125 | 	//
126 | 
127 | 	function handleMouseDownRotate( event ) {
128 | 		//console.log( 'handleMouseDownRotate' );
129 | 		rotateStart.set( event.clientX, event.clientY );
130 | 	}
131 | 
132 | 	function handleMouseDownZoom( event ) {
133 | 		//console.log( 'handleMouseDownZoom' );
134 | 		zoomStart.set( event.clientX, event.clientY );
135 | 	}
136 | 
137 | 	function handleMouseMoveRotate( event ) {
138 | 		//console.log( 'handleMouseMoveRotate' );
139 | 		rotateEnd.set( event.clientX, event.clientY );
140 | 		rotateDelta.subVectors( rotateEnd, rotateStart );
141 | 		var element = scope.domElement === document ? scope.domElement.body : scope.domElement;
142 | 		// rotating across whole screen goes 360 degrees around
143 | 		rotateLeft( 2 * Math.PI * rotateDelta.x / element.clientWidth * scope.panSpeed );
144 | 		// rotating up and down along whole screen attempts to go 360, but limited to 180
145 | 		rotateUp( 2 * Math.PI * rotateDelta.y / element.clientHeight * scope.tiltSpeed );
146 | 		rotateStart.copy( rotateEnd );
147 | 	}
148 | 
149 | 	function handleMouseMoveZoom( event ) {
150 | 		//console.log( 'handleMouseMoveZoom' );
151 | 		zoomEnd.set( event.clientX, event.clientY );
152 | 		zoomDelta.subVectors( zoomEnd, zoomStart );
153 | 		if ( zoomDelta.y > 0 ) {
154 | 			zoomIn( getZoomScale() );
155 | 		} else if ( zoomDelta.y < 0 ) {
156 | 			zoomOut( getZoomScale() );
157 | 		}
158 | 		zoomStart.copy( zoomEnd );
159 | 	}
160 | 
161 | 	function handleMouseUp( event ) {
162 | 		// console.log( 'handleMouseUp' );
163 | 	}
164 | 
165 | 	function handleMouseWheel( event ) {
166 | 		// console.log( 'handleMouseWheel' );
167 | 		if ( event.deltaY < 0 ) {
168 | 			zoomOut( getZoomScale() );
169 | 		} else if ( event.deltaY > 0 ) {
170 | 			zoomIn( getZoomScale() );
171 | 		}
172 | 	}
173 | 
174 | 	function handleTouchStartRotate( event ) {
175 | 		//console.log( 'handleTouchStartRotate' );
176 | 		rotateStart.set( event.touches[ 0 ].pageX, event.touches[ 0 ].pageY );
177 | 	}
178 | 
179 | 	function handleTouchStartZoom( event ) {
180 | 		//console.log( 'handleTouchStartZoom' );
181 | 		var dx = event.touches[ 0 ].pageX - event.touches[ 1 ].pageX;
182 | 		var dy = event.touches[ 0 ].pageY - event.touches[ 1 ].pageY;
183 | 		var distance = Math.sqrt( dx * dx + dy * dy );
184 | 		zoomStart.set( 0, distance );
185 | 	}
186 | 
187 | 	function handleTouchMoveRotate( event ) {
188 | 		//console.log( 'handleTouchMoveRotate' );
189 | 		rotateEnd.set( event.touches[ 0 ].pageX, event.touches[ 0 ].pageY );
190 | 		rotateDelta.subVectors( rotateEnd, rotateStart );
191 | 		var element = scope.domElement === document ? scope.domElement.body : scope.domElement;
192 | 		// rotating across whole screen goes 360 degrees around
193 | 		rotateLeft( 2 * Math.PI * rotateDelta.x / element.clientWidth * scope.panSpeed );
194 | 		// rotating up and down along whole screen attempts to go 360, but limited to 180
195 | 		rotateUp( 2 * Math.PI * rotateDelta.y / element.clientHeight * scope.tiltSpeed );
196 | 		rotateStart.copy( rotateEnd );
197 | 	}
198 | 
199 | 	function handleTouchMoveZoom( event ) {
200 | 		//console.log( 'handleTouchMoveZoom' );
201 | 		var dx = event.touches[ 0 ].pageX - event.touches[ 1 ].pageX;
202 | 		var dy = event.touches[ 0 ].pageY - event.touches[ 1 ].pageY;
203 | 		var distance = Math.sqrt( dx * dx + dy * dy );
204 | 		zoomEnd.set( 0, distance );
205 | 		zoomDelta.subVectors( zoomEnd, zoomStart );
206 | 		if ( zoomDelta.y > 0 ) {
207 | 			zoomOut( getZoomScale() );
208 | 		} else if ( zoomDelta.y < 0 ) {
209 | 			zoomIn( getZoomScale() );
210 | 		}
211 | 		zoomStart.copy( zoomEnd );
212 | 	}
213 | 
214 | 	function handleTouchEnd( event ) {
215 | 		//console.log( 'handleTouchEnd' );
216 | 	}
217 | 
218 | 	//
219 | 	// event handlers - FSM: listen for events and reset state
220 | 	//
221 | 	function onMouseDown( event ) {
222 | 		if ( scope.enabled === false ) return;
223 | 		event.preventDefault();
224 | 		switch ( event.button ) {
225 | 			case scope.mouseButtons.PAN_TILT:
226 | 				if ( scope.enableRotate === false ) return;
227 | 				handleMouseDownRotate( event );
228 | 				state = STATE.PAN_TILT;
229 | 				break;
230 | 
231 | 			case scope.mouseButtons.ZOOM:
232 | 				if ( scope.enableZoom === false ) return;
233 | 				handleMouseDownZoom( event );
234 | 				state = STATE.ZOOM;
235 | 				break;
236 | 				
237 | 			case scope.mouseButtons.SELECT:
238 |         state = STATE.NONE;
239 | 			  var rect = scope.domElement.getBoundingClientRect();
240 | 			  selectEvent.x = event.clientX - rect.left;
241 | 				selectEvent.y = event.clientY - rect.top;
242 | 			  scope.dispatchEvent( selectEvent );
243 | 			  break;
244 | 		}
245 | 		if ( state !== STATE.NONE ) {
246 | 			document.addEventListener( 'mousemove', onMouseMove, false );
247 | 			document.addEventListener( 'mouseup', onMouseUp, false );
248 | 		}
249 | 
250 | 	}
251 | 
252 | 	function onMouseMove( event ) {
253 | 		if ( scope.enabled === false ) return;
254 | 		event.preventDefault();
255 | 		switch ( state ) {
256 | 			case STATE.PAN_TILT:
257 | 				if ( scope.enableRotate === false ) return;
258 | 				handleMouseMoveRotate( event );
259 | 				break;
260 | 
261 | 			case STATE.ZOOM:
262 | 				if ( scope.enableZoom === false ) return;
263 | 				handleMouseMoveZoom( event );
264 | 				break;
265 | 		}
266 | 	}
267 | 
268 | 	function onMouseUp( event ) {
269 | 		if ( scope.enabled === false ) return;
270 | 		handleMouseUp( event );
271 | 		document.removeEventListener( 'mousemove', onMouseMove, false );
272 | 		document.removeEventListener( 'mouseup', onMouseUp, false );
273 | 		if (state == STATE.PAN_TILT) {
274 | 		  scope.dispatchEvent( rotateEvent );
275 | 		}
276 | 		state = STATE.NONE;
277 | 	}
278 | 
279 | 	function onMouseWheel( event ) {
280 | 		if ( scope.enabled === false || scope.enableZoom === false || ( state !== STATE.NONE && state !== STATE.PAN_TILT ) ) return;
281 | 		event.preventDefault();
282 | 		event.stopPropagation();
283 | 		handleMouseWheel( event );
284 | 	}
285 | 
286 | 	function onTouchStart( event ) {
287 | 		if ( scope.enabled === false ) return;
288 | 		switch ( event.touches.length ) {
289 | 			case 1:	// one-fingered touch: rotate
290 | 				if ( scope.enableRotate === false ) return;
291 | 				handleTouchStartRotate( event );
292 | 				state = STATE.TOUCH_PAN_TILT;
293 | 				break;
294 | 
295 | 			case 2:	// two-fingered touch: zoom
296 | 				if ( scope.enableZoom === false ) return;
297 | 				handleTouchStartZoom( event );
298 | 				state = STATE.TOUCH_ZOOM;
299 | 				break;
300 | 
301 | 			default:
302 | 				state = STATE.NONE;
303 | 
304 | 		}
305 | 	}
306 | 
307 | 	function onTouchMove( event ) {
308 | 		if ( scope.enabled === false ) return;
309 | 		event.preventDefault();
310 | 		event.stopPropagation();
311 | 		switch ( event.touches.length ) {
312 | 			case 1: // one-fingered touch: rotate
313 | 				if ( scope.enableRotate === false ) return;
314 | 				if ( state !== STATE.TOUCH_PAN_TILT ) return; // is this needed?...
315 | 				handleTouchMoveRotate( event );
316 | 				break;
317 | 
318 | 			case 2: // two-fingered touch: zoom
319 | 				if ( scope.enableZoom === false ) return;
320 |   			if ( state !== STATE.TOUCH_ZOOM ) return; // is this needed?...
321 | 				handleTouchMoveZoom( event );
322 | 				break;
323 | 
324 | 			default:
325 | 				state = STATE.NONE;
326 | 		}
327 | 	}
328 | 
329 | 	function onTouchEnd( event ) {
330 | 		if ( scope.enabled === false ) return;
331 | 		handleTouchEnd( event );
332 | 		state = STATE.NONE;
333 | 	}
334 | 
335 | 	function onContextMenu( event ) {
336 | 		if ( scope.enabled === false ) return;
337 | 		event.preventDefault();
338 | 	}
339 | 
340 | 
341 | 	scope.domElement.addEventListener( 'contextmenu', onContextMenu, false );
342 | 
343 | 	scope.domElement.addEventListener( 'mousedown', onMouseDown, false );
344 | 	scope.domElement.addEventListener( 'wheel', onMouseWheel, false );
345 | 
346 | 	scope.domElement.addEventListener( 'touchstart', onTouchStart, false );
347 | 	scope.domElement.addEventListener( 'touchend', onTouchEnd, false );
348 | 	scope.domElement.addEventListener( 'touchmove', onTouchMove, false );
349 | 
350 | };
351 | 
352 | THREE.PTZCameraControls.prototype = Object.create( THREE.EventDispatcher.prototype );
353 | THREE.PTZCameraControls.prototype.constructor = THREE.PTZCameraControls;
354 | 
355 | 


--------------------------------------------------------------------------------
/web/app/js/RequestAnimationFrame.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Provides requestAnimationFrame in a cross browser way.
 3 |  * http://paulirish.com/2011/requestanimationframe-for-smart-animating/
 4 |  */
 5 | 
 6 | if ( !window.requestAnimationFrame ) {
 7 | 
 8 | 	window.requestAnimationFrame = ( function() {
 9 | 
10 | 		return window.webkitRequestAnimationFrame ||
11 | 		window.mozRequestAnimationFrame ||
12 | 		window.oRequestAnimationFrame ||
13 | 		window.msRequestAnimationFrame ||
14 | 		function( /* function FrameRequestCallback */ callback, /* DOMElement Element */ element ) {
15 | 
16 | 			window.setTimeout( callback, 1000 / 60 );
17 | 
18 | 		};
19 | 
20 | 	} )();
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/web/app/js/Trajectory.js:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | var step = 0;
  4 | var playing = false;
  5 | var downloading = false;
  6 | var scan;
  7 | var curr_image_id;
  8 | var capturer;
  9 | var frameRate = 60.0;
 10 | var pauseStart = 1000;
 11 | var pauseEnd = 3000;
 12 | 
 13 | // declare a bunch of variable we will need later
 14 | var camera, camera_pose, scene, controls, renderer, connections, id_to_ix, world_frame, cylinder_frame, cubemap_frame;
 15 | var fp_scene, fp_camera, fp_renderer, dollhouse, mesh_names;
 16 | var mouse = new THREE.Vector2();
 17 | var id, path, gt, traj;
 18 | 
 19 | var ix = 0;
 20 | var SIZE_X = 1140;
 21 | var SIZE_Y = 650;
 22 | var VFOV = 80;
 23 | var ASPECT = SIZE_X/SIZE_Y;
 24 | 
 25 | var FP_SIZE_X = 960;
 26 | var FP_SIZE_Y = 720;
 27 | var FP_VFOV = 70;
 28 | var FP_ASPECT = FP_SIZE_X/FP_SIZE_Y;
 29 | 
 30 | var $ix=document.getElementById('ix');
 31 | var $width=document.getElementById('width');
 32 | var $height=document.getElementById('height');
 33 | var $vfov=document.getElementById('vfov');
 34 | var $instr_id=document.getElementById('instr_id');
 35 | var $play=document.getElementById('play');
 36 | var $download=document.getElementById('download');
 37 | var $canvas=document.getElementById('skybox');
 38 | 
 39 | // set the initial input-text values to the width/height vars
 40 | $ix.value=ix;
 41 | $width.value=SIZE_X;
 42 | $height.value=SIZE_Y;
 43 | $vfov.value=VFOV;
 44 | 
 45 | // listen for keyup events on width & height input-text elements
 46 | // Get the current values from input-text & set the width/height vars
 47 | // call draw to redraw the rect with the current width/height values
 48 | $ix.addEventListener("keyup", function(){
 49 |   playing=false;
 50 |   ix=this.value;
 51 |   draw();
 52 | }, false);
 53 | 
 54 | function reset() {
 55 |   playing = false;
 56 |   downloading = false;
 57 |   step = 0;
 58 |   $play.disabled = false;
 59 |   $download.disabled = false;
 60 | }
 61 | 
 62 | function left() {
 63 |   reset();
 64 |   ix = ix - 1;
 65 |   if (ix < 0) { ix = traj.length-1;}
 66 |   $ix.value=ix;
 67 |   draw();
 68 | }
 69 | 
 70 | function right() {
 71 |   reset();
 72 |   ix = ix + 1;
 73 |   if (ix >= traj.length) { ix = 0;}
 74 |   $ix.value=ix;
 75 |   draw();
 76 | }
 77 | 
 78 | $width.addEventListener("keyup", function(){
 79 |   if (this.value > 100 && this.value <=1280){
 80 |     SIZE_X=this.value;
 81 |     ASPECT = SIZE_X/SIZE_Y;
 82 |     draw();
 83 |   }
 84 | }, false);
 85 | 
 86 | $height.addEventListener("keyup", function(){
 87 |   if (this.value > 100 && this.value <=720){
 88 |     SIZE_Y=this.value;
 89 |     ASPECT = SIZE_X/SIZE_Y;
 90 |     draw();
 91 |   }
 92 | }, false);
 93 | 
 94 | $vfov.addEventListener("keyup", function(){
 95 |     VFOV=this.value;
 96 |     draw();
 97 | }, false);
 98 | 
 99 | document.getElementById('show-instructions').addEventListener("change", function(){
100 |   move_to(curr_image_id);
101 | });
102 | 
103 | document.getElementById('trajFile').addEventListener("change", function(evt){
104 |   reset();
105 |   ix = 0;
106 |   $ix.value=ix;
107 |   var file = evt.target.files[0];
108 |   $('#fileName').text(file.name);
109 |   initialize();
110 | }, false);
111 | 
112 | function draw(){
113 |   id = traj[ix]['instr_id'];
114 |   $instr_id.value=id;
115 |   path = traj[ix]['trajectory'];
116 |   var found = false;
117 |   for (var i = 0; i < gt.length; i++) {
118 |     if (gt[i]['path_id'] == id.split('_')[0]) {
119 |       scan = gt[i]['scan'];
120 |       curr_image_id = gt[i]['path'][0];
121 |       instr = gt[i]['instructions'][parseInt(id.split('_')[1])];
122 |       $('#instruction').text(instr);
123 |       found = true;
124 |       break;
125 |     }
126 |   }
127 |   if (found){
128 |     skybox_init();
129 |     load_connections(scan, curr_image_id);
130 |   } else {
131 |     console.error('instruction id ' + id + ' not in something');
132 |   }
133 | }
134 | 
135 | function initialize(){
136 |   matt.loadJson($('#fileName').text()).then(function(data){
137 |     traj = data;
138 |     d3.queue()
139 |     .defer(d3.json, "/R2Rdata/R2R_val_seen.json")
140 |     .defer(d3.json, "/R2Rdata/R2R_val_unseen.json")
141 |     .defer(d3.json, "/R2Rdata/R2R_test.json")
142 |     .await(function(error, d1, d2, d3) {
143 |       if (error) {
144 |         console.error(error);
145 |       }
146 |       else {
147 |         gt = d1.concat(d2).concat(d3);
148 |         draw();
149 |       }
150 |     });
151 |   });
152 | }
153 | var matt = new Matterport3D("");
154 | initialize();
155 | 
156 | function download() {
157 |   if (!playing && !downloading){
158 |     downloading = true;
159 |     mediaStream = $canvas.captureStream(frameRate);
160 |     var options = {
161 |       mimeType : "video/webm;codecs=H264"
162 |     }
163 |     capturer = new MediaRecorder(mediaStream, options);
164 |     capturer.ondataavailable = function(event){
165 |       // save download
166 |       var url = URL.createObjectURL(event.data);
167 |       var a = document.createElement('a');
168 |       document.body.appendChild(a);
169 |       a.style = 'display: none';
170 |       a.href = url;
171 |       a.download = $instr_id.value + '.webm';
172 |       a.click();
173 |       window.URL.revokeObjectURL(url);
174 |     };
175 |     capturer.start();
176 |     setTimeout(function(){ play(); }, pauseStart);
177 |   }
178 | }
179 | 
180 | function play() {
181 |   if (!playing){
182 |     $play.disabled = true;
183 |     $download.disabled = true;
184 |     if (step != 0 || curr_image_id != path[0][0]){
185 |       // First move back to start
186 |       var image_id = path[0][0];
187 |       matt.loadCubeTexture(cube_urls(scan, image_id)).then(function(texture){
188 |         camera.rotation.x = 0;
189 |         camera.rotation.y = 0;
190 |         camera.rotation.z = 0;
191 |         scene.background = texture;
192 |         render();
193 |         move_to(image_id, true);
194 |         step = 0;
195 |         playing = true;
196 |         step_forward();
197 |       });
198 |     } else {
199 |       step = 0;
200 |       playing = true;
201 |       step_forward();
202 |     }
203 |   }
204 | }
205 | 
206 | function step_forward(){
207 |   step += 1;
208 |   if (step >= path.length) {
209 |     step -= 1;
210 |     playing = false;
211 |     $play.disabled = false;
212 |     $download.disabled = false;
213 |     if (downloading){
214 |       downloading = false;
215 |       setTimeout(function(){ capturer.stop(); }, pauseEnd);
216 |     }
217 |   } else {
218 |     take_action(path[step]);
219 |   }
220 | };
221 | 
222 | // ## Initialize everything
223 | function skybox_init(scan, image) {
224 |   // test if webgl is supported
225 |   if (! Detector.webgl) Detector.addGetWebGLMessage();
226 | 
227 |   // create the camera (kinect 2)
228 |   camera = new THREE.PerspectiveCamera(VFOV, ASPECT, 0.01, 1000);
229 |   camera_pose = new THREE.Group();
230 |   camera_pose.add(camera);
231 |   
232 |   // create the Matterport world frame
233 |   world_frame = new THREE.Group();
234 |   
235 |   // create the cubemap frame
236 |   cubemap_frame = new THREE.Group();
237 |   cubemap_frame.rotation.x = -Math.PI; // Adjust cubemap for z up
238 |   cubemap_frame.add(world_frame);
239 |   
240 |   // create the Scene
241 |   scene = new THREE.Scene();
242 |   world_frame.add(camera_pose);
243 |   scene.add(cubemap_frame);
244 | 
245 |   var light = new THREE.DirectionalLight( 0xFFFFFF, 1 );
246 |   light.position.set(0, 0, 100);
247 |   world_frame.add(light);
248 |   world_frame.add(new THREE.AmbientLight( 0xAAAAAA )); // soft light
249 | 
250 |   // init the WebGL renderer
251 |   renderer = new THREE.WebGLRenderer({canvas: $canvas, antialias: true } );
252 |   renderer.setSize(SIZE_X, SIZE_Y);
253 | 
254 |   controls = new THREE.PTZCameraControls(camera, renderer.domElement);
255 |   controls.minZoom = 1;
256 |   controls.maxZoom = 3.0;
257 |   controls.minTilt = -0.6*Math.PI/2;
258 |   controls.maxTilt = 0.6*Math.PI/2;
259 |   controls.enableDamping = true;
260 |   controls.panSpeed = -0.25;
261 |   controls.tiltSpeed = -0.25;
262 |   controls.zoomSpeed = 1.5;
263 |   controls.dampingFactor = 0.5;
264 |   controls.addEventListener( 'change', render );
265 | }
266 | 
267 | function load_connections(scan, image_id) {
268 |   var pose_url = "/connectivity/"+scan+"_connectivity.json";
269 |   d3.json(pose_url, function(error, data) {
270 |     if (error) return console.warn(error);
271 |     connections = data;
272 |     // Create a cylinder frame for showing arrows of directions
273 |     cylinder_frame = matt.load_viewpoints(data, {opacity:0.4});
274 |     // Keep a structure of connection graph
275 |     id_to_ix = {};
276 |     for (var i = 0; i < data.length; i++) {
277 |       var im = data[i]['image_id'];
278 |       id_to_ix[im] = i;
279 |     }
280 |     world_frame.add(cylinder_frame);
281 |     var image_id = path[0][0];
282 |     matt.loadCubeTexture(cube_urls(scan, image_id)).then(function(texture){
283 |       scene.background = texture;
284 |       move_to(image_id, true);
285 |       $play.disabled = false;
286 |       $download.disabled = false;
287 |     });
288 |   });
289 | }
290 | 
291 | function cube_urls(scan, image_id) {
292 |   var urlPrefix  = "data/v1/scans/" + scan + "/matterport_skybox_images/" + image_id;
293 |   return [ urlPrefix + "_skybox2_sami.jpg", urlPrefix + "_skybox4_sami.jpg",
294 |       urlPrefix + "_skybox0_sami.jpg", urlPrefix + "_skybox5_sami.jpg",
295 |       urlPrefix + "_skybox1_sami.jpg", urlPrefix + "_skybox3_sami.jpg" ];
296 | }
297 | 
298 | function move_to(image_id, isInitial=false) {
299 |   // Adjust cylinder visibility
300 |   var cylinders = cylinder_frame.children;
301 |   for (var i = 0; i < cylinders.length; ++i){
302 |     if ($('#show-instructions').is(":checked")){
303 |       cylinders[i].visible = connections[id_to_ix[image_id]]['unobstructed'][i];
304 |     } else {
305 |       cylinders[i].visible = false;
306 |     }
307 |   }
308 |   // Correct world frame for individual skybox camera rotation
309 |   var inv = new THREE.Matrix4();
310 |   var cam_pose = cylinder_frame.getObjectByName(image_id);
311 |   inv.getInverse(cam_pose.matrix);
312 |   var ignore = new THREE.Vector3();
313 |   inv.decompose(ignore, world_frame.quaternion, world_frame.scale);
314 |   world_frame.updateMatrix();
315 |   if (isInitial){
316 |     set_camera_pose(cam_pose.matrix, cam_pose.height);
317 |   } else {
318 |     set_camera_position(cam_pose.matrix, cam_pose.height);
319 |   }
320 |   render();
321 |   curr_image_id = image_id;
322 |   // Animation
323 |   if (playing) {
324 |     step_forward();
325 |   }
326 | }
327 | 
328 | function set_camera_pose(matrix4d, height){
329 |   matrix4d.decompose(camera_pose.position, camera_pose.quaternion, camera_pose.scale);
330 |   camera_pose.position.z += height;
331 |   camera_pose.rotateX(Math.PI); // convert matterport camera to webgl camera
332 | }
333 | 
334 | function set_camera_position(matrix4d, height) {
335 |   var ignore_q = new THREE.Quaternion();
336 |   var ignore_s = new THREE.Vector3();
337 |   matrix4d.decompose(camera_pose.position, ignore_q, ignore_s);
338 |   camera_pose.position.z += height;
339 | }
340 | 
341 | function get_camera_pose(){
342 |   camera.updateMatrix();
343 |   camera_pose.updateMatrix();
344 |   var m = camera.matrix.clone();
345 |   m.premultiply(camera_pose.matrix);
346 |   return m;
347 | }
348 | 
349 | function take_action(dest) {
350 |   image_id = dest[0]
351 |   heading = dest[1]
352 |   elevation = dest[2]
353 |   if (image_id !== curr_image_id) {
354 |     var texture_promise = matt.loadCubeTexture(cube_urls(scan, image_id)); // start fetching textures
355 |     var target = cylinder_frame.getObjectByName(image_id);
356 | 
357 |     // Camera up vector
358 |     var camera_up = new THREE.Vector3(0,1,0);
359 |     var camera_look = new THREE.Vector3(0,0,-1);
360 |     var camera_m = get_camera_pose();// Animation
361 |     var zero = new THREE.Vector3(0,0,0);
362 |     camera_m.setPosition(zero);
363 |     camera_up.applyMatrix4(camera_m);
364 |     camera_up.normalize();
365 |     camera_look.applyMatrix4(camera_m);
366 |     camera_look.normalize();
367 | 
368 |     // look direction
369 |     var look = target.position.clone();
370 |     look.sub(camera_pose.position);
371 |     look.projectOnPlane(camera_up);
372 |     look.normalize();
373 |     // Simplified - assumes z is zero
374 |     var rotate = Math.atan2(look.y,look.x) - Math.atan2(camera_look.y,camera_look.x);
375 |     if (rotate < -Math.PI) rotate += 2*Math.PI;
376 |     if (rotate > Math.PI) rotate -= 2*Math.PI;
377 | 
378 |     var target_y = camera.rotation.y + rotate;
379 |     var rotate_tween = new TWEEN.Tween({
380 |       x: camera.rotation.x,
381 |       y: camera.rotation.y,
382 |       z: camera.rotation.z})
383 |     .to( {
384 |       x: 0,
385 |       y: target_y,
386 |       z: 0 }, 2000*Math.abs(rotate) )
387 |     .easing( TWEEN.Easing.Cubic.InOut)
388 |     .onUpdate(function() {
389 |       camera.rotation.x = this.x;
390 |       camera.rotation.y = this.y;
391 |       camera.rotation.z = this.z;
392 |       render();
393 |     });
394 |     var new_vfov = VFOV*0.95;
395 |     var zoom_tween = new TWEEN.Tween({
396 |       vfov: VFOV})
397 |     .to( {vfov: new_vfov }, 1000 )
398 |     .easing(TWEEN.Easing.Cubic.InOut)
399 |     .onStart(function() {
400 |       // Color change effect
401 |       target.material.emissive.setHex( 0xff0000 );
402 |       setTimeout(function(){ target.material.emissive.setHex( target.currentHex ); }, 200);
403 |     })
404 |     .onUpdate(function() {
405 |       camera.fov = this.vfov;
406 |       camera.updateProjectionMatrix();
407 |       render();
408 |     })
409 |     .onComplete(function(){
410 |       cancelAnimationFrame(id);
411 |       texture_promise.then(function(texture) {
412 |         scene.background = texture; 
413 |         camera.fov = VFOV;
414 |         camera.updateProjectionMatrix();
415 |         move_to(image_id);
416 |       });
417 |     });
418 |     rotate_tween.chain(zoom_tween);
419 |     animate();
420 |     rotate_tween.start();
421 |   } else {
422 |     // Just move the camera
423 |     // Animation
424 |     if (playing) {
425 |       step_forward();
426 |     }
427 |   }
428 | }
429 | 
430 | // Display the Scene
431 | function render() {
432 |   renderer.render(scene, camera);
433 | }
434 | 
435 | // tweening
436 | function animate() {
437 |   id = requestAnimationFrame( animate );
438 |   TWEEN.update();
439 | }
440 | 
441 | 
442 | 


--------------------------------------------------------------------------------
/web/app/trajectory.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |     <title>Trajectory Viewer</title>
 6 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 8 |   </head>
 9 |   <body>
10 |     <div class="container">
11 |       <div class="page-header">
12 |         <h1>First-Person Trajectory Visualization</h1>
13 |         <div class="row">
14 |           <div class="col-md-12">
15 |             <form class="form-inline">
16 |               <strong>Input json trajectories:</strong>
17 |               <label class="btn btn-default btn-file">
18 |                 Choose file <input id="trajFile" name="trajFile" type="file" style="display: none;">
19 |               </label>
20 |               <span id="fileName"> val_unseen_shortest_agent.json </span>
21 |               <div class="form-group" style="margin-left: 20px;">
22 |                 <input class="form-check-input" type="checkbox" value="" id="show-instructions">
23 |                 <label class="form-check-label" for="show-instructions">Show adjacent viewpoints</label>
24 |               </div>
25 |             </form>
26 |             <p>
27 |               <strong>Trajectory:</strong>
28 |               <button type="button" class="btn btn-default" onclick="left()">
29 |                 <span class="glyphicon glyphicon-triangle-left"></span>
30 |               </button>
31 |               <span>Index:</span><input type="text" id="ix">
32 |               <button type="button" class="btn btn-default" onclick="right()">
33 |                 <span class="glyphicon glyphicon-triangle-right"></span>
34 |               </button>
35 |               <span>Instruction ID:</span><input disabled=True type="text" id="instr_id">
36 |             </p>
37 |             <p>
38 |               <strong>Camera parameters:</strong>
39 |               <span>Width:</span><input type="text" id="width">
40 |               <span>Height:</span><input type="text" id="height">
41 |               <span>V-FOV:</span><input type="text" id="vfov">
42 |             </p>
43 |             <p>
44 |               <strong>Controls:</strong>
45 |               <button class="btn btn-primary" onclick="play()" id="play" disabled=true>Play</button>
46 |               <button class="btn btn-warning" onclick="download()" id="download" disabled=true>Download video</button>
47 |               <span>Download requires Firefox 43 or above, or Chrome 51 or above.</span>
48 |             </p>
49 |             <p> Left click and drag to look around. Mouse wheel to zoom. </p>
50 |           </div>
51 |         </div>
52 |       </div>
53 |       <div>
54 |         <p id='instruction' style="font-size: 200%;"></p>
55 |       <div>
56 |         <figure style="display: inline-block; width: 100%;">
57 |           <canvas id="skybox" style="width:auto; display: block; margin: 0 auto;">
58 |         </figure>
59 |         <figure style="display: inline-block; width: 100%;">
60 |           <canvas id="floorplan" style="width:auto; display: block; margin: 0 auto;">
61 |         </figure>
62 |         <p id="instr" style="text-align: center; margin-left: 60px;"></p>
63 |       </div>
64 |     </div>
65 |     <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/d3/4.10.2/d3.min.js"></script>
66 |     <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/three.js/87/three.min.js"></script>
67 |     <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/tween.js/16.3.5/Tween.min.js"></script>
68 |     <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
69 |     <script type="text/javascript" src="/js/RequestAnimationFrame.js"></script>
70 |     <script type="text/javascript" src="/js/Detector.js"></script>
71 |     <script type="text/javascript" src="/js/PTZCameraControls.js"></script>
72 |     <script type="text/javascript" src="/js/Matterport3D.js"></script>
73 |     <script type="text/javascript" src="/js/Trajectory.js"></script>
74 |   </body>
75 | </html>
76 | 


--------------------------------------------------------------------------------
/web/gulpfile.js:
--------------------------------------------------------------------------------
 1 | var gulp = require('gulp');
 2 | var browserSync = require('browser-sync');
 3 | var useref = require('gulp-useref');
 4 | var minify = require('gulp-babel-minify');
 5 | var gulpIf = require('gulp-if');
 6 | var cssnano = require('gulp-cssnano');
 7 | var cache = require('gulp-cache');
 8 | var del = require('del');
 9 | var runSequence = require('run-sequence');
10 | 
11 | // Development Tasks 
12 | // -----------------
13 | 
14 | // Start browserSync server
15 | gulp.task('browserSync', function() {
16 |   browserSync({
17 |     server: {
18 |       baseDir: 'app'
19 |     }
20 |   })
21 | })
22 | 
23 | // Watchers
24 | gulp.task('watch', function() {
25 |   gulp.watch('app/*.html', browserSync.reload);
26 |   gulp.watch('app/js/**/*.js', browserSync.reload);
27 | });
28 | 
29 | // Optimization Tasks 
30 | // ------------------
31 | 
32 | // Optimizing CSS and JavaScript 
33 | gulp.task('useref', function() {
34 |   return gulp.src('app/collect-hit.html')
35 |     .pipe(useref())
36 |     .pipe(gulpIf('*.js', minify({
37 |       mangle: { keepClassName: true }
38 |      })))
39 |     .pipe(gulpIf('*.css', cssnano()))
40 |     .pipe(gulp.dest('dist'));
41 | });
42 | 
43 | // Cleaning 
44 | gulp.task('clean', function() {
45 |   return del.sync('dist').then(function(cb) {
46 |     return cache.clearAll(cb);
47 |   });
48 | });
49 | 
50 | gulp.task('clean:dist', function() {
51 |   return del.sync(['dist/**/*']);
52 | });
53 | 
54 | // Build Sequences
55 | // ---------------
56 | 
57 | gulp.task('default', function(callback) {
58 |   runSequence(['browserSync'], 'watch',
59 |     callback
60 |   )
61 | })
62 | 
63 | gulp.task('build', function(callback) {
64 |   runSequence(
65 |     'clean:dist',
66 |     ['useref'],
67 |     callback
68 |   )
69 | })
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/web/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "web",
 3 |   "version": "1.0.0",
 4 |   "description": "This directory contains Amazon Mechanical Turk interfaces for collecting and evaluating navigation instructions, plus code for visualizing and saving first-person trajectories and floorplans",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "test": "echo \"Error: no test specified\" && exit 1"
 8 |   },
 9 |   "author": "Peter Anderson",
10 |   "license": "MIT",
11 |   "repository": {
12 |     "type": "git",
13 |     "url": "https://github.com/peteanderson80/Matterport3DSimulator"
14 |   },
15 |   "devDependencies": {
16 |     "browser-sync": "^2.26.3",
17 |     "gulp": "^3.9.1",
18 |     "gulp-babel-minify": "^0.5.0",
19 |     "gulp-cssnano": "^2.1.3",
20 |     "gulp-if": "^2.0.2",
21 |     "gulp-useref": "^3.1.5",
22 |     "run-sequence": "^2.2.1"
23 |   },
24 |   "dependencies": {
25 |     "del": "^3.0.0",
26 |     "gulp-cache": "^1.0.2"
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/webgl_imgs/17DRP5sb8fy_85c23efeaecd4d43a7dcd5b90137179e_2.551961945320492_0.008557380839564054.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/17DRP5sb8fy_85c23efeaecd4d43a7dcd5b90137179e_2.551961945320492_0.008557380839564054.png


--------------------------------------------------------------------------------
/webgl_imgs/1LXtFkjw3qL_187589bb7d4644f2943079fb949c0be9_1.8699330579409539_0.0004921836022802584.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/1LXtFkjw3qL_187589bb7d4644f2943079fb949c0be9_1.8699330579409539_0.0004921836022802584.png


--------------------------------------------------------------------------------
/webgl_imgs/1pXnuDYAj8r_163d61ac7edb43fb958c5d9e69ae11ad_4.626331047551077_-0.02444352614304746.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/1pXnuDYAj8r_163d61ac7edb43fb958c5d9e69ae11ad_4.626331047551077_-0.02444352614304746.png


--------------------------------------------------------------------------------
/webgl_imgs/29hnd4uzFmX_1576d62e7bbb45e8a5ef9e7bb37b1839_5.844119909926444_-0.0006838914039405167.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/29hnd4uzFmX_1576d62e7bbb45e8a5ef9e7bb37b1839_5.844119909926444_-0.0006838914039405167.png


--------------------------------------------------------------------------------
/webgl_imgs/2azQ1b91cZZ_3daad58ad53742038e50d62e91f84e7b_3.1736484087962933_0.016732869758208434.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/2azQ1b91cZZ_3daad58ad53742038e50d62e91f84e7b_3.1736484087962933_0.016732869758208434.png


--------------------------------------------------------------------------------
/webgl_imgs/2n8kARJN3HM_94ac3cea52ec455993f8562f78da3be1_2.604601935142565_-0.0009188787844489273.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/2n8kARJN3HM_94ac3cea52ec455993f8562f78da3be1_2.604601935142565_-0.0009188787844489273.png


--------------------------------------------------------------------------------
/webgl_imgs/2t7WUuJeko7_529f006f8293406da0b506defd2891a5_0.032985516949381344_-0.013788837143969411.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/2t7WUuJeko7_529f006f8293406da0b506defd2891a5_0.032985516949381344_-0.013788837143969411.png


--------------------------------------------------------------------------------
/webgl_imgs/5LpN3gDmAk7_bda8025f20404048a77381e9e0dc0ccf_5.325207878739601_-0.01083211073205187.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/5LpN3gDmAk7_bda8025f20404048a77381e9e0dc0ccf_5.325207878739601_-0.01083211073205187.png


--------------------------------------------------------------------------------
/webgl_imgs/5ZKStnWn8Zo_c76b52856e7c4f2a9a4419000c8e646a_4.13470589902238_-0.02922217527541366.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/5ZKStnWn8Zo_c76b52856e7c4f2a9a4419000c8e646a_4.13470589902238_-0.02922217527541366.png


--------------------------------------------------------------------------------
/webgl_imgs/5q7pvUzZiYa_397403366d784caf804d741f32fd68b9_2.8746465006968234_-0.0007063598518199811.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/5q7pvUzZiYa_397403366d784caf804d741f32fd68b9_2.8746465006968234_-0.0007063598518199811.png


--------------------------------------------------------------------------------
/webgl_imgs/759xd9YjKW5_2343ef3bf04a4433af62f0d527d7512a_3.5451019786019264_-0.016938006310169448.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/759xd9YjKW5_2343ef3bf04a4433af62f0d527d7512a_3.5451019786019264_-0.016938006310169448.png


--------------------------------------------------------------------------------
/webgl_imgs/7y3sRwLe3Va_9bbf903d50da4ffd9e5d1fb7c9f4d69b_1.7348660165523566_0.008361841032265524.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/7y3sRwLe3Va_9bbf903d50da4ffd9e5d1fb7c9f4d69b_1.7348660165523566_0.008361841032265524.png


--------------------------------------------------------------------------------
/webgl_imgs/8194nk5LbLH_c9e8dc09263e4d0da77d16de0ecddd39_4.05504292862083_0.008533161479170466.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/8194nk5LbLH_c9e8dc09263e4d0da77d16de0ecddd39_4.05504292862083_0.008533161479170466.png


--------------------------------------------------------------------------------
/webgl_imgs/82sE5b5pLXE_056a491afa534b17bac36f4f5898462a_1.689393931320027_-0.0037883068413356496.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/82sE5b5pLXE_056a491afa534b17bac36f4f5898462a_1.689393931320027_-0.0037883068413356496.png


--------------------------------------------------------------------------------
/webgl_imgs/8WUmhLawc2A_d21aae0b5d944f27a0074525c803fc9f_3.047458184407221_-0.04510889155759994.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/8WUmhLawc2A_d21aae0b5d944f27a0074525c803fc9f_3.047458184407221_-0.04510889155759994.png


--------------------------------------------------------------------------------
/webgl_imgs/ARNzJeq3xxb_9a671e6915de4eb897f45fee8bf2031d_5.616355886953764_0.02583868533558965.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/ARNzJeq3xxb_9a671e6915de4eb897f45fee8bf2031d_5.616355886953764_0.02583868533558965.png


--------------------------------------------------------------------------------
/webgl_imgs/B6ByNegPMKs_e3a65955df26467581c32613c4e9f865_5.230794959607039_0.007265625492957138.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/B6ByNegPMKs_e3a65955df26467581c32613c4e9f865_5.230794959607039_0.007265625492957138.png


--------------------------------------------------------------------------------
/webgl_imgs/ac26ZMwG7aT_efeef7cc82c84690addb0bf415f075ea_0.07434352566701552_-0.013447513736072197.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/ac26ZMwG7aT_efeef7cc82c84690addb0bf415f075ea_0.07434352566701552_-0.013447513736072197.png


--------------------------------------------------------------------------------
/webgl_imgs/b8cTxDM8gDG_f2944e0b66b9461994a7f757582f9bc3_0.0853092784395515_-0.007543204141144086.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/b8cTxDM8gDG_f2944e0b66b9461994a7f757582f9bc3_0.0853092784395515_-0.007543204141144086.png


--------------------------------------------------------------------------------
/webgl_imgs/cV4RVeZvu5T_1b321779a4374c2b952c51820daa9e6c_6.266463179566256_0.07914721704610106.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/webgl_imgs/cV4RVeZvu5T_1b321779a4374c2b952c51820daa9e6c_6.266463179566256_0.07914721704610106.png


--------------------------------------------------------------------------------