├── .gitmodules
├── CMakeLists.txt
├── Dockerfile
├── Doxyfile
├── LICENSE
├── README.md
├── cmake
└── Modules
│ └── FindNumPy.cmake
├── connectivity
├── 17DRP5sb8fy_connectivity.json
├── 1LXtFkjw3qL_connectivity.json
├── 1pXnuDYAj8r_connectivity.json
├── 29hnd4uzFmX_connectivity.json
├── 2azQ1b91cZZ_connectivity.json
├── 2n8kARJN3HM_connectivity.json
├── 2t7WUuJeko7_connectivity.json
├── 5LpN3gDmAk7_connectivity.json
├── 5ZKStnWn8Zo_connectivity.json
├── 5q7pvUzZiYa_connectivity.json
├── 759xd9YjKW5_connectivity.json
├── 7y3sRwLe3Va_connectivity.json
├── 8194nk5LbLH_connectivity.json
├── 82sE5b5pLXE_connectivity.json
├── 8WUmhLawc2A_connectivity.json
├── ARNzJeq3xxb_connectivity.json
├── B6ByNegPMKs_connectivity.json
├── D7G3Y4RVNrH_connectivity.json
├── D7N2EKCX4Sj_connectivity.json
├── E9uDoFAP3SH_connectivity.json
├── EDJbREhghzL_connectivity.json
├── EU6Fwq7SyZv_connectivity.json
├── GdvgFV5R1Z5_connectivity.json
├── HxpKQynjfin_connectivity.json
├── JF19kD82Mey_connectivity.json
├── JeFG25nYj2p_connectivity.json
├── JmbYfDe2QKZ_connectivity.json
├── PX4nDJXEHrG_connectivity.json
├── Pm6F8kyY3z2_connectivity.json
├── PuKPg4mmafe_connectivity.json
├── QUCTc6BB5sX_connectivity.json
├── README.md
├── RPmz2sHmrrY_connectivity.json
├── S9hNv5qa7GM_connectivity.json
├── SN83YJsR3w2_connectivity.json
├── TbHJrupSAjP_connectivity.json
├── ULsKaCPVFJR_connectivity.json
├── UwV83HsGsw3_connectivity.json
├── Uxmj2M2itWa_connectivity.json
├── V2XKFyX4ASd_connectivity.json
├── VFuaQ6m2Qom_connectivity.json
├── VLzqgDo317F_connectivity.json
├── VVfe2KiqLaN_connectivity.json
├── Vt2qJdWjCF2_connectivity.json
├── Vvot9Ly1tCj_connectivity.json
├── VzqfbhrpDEA_connectivity.json
├── WYY7iVyf5p8_connectivity.json
├── X7HyMhZNoso_connectivity.json
├── XcA2TqTSSAj_connectivity.json
├── YFuZgdQ5vWj_connectivity.json
├── YVUC4YcDtcY_connectivity.json
├── YmJkqBEsHnH_connectivity.json
├── Z6MFQCViBuw_connectivity.json
├── ZMojNkEp431_connectivity.json
├── aayBHfsNo7d_connectivity.json
├── ac26ZMwG7aT_connectivity.json
├── b8cTxDM8gDG_connectivity.json
├── cV4RVeZvu5T_connectivity.json
├── dhjEzFoUFzH_connectivity.json
├── e9zR4mvMWw7_connectivity.json
├── fzynW3qQPVF_connectivity.json
├── gTV8FGcVJC9_connectivity.json
├── gYvKGZ5eRqb_connectivity.json
├── gZ6f7yhEvPG_connectivity.json
├── gxdoqLR6rwA_connectivity.json
├── i5noydFURQK_connectivity.json
├── jh4fc5c5qoQ_connectivity.json
├── jtcxE69GiFV_connectivity.json
├── kEZ7cmS4wCh_connectivity.json
├── mJXqzFtmKg4_connectivity.json
├── oLBMNvg9in8_connectivity.json
├── p5wJjkQkbXX_connectivity.json
├── pLe4wQe7qrG_connectivity.json
├── pRbA3pwrgk9_connectivity.json
├── pa4otMbVnkk_connectivity.json
├── q9vSo1VnCiC_connectivity.json
├── qoiz87JEwZ2_connectivity.json
├── r1Q1Z4BcV1o_connectivity.json
├── r47D5H71a5s_connectivity.json
├── rPc6DW4iMge_connectivity.json
├── rqfALeAoiTq_connectivity.json
├── s8pcmisQ38h_connectivity.json
├── sKLMLpTHeUy_connectivity.json
├── sT4fr6TAbpF_connectivity.json
├── scans.txt
├── uNb9QFRL6hY_connectivity.json
├── ur6pFq6Qu1A_connectivity.json
├── vyrNrziPKCB_connectivity.json
├── wc2JMjhGNzB_connectivity.json
├── x8F5xyUWy9e_connectivity.json
├── yqstnuAEVhm_connectivity.json
└── zsNo4HB9uLZ_connectivity.json
├── include
├── Benchmark.hpp
├── Catch.hpp
├── MatterSim.hpp
├── NavGraph.hpp
└── cbf.h
├── models
└── ResNet-152-deploy.prototxt
├── requirements.txt
├── scripts
├── depth_to_skybox.py
├── downsize_skybox.py
├── fill_depth.py
├── precompute_img_features.py
└── timer.py
├── src
├── driver
│ ├── driver.py
│ └── mattersim_main.cpp
├── lib
│ ├── Benchmark.cpp
│ ├── MatterSim.cpp
│ ├── NavGraph.cpp
│ ├── cbf.cpp
│ ├── fragment.sh
│ └── vertex.sh
├── lib_python
│ └── MatterSimPython.cpp
└── test
│ ├── main.cpp
│ ├── python_test.py
│ └── rendertest_spec.json
├── tasks
└── R2R
│ ├── Agents
│ ├── __init__.py
│ └── agent.py
│ ├── Models
│ ├── __init__.py
│ └── dynamic.py
│ ├── data
│ ├── R2R_test.json
│ ├── R2R_train.json
│ ├── R2R_val_seen.json
│ ├── R2R_val_unseen.json
│ └── download.sh
│ ├── env.py
│ ├── eval.py
│ ├── main.py
│ ├── results
│ ├── data_augmentation
│ │ ├── decoder_weights_best
│ │ └── encoder_weights_best
│ └── normal_data
│ │ ├── decoder_weights_best
│ │ └── encoder_weights_best
│ ├── trainer.py
│ └── utils.py
├── web
├── README.md
├── app
│ ├── collect-hit.html
│ ├── eval-hit.html
│ ├── index.html
│ ├── js
│ │ ├── Detector.js
│ │ ├── Matterport3D.js
│ │ ├── PTZCameraControls.js
│ │ ├── RequestAnimationFrame.js
│ │ └── Trajectory.js
│ ├── sample_room_paths.json
│ ├── trajectory.html
│ └── val_unseen_shortest_agent.json
├── gulpfile.js
├── package-lock.json
└── package.json
└── webgl_imgs
├── 17DRP5sb8fy_85c23efeaecd4d43a7dcd5b90137179e_2.551961945320492_0.008557380839564054.png
├── 1LXtFkjw3qL_187589bb7d4644f2943079fb949c0be9_1.8699330579409539_0.0004921836022802584.png
├── 1pXnuDYAj8r_163d61ac7edb43fb958c5d9e69ae11ad_4.626331047551077_-0.02444352614304746.png
├── 29hnd4uzFmX_1576d62e7bbb45e8a5ef9e7bb37b1839_5.844119909926444_-0.0006838914039405167.png
├── 2azQ1b91cZZ_3daad58ad53742038e50d62e91f84e7b_3.1736484087962933_0.016732869758208434.png
├── 2n8kARJN3HM_94ac3cea52ec455993f8562f78da3be1_2.604601935142565_-0.0009188787844489273.png
├── 2t7WUuJeko7_529f006f8293406da0b506defd2891a5_0.032985516949381344_-0.013788837143969411.png
├── 5LpN3gDmAk7_bda8025f20404048a77381e9e0dc0ccf_5.325207878739601_-0.01083211073205187.png
├── 5ZKStnWn8Zo_c76b52856e7c4f2a9a4419000c8e646a_4.13470589902238_-0.02922217527541366.png
├── 5q7pvUzZiYa_397403366d784caf804d741f32fd68b9_2.8746465006968234_-0.0007063598518199811.png
├── 759xd9YjKW5_2343ef3bf04a4433af62f0d527d7512a_3.5451019786019264_-0.016938006310169448.png
├── 7y3sRwLe3Va_9bbf903d50da4ffd9e5d1fb7c9f4d69b_1.7348660165523566_0.008361841032265524.png
├── 8194nk5LbLH_c9e8dc09263e4d0da77d16de0ecddd39_4.05504292862083_0.008533161479170466.png
├── 82sE5b5pLXE_056a491afa534b17bac36f4f5898462a_1.689393931320027_-0.0037883068413356496.png
├── 8WUmhLawc2A_d21aae0b5d944f27a0074525c803fc9f_3.047458184407221_-0.04510889155759994.png
├── ARNzJeq3xxb_9a671e6915de4eb897f45fee8bf2031d_5.616355886953764_0.02583868533558965.png
├── B6ByNegPMKs_e3a65955df26467581c32613c4e9f865_5.230794959607039_0.007265625492957138.png
├── ac26ZMwG7aT_efeef7cc82c84690addb0bf415f075ea_0.07434352566701552_-0.013447513736072197.png
├── b8cTxDM8gDG_f2944e0b66b9461994a7f757582f9bc3_0.0853092784395515_-0.007543204141144086.png
└── cV4RVeZvu5T_1b321779a4374c2b952c51820daa9e6c_6.266463179566256_0.07914721704610106.png
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "pybind11"]
2 | path = pybind11
3 | url = https://github.com/pybind/pybind11.git
4 | [submodule "speaksee"]
5 | path = speaksee
6 | url = https://github.com/aimagelab/speaksee.git
7 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | project(Matterport_Simulator CXX)
2 | cmake_minimum_required(VERSION 2.8)
3 |
4 | option(OSMESA_RENDERING "Offscreen CPU rendering with OSMesa" OFF)
5 | option(EGL_RENDERING "Offscreen GPU rendering with EGL" OFF)
6 |
7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
8 | # Make custom find-modules available
9 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/Modules")
10 |
11 | if(NOT CMAKE_BUILD_TYPE)
12 | set(CMAKE_BUILD_TYPE Release)
13 | endif()
14 |
15 | include_directories("${PROJECT_SOURCE_DIR}/include")
16 |
17 | find_package(OpenCV REQUIRED)
18 | find_package(PkgConfig REQUIRED)
19 | find_package(OpenMP)
20 | if (OPENMP_CXX_FOUND)
21 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
22 | set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
23 | endif()
24 |
25 | pkg_check_modules(JSONCPP REQUIRED jsoncpp)
26 |
27 | if(EGL_RENDERING)
28 | add_definitions(-DEGL_RENDERING)
29 | find_package(OpenGL REQUIRED COMPONENTS OpenGL EGL)
30 | pkg_check_modules(EPOXY REQUIRED epoxy)
31 | set(GL_LIBS OpenGL::OpenGL OpenGL::EGL ${EPOXY_LIBRARIES})
32 | elseif(OSMESA_RENDERING)
33 | add_definitions(-DOSMESA_RENDERING)
34 | pkg_check_modules(OSMESA REQUIRED osmesa)
35 | set(GL_LIBS ${OSMESA_LIBRARIES})
36 | else()
37 | cmake_policy(SET CMP0072 OLD)
38 | find_package(OpenGL REQUIRED)
39 | find_package(GLEW REQUIRED)
40 | set(GL_LIBS ${OPENGL_LIBRARIES} ${GLEW_LIBRARIES})
41 | endif()
42 |
43 | add_library(MatterSim SHARED src/lib/MatterSim.cpp src/lib/NavGraph.cpp src/lib/Benchmark.cpp src/lib/cbf.cpp)
44 | if(OSMESA_RENDERING)
45 | target_compile_definitions(MatterSim PUBLIC "-DOSMESA_RENDERING")
46 | endif()
47 | target_include_directories(MatterSim PRIVATE ${JSONCPP_INCLUDE_DIRS})
48 | target_link_libraries(MatterSim ${JSONCPP_LIBRARIES} ${OpenCV_LIBS} ${GL_LIBS})
49 |
50 | add_executable(tests src/test/main.cpp)
51 | target_include_directories(tests PRIVATE ${JSONCPP_INCLUDE_DIRS})
52 | target_link_libraries(tests MatterSim ${JSONCPP_LIBRARIES} ${OpenCV_LIBS})
53 |
54 | add_executable(mattersim_main src/driver/mattersim_main.cpp)
55 | target_link_libraries(mattersim_main MatterSim)
56 |
57 | add_subdirectory(pybind11)
58 |
59 | find_package(PythonInterp 2.7)
60 | message(${PYTHON_EXECUTABLE})
61 |
62 | # Need to search for python executable again to pick up an activated
63 | # virtualenv python, if any.
64 | unset(PYTHON_EXECUTABLE CACHE)
65 | find_program(PYTHON_EXECUTABLE python
66 | PATHS ENV PATH # look in the PATH environment variable
67 | NO_DEFAULT_PATH # do not look anywhere else...
68 | )
69 |
70 | find_package(NumPy REQUIRED)
71 |
72 | pybind11_add_module(MatterSimPython src/lib_python/MatterSimPython.cpp)
73 | target_include_directories(MatterSimPython PRIVATE ${NUMPY_INCLUDES})
74 | target_link_libraries(MatterSimPython PRIVATE MatterSim)
75 | set_target_properties(MatterSimPython
76 | PROPERTIES
77 | OUTPUT_NAME MatterSim)
78 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Matterport3DSimulator
2 | # Requires nvidia gpu with driver 384.xx or higher
3 |
4 |
5 | FROM nvidia/cudagl:9.0-devel-ubuntu16.04
6 |
7 | # Install a few libraries to support both EGL and OSMESA options
8 | RUN apt-get update && apt-get install -y wget doxygen curl libjsoncpp-dev libepoxy-dev libglm-dev libosmesa6 libosmesa6-dev libglew-dev libopencv-dev python-opencv python-setuptools python-dev
9 | RUN easy_install pip
10 | RUN pip install torch torchvision pandas networkx
11 |
12 | #install latest cmake
13 | ADD https://cmake.org/files/v3.12/cmake-3.12.2-Linux-x86_64.sh /cmake-3.12.2-Linux-x86_64.sh
14 | RUN mkdir /opt/cmake
15 | RUN sh /cmake-3.12.2-Linux-x86_64.sh --prefix=/opt/cmake --skip-license
16 | RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
17 | RUN cmake --version
18 |
19 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Federico Landi
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Embodied Vision-and-Language Navigation with Dynamic Convolutional Filters
2 |
3 | This is the PyTorch implementation for our paper:
4 |
5 | [**Embodied Vision-and-Language Navigation with Dynamic Convolutional Filters**](https://bmvc2019.org/wp-content/uploads/papers/0384-paper.pdf)
6 | __***Federico Landi***__, Lorenzo Baraldi, Massimiliano Corsini, Rita Cucchiara
7 | British Machine Vision Conference (BMVC), 2019
8 | **Oral Presentation**
9 |
10 | Visit the main [website](http://imagelab.ing.unimore.it/vln-dynamic-filters) for more details.
11 |
12 | ## Reference
13 |
14 | If you use our code for your research, please cite our paper (BMVC 2019 oral):
15 |
16 | ### Bibtex:
17 | ```
18 | @inproceedings{landi2019embodied,
19 | title={Embodied Vision-and-Language Navigation with Dynamic Convolutional Filters},
20 | author={Landi, Federico and Baraldi, Lorenzo and Corsini, Massimiliano and Cucchiara, Rita},
21 | booktitle={Proceedings of the British Machine Vision Conference},
22 | year={2019}
23 | }
24 | ```
25 |
26 | ## Installation
27 |
28 | ### Clone Repo
29 |
30 | Clone the repository:
31 | ```
32 | # Make sure to clone with --recursive
33 | git clone --recursive https://github.com/fdlandi/DynamicConv-agent.git
34 | cd DynamicConv-agent
35 | ```
36 |
37 | If you didn't clone with the `--recursive` flag, then you'll need to manually clone the pybind submodule from the top-level directory:
38 | ```
39 | git submodule update --init --recursive
40 | ```
41 |
42 | ### Python setup
43 |
44 | Python 3.6 is required to run our code. You can install the other modules via:
45 | ```
46 | cd speaksee
47 | pip install -e .
48 | cd ..
49 | pip install -r requirements.txt
50 | ```
51 |
52 | ### Building with Docker
53 |
54 | Please follow the instructions on the [Matterport3DSimulator](https://github.com/peteanderson80/Matterport3DSimulator) to install the simulator via Docker.
55 |
56 | ### Bulding without Docker
57 |
58 | The simulator can be built outside of a docker container using the cmake build commands described above. However, this is not the recommended approach, as all dependencies will need to be installed locally and may conflict with existing libraries. The main requirements are:
59 | - Ubuntu >= 14.04
60 | - Nvidia-driver with CUDA installed
61 | - C++ compiler with C++11 support
62 | - [CMake](https://cmake.org/) >= 3.10
63 | - [OpenCV](http://opencv.org/) >= 2.4 including 3.x
64 | - [OpenGL](https://www.opengl.org/)
65 | - [GLM](https://glm.g-truc.net/0.9.8/index.html)
66 | - [Numpy](http://www.numpy.org/)
67 |
68 | Optional dependences (depending on the cmake rendering options):
69 | - [OSMesa](https://www.mesa3d.org/osmesa.html) for OSMesa backend support
70 | - [epoxy](https://github.com/anholt/libepoxy) for EGL backend support
71 |
72 | ### Build and Test
73 |
74 | Build the simulator and run the unit tests:
75 | ```
76 | cd DynamicConv-agent
77 | mkdir build && cd build
78 | cmake -DEGL_RENDERING=ON ..
79 | make
80 | cd ../
81 | ./build/tests ~Timing
82 | ```
83 |
84 | If you use a conda environment for your experiments, you should specify the python path in the cmake options:
85 | ```
86 | cmake -DEGL_RENDERING=ON -DPYTHON_EXECUTABLE:FILEPATH='path_to_your_python_bin' ..
87 | ```
88 |
89 | ### Precomputing ResNet Image Features
90 |
91 | Alternatively, skip the generation and just download and extract our tsv files into the `img_features` directory:
92 | - [ResNet-152-imagenet features [380K/2.9GB]](https://www.dropbox.com/s/715bbj8yjz32ekf/ResNet-152-imagenet.zip?dl=1)
93 | - [ResNet-152-places365 features [380K/2.9GB]](https://www.dropbox.com/s/gox1rbdebyaa98w/ResNet-152-places365.zip?dl=1)
94 |
95 |
96 | ## Training and Testing
97 |
98 | You can train our agent by running:
99 | ```
100 | python tasks/R2R/main.py
101 | ```
102 | The number of dynamic filters can be set with the `--num_heads` parameter:
103 | ```
104 | python tasks/R2R/main.py --num_heads=4
105 | ```
106 |
107 | ## Reproducibility Note
108 |
109 | Results in our paper were obtained with version v0.1 of the Matterport3DSimulator. Due to this difference, results could vary from the one in the paper. Using different GPUs for training, as well as different random seeds, may also affect results.
110 |
111 | We provide the weights obtained with our training. To reproduce results from the paper, run:
112 | ```
113 | python tasks/R2R/main.py --name=normal_data --num_heads=4 --eval_only
114 | ```
115 |
116 | or:
117 | ```
118 | python tasks/R2R/main.py --name=data_augmentation --num_heads=4 --eval_only
119 | ```
120 |
121 | ## License
122 |
123 | The Matterport3D dataset, and data derived from it, is released under the [Matterport3D Terms of Use](http://dovahkiin.stanford.edu/matterport/public/MP_TOS.pdf). Our code is released under the MIT license.
124 |
--------------------------------------------------------------------------------
/cmake/Modules/FindNumPy.cmake:
--------------------------------------------------------------------------------
1 | #-------------------------------------------------------------------------------
2 | # Copyright (c) 2013, Lars Baehren
3 | # All rights reserved.
4 | #
5 | # Redistribution and use in source and binary forms, with or without modification,
6 | # are permitted provided that the following conditions are met:
7 | #
8 | # * Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | # * Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | #-------------------------------------------------------------------------------
25 |
26 | # - Check for the presence of NumPy
27 | #
28 | # The following variables are set when NumPy is found:
29 | # NUMPY_FOUND = Set to true, if all components of NUMPY have been found.
30 | # NUMPY_INCLUDES = Include path for the header files of NUMPY
31 | # NUMPY_LIBRARIES = Link these to use NUMPY
32 | # NUMPY_LFLAGS = Linker flags (optional)
33 |
34 | if (NOT NUMPY_FOUND)
35 |
36 | if (NOT NUMPY_ROOT_DIR)
37 | set (NUMPY_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
38 | endif (NOT NUMPY_ROOT_DIR)
39 |
40 | if (NOT PYTHONINTERP_FOUND)
41 | find_package (PythonInterp)
42 | endif (NOT PYTHONINTERP_FOUND)
43 |
44 | ##__________________________________________________________________________
45 | ## Check for the header files
46 |
47 | ## Use Python to determine the include directory
48 | execute_process (
49 | COMMAND ${PYTHON_EXECUTABLE} -c import\ numpy\;\ print\(numpy.get_include\(\)\)\;
50 | ERROR_VARIABLE NUMPY_FIND_ERROR
51 | RESULT_VARIABLE NUMPY_FIND_RESULT
52 | OUTPUT_VARIABLE NUMPY_FIND_OUTPUT
53 | OUTPUT_STRIP_TRAILING_WHITESPACE
54 | )
55 | ## process the output from the execution of the command
56 | if (NOT NUMPY_FIND_RESULT)
57 | set (NUMPY_INCLUDES ${NUMPY_FIND_OUTPUT})
58 | endif (NOT NUMPY_FIND_RESULT)
59 |
60 | ##__________________________________________________________________________
61 | ## Check for the library
62 |
63 | unset (NUMPY_LIBRARIES)
64 |
65 | if (PYTHON_SITE_PACKAGES)
66 | find_library (NUMPY_NPYMATH_LIBRARY npymath
67 | HINTS ${PYTHON_SITE_PACKAGES}/numpy/core
68 | PATH_SUFFIXES lib
69 | )
70 | if (NUMPY_NPYMATH_LIBRARY)
71 | list (APPEND NUMPY_LIBRARIES ${NUMPY_NPYMATH_LIBRARY})
72 | endif (NUMPY_NPYMATH_LIBRARY)
73 | endif (PYTHON_SITE_PACKAGES)
74 |
75 | ##__________________________________________________________________________
76 | ## Get API version of NumPy from 'numpy/numpyconfig.h'
77 |
78 | if (PYTHON_EXECUTABLE)
79 | execute_process (
80 | COMMAND ${PYTHON_EXECUTABLE} -c import\ numpy\;\ print\(numpy.__version__\)\;
81 | ERROR_VARIABLE NUMPY_API_VERSION_ERROR
82 | RESULT_VARIABLE NUMPY_API_VERSION_RESULT
83 | OUTPUT_VARIABLE NUMPY_API_VERSION
84 | OUTPUT_STRIP_TRAILING_WHITESPACE
85 | )
86 | else ()
87 | ## Backup procedure: extract version number directly from the header file
88 | if (NUMPY_INCLUDES)
89 | find_file (HAVE_NUMPYCONFIG_H numpy/numpyconfig.h
90 | HINTS ${NUMPY_INCLUDES}
91 | )
92 | endif (NUMPY_INCLUDES)
93 | endif ()
94 |
95 | ## Dissect full version number into major, minor and patch version
96 | if (NUMPY_API_VERSION)
97 | string (REGEX REPLACE "\\." ";" _tmp ${NUMPY_API_VERSION})
98 | list (GET _tmp 0 NUMPY_API_VERSION_MAJOR)
99 | list (GET _tmp 1 NUMPY_API_VERSION_MINOR)
100 | list (GET _tmp 2 NUMPY_API_VERSION_PATCH)
101 | endif (NUMPY_API_VERSION)
102 |
103 | ##__________________________________________________________________________
104 | ## Actions taken when all components have been found
105 |
106 | find_package_handle_standard_args (NUMPY DEFAULT_MSG NUMPY_INCLUDES)
107 |
108 | if (NUMPY_FOUND)
109 | if (NOT NUMPY_FIND_QUIETLY)
110 | message (STATUS "Found components for NumPy")
111 | message (STATUS "NUMPY_ROOT_DIR = ${NUMPY_ROOT_DIR}")
112 | message (STATUS "NUMPY_INCLUDES = ${NUMPY_INCLUDES}")
113 | message (STATUS "NUMPY_LIBRARIES = ${NUMPY_LIBRARIES}")
114 | message (STATUS "NUMPY_API_VERSION = ${NUMPY_API_VERSION}")
115 | endif (NOT NUMPY_FIND_QUIETLY)
116 | else (NUMPY_FOUND)
117 | if (NUMPY_FIND_REQUIRED)
118 | message (FATAL_ERROR "Could not find NUMPY!")
119 | endif (NUMPY_FIND_REQUIRED)
120 | endif (NUMPY_FOUND)
121 |
122 | ##__________________________________________________________________________
123 | ## Mark advanced variables
124 |
125 | mark_as_advanced (
126 | NUMPY_ROOT_DIR
127 | NUMPY_INCLUDES
128 | NUMPY_LIBRARIES
129 | )
130 |
131 | endif (NOT NUMPY_FOUND)
132 |
--------------------------------------------------------------------------------
/connectivity/8194nk5LbLH_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"c9e8dc09263e4d0da77d16de0ecddd39","pose":[-0.611043,-0.00396746,-0.791588,-0.213904,0.791585,-0.00882497,-0.610996,2.305,-0.00456166,-0.999953,0.00853306,1.56916,0,0,0,1],"included":true,"visible":[false,false,false,false,true,true,false,true,true,true,false,false,false,false,false,false,false,false,false,false],"unobstructed":[false,false,false,false,true,false,false,false,true,true,false,false,false,false,false,false,false,false,false,false],"height":1.5826326295962942},{"image_id":"286b0c2d9a46408ba80b6ccebb21e582","pose":[0.951596,0.00201098,0.307346,6.58012,-0.307351,0.00915895,0.951552,-2.96479,-0.000901435,-0.999956,0.00933374,4.36353,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,false,false,false,false,false,false,false,true,false,true,false,true,false,true],"unobstructed":[false,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,true,true,false],"height":1.5712253956498747},{"image_id":"6776097c17ed4b93aee61704eb32f06c","pose":[-0.711582,-0.00419131,-0.702591,-1.68941,0.702575,0.00464776,-0.711594,-5.37908,0.00624796,-0.99998,-0.000362505,1.58622,0,0,0,1],"included":true,"visible":[false,false,false,false,false,true,true,true,false,true,false,true,false,false,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,true,true,false,false,false,false,true,false,false,false,false,false,false,false,true],"height":1.5804941871490743},{"image_id":"8c7e8da7d4a44ab695e6b3195eac0cf1","pose":[0.709879,0.011247,0.704234,8.62929,-0.70424,-0.00407304,0.70995,-1.77115,0.0108531,-0.999928,0.00502926,4.38556,0,0,0,1],"included":true,"visible":[false,true,false,false,false,false,false,false,false,false,true,false,true,true,false,false,false,true,true,false],"unobstructed":[false,true,false,false,false,false,false,false,false,false,true,false,false,true,false,false,false,true,true,false],"height":1.585645804390483},{"image_id":"f33c718aaf2c41469389a87944442c62","pose":[0.619478,0.0166688,0.784837,-3.88437,-0.784902,-0.00375152,0.619609,-0.528748,0.0132725,-0.999854,0.0107595,1.58368,0,0,0,1],"included":true,"visible":[true,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true],"unobstructed":[true,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true],"height":1.5829827809014503},{"image_id":"fcd90a404061413385286bef9662630e","pose":[-0.111393,0.00837906,0.993741,2.80245,-0.993773,-0.00348217,-0.111367,-3.78204,0.0025272,-0.999959,0.00871482,1.58057,0,0,0,1],"included":true,"visible":[true,false,true,false,false,false,false,true,false,false,false,false,false,false,false,true,false,false,false,false],"unobstructed":[false,false,true,false,false,false,true,true,true,false,false,false,false,false,false,false,false,false,false,false],"height":1.5763528408163245},{"image_id":"c07d4ae8330542a09cf8f8dddb9728ce","pose":[-0.985207,-0.0101267,0.171069,0.656519,-0.171094,0.00168538,-0.985253,-5.08928,0.00968898,-0.999947,-0.00339301,1.57611,0,0,0,1],"included":true,"visible":[true,false,true,false,false,true,false,true,false,false,false,false,false,false,true,false,true,false,false,true],"unobstructed":[false,false,true,false,false,true,false,true,false,false,false,true,false,false,false,false,false,false,false,false],"height":1.575276915205382},{"image_id":"2393bffb53fe4205bcc67796c6fb76e3","pose":[-0.241654,0.00228344,-0.97036,3.33582,0.970294,0.0124463,-0.241608,-5.90025,0.0115256,-0.99992,-0.00522325,1.57791,0,0,0,1],"included":true,"visible":[false,false,true,false,false,true,true,false,false,false,false,false,false,false,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,true,true,false,false,false,false,false,false,false,true,false,false,false,false,false],"height":1.5730354249357412},{"image_id":"71bf74df73cd4e24a191ef4f2338ca22","pose":[0.906931,-0.00688335,-0.421222,0.122562,0.421182,-0.00662188,0.906952,-0.00319673,-0.00903217,-0.999954,-0.00310641,1.57207,0,0,0,1],"included":true,"visible":[true,false,false,false,true,true,true,true,false,true,false,false,false,false,false,false,false,false,false,false],"unobstructed":[true,false,false,false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false],"height":1.570272020216938},{"image_id":"be8a2edacab34ec8887ba6a7b1e4945f","pose":[0.791463,0.0101015,0.611133,-3.50132,-0.611154,-0.00121731,0.791511,1.58103,0.00873934,-0.999948,0.00521015,1.56992,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,false,true,false,false,false,true,false,false,false,false,false,false,false,true],"unobstructed":[true,false,false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false],"height":1.577126892771864},{"image_id":"9bdde31adaa1443bb206b09bfa3c474c","pose":[0.799844,0.0047414,0.60019,8.67581,-0.600208,0.0075118,0.799809,-4.8108,-0.000716311,-0.99996,0.00885413,2.82261,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,true,true,false,false],"unobstructed":[false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false],"height":1.58264400638767},{"image_id":"66d4adb61b57494aa2c1ad141a0fad9b","pose":[-0.34536,-0.0108675,-0.938407,-2.27885,0.938436,0.00459882,-0.345423,-3.2282,0.00806945,-0.99993,0.00861029,1.58739,0,0,0,1],"included":true,"visible":[false,false,true,false,false,true,true,true,false,true,false,false,false,false,true,true,false,false,false,true],"unobstructed":[false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true],"height":1.5705441219971223},{"image_id":"83ff709c0e3e46079836153ea5c7feac","pose":[0.68423,0.0137303,0.729137,3.42529,-0.729235,0.00364543,0.684254,1.65175,0.00673696,-0.999899,0.012507,4.37069,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false],"height":1.578378655072358},{"image_id":"d9e325df2f3948679c78b93d8025e2da","pose":[0.826698,0.0192407,0.562317,8.49764,-0.562455,0.00220125,0.826825,-0.816805,0.0146709,-0.999812,0.0126418,4.38875,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,true,false,true,false,false,false,false,true,true,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,true,true,false],"height":1.5865892751674604},{"image_id":"423efb97f77f4e7995f19c66fe82afbc","pose":[0.958879,0.00141119,0.283813,5.51819,-0.283808,0.0124035,0.958801,-5.67527,-0.00216725,-0.999922,0.012294,1.58856,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,true,false,false,false,false,false,false,false,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,true,false,false,false],"height":1.5784339701720043},{"image_id":"6c49579a5cd34df8acb7f790b74e9eae","pose":[-0.95716,-0.00676032,-0.289482,-6.48379,0.289538,-0.00977451,-0.957117,-2.57899,0.00364085,-0.999929,0.0113132,1.59886,0,0,0,1],"included":true,"visible":[false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,true],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true],"height":1.5798282335589897},{"image_id":"aeed67040d744240b188f66f17d87d43","pose":[0.132175,0.0257204,0.990893,7.67989,-0.991226,0.00381825,0.132121,-5.81072,-0.000385302,-0.999662,0.0259995,2.29866,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,true,false,false,true,false,false,false,true,false,false,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false,false],"height":1.6026680667792301},{"image_id":"aae01016bb354f78bd6db86e9d71af2b","pose":[0.0788252,0.00384462,0.996881,6.79041,-0.996887,0.00184069,0.0788186,-0.995862,-0.00153193,-0.999991,0.0039778,4.37219,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false],"height":1.5770919536040346},{"image_id":"346b680ac5904359a1859c929ad312b6","pose":[-0.589008,0.00463239,0.808114,5.58585,-0.808123,0.00000695791,-0.589015,0.644327,-0.00273419,-0.999989,0.00373948,4.38174,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,true,false,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,true,false,false],"height":1.5707587596461066},{"image_id":"ae91518ed77047b3bdeeca864cd04029","pose":[0.310985,0.0070688,0.950389,-4.60607,-0.950392,-0.00460962,0.31102,-2.5949,0.00657945,-0.999964,0.00528466,1.58581,0,0,0,1],"included":true,"visible":[false,false,true,false,true,true,false,true,false,true,false,true,false,false,false,true,false,false,false,false],"unobstructed":[false,false,true,false,true,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false],"height":1.5747548700639524}]
--------------------------------------------------------------------------------
/connectivity/GdvgFV5R1Z5_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"0b02e18654324edd8d74c078b66bfb20","pose":[-0.057695,-0.000357129,0.998334,-2.46692,-0.998304,-0.00769199,-0.0576965,-3.15814,0.00770012,-0.99997,0.0000884733,1.5171,0,0,0,1],"included":true,"visible":[false,true,false,false,false,true,true,true,true,false,true,false],"unobstructed":[false,false,false,false,false,true,false,true,true,false,true,false],"height":1.51470410293751},{"image_id":"1db1c0a09ecf40d188197efc05ced3bb","pose":[-0.442443,0.0138817,0.896688,-4.03893,-0.89679,-0.0101225,-0.442338,-3.05434,0.00293664,-0.999852,0.0169288,0.974424,0,0,0,1],"included":true,"visible":[true,false,false,false,true,true,false,false,true,false,false,true],"unobstructed":[false,false,false,false,false,true,false,false,true,false,false,true],"height":0.9701803380402906},{"image_id":"6178647ca8d14dc09370f6c1b7ed2fd6","pose":[-0.870025,0.0056275,0.492973,-3.69279,-0.493005,-0.0105975,-0.869962,1.95433,0.000328893,-0.999927,0.0119957,1.51516,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,true,true,false,false,true,false],"unobstructed":[false,false,false,true,false,false,true,true,false,true,true,false],"height":1.517582101716661},{"image_id":"565cc21cd28b4ee6bb5ba83c5270c032","pose":[0.0242634,0.000986587,-0.999704,-3.91782,0.999699,0.00333371,0.024267,0.178675,0.00335701,-0.999993,-0.0009042,1.50868,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,false,false,true,true,false],"unobstructed":[false,false,true,false,false,false,false,false,false,true,true,false],"height":1.5114421933143356},{"image_id":"ef638e508e054c4aabd49b38d1b88fc7","pose":[0.0820523,0.0151057,0.996513,-4.61631,-0.995947,-0.0356725,0.0825462,-2.18899,0.0367954,-0.999249,0.0121187,1.52757,0,0,0,1],"included":true,"visible":[false,true,false,false,false,true,false,false,true,false,false,true],"unobstructed":[false,false,false,false,false,true,false,false,true,false,false,true],"height":1.5162868543024455},{"image_id":"97ed68de989e44fdaf2d9b949898fab6","pose":[0.0900997,0.0149714,0.99582,-3.64126,-0.995713,-0.0195971,0.0903844,-3.16818,0.0208687,-0.999695,0.0131427,1.52081,0,0,0,1],"included":true,"visible":[true,true,false,false,true,false,false,false,true,false,false,true],"unobstructed":[true,true,false,false,true,false,false,false,true,false,false,true],"height":1.5211418713547455},{"image_id":"5fd70cff4992429a99a84fd3c117ccb5","pose":[-0.0539877,-0.000800861,-0.998541,0.0108044,0.998337,0.0201438,-0.0539926,0.00604319,0.020158,-0.999796,-0.000286778,1.51223,0,0,0,1],"included":true,"visible":[true,false,true,true,false,false,false,true,false,true,true,false],"unobstructed":[false,false,true,false,false,false,false,true,false,false,true,false],"height":1.5113248528175798},{"image_id":"86d342c576ff46a9828d2ba377cc8cd5","pose":[0.998173,0.0151118,-0.0584746,-1.78347,0.0584707,0.000718574,0.998288,-1.89835,0.0151283,-0.999885,-0.000165129,1.52238,0,0,0,1],"included":true,"visible":[true,false,true,false,false,false,true,false,false,false,true,false],"unobstructed":[true,false,true,false,false,false,true,false,false,false,true,false],"height":1.5103397372923053},{"image_id":"8dba9ff900b14f9b84ead660f5f7f701","pose":[-0.999855,-0.0144511,0.00887107,-4.11579,-0.00895392,0.00564829,-0.999943,-2.90606,0.0144005,-0.999879,-0.00577567,1.51617,0,0,0,1],"included":true,"visible":[true,true,false,false,true,true,false,false,false,false,false,true],"unobstructed":[true,true,false,false,true,true,false,false,false,false,false,true],"height":1.5112098807574073},{"image_id":"0d8c5fbfd73f44e28d6da370520611e4","pose":[0.0769887,0.00664334,0.997009,-6.15424,-0.997016,-0.00490415,0.0770216,-0.0398163,0.00540151,-0.999965,0.00624716,1.50965,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,true,false,false,false,true,false],"unobstructed":[false,false,true,true,false,false,false,false,false,false,false,false],"height":1.5058928427471967},{"image_id":"aebb1de49d21485e8bef7633dfb58761","pose":[-0.0229751,-0.0058052,-0.999718,-1.94579,0.999719,0.00553997,-0.0230069,-0.026534,0.00567231,-0.999967,0.0056775,1.50582,0,0,0,1],"included":true,"visible":[true,false,true,true,false,false,true,true,false,true,false,false],"unobstructed":[true,false,true,true,false,false,true,true,false,false,false,false],"height":1.5101720791580233},{"image_id":"e34e51f3d6584ad09c510de5db84752f","pose":[-0.0418368,-0.0124855,0.999046,-3.99281,-0.993607,-0.104406,-0.0429142,-2.13265,0.104842,-0.994456,-0.00803644,0.980264,0,0,0,1],"included":true,"visible":[false,true,false,false,true,true,false,false,true,false,false,false],"unobstructed":[false,true,false,false,true,true,false,false,true,false,false,false],"height":0.969584316081611}]
--------------------------------------------------------------------------------
/connectivity/README.md:
--------------------------------------------------------------------------------
1 | ## connectivity
2 | Connectivity graphs indicating the navigable paths between viewpoints in each scan.
3 |
4 | Each json file contains an array of annotations, one for each viewpoint in the scan. All annotations share the same basic structure as follows:
5 |
6 | ```
7 | {
8 | "image_id": str,
9 | "pose": [float x 16],
10 | "included": boolean,
11 | "visible": [boolean x num_viewpoints],
12 | "unobstructed": [boolean x num_viewpoints],
13 | "height": float
14 | }
15 | ```
16 | - `image_id`: matterport skybox prefix
17 | - `pose`: 4x4 matrix in row major order that transforms matterport skyboxes to global coordinates (z-up). Pose matrices are based on the assumption that the camera is facing skybox image 3.
18 | - `included`: whether viewpoint is included in the simulator. Some overlapping viewpoints are excluded.
19 | - `visible`: indicates other viewpoints that can be seen from this viewpoint.
20 | - `unobstructed`: indicates transitions to other viewpoints that are considered navigable for an agent.
21 | - `height`: estimated height of the viewpoint above the floor. Not required for the simulator.
22 |
23 | Units are in metres.
24 |
25 | `scans.txt` contains a list of all the scan ids in the dataset.
26 |
--------------------------------------------------------------------------------
/connectivity/YmJkqBEsHnH_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"006933a75f764c5485cf284bea0ded0b","pose":[0.210914,-0.00824746,-0.977469,-7.64722,0.977278,0.0232484,0.210677,-2.15553,0.0209873,-0.999695,0.0129646,1.56695,0,0,0,1],"included":true,"visible":[false,false,true,false,true,true,false,true,true,true,false],"unobstructed":[false,false,false,false,false,false,false,true,true,false,false],"height":1.524793092035509},{"image_id":"e4ede0695e4e4a77aae8537abb9f11d3","pose":[-0.0422212,-0.0176246,-0.998952,-0.133122,0.998904,0.0194092,-0.0425613,-0.0184591,0.0201393,-0.999656,0.016787,1.48352,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,false,false,false,false,false],"unobstructed":[false,false,true,false,false,true,false,false,false,false,false],"height":1.5227398475592409},{"image_id":"d471e89e00be49f49a7ecace814d60bf","pose":[0.426939,-0.00370058,-0.904272,-0.421886,0.904055,0.0239963,0.426739,-2.12366,0.0201203,-0.999705,0.0135916,1.49477,0,0,0,1],"included":true,"visible":[true,true,false,true,true,true,false,true,true,true,false],"unobstructed":[false,true,false,true,false,true,false,false,false,false,false],"height":1.5263900136377955},{"image_id":"b34af02ce9b642ebbd0c7e9e0ba3b553","pose":[0.960272,0.00870611,-0.278924,-0.0905727,0.278755,0.0168277,0.960214,-3.55265,0.0130537,-0.99982,0.0137334,1.49061,0,0,0,1],"included":true,"visible":[true,true,true,false,false,false,false,false,false,false,false],"unobstructed":[false,false,true,false,false,true,false,false,false,false,false],"height":1.5323637229797105},{"image_id":"01c80b5f8fbd4c969ee0bc03f1ec7a6c","pose":[0.359562,-0.0105291,-0.933061,-3.77309,0.932771,0.0313799,0.359097,-2.1838,0.0254987,-0.999452,0.0211054,1.53932,0,0,0,1],"included":true,"visible":[true,false,true,false,false,true,false,true,true,true,false],"unobstructed":[false,false,false,false,false,true,false,true,false,false,false],"height":1.5286629461398107},{"image_id":"82ea5baa30f945fe98f6cad3064af847","pose":[0.0376233,-0.0115611,-0.999224,-2.01669,0.998821,0.0310955,0.0372487,-2.16965,0.030641,-0.999449,0.0127185,1.50807,0,0,0,1],"included":true,"visible":[true,true,true,true,true,false,false,true,true,true,false],"unobstructed":[false,true,true,true,true,false,false,false,false,false,false],"height":1.5253207999550662},{"image_id":"aecbb791f30b452a9236c5a8c7030663","pose":[0.296076,-0.0242641,-0.954855,-13.5955,0.955111,0.0179483,0.2957,-2.22547,0.00996343,-0.999544,0.0284901,1.59272,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,true,true,true,true],"unobstructed":[false,false,false,false,false,false,false,false,false,true,true],"height":1.7557263982456066},{"image_id":"d841f7b710f9470796d55561f8f524db","pose":[0.270437,0.002913,-0.962732,-5.77716,0.962325,0.0284129,0.27041,-2.21321,0.028142,-0.999591,0.00488176,1.55947,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,false,true,true,false],"unobstructed":[true,false,false,false,true,false,false,false,false,false,false],"height":1.5357935019251416},{"image_id":"8e38fdd81c7949db9646968bafbbdcfc","pose":[-0.00277118,-0.0169575,-0.999852,-9.93905,0.999791,0.020127,-0.00311204,-2.17463,0.0201771,-0.999653,0.0168993,1.60592,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,true,false,true,true],"unobstructed":[true,false,false,false,false,false,false,false,false,true,false],"height":1.5208970888736792},{"image_id":"20fd759be0b64fc9aa96d290f0a704ec","pose":[0.227815,0.0117555,-0.973633,-12.1161,0.973367,0.0235263,0.228037,-2.15724,0.025587,-0.999654,-0.00608172,1.59969,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,true,true,true,false,true],"unobstructed":[false,false,false,false,false,false,true,false,true,false,false],"height":1.5261379179165138},{"image_id":"d838acff82244c2da0cf2651e54966cb","pose":[0.310234,-0.0632421,-0.948553,-15.2317,0.950604,0.0313736,0.308813,-2.28133,0.0102298,-0.997504,0.0698525,0.902626,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,true,true,true,true,false],"unobstructed":[false,false,false,false,false,false,true,false,false,false,false],"height":1.558854711359605}]
--------------------------------------------------------------------------------
/connectivity/gZ6f7yhEvPG_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"80929af5cf234ae38ac3a2a4e60e4342","pose":[0.983395,0.00450812,-0.181418,-2.79247,0.181442,-0.00570068,0.983385,-1.38801,0.00339928,-0.999973,-0.00642298,1.42676,0,0,0,1],"included":true,"visible":[false,true,true,false,false,true,false,false],"unobstructed":[false,true,false,true,false,true,false,false],"height":1.4191402375960298},{"image_id":"ba27da20782d4e1a825f0a133ad84da9","pose":[-0.7605,-0.0115739,-0.649234,-2.38988,0.648885,0.0237502,-0.760515,-0.0538717,0.0242219,-0.999651,-0.0105509,1.4341,0,0,0,1],"included":true,"visible":[true,false,true,true,false,true,false,true],"unobstructed":[true,false,false,false,false,true,false,true],"height":1.424939020658826},{"image_id":"46cecea0b30e4786b673f5e951bf82d4","pose":[0.593129,0.0137361,-0.80499,0.99933,0.804932,0.010707,0.59327,1.17558,0.0167685,-0.999848,-0.00470498,1.41684,0,0,0,1],"included":true,"visible":[false,false,false,true,true,false,true,true],"unobstructed":[false,false,false,true,true,false,true,true],"height":1.4252108727703763},{"image_id":"bda7a9e6d1d94b3aa8ff491beb158f3a","pose":[-0.378592,-0.0208239,0.925329,-0.182918,-0.925433,-0.00820128,-0.37882,-1.72967,0.0154776,-0.999749,-0.0161651,1.42205,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,true,true],"unobstructed":[true,false,true,false,true,false,false,true],"height":1.42983949725488},{"image_id":"dbb2f8000bc04b3ebcd0a55112786149","pose":[-0.595363,0.00457706,-0.803444,1.10196,0.803383,0.0168543,-0.595222,-1.10724,0.0108174,-0.999847,-0.0137106,1.41536,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,true,true],"unobstructed":[false,false,true,true,false,false,true,true],"height":1.4186255623107038},{"image_id":"29b20fa80dcd4771974303c1ccd8953f","pose":[0.292738,0.0164579,-0.956051,-2.77306,0.956096,0.0090939,0.292909,1.55377,0.0135152,-0.999823,-0.0130722,1.43367,0,0,0,1],"included":true,"visible":[true,true,true,false,true,false,false,false],"unobstructed":[true,true,false,false,false,false,false,false],"height":1.4237594118402337},{"image_id":"0ee20663dfa34b438d48750ddcd7366c","pose":[-0.75968,-0.0019971,-0.650293,-0.111567,0.650131,0.0201598,-0.759554,1.31337,0.014627,-0.999794,-0.0140156,1.42291,0,0,0,1],"included":true,"visible":[false,false,true,true,true,false,false,true],"unobstructed":[false,false,true,false,true,false,false,true],"height":1.4276556862049736},{"image_id":"47d8a8282c1c4a7fb3eeeacc45e9d959","pose":[-0.0254788,0.00643152,-0.999654,-0.0034508,0.999603,0.0120797,-0.0253995,0.0112371,0.0119124,-0.999906,-0.00673574,1.42388,0,0,0,1],"included":true,"visible":[true,true,true,true,true,false,true,false],"unobstructed":[false,true,true,true,true,false,true,false],"height":1.4268855357216241}]
--------------------------------------------------------------------------------
/connectivity/scans.txt:
--------------------------------------------------------------------------------
1 | 17DRP5sb8fy
2 | 1LXtFkjw3qL
3 | 1pXnuDYAj8r
4 | 29hnd4uzFmX
5 | 2azQ1b91cZZ
6 | 2n8kARJN3HM
7 | 2t7WUuJeko7
8 | 5LpN3gDmAk7
9 | 5q7pvUzZiYa
10 | 5ZKStnWn8Zo
11 | 759xd9YjKW5
12 | 7y3sRwLe3Va
13 | 8194nk5LbLH
14 | 82sE5b5pLXE
15 | 8WUmhLawc2A
16 | aayBHfsNo7d
17 | ac26ZMwG7aT
18 | ARNzJeq3xxb
19 | B6ByNegPMKs
20 | b8cTxDM8gDG
21 | cV4RVeZvu5T
22 | D7G3Y4RVNrH
23 | D7N2EKCX4Sj
24 | dhjEzFoUFzH
25 | E9uDoFAP3SH
26 | e9zR4mvMWw7
27 | EDJbREhghzL
28 | EU6Fwq7SyZv
29 | fzynW3qQPVF
30 | GdvgFV5R1Z5
31 | gTV8FGcVJC9
32 | gxdoqLR6rwA
33 | gYvKGZ5eRqb
34 | gZ6f7yhEvPG
35 | HxpKQynjfin
36 | i5noydFURQK
37 | JeFG25nYj2p
38 | JF19kD82Mey
39 | jh4fc5c5qoQ
40 | JmbYfDe2QKZ
41 | jtcxE69GiFV
42 | kEZ7cmS4wCh
43 | mJXqzFtmKg4
44 | oLBMNvg9in8
45 | p5wJjkQkbXX
46 | pa4otMbVnkk
47 | pLe4wQe7qrG
48 | Pm6F8kyY3z2
49 | pRbA3pwrgk9
50 | PuKPg4mmafe
51 | PX4nDJXEHrG
52 | q9vSo1VnCiC
53 | qoiz87JEwZ2
54 | QUCTc6BB5sX
55 | r1Q1Z4BcV1o
56 | r47D5H71a5s
57 | rPc6DW4iMge
58 | RPmz2sHmrrY
59 | rqfALeAoiTq
60 | s8pcmisQ38h
61 | S9hNv5qa7GM
62 | sKLMLpTHeUy
63 | SN83YJsR3w2
64 | sT4fr6TAbpF
65 | TbHJrupSAjP
66 | ULsKaCPVFJR
67 | uNb9QFRL6hY
68 | ur6pFq6Qu1A
69 | UwV83HsGsw3
70 | Uxmj2M2itWa
71 | V2XKFyX4ASd
72 | VFuaQ6m2Qom
73 | VLzqgDo317F
74 | Vt2qJdWjCF2
75 | VVfe2KiqLaN
76 | Vvot9Ly1tCj
77 | vyrNrziPKCB
78 | VzqfbhrpDEA
79 | wc2JMjhGNzB
80 | WYY7iVyf5p8
81 | X7HyMhZNoso
82 | x8F5xyUWy9e
83 | XcA2TqTSSAj
84 | YFuZgdQ5vWj
85 | YmJkqBEsHnH
86 | yqstnuAEVhm
87 | YVUC4YcDtcY
88 | Z6MFQCViBuw
89 | ZMojNkEp431
90 | zsNo4HB9uLZ
91 |
--------------------------------------------------------------------------------
/include/Benchmark.hpp:
--------------------------------------------------------------------------------
1 | #ifndef MATTERSIM_BENCHMARK
2 | #define MATTERSIM_BENCHMARK
3 |
4 | #include
5 |
6 | namespace mattersim {
7 |
8 | class Timer {
9 | public:
10 | Timer();
11 | virtual void Start();
12 | virtual void Stop();
13 | virtual void Reset();
14 | virtual float MilliSeconds();
15 | virtual float MicroSeconds();
16 | virtual float Seconds();
17 | inline bool running() { return running_; }
18 |
19 | protected:
20 | bool running_;
21 | std::chrono::steady_clock::time_point start_;
22 | std::chrono::steady_clock::duration elapsed_;
23 | };
24 | }
25 |
26 | #endif // MATTERSIM_BENCHMARK
27 |
--------------------------------------------------------------------------------
/include/MatterSim.hpp:
--------------------------------------------------------------------------------
1 | #ifndef MATTERSIM_HPP
2 | #define MATTERSIM_HPP
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | #include
11 |
12 | #ifdef OSMESA_RENDERING
13 | #define GL_GLEXT_PROTOTYPES
14 | #include
15 | #include
16 | #elif defined (EGL_RENDERING)
17 | #include
18 | #include
19 | #else
20 | #include
21 | #endif
22 |
23 | #define GLM_FORCE_RADIANS
24 | #include
25 | #include
26 | #include
27 | #include "glm/ext.hpp"
28 |
29 | #include "Benchmark.hpp"
30 | #include "NavGraph.hpp"
31 |
32 | namespace mattersim {
33 |
34 | struct Viewpoint: std::enable_shared_from_this {
35 | Viewpoint(std::string viewpointId, unsigned int ix, double x, double y, double z,
36 | double rel_heading, double rel_elevation, double rel_distance) :
37 | viewpointId(viewpointId), ix(ix), x(x), y(y), z(z), rel_heading(rel_heading),
38 | rel_elevation(rel_elevation), rel_distance(rel_distance)
39 | {}
40 |
41 | //! Viewpoint identifier
42 | std::string viewpointId;
43 | //! Viewpoint index into connectivity graph
44 | unsigned int ix;
45 | //! 3D position in world coordinates
46 | double x;
47 | double y;
48 | double z;
49 | //! Heading relative to the camera
50 | double rel_heading;
51 | //! Elevation relative to the camera
52 | double rel_elevation;
53 | //! Distance from the agent
54 | double rel_distance;
55 | };
56 |
57 | typedef std::shared_ptr ViewpointPtr;
58 | struct ViewpointPtrComp {
59 | inline bool operator() (const ViewpointPtr& l, const ViewpointPtr& r){
60 | return sqrt(l->rel_heading*l->rel_heading+l->rel_elevation*l->rel_elevation)
61 | < sqrt(r->rel_heading*r->rel_heading+r->rel_elevation*r->rel_elevation);
62 | }
63 | };
64 |
65 | /**
66 | * Simulator state class.
67 | */
68 | struct SimState: std::enable_shared_from_this{
69 | //! Building / scan environment identifier
70 | std::string scanId;
71 | //! Number of frames since the last newEpisode() call
72 | unsigned int step = 0;
73 | //! RGB image (in BGR channel order) from the agent's current viewpoint
74 | cv::Mat rgb;
75 | //! Depth image taken from the agent's current viewpoint
76 | cv::Mat depth;
77 | //! Agent's current 3D location
78 | ViewpointPtr location;
79 | //! Agent's current camera heading in radians
80 | double heading = 0;
81 | //! Agent's current camera elevation in radians
82 | double elevation = 0;
83 | //! Agent's current view [0-35] (set only when viewing angles are discretized)
84 | //! [0-11] looking down, [12-23] looking at horizon, [24-35] looking up
85 | unsigned int viewIndex = 0;
86 | //! Vector of nearby navigable locations representing state-dependent action candidates, i.e.
87 | //! viewpoints you can move to. Index 0 is always to remain at the current viewpoint.
88 | //! The remaining viewpoints are sorted by their angular distance from the centre of the image.
89 | std::vector navigableLocations;
90 | };
91 |
92 | typedef std::shared_ptr SimStatePtr;
93 |
94 |
95 | /**
96 | * Main class for accessing an instance of the simulator environment.
97 | */
98 | class Simulator {
99 |
100 | public:
101 | Simulator();
102 |
103 | ~Simulator();
104 |
105 | /**
106 | * Set a non-standard path to the Matterport3D dataset.
107 | * The provided directory must contain subdirectories of the form:
108 | * "/matterport_skybox_images/". Default is "./data/v1/scans/".
109 | */
110 | void setDatasetPath(const std::string& path);
111 |
112 | /**
113 | * Set a non-standard path to the viewpoint connectivity graphs. The provided directory must contain files
114 | * of the form "/_connectivity.json". Default is "./connectivity" (the graphs provided
115 | * by this repo).
116 | */
117 | void setNavGraphPath(const std::string& path);
118 |
119 | /**
120 | * Enable or disable rendering. Useful for testing. Default is true (enabled).
121 | */
122 | void setRenderingEnabled(bool value);
123 |
124 | /**
125 | * Sets camera resolution. Default is 320 x 240.
126 | */
127 | void setCameraResolution(int width, int height);
128 |
129 | /**
130 | * Sets camera vertical field-of-view in radians. Default is 0.8, approx 46 degrees.
131 | */
132 | void setCameraVFOV(double vfov);
133 |
134 | /**
135 | * Set the camera elevation min and max limits in radians. Default is +-0.94 radians.
136 | * @return true if successful.
137 | */
138 | bool setElevationLimits(double min, double max);
139 |
140 | /**
141 | * Enable or disable discretized viewing angles. When enabled, heading and
142 | * elevation changes will be restricted to 30 degree increments from zero,
143 | * with left/right/up/down movement triggered by the sign of the makeAction
144 | * heading and elevation parameters. Default is false (disabled).
145 | */
146 | void setDiscretizedViewingAngles(bool value);
147 |
148 | /**
149 | * Enable or disable preloading of images from disk to CPU memory. Default is false (disabled).
150 | * Enabled is better for training models, but will cause a delay when starting the simulator.
151 | */
152 | void setPreloadingEnabled(bool value);
153 |
154 | /**
155 | * Enable or disable rendering of depth images. Default is false (disabled).
156 | */
157 | void setDepthEnabled(bool value);
158 |
159 | /**
160 | * Set the number of environments in the batch. Default is 1.
161 | */
162 | void setBatchSize(unsigned int size);
163 |
164 | /**
165 | * Set the cache size for storing pano images in gpu memory. Default is 200. Should be comfortably
166 | * larger than the batch size.
167 | */
168 | void setCacheSize(unsigned int size);
169 |
170 | /**
171 | * Set the random seed for episodes where viewpoint is not provided.
172 | */
173 | void setSeed(int seed);
174 |
175 | /**
176 | * Initialize the simulator. Further configuration won't take any effect from now on.
177 | */
178 | void initialize();
179 |
180 | /**
181 | * Starts a new episode. If a viewpoint is not provided initialization will be random.
182 | * @param scanId - sets which scene is used, e.g. "2t7WUuJeko7"
183 | * @param viewpointId - sets the initial viewpoint location, e.g. "cc34e9176bfe47ebb23c58c165203134"
184 | * @param heading - set the agent's initial camera heading in radians. With z-axis up,
185 | * heading is defined relative to the y-axis (turning right is positive).
186 | * @param elevation - set the initial camera elevation in radians, measured from the horizon
187 | * defined by the x-y plane (up is positive).
188 | */
189 | void newEpisode(const std::vector& scanId, const std::vector& viewpointId,
190 | const std::vector& heading, const std::vector& elevation);
191 |
192 | /**
193 | * Starts a new episode at a random viewpoint.
194 | * @param scanId - sets which scene is used, e.g. "2t7WUuJeko7"
195 | */
196 | void newRandomEpisode(const std::vector& scanId);
197 |
198 | /**
199 | * Returns the current batch of environment states including RGB images and available actions.
200 | */
201 | const std::vector& getState();
202 |
203 | /** @brief Select an action.
204 | *
205 | * An RL agent will sample an action here. A task-specific reward can be determined
206 | * based on the location, heading, elevation, etc. of the resulting state.
207 | * @param index - an index into the set of feasible actions defined by getState()->navigableLocations.
208 | * @param heading - desired heading change in radians. With z-axis up, heading is defined
209 | * relative to the y-axis (turning right is positive).
210 | * @param elevation - desired elevation change in radians, measured from the horizon defined
211 | * by the x-y plane (up is positive).
212 | */
213 | void makeAction(const std::vector& index, const std::vector& heading,
214 | const std::vector& elevation);
215 |
216 | /**
217 | * Closes the environment and releases underlying texture resources, OpenGL contexts, etc.
218 | */
219 | void close();
220 |
221 | /**
222 | * Reset the rendering timers that run automatically.
223 | */
224 | void resetTimers();
225 |
226 | /**
227 | * Return a formatted timing string.
228 | */
229 | std::string timingInfo();
230 |
231 | private:
232 | const int headingCount = 12; // 12 heading values in discretized views
233 | const double elevationIncrement = M_PI/6.0; // 30 degrees discretized up/down
234 | void populateNavigable();
235 | void setHeadingElevation(const std::vector& heading, const std::vector& elevation);
236 | void renderScene();
237 | #ifdef OSMESA_RENDERING
238 | void *buffer;
239 | OSMesaContext ctx;
240 | #elif defined (EGL_RENDERING)
241 | EGLDisplay eglDpy;
242 | GLuint FramebufferName;
243 | #else
244 | GLuint FramebufferName;
245 | #endif
246 | std::vector states;
247 | bool initialized;
248 | bool renderingEnabled;
249 | bool discretizeViews;
250 | bool preloadImages;
251 | bool renderDepth;
252 | int width;
253 | int height;
254 | int randomSeed;
255 | unsigned int cacheSize;
256 | unsigned int batchSize;
257 | double vfov;
258 | double minElevation;
259 | double maxElevation;
260 | glm::mat4 Projection;
261 | glm::mat4 View;
262 | glm::mat4 Model;
263 | glm::mat4 Scale;
264 | glm::mat4 RotateX;
265 | glm::mat4 RotateZ;
266 | GLint ProjMat;
267 | GLint ModelViewMat;
268 | GLint vertex;
269 | GLint isDepth;
270 | GLuint vao_cube;
271 | GLuint vbo_cube_vertices;
272 | GLuint glProgram;
273 | GLuint glShaderV;
274 | GLuint glShaderF;
275 | std::string datasetPath;
276 | std::string navGraphPath;
277 | Timer preloadTimer; // Preloading images from disk into cpu memory
278 | Timer loadTimer; // Loading textures from disk or cpu memory onto gpu
279 | Timer renderTimer; // Rendering time
280 | Timer gpuReadTimer; // Reading rendered images from gpu back to cpu memory
281 | Timer processTimer; // Total run time for simulator
282 | Timer wallTimer; // Wall clock timer
283 | unsigned int frames;
284 | };
285 | }
286 |
287 | #endif
288 |
--------------------------------------------------------------------------------
/include/NavGraph.hpp:
--------------------------------------------------------------------------------
1 | #ifndef NAVGRAPH_HPP
2 | #define NAVGRAPH_HPP
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | #include
13 | #include
14 |
15 | #ifdef OSMESA_RENDERING
16 | #define GL_GLEXT_PROTOTYPES
17 | #include
18 | #include
19 | #elif defined (EGL_RENDERING)
20 | #include
21 | #include
22 | #else
23 | #include
24 | #endif
25 |
26 | #define GLM_FORCE_RADIANS
27 | #include
28 | #include
29 | #include
30 |
31 | namespace mattersim {
32 |
33 | static void assertOpenGLError(const std::string& msg) {
34 | GLenum error = glGetError();
35 | if (error != GL_NO_ERROR) {
36 | std::stringstream s;
37 | s << "OpenGL error 0x" << std::hex << error << " at " << msg;
38 | throw std::runtime_error(s.str());
39 | }
40 | }
41 | #ifdef EGL_RENDERING
42 | static void assertEGLError(const std::string& msg) {
43 | EGLint error = eglGetError();
44 |
45 | if (error != EGL_SUCCESS) {
46 | std::stringstream s;
47 | s << "EGL error 0x" << std::hex << error << " at " << msg;
48 | throw std::runtime_error(s.str());
49 | }
50 | }
51 | #endif
52 |
53 | /**
54 | * Navigation graph indicating which panoramic viewpoints are adjacent, and also
55 | * containing (optionally pre-loaded) skybox / cubemap images and textures.
56 | * Class is a singleton to ensure images and textures are only loaded once.
57 | */
58 | class NavGraph final {
59 |
60 | private:
61 |
62 | NavGraph(const std::string& navGraphPath, const std::string& datasetPath,
63 | bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize);
64 |
65 | ~NavGraph();
66 |
67 | public:
68 | // Delete the default, copy and move constructors
69 | NavGraph() = delete;
70 | NavGraph(const NavGraph&) = delete;
71 | NavGraph& operator=(const NavGraph&) = delete;
72 | NavGraph(NavGraph&&) = delete;
73 | NavGraph& operator=(NavGraph&&) = delete;
74 |
75 | /**
76 | * First call will load the navigation graph from disk and (optionally) preload the
77 | * cubemap images into memory.
78 | * @param navGraphPath - directory containing json viewpoint connectivity graphs
79 | * @param datasetPath - directory containing a data directory for each Matterport scan id
80 | * @param preloadImages - if true, all cubemap images will be loaded into CPU memory immediately
81 | * @param renderDepth - if true, depth map images are also required
82 | * @param randomSeed - only used for randomViewpoint function
83 | * @param cacheSize - number of pano textures to keep in GPU memory
84 | */
85 | static NavGraph& getInstance(const std::string& navGraphPath, const std::string& datasetPath,
86 | bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize);
87 |
88 | /**
89 | * Select a random viewpoint from a scan
90 | */
91 | const std::string& randomViewpoint(const std::string& scanId);
92 |
93 | /**
94 | * Find the index of a selected viewpointId
95 | */
96 | unsigned int index(const std::string& scanId, const std::string& viewpointId) const;
97 |
98 | /**
99 | * ViewpointId of a selected viewpoint index
100 | */
101 | const std::string& viewpoint(const std::string& scanId, unsigned int ix) const;
102 |
103 | /**
104 | * Camera rotation matrix for a selected viewpoint index
105 | */
106 | const glm::mat4& cameraRotation(const std::string& scanId, unsigned int ix) const;
107 |
108 | /**
109 | * Camera position vector for a selected viewpoint index
110 | */
111 | const glm::vec3& cameraPosition(const std::string& scanId, unsigned int ix) const;
112 |
113 | /**
114 | * Return a list of other viewpoint indices that are reachable from a selected viewpoint index
115 | */
116 | std::vector adjacentViewpointIndices(const std::string& scanId, unsigned int ix) const;
117 |
118 | /**
119 | * Get cubemap RGB (and optionally, depth) textures for a selected viewpoint index
120 | */
121 | std::pair cubemapTextures(const std::string& scanId, unsigned int ix);
122 |
123 | /**
124 | * Free GPU memory associated with this viewpoint's textures
125 | */
126 | void deleteCubemapTextures(const std::string& scanId, unsigned int ix);
127 |
128 |
129 | protected:
130 |
131 | /**
132 | * Helper class representing nodes in the navigation graph and their cubemap textures.
133 | */
134 | class Location {
135 |
136 | public:
137 | /**
138 | * Construct a location object from a json struct
139 | * @param viewpoint - json struct
140 | * @param skyboxDir - directory containing a data directory for each Matterport scan id
141 | * @param preload - if true, all cubemap images will be loaded into CPU memory immediately
142 | * @param depth - if true, depth textures will also be provided
143 | */
144 | Location(const Json::Value& viewpoint, const std::string& skyboxDir, bool preload, bool depth);
145 |
146 | Location() = delete; // no default constructor
147 |
148 | /**
149 | * Return the cubemap RGB (and optionally, depth) textures for this viewpoint, which will
150 | * be loaded from CPU memory or disk if necessary
151 | */
152 | std::pair cubemapTextures();
153 |
154 | /**
155 | * Free GPU memory associated with RGB and depth textures at this location
156 | */
157 | void deleteCubemapTextures();
158 |
159 | std::string viewpointId; //! Unique Matterport identifier for every pano
160 | bool included; //! Some duplicated viewpoints have been excluded
161 | glm::mat4 rot; //! Camera pose rotation component
162 | glm::vec3 pos; //! Camera pose translation component
163 | std::vector unobstructed; //! Connections to other graph locations
164 |
165 | protected:
166 |
167 | /**
168 | * Load RGB (and optionally, depth) cubemap images from disk into CPU memory
169 | */
170 | void loadCubemapImages();
171 |
172 | /**
173 | * Create RGB (and optionally, depth) textures from cubemap images (e.g., in GPU memory)
174 | */
175 | void loadCubemapTextures();
176 |
177 | GLuint cubemap_texture;
178 | GLuint depth_texture;
179 | cv::Mat xpos; //! RGB images for faces of the cubemap
180 | cv::Mat xneg;
181 | cv::Mat ypos;
182 | cv::Mat yneg;
183 | cv::Mat zpos;
184 | cv::Mat zneg;
185 | cv::Mat xposD; //! Depth images for faces of the cubemap
186 | cv::Mat xnegD;
187 | cv::Mat yposD;
188 | cv::Mat ynegD;
189 | cv::Mat zposD;
190 | cv::Mat znegD;
191 | bool im_loaded;
192 | bool includeDepth;
193 | std::string skyboxDir; //! Path to skybox images
194 | };
195 | typedef std::shared_ptr LocationPtr;
196 |
197 |
198 | /**
199 | * Helper class implementing a LRU cache for cubemap textures.
200 | */
201 | class TextureCache {
202 |
203 | public:
204 | TextureCache(unsigned int size) : size(size) {
205 | cacheMap.reserve(size+1);
206 | }
207 |
208 | TextureCache() = delete; // no default constructor
209 |
210 | void add(LocationPtr loc) {
211 | auto map_it = cacheMap.find(loc);
212 | if (map_it != cacheMap.end()) {
213 | // Remove entry from middle of list
214 | cacheList.erase(map_it->second);
215 | cacheMap.erase(map_it);
216 | }
217 | // Add element to list and save iterator on map
218 | auto list_it = cacheList.insert(cacheList.begin(), loc);
219 | cacheMap.emplace(loc, list_it);
220 | if (cacheMap.size() >= size) {
221 | removeEldest();
222 | }
223 | }
224 |
225 | void removeEldest() {
226 | if (cacheMap.empty()) {
227 | throw std::runtime_error("MatterSim: TextureCache is empty");
228 | }
229 | LocationPtr loc = cacheList.back();
230 | loc->deleteCubemapTextures();
231 | cacheMap.erase(loc);
232 | cacheList.pop_back();
233 | }
234 |
235 | private:
236 | unsigned int size;
237 | std::unordered_map::iterator > cacheMap;
238 | std::list cacheList;
239 | };
240 |
241 |
242 | std::map > scanLocations;
243 | std::default_random_engine generator;
244 | TextureCache cache;
245 | };
246 |
247 | }
248 |
249 | #endif
250 |
--------------------------------------------------------------------------------
/include/cbf.h:
--------------------------------------------------------------------------------
1 | // NYU Depth V2 Dataset Matlab Toolbox
2 | // Authors: Nathan Silberman, Pushmeet Kohli, Derek Hoiem, Rob Fergus
3 |
4 | #ifndef CBF_H_
5 | #define CBF_H_
6 |
7 | #include
8 |
9 | namespace cbf {
10 |
11 | // Filters the given depth image using a Cross Bilateral Filter.
12 | //
13 | // Args:
14 | // height - height of the images.
15 | // width - width of the images.
16 | // depth - HxW row-major ordered matrix.
17 | // intensity - HxW row-major ordered matrix.
18 | // mask - HxW row-major ordered matrix.
19 | // result - HxW row-major ordered matrix.
20 | // num_scales - the number of scales at which to perform the filtering.
21 | // sigma_s - the space sigma (in pixels)
22 | // sigma_r - the range sigma (in intensity values, 0-1)
23 | void cbf(int height, int width, uint8_t* depth, uint8_t* intensity,
24 | uint8_t* mask, uint8_t* result, unsigned num_scales, double* sigma_s,
25 | double* sigma_r);
26 |
27 | } // namespace
28 |
29 | #endif // CBF_H_
30 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torchvision==0.4.0
2 | requests==2.22.0
3 | h5py==2.9.0
4 | six==1.12.0
5 | nltk==3.4.4
6 | tqdm==4.32.1
7 | torch==1.2.0
8 | setuptools==41.0.1
9 | pycocotools==2.0.0
10 | numpy==1.16.4
11 | revtok==0.0.3
12 | spacy==2.1.8
13 | networkx==2.3
14 |
--------------------------------------------------------------------------------
/scripts/depth_to_skybox.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | ''' Script for generating depth skyboxes based on undistorted depth images,
4 | in order to support depth output in the simulator. The current version
5 | assumes that undistorted depth images are aligned to matterport skyboxes,
6 | and uses simple blending. Images are downsized 50%. '''
7 |
8 | import os
9 | import math
10 | import cv2
11 | import numpy as np
12 | from multiprocessing import Pool
13 | from numpy.linalg import inv,norm
14 | from StringIO import StringIO
15 |
16 |
17 | # Parameters
18 | DOWNSIZED_WIDTH = 512
19 | DOWNSIZED_HEIGHT = 512
20 | NUM_WORKER_PROCESSES = 20
21 | FILL_HOLES = True
22 | VISUALIZE_OUTPUT = False
23 |
24 | if FILL_HOLES:
25 | import sys
26 | sys.path.append('build')
27 | from MatterSim import cbf
28 |
29 | # Constants
30 | # Note: Matterport camera is really y=up, x=right, -z=look.
31 | SKYBOX_WIDTH = 1024
32 | SKYBOX_HEIGHT = 1024
33 | base_dir = 'data/v1/scans'
34 | skybox_template = '%s/%s/matterport_skybox_images/%s_skybox%d_sami.jpg'
35 | color_template = '%s/%s/undistorted_color_images/%s_i%s.jpg'
36 | depth_template = '%s/%s/undistorted_depth_images/%s_d%s.png'
37 | camera_template = '%s/%s/undistorted_camera_parameters/%s.conf'
38 | skybox_depth_template = '%s/%s/matterport_skybox_images/%s_skybox_depth_small.png'
39 |
40 |
41 | # camera transform for skybox images 0-5 relative to image 1
42 | skybox_transforms = [
43 | np.array([[1,0,0],[0,0,-1],[0,1,0]], dtype=np.double), #up (down)
44 | np.eye(3, dtype=np.double),
45 | np.array([[0,0,-1],[0,1,0],[1,0,0]], dtype=np.double), # right
46 | np.array([[-1,0,0],[0,1,0],[0,0,-1]], dtype=np.double), # 180
47 | np.array([[0,0,1],[0,1,0],[-1,0,0]], dtype=np.double), # left
48 | np.array([[1,0,0],[0,0,1],[0,-1,0]], dtype=np.double) # down (up)
49 | ]
50 |
51 |
52 | def camera_parameters(scan):
53 | ''' Returns two dicts containing undistorted camera intrinsics (3x3) and extrinsics (4x4),
54 | respectively, for a given scan. Viewpoint IDs are used as dict keys. '''
55 | intrinsics = {}
56 | extrinsics = {}
57 | with open(camera_template % (base_dir,scan,scan)) as f:
58 | pos = -1
59 | for line in f.readlines():
60 | if 'intrinsics_matrix' in line:
61 | intr = line.split()
62 | C = np.zeros((3, 3), np.double)
63 | C[0,0] = intr[1] # fx
64 | C[1,1] = intr[5] # fy
65 | C[0,2] = intr[3] # cx
66 | C[1,2] = intr[6] # cy
67 | C[2,2] = 1.0
68 | pos = 0
69 | elif pos >= 0 and pos < 6:
70 | q = line.find('.jpg')
71 | camera = line[q-37:q]
72 | if pos == 0:
73 | intrinsics[camera[:-2]] = C
74 | T = np.loadtxt(StringIO(line.split('jpg ')[1])).reshape((4,4))
75 | # T is camera-to-world transform, invert for world-to-camera
76 | extrinsics[camera] = (T,inv(T))
77 | pos += 1
78 | return intrinsics,extrinsics
79 |
80 |
81 | def z_to_euclid(K_inv, depth):
82 | ''' Takes inverse intrinsics matrix and a depth image. Returns a new depth image with
83 | depth converted from z-distance into euclidean distance from the camera centre. '''
84 |
85 | assert len(depth.shape) == 2
86 | h = depth.shape[0]
87 | w = depth.shape[1]
88 |
89 | y,x = np.indices((h,w))
90 | homo_pixels = np.vstack((x.flatten(),y.flatten(),np.ones((x.size))))
91 | rays = K_inv.dot(homo_pixels)
92 | cos_theta = np.array([0,0,1]).dot(rays) / norm(rays,axis=0)
93 |
94 | output = depth / cos_theta.reshape(h,w)
95 | return output
96 |
97 |
98 | def instrinsic_matrix(width, height):
99 | ''' Construct an ideal camera intrinsic matrix. '''
100 | K = np.zeros((3, 3), np.double)
101 | K[0,0] = width/2 #fx
102 | K[1,1] = height/2 #fy
103 | K[0,2] = width/2 #cx
104 | K[1,2] = height/2 #cy
105 | K[2,2] = 1.0
106 | return K
107 |
108 |
109 |
110 | def fill_joint_bilateral_filter(rgb, depth):
111 | ''' Fill holes in a 16bit depth image given corresponding rgb image '''
112 |
113 | intensity = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
114 |
115 | # Convert the depth image to uint8.
116 | maxDepth = np.max(depth)+1
117 | depth = (depth.astype(np.float64)/maxDepth)
118 | depth[depth > 1] = 1
119 | depth = (depth*255).astype(np.uint8)
120 |
121 | # Convert to col major order
122 | depth = np.asfortranarray(depth)
123 | intensity = np.asfortranarray(intensity)
124 | mask = (depth == 0)
125 | result = np.zeros_like(depth)
126 |
127 | # Fill holes
128 | cbf(depth, intensity, mask, result)
129 | result = (result.astype(np.float64)/255*maxDepth).astype(np.uint16)
130 | return result
131 |
132 |
133 | def depth_to_skybox(scan, visualize=VISUALIZE_OUTPUT, fill_holes=FILL_HOLES):
134 |
135 | # Load camera parameters
136 | intrinsics,extrinsics = camera_parameters(scan)
137 | # Skybox camera intrinsics
138 | K_skybox = instrinsic_matrix(SKYBOX_WIDTH, SKYBOX_HEIGHT)
139 |
140 | pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
141 | print('Processing scan %s with %d panoramas' % (scan, len(pano_ids)))
142 |
143 | if visualize:
144 | cv2.namedWindow('RGB')
145 | cv2.namedWindow('Depth')
146 | cv2.namedWindow('Skybox')
147 |
148 | for pano in pano_ids:
149 |
150 | # Load undistorted depth and rgb images
151 | depth = {}
152 | rgb = {}
153 | for c in range(3):
154 | K_inv = inv(intrinsics['%s_i%d' % (pano,c)])
155 | for i in range(6):
156 | name = '%d_%d' % (c,i)
157 | if visualize:
158 | rgb[name] = cv2.imread(color_template % (base_dir,scan,pano,name))
159 | # Load 16bit grayscale image
160 | d_im = cv2.imread(depth_template % (base_dir,scan,pano,name), cv2.IMREAD_ANYDEPTH)
161 | depth[name] = z_to_euclid(K_inv, d_im)
162 |
163 | ims = []
164 | for skybox_ix in range(6):
165 |
166 | # Load skybox image
167 | skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix))
168 |
169 | # Skybox index 1 is the same orientation as camera image 1_5
170 | skybox_ctw,_ = extrinsics[pano + '_i1_5']
171 | skybox_ctw = skybox_ctw[:3,:3].dot(skybox_transforms[skybox_ix])
172 | skybox_wtc = inv(skybox_ctw)
173 |
174 | base_depth = np.zeros((SKYBOX_HEIGHT,SKYBOX_WIDTH), np.uint16)
175 | if visualize:
176 | base_rgb = np.zeros((SKYBOX_HEIGHT,SKYBOX_WIDTH,3), np.uint8)
177 |
178 | for camera in range(3):
179 | for angle in range(6):
180 |
181 | # Camera parameters
182 | im_name = '%d_%d' % (camera,angle)
183 | K_im = intrinsics[pano + '_i' + im_name[0]]
184 | T_ctw,T_wtc = extrinsics[pano + '_i' + im_name]
185 | R_ctw = T_ctw[:3,:3]
186 |
187 | # Check if this image can be skipped (facing away)
188 | z = np.array([0,0,1])
189 | if R_ctw.dot(z).dot(skybox_ctw.dot(z)) < 0:
190 | continue
191 |
192 | # Compute homography
193 | H = K_skybox.dot(skybox_wtc.dot(R_ctw.dot(inv(K_im))))
194 |
195 | # Warp and blend the depth image
196 | flip = cv2.flip(depth[im_name], 1) # flip around y-axis
197 | warp = cv2.warpPerspective(flip, H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_NEAREST)
198 | mask = cv2.warpPerspective(np.ones_like(flip), H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR)
199 | mask[warp == 0] = 0 # Set mask to zero where we don't have any depth values
200 | mask = cv2.erode(mask,np.ones((3,3),np.uint8),iterations = 1)
201 | locs = np.where(mask == 1)
202 | base_depth[locs[0], locs[1]] = warp[locs[0], locs[1]]
203 |
204 | if visualize:
205 | # Warp and blend the rgb image
206 | flip = cv2.flip(rgb[im_name], 1) # flip around y-axis
207 | warp = cv2.warpPerspective(flip, H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR)
208 | mask = cv2.warpPerspective(np.ones_like(flip), H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR)
209 | mask = cv2.erode(mask,np.ones((3,3),np.uint8),iterations = 1)
210 | locs = np.where(mask == 1)
211 | base_rgb[locs[0], locs[1]] = warp[locs[0], locs[1]]
212 |
213 | depth_small = cv2.resize(cv2.flip(base_depth, 1),(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_NEAREST) # flip around y-axis, downsize
214 | if fill_holes:
215 | depth_filled = fill_joint_bilateral_filter(skybox, depth_small) # Fill holes
216 | ims.append(depth_filled)
217 | else:
218 | ims.append(depth_small)
219 |
220 | if visualize and False:
221 | cv2.imshow('Skybox', skybox)
222 | cv2.imshow('Depth', cv2.applyColorMap((depth_small/256).astype(np.uint8), cv2.COLORMAP_JET))
223 | rgb_output = cv2.flip(base_rgb, 1) # flip around y-axis
224 | cv2.imshow('RGB', rgb_output)
225 | cv2.waitKey(0)
226 |
227 | newimg = np.concatenate(ims, axis=1)
228 |
229 | if visualize:
230 | maxDepth = np.max(newimg)+1
231 | newimg = (newimg.astype(np.float64)/maxDepth)
232 | newimg = (newimg*255).astype(np.uint8)
233 | cv2.imshow('Depth pano', cv2.applyColorMap(newimg, cv2.COLORMAP_JET))
234 | cv2.waitKey(0)
235 | else:
236 | # Save output
237 | outfile = skybox_depth_template % (base_dir,scan,pano)
238 | assert cv2.imwrite(outfile, newimg), ('Could not write to %s' % outfile)
239 |
240 | if visualize:
241 | cv2.destroyAllWindows()
242 | print ('Completed scan %s' % (scan))
243 |
244 |
245 |
246 | if __name__ == '__main__':
247 |
248 | with open('connectivity/scans.txt') as f:
249 | scans = [scan.strip() for scan in f.readlines()]
250 | p = Pool(NUM_WORKER_PROCESSES)
251 | p.map(depth_to_skybox, scans)
252 |
253 |
254 |
255 |
--------------------------------------------------------------------------------
/scripts/downsize_skybox.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | ''' Script for downsizing skybox images. '''
4 |
5 | import os
6 | import math
7 | import cv2
8 | import numpy as np
9 | from multiprocessing import Pool
10 | from depth_to_skybox import camera_parameters
11 |
12 |
13 | NUM_WORKER_PROCESSES = 20
14 | DOWNSIZED_WIDTH = 512
15 | DOWNSIZED_HEIGHT = 512
16 |
17 | # Constants
18 | SKYBOX_WIDTH = 1024
19 | SKYBOX_HEIGHT = 1024
20 | base_dir = 'data/v1/scans'
21 | skybox_template = '%s/%s/matterport_skybox_images/%s_skybox%d_sami.jpg'
22 | skybox_small_template = '%s/%s/matterport_skybox_images/%s_skybox%d_small.jpg'
23 | skybox_merge_template = '%s/%s/matterport_skybox_images/%s_skybox_small.jpg'
24 |
25 |
26 |
27 | def downsizeWithMerge(scan):
28 | # Load pano ids
29 | intrinsics,_ = camera_parameters(scan)
30 | pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
31 | print('Processing scan %s with %d panoramas' % (scan, len(pano_ids)))
32 |
33 | for pano in pano_ids:
34 |
35 | ims = []
36 | for skybox_ix in range(6):
37 |
38 | # Load and downsize skybox image
39 | skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix))
40 | ims.append(cv2.resize(skybox,(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_AREA))
41 |
42 | # Save output
43 | newimg = np.concatenate(ims, axis=1)
44 | assert cv2.imwrite(skybox_merge_template % (base_dir,scan,pano), newimg)
45 |
46 |
47 | def downsize(scan):
48 |
49 | # Load pano ids
50 | intrinsics,_ = camera_parameters(scan)
51 | pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
52 | print('Processing scan %s with %d panoramas' % (scan, len(pano_ids)))
53 |
54 | for pano in pano_ids:
55 |
56 | for skybox_ix in range(6):
57 |
58 | # Load and downsize skybox image
59 | skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix))
60 | newimg = cv2.resize(skybox,(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_AREA)
61 |
62 | # Save output
63 | assert cv2.imwrite(skybox_small_template % (base_dir,scan,pano,skybox_ix), newimg)
64 |
65 |
66 | if __name__ == '__main__':
67 |
68 | with open('connectivity/scans.txt') as f:
69 | scans = [scan.strip() for scan in f.readlines()]
70 | p = Pool(NUM_WORKER_PROCESSES)
71 | p.map(downsizeWithMerge, scans)
72 |
73 |
74 |
--------------------------------------------------------------------------------
/scripts/fill_depth.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | ''' Script for filling missing values in undistorted depth images. '''
4 |
5 | import os
6 | import math
7 | import cv2
8 | import numpy as np
9 | from multiprocessing import Pool
10 | from depth_to_skybox import camera_parameters
11 |
12 | import sys
13 | sys.path.append('build')
14 | from MatterSim import cbf
15 |
16 |
17 | base_dir = 'data/v1/scans'
18 | color_template = '%s/%s/undistorted_color_images/%s_i%s.jpg'
19 | depth_template = '%s/%s/undistorted_depth_images/%s_d%s.png'
20 | filled_depth_template = '%s/%s/undistorted_depth_images/%s_d%s_filled.png'
21 |
22 | def fill_joint_bilateral_filter(scan):
23 |
24 | # Load camera parameters
25 | intrinsics,_ = camera_parameters(scan)
26 | pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
27 | print('Processing scan %s with %d panoramas' % (scan, len(pano_ids)))
28 |
29 | for pano in pano_ids:
30 |
31 | # Load undistorted depth and rgb images
32 | for c in range(3):
33 | for i in range(6):
34 | name = '%d_%d' % (c,i)
35 | rgb = cv2.imread(color_template % (base_dir,scan,pano,name))
36 | intensity = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
37 |
38 | # Load 16bit depth image
39 | depth = cv2.imread(depth_template % (base_dir,scan,pano,name), cv2.IMREAD_ANYDEPTH)
40 |
41 | # Convert the depth image to uint8.
42 | maxDepth = np.max(depth)+1
43 | depth = (depth.astype(np.float64)/maxDepth)
44 | depth[depth > 1] = 1
45 | depth = (depth*255).astype(np.uint8)
46 |
47 | #cv2.imshow('input', cv2.applyColorMap(depth, cv2.COLORMAP_JET))
48 |
49 | # Convert to col major order
50 | depth = np.asfortranarray(depth)
51 | intensity = np.asfortranarray(intensity)
52 | mask = (depth == 0)
53 | result = np.zeros_like(depth)
54 |
55 | # Fill holes
56 | cbf(depth, intensity, mask, result)
57 |
58 | #cv2.imshow('result', cv2.applyColorMap(result, cv2.COLORMAP_JET))
59 | #cv2.waitKey(0)
60 |
61 | result = (result.astype(np.float64)/255*maxDepth).astype(np.uint16)
62 | assert cv2.imwrite(filled_depth_template % (base_dir,scan,pano,name), result)
63 |
64 |
65 | if __name__ == '__main__':
66 |
67 | with open('connectivity/scans.txt') as f:
68 | scans = [scan.strip() for scan in f.readlines()]
69 | p = Pool(10)
70 | p.map(fill_joint_bilateral_filter, scans)
71 |
72 |
73 |
--------------------------------------------------------------------------------
/scripts/precompute_img_features.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | ''' Script to precompute image features using a Caffe ResNet CNN, using 36 discretized views
4 | at each viewpoint in 30 degree increments, and the provided camera WIDTH, HEIGHT
5 | and VFOV parameters. '''
6 |
7 | import numpy as np
8 | import cv2
9 | import json
10 | import math
11 | import base64
12 | import csv
13 | import sys
14 |
15 | csv.field_size_limit(sys.maxsize)
16 |
17 |
18 | # Caffe and MatterSim need to be on the Python path
19 | sys.path.insert(0, 'build')
20 | import MatterSim
21 |
22 | #caffe_root = '../' # your caffe build
23 | #sys.path.insert(0, caffe_root + 'python')
24 | import caffe
25 |
26 | from timer import Timer
27 |
28 |
29 | TSV_FIELDNAMES = ['scanId', 'viewpointId', 'image_w','image_h', 'vfov', 'features']
30 | VIEWPOINT_SIZE = 36 # Number of discretized views from one viewpoint
31 | FEATURE_SIZE = 2048
32 | BATCH_SIZE = 4 # Some fraction of viewpoint size - batch size 4 equals 11GB memory
33 | GPU_ID = 0
34 | PROTO = 'models/ResNet-152-deploy.prototxt'
35 | MODEL = 'models/ResNet-152-model.caffemodel' # You need to download this, see README.md
36 | #MODEL = 'models/resnet152_places365.caffemodel'
37 | OUTFILE = 'img_features/ResNet-152-imagenet.tsv'
38 | GRAPHS = 'connectivity/'
39 |
40 | # Simulator image parameters
41 | WIDTH=640
42 | HEIGHT=480
43 | VFOV=60
44 |
45 |
46 | def load_viewpointids():
47 | viewpointIds = []
48 | with open(GRAPHS+'scans.txt') as f:
49 | scans = [scan.strip() for scan in f.readlines()]
50 | for scan in scans:
51 | with open(GRAPHS+scan+'_connectivity.json') as j:
52 | data = json.load(j)
53 | for item in data:
54 | if item['included']:
55 | viewpointIds.append((scan, item['image_id']))
56 | print('Loaded %d viewpoints' % len(viewpointIds))
57 | return viewpointIds
58 |
59 |
60 | def transform_img(im):
61 | ''' Prep opencv 3 channel image for the network '''
62 | im_orig = im.astype(np.float32, copy=True)
63 | im_orig -= np.array([[[103.1, 115.9, 123.2]]]) # BGR pixel mean
64 | blob = np.zeros((1, im.shape[0], im.shape[1], 3), dtype=np.float32)
65 | blob[0, :, :, :] = im_orig
66 | blob = blob.transpose((0, 3, 1, 2))
67 | return blob
68 |
69 |
70 | def build_tsv():
71 | # Set up the simulator
72 | sim = MatterSim.Simulator()
73 | sim.setCameraResolution(WIDTH, HEIGHT)
74 | sim.setCameraVFOV(math.radians(VFOV))
75 | sim.setDiscretizedViewingAngles(True)
76 | sim.init()
77 |
78 | # Set up Caffe resnet
79 | caffe.set_device(GPU_ID)
80 | caffe.set_mode_gpu()
81 | net = caffe.Net(PROTO, MODEL, caffe.TEST)
82 | net.blobs['data'].reshape(BATCH_SIZE, 3, HEIGHT, WIDTH)
83 |
84 | count = 0
85 | t_render = Timer()
86 | t_net = Timer()
87 | with open(OUTFILE, 'wb') as tsvfile:
88 | writer = csv.DictWriter(tsvfile, delimiter = '\t', fieldnames = TSV_FIELDNAMES)
89 |
90 | # Loop all the viewpoints in the simulator
91 | viewpointIds = load_viewpointids()
92 | for scanId,viewpointId in viewpointIds:
93 | t_render.tic()
94 | # Loop all discretized views from this location
95 | blobs = []
96 | features = np.empty([VIEWPOINT_SIZE, FEATURE_SIZE], dtype=np.float32)
97 | for ix in range(VIEWPOINT_SIZE):
98 | if ix == 0:
99 | sim.newEpisode(scanId, viewpointId, 0, math.radians(-30))
100 | elif ix % 12 == 0:
101 | sim.makeAction(0, 1.0, 1.0)
102 | else:
103 | sim.makeAction(0, 1.0, 0)
104 |
105 | state = sim.getState()
106 | assert state.viewIndex == ix
107 |
108 | # Transform and save generated image
109 | blobs.append(transform_img(state.rgb))
110 |
111 | t_render.toc()
112 | t_net.tic()
113 | # Run as many forward passes as necessary
114 | assert VIEWPOINT_SIZE % BATCH_SIZE == 0
115 | forward_passes = VIEWPOINT_SIZE / BATCH_SIZE
116 | ix = 0
117 | for f in range(forward_passes):
118 | for n in range(BATCH_SIZE):
119 | # Copy image blob to the net
120 | net.blobs['data'].data[n, :, :, :] = blobs[ix]
121 | ix += 1
122 | # Forward pass
123 | output = net.forward()
124 | features[f*BATCH_SIZE:(f+1)*BATCH_SIZE, :] = net.blobs['pool5'].data[:,:,0,0]
125 |
126 | writer.writerow({
127 | 'scanId': scanId,
128 | 'viewpointId': viewpointId,
129 | 'image_w': WIDTH,
130 | 'image_h': HEIGHT,
131 | 'vfov' : VFOV,
132 | 'features': base64.b64encode(features)
133 | })
134 | count += 1
135 | t_net.toc()
136 | if count % 100 == 0:
137 | print('Processed %d / %d viewpoints, %.1fs avg render time, %.1fs avg net time, projected %.1f hours' %\
138 | (count,len(viewpointIds), t_render.average_time, t_net.average_time,
139 | (t_render.average_time+t_net.average_time)*len(viewpointIds)/3600))
140 |
141 |
142 | def read_tsv(infile):
143 | # Verify we can read a tsv
144 | in_data = []
145 | with open(infile, "r+b") as tsv_in_file:
146 | reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames = TSV_FIELDNAMES)
147 | for item in reader:
148 | item['image_h'] = int(item['image_h'])
149 | item['image_w'] = int(item['image_w'])
150 | item['vfov'] = int(item['vfov'])
151 | item['features'] = np.frombuffer(base64.decodestring(item['features']),
152 | dtype=np.float32).reshape((VIEWPOINT_SIZE, FEATURE_SIZE))
153 | in_data.append(item)
154 | return in_data
155 |
156 |
157 | if __name__ == "__main__":
158 |
159 | build_tsv()
160 | data = read_tsv(OUTFILE)
161 | print('Completed %d viewpoints' % len(data))
162 |
163 |
--------------------------------------------------------------------------------
/scripts/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 | class Timer(object):
11 | """A simple timer."""
12 | def __init__(self):
13 | self.total_time = 0.
14 | self.calls = 0
15 | self.start_time = 0.
16 | self.diff = 0.
17 | self.average_time = 0.
18 |
19 | def tic(self):
20 | # using time.time instead of time.clock because time time.clock
21 | # does not normalize for multithreading
22 | self.start_time = time.time()
23 |
24 | def toc(self, average=True):
25 | self.diff = time.time() - self.start_time
26 | self.total_time += self.diff
27 | self.calls += 1
28 | self.average_time = self.total_time / self.calls
29 | if average:
30 | return self.average_time
31 | else:
32 | return self.diff
33 |
--------------------------------------------------------------------------------
/src/driver/driver.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('build')
3 | import MatterSim
4 | import time
5 | import math
6 | import cv2
7 | import numpy as np
8 |
9 | WIDTH = 800
10 | HEIGHT = 600
11 | VFOV = math.radians(60)
12 | HFOV = VFOV*WIDTH/HEIGHT
13 | TEXT_COLOR = [230, 40, 40]
14 |
15 | cv2.namedWindow('Python RGB')
16 | cv2.namedWindow('Python Depth')
17 |
18 | sim = MatterSim.Simulator()
19 | sim.setCameraResolution(WIDTH, HEIGHT)
20 | sim.setCameraVFOV(VFOV)
21 | sim.setDepthEnabled(True)
22 | sim.initialize()
23 | #sim.newEpisode(['2t7WUuJeko7'], ['1e6b606b44df4a6086c0f97e826d4d15'], [0], [0])
24 | #sim.newEpisode(['1LXtFkjw3qL'], ['0b22fa63d0f54a529c525afbf2e8bb25'], [0], [0])
25 | sim.newRandomEpisode(['1LXtFkjw3qL'])
26 |
27 | heading = 0
28 | elevation = 0
29 | location = 0
30 | ANGLEDELTA = 5 * math.pi / 180
31 |
32 | print('\nPython Demo')
33 | print('Use arrow keys to move the camera.')
34 | print('Use number keys (not numpad) to move to nearby viewpoints indicated in the RGB view.\n')
35 |
36 | while True:
37 | sim.makeAction([location], [heading], [elevation])
38 | location = 0
39 | heading = 0
40 | elevation = 0
41 |
42 | state = sim.getState()[0]
43 | locations = state.navigableLocations
44 | rgb = np.array(state.rgb, copy=False)
45 | for idx, loc in enumerate(locations[1:]):
46 | # Draw actions on the screen
47 | fontScale = 3.0/loc.rel_distance
48 | x = int(WIDTH/2 + loc.rel_heading/HFOV*WIDTH)
49 | y = int(HEIGHT/2 - loc.rel_elevation/VFOV*HEIGHT)
50 | cv2.putText(rgb, str(idx + 1), (x, y), cv2.FONT_HERSHEY_SIMPLEX,
51 | fontScale, TEXT_COLOR, thickness=3)
52 | cv2.imshow('Python RGB', rgb)
53 |
54 | depth = np.array(state.depth, copy=False)
55 | cv2.imshow('Python Depth', depth)
56 | k = cv2.waitKey(1)
57 | if k == -1:
58 | continue
59 | else:
60 | k = (k & 255)
61 | if k == ord('q'):
62 | break
63 | elif ord('1') <= k <= ord('9'):
64 | location = k - ord('0')
65 | if location >= len(locations):
66 | location = 0
67 | elif k == 81 or k == ord('a'):
68 | heading = -ANGLEDELTA
69 | elif k == 82 or k == ord('w'):
70 | elevation = ANGLEDELTA
71 | elif k == 83 or k == ord('d'):
72 | heading = ANGLEDELTA
73 | elif k == 84 or k == ord('s'):
74 | elevation = -ANGLEDELTA
75 |
--------------------------------------------------------------------------------
/src/driver/mattersim_main.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "MatterSim.hpp"
5 |
6 | using namespace mattersim;
7 |
8 | #define WIDTH 1280
9 | #define HEIGHT 720
10 |
11 | #ifndef M_PI
12 | #define M_PI (3.14159265358979323846)
13 | #endif
14 |
15 | int main(int argc, char *argv[]) {
16 |
17 | cv::namedWindow("C++ RGB");
18 | cv::namedWindow("C++ Depth");
19 |
20 | Simulator sim;
21 |
22 | // Sets resolution. Default is 320X240
23 | sim.setCameraResolution(640,480);
24 | sim.setDepthEnabled(true);
25 |
26 | // Initialize the simulator. Further camera configuration won't take any effect from now on.
27 | sim.initialize();
28 |
29 | std::cout << "\nC++ Demo" << std::endl;
30 | std::cout << "Showing some random viewpoints in one building." << std::endl;
31 |
32 | int i = 0;
33 | while(true) {
34 | i++;
35 | std::cout << "Episode #" << i << "\n";
36 |
37 | // Starts a new episode. It is not needed right after init() but it doesn't cost much and the loop is nicer.
38 | sim.newRandomEpisode(std::vector(1,"pa4otMbVnkk")); // Launches at a random location
39 |
40 | for (int k=0; k<500; k++) {
41 |
42 | // Get the state
43 | SimStatePtr state = sim.getState().at(0); // SimStatePtr is std::shared_ptr
44 |
45 | // Which consists of:
46 | unsigned int n = state->step;
47 | cv::Mat rgb = state->rgb; // OpenCV CV_8UC3 type (i.e. 8bit color rgb)
48 | cv::Mat depth = state->depth; // OpenCV CV_16UC1 type (i.e. 16bit grayscale)
49 | ViewpointPtr location = state->location; // Need a class to hold viewpoint id, and x,y,z location of a viewpoint
50 | float heading = state->heading;
51 | float elevation = state->elevation; // camera parameters
52 | std::vector reachable = state->navigableLocations; // Where we can move to,
53 | int locationIdx = 0; // Must be an index into reachable
54 | double headingChange = M_PI / 500;
55 | double elevationChange = 0;
56 |
57 | cv::imshow("C++ RGB", rgb);
58 | cv::imshow("C++ Depth", depth);
59 | cv::waitKey(10);
60 |
61 | sim.makeAction(std::vector(1, locationIdx),
62 | std::vector(1, headingChange),
63 | std::vector(1, elevationChange));
64 |
65 | }
66 | }
67 |
68 | // It will be done automatically in destructor but after close you can init it again with different settings.
69 | sim.close();
70 |
71 | return 0;
72 | }
73 |
--------------------------------------------------------------------------------
/src/lib/Benchmark.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "Benchmark.hpp"
4 |
5 | namespace mattersim {
6 |
7 | Timer::Timer()
8 | : running_(false),
9 | elapsed_(0) {}
10 |
11 | void Timer::Start() {
12 | if (!running()) {
13 | start_ = std::chrono::steady_clock::now();
14 | running_ = true;
15 | }
16 | }
17 |
18 | void Timer::Stop() {
19 | if (running()) {
20 | elapsed_ += std::chrono::steady_clock::now() - start_;
21 | running_ = false;
22 | }
23 | }
24 |
25 | void Timer::Reset() {
26 | if (running()) {
27 | running_ = false;
28 | }
29 | elapsed_ = std::chrono::steady_clock::duration(0);
30 | }
31 |
32 | float Timer::MicroSeconds() {
33 | if (running()) {
34 | elapsed_ += std::chrono::steady_clock::now() - start_;
35 | }
36 | return std::chrono::duration_cast(elapsed_).count();
37 | }
38 |
39 | float Timer::MilliSeconds() {
40 | if (running()) {
41 | elapsed_ += std::chrono::steady_clock::now() - start_;
42 | }
43 | return std::chrono::duration_cast(elapsed_).count();
44 | }
45 |
46 | float Timer::Seconds() {
47 | if (running()) {
48 | elapsed_ += std::chrono::steady_clock::now() - start_;
49 | }
50 | return std::chrono::duration_cast(elapsed_).count();
51 | }
52 |
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/src/lib/NavGraph.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #include
7 | #ifdef _OPENMP
8 | #include
9 | #endif
10 | #include "NavGraph.hpp"
11 |
12 | namespace mattersim {
13 |
14 |
15 | NavGraph::Location::Location(const Json::Value& viewpoint, const std::string& skyboxDir,
16 | bool preload, bool depth): skyboxDir(skyboxDir), im_loaded(false),
17 | includeDepth(depth), cubemap_texture(0), depth_texture(0) {
18 |
19 | viewpointId = viewpoint["image_id"].asString();
20 | included = viewpoint["included"].asBool();
21 |
22 | float posearr[16];
23 | int i = 0;
24 | for (auto f : viewpoint["pose"]) {
25 | posearr[i++] = f.asFloat();
26 | }
27 | // glm uses column-major order. Inputs are in row-major order.
28 | rot = glm::transpose(glm::make_mat4(posearr));
29 | // glm access is col,row
30 | pos = glm::vec3{rot[3][0], rot[3][1], rot[3][2]};
31 | rot[3] = {0,0,0,1}; // remove translation component
32 |
33 | for (auto u : viewpoint["unobstructed"]) {
34 | unobstructed.push_back(u.asBool());
35 | }
36 |
37 | if (preload) {
38 | // Preload skybox images
39 | loadCubemapImages();
40 | }
41 | };
42 |
43 |
44 | void NavGraph::Location::loadCubemapImages() {
45 | cv::Mat rgb = cv::imread(skyboxDir + viewpointId + "_skybox_small.jpg");
46 | int w = rgb.cols/6;
47 | int h = rgb.rows;
48 | xpos = rgb(cv::Rect(2*w, 0, w, h));
49 | xneg = rgb(cv::Rect(4*w, 0, w, h));
50 | ypos = rgb(cv::Rect(0*w, 0, w, h));
51 | yneg = rgb(cv::Rect(5*w, 0, w, h));
52 | zpos = rgb(cv::Rect(1*w, 0, w, h));
53 | zneg = rgb(cv::Rect(3*w, 0, w, h));
54 | if (xpos.empty() || xneg.empty() || ypos.empty() || yneg.empty() || zpos.empty() || zneg.empty()) {
55 | throw std::invalid_argument( "MatterSim: Could not open skybox RGB files at: " + skyboxDir + viewpointId + "_skybox_small.jpg");
56 | }
57 | if (includeDepth) {
58 | // 16 bit grayscale images
59 | cv::Mat depth = cv::imread(skyboxDir + viewpointId + "_skybox_depth_small.png", CV_LOAD_IMAGE_ANYDEPTH);
60 | xposD = depth(cv::Rect(2*w, 0, w, h));
61 | xnegD = depth(cv::Rect(4*w, 0, w, h));
62 | yposD = depth(cv::Rect(0*w, 0, w, h));
63 | ynegD = depth(cv::Rect(5*w, 0, w, h));
64 | zposD = depth(cv::Rect(1*w, 0, w, h));
65 | znegD = depth(cv::Rect(3*w, 0, w, h));
66 | if (xposD.empty() || xnegD.empty() || yposD.empty() || ynegD.empty() || zposD.empty() || znegD.empty()) {
67 | throw std::invalid_argument( "MatterSim: Could not open skybox depth files at: " + skyboxDir + viewpointId + "_skybox_depth_small.png");
68 | }
69 | }
70 | im_loaded = true;
71 | }
72 |
73 |
74 | void NavGraph::Location::loadCubemapTextures() {
75 | // RGB texture
76 | glActiveTexture(GL_TEXTURE0);
77 | glEnable(GL_TEXTURE_CUBE_MAP);
78 | glGenTextures(1, &cubemap_texture);
79 | glBindTexture(GL_TEXTURE_CUBE_MAP, cubemap_texture);
80 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
81 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
82 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
83 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
84 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
85 | //use fast 4-byte alignment (default anyway) if possible
86 | glPixelStorei(GL_UNPACK_ALIGNMENT, (xneg.step & 3) ? 1 : 4);
87 | //set length of one complete row in data (doesn't need to equal image.cols)
88 | glPixelStorei(GL_UNPACK_ROW_LENGTH, xneg.step/xneg.elemSize());
89 | glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RGB, xpos.rows, xpos.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, xpos.ptr());
90 | glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, GL_RGB, xneg.rows, xneg.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, xneg.ptr());
91 | glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, GL_RGB, ypos.rows, ypos.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, ypos.ptr());
92 | glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, GL_RGB, yneg.rows, yneg.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, yneg.ptr());
93 | glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, GL_RGB, zpos.rows, zpos.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, zpos.ptr());
94 | glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, GL_RGB, zneg.rows, zneg.cols, 0, GL_BGR, GL_UNSIGNED_BYTE, zneg.ptr());
95 | assertOpenGLError("RGB texture");
96 | if (includeDepth) {
97 | // Depth Texture
98 | glActiveTexture(GL_TEXTURE0);
99 | glEnable(GL_TEXTURE_CUBE_MAP);
100 | glGenTextures(1, &depth_texture);
101 | glBindTexture(GL_TEXTURE_CUBE_MAP, depth_texture);
102 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
103 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
104 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
105 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
106 | glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
107 | //use fast 4-byte alignment (default anyway) if possible
108 | glPixelStorei(GL_UNPACK_ALIGNMENT, (xnegD.step & 3) ? 1 : 4);
109 | //set length of one complete row in data (doesn't need to equal image.cols)
110 | glPixelStorei(GL_UNPACK_ROW_LENGTH, xnegD.step/xnegD.elemSize());
111 | glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RED, xposD.rows, xposD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, xposD.ptr());
112 | glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, GL_RED, xnegD.rows, xnegD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, xnegD.ptr());
113 | glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, GL_RED, yposD.rows, yposD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, yposD.ptr());
114 | glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, GL_RED, ynegD.rows, ynegD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, ynegD.ptr());
115 | glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, GL_RED, zposD.rows, zposD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, zposD.ptr());
116 | glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, GL_RED, znegD.rows, znegD.cols, 0, GL_RED, GL_UNSIGNED_SHORT, znegD.ptr());
117 | assertOpenGLError("Depth texture");
118 | }
119 | }
120 |
121 |
122 | void NavGraph::Location::deleteCubemapTextures() {
123 | // no need to check existence, silently ignores errors
124 | glDeleteTextures(1, &cubemap_texture);
125 | glDeleteTextures(1, &depth_texture);
126 | cubemap_texture = 0;
127 | depth_texture = 0;
128 | }
129 |
130 |
131 | std::pair NavGraph::Location::cubemapTextures() {
132 | if (glIsTexture(cubemap_texture)){
133 | return {cubemap_texture, depth_texture};
134 | }
135 | if (!im_loaded) {
136 | loadCubemapImages();
137 | }
138 | loadCubemapTextures();
139 | return {cubemap_texture, depth_texture};
140 | }
141 |
142 |
143 | NavGraph::NavGraph(const std::string& navGraphPath, const std::string& datasetPath,
144 | bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize) : cache(cacheSize) {
145 |
146 | generator.seed(randomSeed);
147 |
148 | auto textFile = navGraphPath + "/scans.txt";
149 | std::ifstream scansFile(textFile);
150 | if (scansFile.fail()){
151 | throw std::invalid_argument( "MatterSim: Could not open list of scans at: " +
152 | textFile + ", is path valid?" );
153 | }
154 | std::vector scanIds;
155 | std::copy(std::istream_iterator(scansFile),
156 | std::istream_iterator(),
157 | std::back_inserter(scanIds));
158 |
159 | #pragma omp parallel for
160 | for (unsigned int i=0; i> root;
170 | auto skyboxDir = datasetPath + "/" + scanId + "/matterport_skybox_images/";
171 | #pragma omp critical
172 | {
173 | scanLocations.insert(std::pair > (scanId, std::vector()));
175 | }
176 | for (auto viewpoint : root) {
177 | Location l(viewpoint, skyboxDir, preloadImages, renderDepth);
178 | #pragma omp critical
179 | {
180 | scanLocations[scanId].push_back(std::make_shared(l));
181 | }
182 | }
183 | }
184 | }
185 |
186 |
187 | NavGraph::~NavGraph() {
188 | // free all remaining textures
189 | for (auto scan : scanLocations) {
190 | for (auto loc : scan.second) {
191 | loc->deleteCubemapTextures();
192 | }
193 | }
194 | }
195 |
196 |
197 | NavGraph& NavGraph::getInstance(const std::string& navGraphPath, const std::string& datasetPath,
198 | bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize){
199 | // magic static
200 | static NavGraph instance(navGraphPath, datasetPath, preloadImages, renderDepth, randomSeed, cacheSize);
201 | return instance;
202 | }
203 |
204 |
205 | const std::string& NavGraph::randomViewpoint(const std::string& scanId) {
206 | std::uniform_int_distribution distribution(0,scanLocations.at(scanId).size()-1);
207 | int start_ix = distribution(generator); // generates random starting index
208 | int ix = start_ix;
209 | while (!scanLocations.at(scanId).at(ix)->included) { // Don't start at an excluded viewpoint
210 | ix++;
211 | if (ix >= scanLocations.at(scanId).size()) ix = 0;
212 | if (ix == start_ix) {
213 | throw std::logic_error( "MatterSim: ScanId: " + scanId + " has no included viewpoints!");
214 | }
215 | }
216 | return scanLocations.at(scanId).at(ix)->viewpointId;
217 | }
218 |
219 |
220 | unsigned int NavGraph::index(const std::string& scanId, const std::string& viewpointId) const {
221 | int ix = -1;
222 | for (int i = 0; i < scanLocations.at(scanId).size(); ++i) {
223 | if (scanLocations.at(scanId).at(i)->viewpointId == viewpointId) {
224 | if (!scanLocations.at(scanId).at(i)->included) {
225 | throw std::invalid_argument( "MatterSim: ViewpointId: " +
226 | viewpointId + ", is excluded from the connectivity graph." );
227 | }
228 | ix = i;
229 | break;
230 | }
231 | }
232 | if (ix < 0) {
233 | throw std::invalid_argument( "MatterSim: Could not find viewpointId: " +
234 | viewpointId + ", is viewpoint id valid?" );
235 | } else {
236 | return ix;
237 | }
238 | }
239 |
240 | const std::string& NavGraph::viewpoint(const std::string& scanId, unsigned int ix) const {
241 | return scanLocations.at(scanId).at(ix)->viewpointId;
242 | }
243 |
244 |
245 | const glm::mat4& NavGraph::cameraRotation(const std::string& scanId, unsigned int ix) const {
246 | return scanLocations.at(scanId).at(ix)->rot;
247 | }
248 |
249 |
250 | const glm::vec3& NavGraph::cameraPosition(const std::string& scanId, unsigned int ix) const {
251 | return scanLocations.at(scanId).at(ix)->pos;
252 | }
253 |
254 |
255 | std::vector NavGraph::adjacentViewpointIndices(const std::string& scanId, unsigned int ix) const {
256 | std::vector reachable;
257 | for (unsigned int i = 0; i < scanLocations.at(scanId).size(); ++i) {
258 | if (i == ix) {
259 | // Skip option to stay at the same viewpoint
260 | continue;
261 | }
262 | if (scanLocations.at(scanId).at(ix)->unobstructed[i] && scanLocations.at(scanId).at(i)->included) {
263 | reachable.push_back(i);
264 | }
265 | }
266 | return reachable;
267 | }
268 |
269 |
270 | std::pair NavGraph::cubemapTextures(const std::string& scanId, unsigned int ix) {
271 | LocationPtr loc = scanLocations.at(scanId).at(ix);
272 | std::pair textures = loc->cubemapTextures();
273 | cache.add(loc);
274 | return textures;
275 | }
276 |
277 |
278 | void NavGraph::deleteCubemapTextures(const std::string& scanId, unsigned int ix) {
279 | scanLocations.at(scanId).at(ix)->deleteCubemapTextures();
280 | }
281 |
282 |
283 | }
284 |
--------------------------------------------------------------------------------
/src/lib/fragment.sh:
--------------------------------------------------------------------------------
1 | R""(
2 | #version 120
3 |
4 | varying vec3 texCoord;
5 | varying vec4 camCoord;
6 | uniform samplerCube cubemap;
7 | const vec3 camlook = vec3( 0.0, 0.0, -1.0 );
8 | uniform bool isDepth;
9 |
10 | void main (void) {
11 | vec4 color = textureCube(cubemap, texCoord);
12 | if (isDepth) {
13 | float scale = dot(camCoord.xyz, camlook) / length(camCoord.xyz);
14 | gl_FragColor = color*scale;
15 | } else {
16 | gl_FragColor = color;
17 | }
18 | }
19 | )""
20 |
--------------------------------------------------------------------------------
/src/lib/vertex.sh:
--------------------------------------------------------------------------------
1 | R""(
2 | #version 120
3 |
4 | attribute vec3 vertex;
5 | varying vec3 texCoord;
6 | varying vec4 camCoord;
7 | uniform mat4 ProjMat;
8 | uniform mat4 ModelViewMat;
9 |
10 | void main() {
11 | camCoord = ModelViewMat * vec4(vertex, 1.0);
12 | gl_Position = ProjMat * camCoord;
13 | texCoord = vertex;
14 | }
15 | )""
16 |
--------------------------------------------------------------------------------
/src/lib_python/MatterSimPython.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "MatterSim.hpp"
4 | #include "cbf.h"
5 |
6 | namespace py = pybind11;
7 |
8 | namespace mattersim {
9 |
10 | void cbf(py::buffer depth, py::buffer intensity, py::buffer mask, py::buffer result) {
11 | double spaceSigmas[3] = {12, 5, 8};
12 | double rangeSigmas[3] = {0.2, 0.08, 0.02};
13 | py::buffer_info d_info = depth.request();
14 | py::buffer_info i_info = intensity.request();
15 | py::buffer_info m_info = mask.request();
16 | py::buffer_info r_info = result.request();
17 | cbf::cbf(d_info.shape[0], d_info.shape[1],
18 | static_cast(d_info.ptr),
19 | static_cast(i_info.ptr),
20 | static_cast(m_info.ptr),
21 | static_cast(r_info.ptr),
22 | 3, &spaceSigmas[0], &rangeSigmas[0]);
23 | }
24 |
25 | }
26 |
27 | using namespace mattersim;
28 |
29 | PYBIND11_MODULE(MatterSim, m) {
30 | m.def("cbf", &mattersim::cbf, "Cross Bilateral Filter");
31 | py::class_(m, "ViewPoint")
32 | .def_readonly("viewpointId", &Viewpoint::viewpointId)
33 | .def_readonly("ix", &Viewpoint::ix)
34 | .def_readonly("x", &Viewpoint::x)
35 | .def_readonly("y", &Viewpoint::y)
36 | .def_readonly("z", &Viewpoint::z)
37 | .def_readonly("rel_heading", &Viewpoint::rel_heading)
38 | .def_readonly("rel_elevation", &Viewpoint::rel_elevation)
39 | .def_readonly("rel_distance", &Viewpoint::rel_distance);
40 | py::class_(m, "Mat", pybind11::buffer_protocol())
41 | .def_buffer([](cv::Mat& im) -> pybind11::buffer_info {
42 | ssize_t item_size = im.elemSize() / im.channels();
43 | std::string format = pybind11::format_descriptor::format();
44 | if (item_size == 2) { // handle 16bit data from depth maps
45 | format = pybind11::format_descriptor::format();
46 | }
47 | return pybind11::buffer_info(
48 | im.data, // Pointer to buffer
49 | item_size, // Size of one scalar
50 | format,
51 | 3, // Number of dimensions (row, cols, channels)
52 | { im.rows, im.cols, im.channels() }, // Buffer dimensions
53 | { // Strides (in bytes) for each index
54 | item_size * im.channels() * im.cols,
55 | item_size * im.channels(),
56 | item_size
57 | }
58 | );
59 | });
60 | py::class_(m, "SimState")
61 | .def_readonly("scanId", &SimState::scanId)
62 | .def_readonly("step", &SimState::step)
63 | .def_readonly("rgb", &SimState::rgb)
64 | .def_readonly("depth", &SimState::depth)
65 | .def_readonly("location", &SimState::location)
66 | .def_readonly("heading", &SimState::heading)
67 | .def_readonly("elevation", &SimState::elevation)
68 | .def_readonly("viewIndex", &SimState::viewIndex)
69 | .def_readonly("navigableLocations", &SimState::navigableLocations);
70 | py::class_(m, "Simulator")
71 | .def(py::init<>())
72 | .def("setDatasetPath", &Simulator::setDatasetPath)
73 | .def("setNavGraphPath", &Simulator::setNavGraphPath)
74 | .def("setRenderingEnabled", &Simulator::setRenderingEnabled)
75 | .def("setCameraResolution", &Simulator::setCameraResolution)
76 | .def("setCameraVFOV", &Simulator::setCameraVFOV)
77 | .def("setElevationLimits", &Simulator::setElevationLimits)
78 | .def("setDiscretizedViewingAngles", &Simulator::setDiscretizedViewingAngles)
79 | .def("setPreloadingEnabled", &Simulator::setPreloadingEnabled)
80 | .def("setDepthEnabled", &Simulator::setDepthEnabled)
81 | .def("setBatchSize", &Simulator::setBatchSize)
82 | .def("setCacheSize", &Simulator::setCacheSize)
83 | .def("setSeed", &Simulator::setSeed)
84 | .def("initialize", &Simulator::initialize)
85 | .def("newEpisode", &Simulator::newEpisode)
86 | .def("newRandomEpisode", &Simulator::newRandomEpisode)
87 | .def("getState", &Simulator::getState, py::return_value_policy::take_ownership)
88 | .def("makeAction", &Simulator::makeAction)
89 | .def("close", &Simulator::close)
90 | .def("resetTimers", &Simulator::resetTimers)
91 | .def("timingInfo", &Simulator::timingInfo);
92 | }
93 |
--------------------------------------------------------------------------------
/src/test/python_test.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('build')
3 |
4 | from MatterSim import Simulator
5 | import math
6 | import cv2
7 | import json
8 | import numpy as np
9 |
10 |
11 | sim = Simulator()
12 | sim.setCameraResolution(500, 300)
13 | sim.setCameraVFOV(math.radians(60))
14 | sim.setElevationLimits(math.radians(-40),math.radians(50))
15 | sim.initialize()
16 |
17 | with open("src/test/rendertest_spec.json") as f:
18 | spec = json.load(f)
19 | for tc in spec[:1]:
20 | sim.newEpisode(tc["scanId"], tc["viewpointId"], tc["heading"], tc["elevation"])
21 | state = sim.getState()
22 | im = np.array(state.rgb, copy=False)
23 | imgfile = tc["reference_image"]
24 | cv2.imwrite("sim_imgs/"+imgfile, im);
25 | cv2.imshow('rendering', im)
26 | cv2.waitKey(0)
27 |
28 |
29 |
--------------------------------------------------------------------------------
/src/test/rendertest_spec.json:
--------------------------------------------------------------------------------
1 | [
2 | [
3 | {
4 | "scanId": "17DRP5sb8fy",
5 | "viewpointId": "85c23efeaecd4d43a7dcd5b90137179e",
6 | "elevation": 0.008557380839564054,
7 | "heading": 2.551961945320492,
8 | "reference_image": "17DRP5sb8fy_85c23efeaecd4d43a7dcd5b90137179e_2.551961945320492_0.008557380839564054.png"
9 | },
10 | {
11 | "scanId": "1LXtFkjw3qL",
12 | "viewpointId": "187589bb7d4644f2943079fb949c0be9",
13 | "elevation": 0.0004921836022802584,
14 | "heading": 1.8699330579409539,
15 | "reference_image": "1LXtFkjw3qL_187589bb7d4644f2943079fb949c0be9_1.8699330579409539_0.0004921836022802584.png"
16 | },
17 | {
18 | "scanId": "1pXnuDYAj8r",
19 | "viewpointId": "163d61ac7edb43fb958c5d9e69ae11ad",
20 | "elevation": -0.02444352614304746,
21 | "heading": 4.626331047551077,
22 | "reference_image": "1pXnuDYAj8r_163d61ac7edb43fb958c5d9e69ae11ad_4.626331047551077_-0.02444352614304746.png"
23 | },
24 | {
25 | "scanId": "29hnd4uzFmX",
26 | "viewpointId": "1576d62e7bbb45e8a5ef9e7bb37b1839",
27 | "elevation": -0.0006838914039405167,
28 | "heading": 5.844119909926444,
29 | "reference_image": "29hnd4uzFmX_1576d62e7bbb45e8a5ef9e7bb37b1839_5.844119909926444_-0.0006838914039405167.png"
30 | }
31 | ],
32 | [
33 | {
34 | "scanId": "2azQ1b91cZZ",
35 | "viewpointId": "3daad58ad53742038e50d62e91f84e7b",
36 | "elevation": 0.016732869758208434,
37 | "heading": 3.1736484087962933,
38 | "reference_image": "2azQ1b91cZZ_3daad58ad53742038e50d62e91f84e7b_3.1736484087962933_0.016732869758208434.png"
39 | },
40 | {
41 | "scanId": "2n8kARJN3HM",
42 | "viewpointId": "94ac3cea52ec455993f8562f78da3be1",
43 | "elevation": -0.0009188787844489273,
44 | "heading": 2.604601935142565,
45 | "reference_image": "2n8kARJN3HM_94ac3cea52ec455993f8562f78da3be1_2.604601935142565_-0.0009188787844489273.png"
46 | },
47 | {
48 | "scanId": "2t7WUuJeko7",
49 | "viewpointId": "529f006f8293406da0b506defd2891a5",
50 | "elevation": -0.013788837143969411,
51 | "heading": 0.032985516949381344,
52 | "reference_image": "2t7WUuJeko7_529f006f8293406da0b506defd2891a5_0.032985516949381344_-0.013788837143969411.png"
53 | },
54 | {
55 | "scanId": "5LpN3gDmAk7",
56 | "viewpointId": "bda8025f20404048a77381e9e0dc0ccf",
57 | "elevation": -0.01083211073205187,
58 | "heading": 5.325207878739601,
59 | "reference_image": "5LpN3gDmAk7_bda8025f20404048a77381e9e0dc0ccf_5.325207878739601_-0.01083211073205187.png"
60 | }
61 | ],
62 | [
63 | {
64 | "scanId": "5q7pvUzZiYa",
65 | "viewpointId": "397403366d784caf804d741f32fd68b9",
66 | "elevation": -0.0007063598518199811,
67 | "heading": 2.8746465006968234,
68 | "reference_image": "5q7pvUzZiYa_397403366d784caf804d741f32fd68b9_2.8746465006968234_-0.0007063598518199811.png"
69 | },
70 | {
71 | "scanId": "5ZKStnWn8Zo",
72 | "viewpointId": "c76b52856e7c4f2a9a4419000c8e646a",
73 | "elevation": -0.02922217527541366,
74 | "heading": 4.13470589902238,
75 | "reference_image": "5ZKStnWn8Zo_c76b52856e7c4f2a9a4419000c8e646a_4.13470589902238_-0.02922217527541366.png"
76 | },
77 | {
78 | "scanId": "759xd9YjKW5",
79 | "viewpointId": "2343ef3bf04a4433af62f0d527d7512a",
80 | "elevation": -0.016938006310169448,
81 | "heading": 3.5451019786019264,
82 | "reference_image": "759xd9YjKW5_2343ef3bf04a4433af62f0d527d7512a_3.5451019786019264_-0.016938006310169448.png"
83 | },
84 | {
85 | "scanId": "7y3sRwLe3Va",
86 | "viewpointId": "9bbf903d50da4ffd9e5d1fb7c9f4d69b",
87 | "elevation": 0.008361841032265524,
88 | "heading": 1.7348660165523566,
89 | "reference_image": "7y3sRwLe3Va_9bbf903d50da4ffd9e5d1fb7c9f4d69b_1.7348660165523566_0.008361841032265524.png"
90 | }
91 | ],
92 | [
93 | {
94 | "scanId": "8194nk5LbLH",
95 | "viewpointId": "c9e8dc09263e4d0da77d16de0ecddd39",
96 | "elevation": 0.008533161479170466,
97 | "heading": 4.05504292862083,
98 | "reference_image": "8194nk5LbLH_c9e8dc09263e4d0da77d16de0ecddd39_4.05504292862083_0.008533161479170466.png"
99 | },
100 | {
101 | "scanId": "82sE5b5pLXE",
102 | "viewpointId": "056a491afa534b17bac36f4f5898462a",
103 | "elevation": -0.0037883068413356496,
104 | "heading": 1.689393931320027,
105 | "reference_image": "82sE5b5pLXE_056a491afa534b17bac36f4f5898462a_1.689393931320027_-0.0037883068413356496.png"
106 | },
107 | {
108 | "scanId": "8WUmhLawc2A",
109 | "viewpointId": "d21aae0b5d944f27a0074525c803fc9f",
110 | "elevation": -0.04510889155759994,
111 | "heading": 3.047458184407221,
112 | "reference_image": "8WUmhLawc2A_d21aae0b5d944f27a0074525c803fc9f_3.047458184407221_-0.04510889155759994.png"
113 | },
114 | {
115 | "scanId": "ac26ZMwG7aT",
116 | "viewpointId": "efeef7cc82c84690addb0bf415f075ea",
117 | "elevation": -0.013447513736072197,
118 | "heading": 0.07434352566701552,
119 | "reference_image": "ac26ZMwG7aT_efeef7cc82c84690addb0bf415f075ea_0.07434352566701552_-0.013447513736072197.png"
120 | }
121 | ],
122 | [
123 | {
124 | "scanId": "ARNzJeq3xxb",
125 | "viewpointId": "9a671e6915de4eb897f45fee8bf2031d",
126 | "elevation": 0.02583868533558965,
127 | "heading": 5.616355886953764,
128 | "reference_image": "ARNzJeq3xxb_9a671e6915de4eb897f45fee8bf2031d_5.616355886953764_0.02583868533558965.png"
129 | },
130 | {
131 | "scanId": "B6ByNegPMKs",
132 | "viewpointId": "e3a65955df26467581c32613c4e9f865",
133 | "elevation": 0.007265625492957138,
134 | "heading": 5.230794959607039,
135 | "reference_image": "B6ByNegPMKs_e3a65955df26467581c32613c4e9f865_5.230794959607039_0.007265625492957138.png"
136 | },
137 | {
138 | "scanId": "b8cTxDM8gDG",
139 | "viewpointId": "f2944e0b66b9461994a7f757582f9bc3",
140 | "elevation": -0.007543204141144086,
141 | "heading": 0.0853092784395515,
142 | "reference_image": "b8cTxDM8gDG_f2944e0b66b9461994a7f757582f9bc3_0.0853092784395515_-0.007543204141144086.png"
143 | },
144 | {
145 | "scanId": "cV4RVeZvu5T",
146 | "viewpointId": "1b321779a4374c2b952c51820daa9e6c",
147 | "elevation": 0.07914721704610106,
148 | "heading": 6.266463179566256,
149 | "reference_image": "cV4RVeZvu5T_1b321779a4374c2b952c51820daa9e6c_6.266463179566256_0.07914721704610106.png"
150 | }
151 | ]
152 | ]
153 |
--------------------------------------------------------------------------------
/tasks/R2R/Agents/__init__.py:
--------------------------------------------------------------------------------
1 | from tasks.R2R.Agents.agent import R2RAgent, Oracle, Stop, Random, Dynamic
2 |
3 | agents = {'Base': R2RAgent,
4 | 'Oracle': Oracle,
5 | 'Stop': Stop,
6 | 'Random': Random,
7 | 'Dynamic': Dynamic,
8 | }
9 |
10 |
11 | def get_agent(name, config):
12 | assert name in agents.keys(), '%s is not valid agent name' % name
13 | return agents[name](config)
14 |
--------------------------------------------------------------------------------
/tasks/R2R/Agents/agent.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import sys
4 |
5 | from tasks.R2R.Models import DynamicDecoder, InstructionEncoder
6 | from tasks.R2R.utils import append_coordinates, batched_sentence_embedding, to_one_hot
7 |
8 | sys.path.append('speaksee')
9 | import speaksee.vocab as ssvoc
10 |
11 |
12 | class R2RAgent:
13 |
14 | low_level_actions = [
15 | (0, -1, 0), # left
16 | (0, 1, 0), # right
17 | (0, 0, 1), # up
18 | (0, 0, -1), # down
19 | (1, 0, 0), # forward
20 | (0, 0, 0), #
21 | ]
22 |
23 | def __init__(self, config):
24 | self.config = config
25 | self.name = 'Base'
26 |
27 | def get_name(self):
28 | return self.name
29 |
30 | def get_config(self):
31 | return self.config
32 |
33 | def rollout(self, env):
34 | raise NotImplementedError
35 |
36 | def train(self):
37 | """ Should call Module.train() on each torch.nn.Module, if present """
38 | pass
39 |
40 | def eval(self):
41 | """ Should call Module.eval() on each torch.nn.Module, if present """
42 | pass
43 |
44 |
45 | class Oracle(R2RAgent):
46 | def __init__(self, config):
47 | super(Oracle, self).__init__(config)
48 | self.name = 'Oracle'
49 |
50 | def rollout(self, env):
51 | obs = env.reset()
52 | traj = [{
53 | 'instr_id': ob['instr_id'],
54 | 'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])]
55 | } for ob in obs]
56 | ended = np.array([False] * len(obs))
57 |
58 | while True:
59 | actions = [ob['teacher'] for ob in obs]
60 | obs = env.step(actions)
61 | for i, a in enumerate(actions):
62 | if a == (0, 0, 0):
63 | ended[i] = True
64 | for i, ob in enumerate(obs):
65 | if not ended[i]:
66 | traj[i]['path'].append((ob['viewpoint'], ob['heading'], ob['elevation']))
67 | if ended.all():
68 | break
69 |
70 | return traj
71 |
72 |
73 | class Stop(R2RAgent):
74 | def __init__(self, config):
75 | super(Stop, self).__init__(config)
76 | self.name = 'Stop'
77 |
78 | def rollout(self, env):
79 | obs = env.reset()
80 | traj = [{
81 | 'instr_id': ob['instr_id'],
82 | 'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])]
83 | } for ob in obs]
84 | return traj
85 |
86 |
87 | class Random(R2RAgent):
88 | def __init__(self, config):
89 | super(Random, self).__init__(config)
90 | self.name = 'Random'
91 |
92 | def rollout(self, env):
93 | obs = env.reset()
94 | traj = [{
95 | 'instr_id': ob['instr_id'],
96 | 'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])]
97 | } for ob in obs]
98 | ended = np.array([False] * len(obs))
99 |
100 | for t in range(20):
101 | actions_idx = np.random.randint(0, len(R2RAgent.low_level_actions), len(obs))
102 | actions = [(0, 1, 0) if len(obs[i]['navigableLocations']) <= 1 and idx == R2RAgent.low_level_actions.index((1, 0, 0))
103 | else R2RAgent.low_level_actions[idx] for i, idx in enumerate(actions_idx)]
104 | obs = env.step(actions)
105 | for i, a in enumerate(actions):
106 | if a == (0, 0, 0):
107 | ended[i] = True
108 | for i, ob in enumerate(obs):
109 | if not ended[i]:
110 | traj[i]['path'].append((ob['viewpoint'], ob['heading'], ob['elevation']))
111 | if ended.all():
112 | break
113 |
114 | return traj
115 |
116 |
117 | class Dynamic(R2RAgent):
118 |
119 | env_actions = [
120 | (0, -1, 0), # left
121 | (0, 1, 0), # right
122 |
123 | (0, 0, 1), # up
124 | (0, 0, -1), # down
125 |
126 | (1, 0, 0), # forward
127 |
128 | (0, 0, 0), #
129 | (0, 0, 0), #
130 | ]
131 |
132 | def __init__(self, config):
133 | super(Dynamic, self).__init__(config)
134 | self.name = 'Dynamic'
135 | self.mode = None
136 |
137 | self.device = config['device']
138 | self.max_episode_len = config['max_episode_len']
139 | self.criterion = torch.nn.CrossEntropyLoss()
140 | self.num_heads = config['num_heads']
141 | self.glove = ssvoc.GloVe()
142 | self.lstm_input_size = 36 * self.num_heads + Dynamic.n_inputs()
143 |
144 | self.encoder = InstructionEncoder(input_size=300,
145 | hidden_size=512,
146 | use_bias=True).to(device=self.device)
147 |
148 | self.policy = DynamicDecoder(input_size=self.lstm_input_size,
149 | hidden_size=512, output_size=6,
150 | key_size=128, query_size=128, value_size=512,
151 | image_size=2051, filter_size=512,
152 | num_heads=self.num_heads,
153 | drop_prob=0.5,
154 | use_bias=True,
155 | filter_activation=torch.nn.Tanh(),
156 | policy_activation=torch.nn.Softmax(dim=-1)).to(device=self.device)
157 |
158 | @staticmethod
159 | def n_inputs():
160 | return len(Dynamic.env_actions)
161 |
162 | def train(self):
163 | self.mode = 'train'
164 | self.encoder.train()
165 | self.policy.train()
166 |
167 | def eval(self):
168 | self.mode = 'eval'
169 | self.encoder.eval()
170 | self.policy.eval()
171 |
172 | def save(self, encoder_path, policy_path):
173 | torch.save(self.encoder.state_dict(), encoder_path)
174 | torch.save(self.policy.state_dict(), policy_path)
175 |
176 | def load(self, encoder_path, policy_path):
177 | pretrained_dict_encoder = torch.load(encoder_path)
178 | pretrained_dict_decoder = torch.load(policy_path)
179 |
180 | encoder_dict = self.encoder.state_dict()
181 | decoder_dict = self.policy.state_dict()
182 |
183 | # 1. filter out unnecessary keys
184 | pretrained_dict_encoder = {k: v for k, v in pretrained_dict_encoder.items() if k in encoder_dict}
185 | pretrained_dict_decoder = {k: v for k, v in pretrained_dict_decoder.items() if k in decoder_dict}
186 |
187 | # 2. overwrite entries in the existing state dict
188 | encoder_dict.update(pretrained_dict_encoder)
189 | decoder_dict.update(pretrained_dict_decoder)
190 |
191 | # 3. load the new state dict
192 | self.encoder.load_state_dict(pretrained_dict_encoder)
193 | self.policy.load_state_dict(pretrained_dict_decoder)
194 |
195 | def _get_targets_and_features(self, obs):
196 | target_actions = []
197 | target_idx = []
198 | features = []
199 |
200 | for i, ob in enumerate(obs):
201 | target_actions.append(
202 | ob['teacher'] if ob['teacher'] in self.env_actions else (1, 0, 0)
203 | )
204 | target_idx.append(self.env_actions.index(
205 | ob['teacher'] if ob['teacher'] in self.env_actions else (1, 0, 0)
206 | ))
207 | features.append(torch.from_numpy(ob['feature']))
208 |
209 | return target_actions, torch.tensor(target_idx), features
210 |
211 | def _encode_instruction(self, instructions):
212 | instr_embedding, instr_len = batched_sentence_embedding(instructions, self.glove, device=self.device)
213 | value = self.encoder(instr_embedding)
214 | return value
215 |
216 | def get_trainable_params(self):
217 | return list(self.encoder.parameters()) + list(self.policy.parameters())
218 |
219 | def rollout(self, env):
220 |
221 | assert self.mode is not None, "This agent contains trainable modules! Please call either agent.train() or agent.eval() before rollout"
222 | assert self.mode in ['train', 'eval'], "Agent.mode expected to be in ['train', 'eval'], found %s" % self.mode
223 |
224 | obs = env.reset()
225 | ended = np.array([False] * len(obs))
226 | losses = []
227 |
228 | traj = [{
229 | 'instr_id': ob['instr_id'],
230 | 'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])]
231 | } for ob in obs]
232 |
233 | instr = [ob['instructions'] for ob in obs]
234 | value = self._encode_instruction(instr)
235 |
236 | target_actions, target_idx, features = self._get_targets_and_features(obs)
237 | previous_action = to_one_hot([Dynamic.n_inputs() - 1] * len(obs), Dynamic.n_inputs()) # Action at t=0 is for every agent
238 |
239 | for t in range(self.max_episode_len):
240 |
241 | image_features = torch.stack(
242 | [append_coordinates(features[i], ob['heading'], ob['elevation']) for i, ob in enumerate(obs)]
243 | ).to(device=self.device)
244 |
245 | pred, logits, response_map = self.policy(image_features, value, previous_action, init_lstm_state=t == 0)
246 |
247 | """ Losses """
248 | step_loss = self.criterion(pred, target_idx.to(device=self.device))
249 | losses.append(step_loss)
250 |
251 | """ Performs steps """
252 | # Mask outputs where agent can't move forward
253 | probs = logits.clone().detach().to(device=torch.device('cpu'))
254 | for i, ob in enumerate(obs):
255 | if len(ob['navigableLocations']) <= 1:
256 | probs[i, self.env_actions.index((1, 0, 0))] = 0.
257 |
258 | if self.mode == 'eval':
259 | _, a_t = probs.max(1) # argmax
260 | actions = [self.env_actions[idx] for idx in a_t]
261 | else:
262 | m = torch.distributions.Categorical(probs) # sampling from distribution
263 | a_t = m.sample()
264 | actions = [self.env_actions[idx] if target_actions[i] != (0, 0, 0) else (0, 0, 0) for i, idx in enumerate(a_t)]
265 |
266 | """ Next step """
267 | obs = env.step(actions)
268 |
269 | for i, ob in enumerate(obs):
270 | if not ended[i]:
271 | if actions[i] == (0, 0, 0):
272 | ended[i] = True
273 | else:
274 | traj[i]['path'].append((ob['viewpoint'], ob['heading'], ob['elevation']))
275 |
276 | if ended.all():
277 | break
278 |
279 | target_actions, target_idx, features = self._get_targets_and_features(obs)
280 | previous_action = to_one_hot(a_t, self.n_inputs())
281 |
282 | """ Compute the loss for the whole rollout """
283 | losses = torch.stack(losses).to(device=self.device)
284 | rollout_loss = torch.mean(losses)
285 |
286 | return traj, rollout_loss
287 |
--------------------------------------------------------------------------------
/tasks/R2R/Models/__init__.py:
--------------------------------------------------------------------------------
1 | from tasks.R2R.Models.dynamic import InstructionEncoder, DynamicDecoder
2 |
--------------------------------------------------------------------------------
/tasks/R2R/Models/dynamic.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class InstructionEncoder(nn.Module):
7 | """ Encodes instruction via LSTM """
8 | def __init__(self, input_size=300, hidden_size=512, use_bias=True):
9 | super(InstructionEncoder, self).__init__()
10 | self.input_size = input_size
11 | self.hidden_size = hidden_size
12 | self.use_bias = use_bias
13 |
14 | """ LSTM init"""
15 | self.lstm_cell = nn.LSTMCell(self.input_size, self.hidden_size, bias=self.use_bias)
16 | """ init weights"""
17 | for name, param in self.lstm_cell.named_parameters():
18 | if 'bias' in name:
19 | nn.init.constant_(param, 0.0)
20 | elif 'weight' in name:
21 | nn.init.orthogonal_(param)
22 |
23 | def forward(self, x):
24 | """ Checking data shape """
25 | forwd = x
26 | assert forwd.shape[1] == self.input_size, "Expected input with shape [batch, %s, seq_len], found %s" % (self.input_size, forwd.shape)
27 | batch_size = forwd.shape[0]
28 |
29 | """ init hidden and cell state """
30 | hx = torch.zeros(batch_size, self.hidden_size).cuda()
31 | cx = torch.zeros(batch_size, self.hidden_size).cuda()
32 | history = []
33 |
34 | """ forward through lstm """
35 | for seq in range(forwd.shape[-1]):
36 | input_data = forwd[..., seq]
37 | hx, cx = self.lstm_cell(input_data, (hx, cx))
38 | history.append(hx)
39 |
40 | stacked = torch.stack(history).transpose(0, 1)
41 | return stacked
42 |
43 |
44 | class DynamicDecoder(nn.Module):
45 | def __init__(self, input_size, hidden_size=512, output_size=6,
46 | key_size=128, query_size=128, value_size=512,
47 | image_size=2051, filter_size=512,
48 | num_heads=1,
49 | drop_prob=0.5, use_bias=True,
50 | filter_activation=nn.Tanh(),
51 | policy_activation=nn.Softmax(dim=-1)):
52 | super(DynamicDecoder, self).__init__()
53 |
54 | """ policy variables """
55 | self.input_size = input_size
56 | self.hidden_size = hidden_size
57 | self.output_size = output_size
58 | self.drop_prob = drop_prob
59 | self.use_bias = use_bias
60 | self.hx = None
61 | self.cx = None
62 |
63 | """ attention variables """
64 | self.key_size = key_size
65 | self.query_size = query_size
66 | self.value_size = value_size
67 |
68 | """ image feature pre-processing variables """
69 | self.image_size = image_size
70 | self.filter_size = filter_size
71 |
72 | """ attention linear layers and activations """
73 | self.fc_key = nn.Linear(self.value_size, self.key_size, bias=self.use_bias)
74 | self.fc_query = nn.Linear(self.hidden_size, self.query_size, bias=self.use_bias)
75 | self.softmax = nn.Softmax(dim=1)
76 | self.filter_activation = filter_activation
77 | self.num_heads = num_heads
78 | self.heads = [nn.Linear(
79 | self.value_size, self.filter_size
80 | ).to(device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
81 | ) for _ in range(self.num_heads)]
82 |
83 | """ policy layers and activation"""
84 | self.bottleneck = nn.Conv1d(self.image_size, self.filter_size, 1, stride=1, padding=0, bias=self.use_bias)
85 | self.fc_action = nn.Linear(7, 7, bias=True)
86 | self.lstm_cell = nn.LSTMCell(self.input_size, self.hidden_size, bias=self.use_bias)
87 | self.linear = nn.Linear(self.hidden_size, self.output_size, bias=self.use_bias)
88 | self.drop = nn.Dropout(p=self.drop_prob)
89 | self.drop_h = nn.Dropout(p=0.2)
90 | self.policy_activation = policy_activation
91 |
92 | """ init LSTM weights"""
93 | for name, param in self.lstm_cell.named_parameters():
94 | if 'bias' in name:
95 | nn.init.constant_(param, 0.0)
96 | elif 'weight' in name:
97 | nn.init.orthogonal_(param)
98 |
99 | def init_lstm_cell(self, batch_size):
100 | self.hx = torch.zeros(batch_size, self.hidden_size).cuda()
101 | self.cx = torch.zeros(batch_size, self.hidden_size).cuda()
102 |
103 | def forward(self, x, value, action, init_lstm_state=True):
104 | assert x.shape[0] == value.shape[0]
105 | assert x.shape[0] == action.shape[0]
106 | batch_size = x.shape[0]
107 |
108 | if init_lstm_state:
109 | self.init_lstm_cell(batch_size)
110 |
111 | """ value shape: [B, T, 512] -> key shape: [B, T, 128] """
112 | key = F.relu(self.fc_key(value))
113 |
114 | """ hx shape: [B, 512] -> query shape: [B, 128, 1]"""
115 | query = F.relu(self.fc_query(self.hx))
116 | query = query.unsqueeze(dim=-1)
117 |
118 | """ scaled-dot-product attention """
119 | scale_1 = torch.sqrt(torch.tensor(key.shape[-1], dtype=torch.double))
120 | scaled_dot_product = torch.bmm(key, query) / scale_1 # shape: [B, T, 1]
121 | softmax = self.softmax(scaled_dot_product) # shape: [B, T, 1]
122 | element_wise_product = value*softmax # shape: [B, T, 512]
123 | current_instruction = torch.sum(element_wise_product, dim=1) # shape: [B, 512]
124 |
125 | """ dynamic convolutional filters """
126 | dynamic_filter = torch.stack([head(self.drop_h(current_instruction)) for head in self.heads]).transpose(0, 1)
127 | dynamic_filter = self.filter_activation(dynamic_filter)
128 | dynamic_filter = F.normalize(dynamic_filter, p=2, dim=-1)
129 |
130 | """ Key must be in the format [Batch, Channels, L]; Channels == image_size """
131 | if x.shape[1] != self.image_size:
132 | x = x.transpose(1, 2)
133 |
134 | x = self.bottleneck(x)
135 |
136 | """ [36, N] = T[512, 36] * T[N, 512] """
137 | scale_2 = torch.sqrt(torch.tensor(x.shape[1], dtype=torch.double))
138 | attention_map = torch.bmm(x.transpose(1, 2), dynamic_filter.transpose(-1, -2)) / scale_2
139 | b, c, f = attention_map.shape
140 | attention_map = attention_map.reshape(b, c*f)
141 |
142 | action_embedded = self.fc_action(action.cuda())
143 | in_data = torch.cat((attention_map, action_embedded), 1)
144 |
145 | """ Shape of in_data must be [Batch, Input_size] """
146 | self.hx, self.cx = self.lstm_cell(in_data, (self.hx, self.cx))
147 |
148 | policy_data = self.hx
149 |
150 | drop = self.drop(policy_data)
151 | pred = self.linear(drop)
152 | logits = self.policy_activation(pred)
153 |
154 | return pred, logits, attention_map.reshape(b, c, f)
155 |
--------------------------------------------------------------------------------
/tasks/R2R/data/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | wget https://www.dropbox.com/s/lztjsji51pr5ig2/R2R_train.json -P tasks/R2R/data/
4 | wget https://www.dropbox.com/s/66nowglznzx1le9/R2R_val_seen.json -P tasks/R2R/data/
5 | wget https://www.dropbox.com/s/it9zpexb97d6bes/R2R_val_unseen.json -P tasks/R2R/data/
6 | wget https://www.dropbox.com/s/0huat2lc5iy5o8j/R2R_test.json -P tasks/R2R/data/
7 |
--------------------------------------------------------------------------------
/tasks/R2R/env.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import csv
3 | import numpy as np
4 | import math
5 | import base64
6 | import random
7 | import networkx as nx
8 |
9 | from tasks.R2R.utils import load_datasets, load_nav_graphs, print_progress
10 |
11 | sys.path.append('build')
12 | import MatterSim
13 |
14 |
15 | csv.field_size_limit(sys.maxsize)
16 |
17 |
18 | def _make_id(scan_id, viewpoint_id):
19 | return scan_id + '_' + viewpoint_id
20 |
21 |
22 | def load_features(feature_store):
23 | image_w, image_h, vfov = 640, 480, 60
24 |
25 | # if the tsv file for image features is provided
26 | if feature_store:
27 | tsv_fieldnames = ['scanId', 'viewpointId', 'image_w', 'image_h', 'vfov', 'features']
28 | features = {}
29 | with open(feature_store, "r") as tsv_in_file:
30 | print('Reading image features file %s' % feature_store)
31 | reader = list(csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=tsv_fieldnames))
32 | total_length = len(reader)
33 |
34 | print('Loading image features...')
35 | for i, item in enumerate(reader):
36 | image_h = int(item['image_h'])
37 | image_w = int(item['image_w'])
38 | vfov = int(item['vfov'])
39 | long_id = _make_id(item['scanId'], item['viewpointId'])
40 | features[long_id] = np.frombuffer(base64.b64decode(item['features']),
41 | dtype=np.float32).reshape((36, 2048))
42 | print_progress(i + 1, total_length, prefix='Progress:',
43 | suffix='Complete', bar_length=50)
44 | else:
45 | print('Image features not provided')
46 | features = None
47 |
48 | return features, (image_w, image_h, vfov)
49 |
50 |
51 | class EnvBatch:
52 | """ A simple wrapper for a batch of MatterSim environments,
53 | using discretized viewpoints and pretrained features """
54 |
55 | def __init__(self, features, img_spec, batch_size=100):
56 | self.features = features
57 | self.image_w, self.image_h, self.vfov = img_spec
58 |
59 | self.batch_size = batch_size
60 | self.sim = MatterSim.Simulator()
61 | self.sim.setRenderingEnabled(False)
62 | self.sim.setDiscretizedViewingAngles(True)
63 | self.sim.setBatchSize(self.batch_size)
64 | self.sim.setCameraResolution(self.image_w, self.image_h)
65 | self.sim.setCameraVFOV(math.radians(self.vfov))
66 | self.sim.initialize()
67 |
68 | def new_episode(self, scan_ids, viewpoint_ids, headings):
69 | self.sim.newEpisode(scan_ids, viewpoint_ids, headings, [0] * self.batch_size)
70 |
71 | def get_states(self):
72 | """ Get list of states augmented with precomputed image features. rgb field will be empty. """
73 | feature_states = []
74 | for state in self.sim.getState():
75 | long_id = _make_id(state.scanId, state.location.viewpointId)
76 | if self.features:
77 | feature = self.features[long_id]
78 | feature_states.append((feature, state))
79 | else:
80 | feature_states.append((None, state))
81 | return feature_states
82 |
83 | def make_actions(self, actions):
84 | """ Take an action using the full state dependent action interface (with batched input).
85 | Every action element should be an (index, heading, elevation) tuple. """
86 | ix = []
87 | heading = []
88 | elevation = []
89 | for i, h, e in actions:
90 | ix.append(int(i))
91 | heading.append(float(h))
92 | elevation.append(float(e))
93 | self.sim.makeAction(ix, heading, elevation)
94 |
95 | def make_simple_actions(self, simple_indices):
96 | """ Take an action using a simple interface: 0-forward, 1-turn left, 2-turn right, 3-look up, 4-look down.
97 | All viewpoint changes are 30 degrees. Forward, look up and look down may not succeed - check state.
98 | WARNING - Very likely this simple interface restricts some edges in the graph. Parts of the
99 | environment may not longer be navigable. """
100 | actions = []
101 | for i, index in enumerate(simple_indices):
102 | if index == 0:
103 | actions.append((1, 0, 0))
104 | elif index == 1:
105 | actions.append((0, -1, 0))
106 | elif index == 2:
107 | actions.append((0, 1, 0))
108 | elif index == 3:
109 | actions.append((0, 0, 1))
110 | elif index == 4:
111 | actions.append((0, 0, -1))
112 | else:
113 | sys.exit("Invalid simple action")
114 | self.make_actions(actions)
115 |
116 |
117 | class R2RBatch:
118 | """ Implements the Room to Room navigation task, using discretized viewpoints and pretrained features """
119 |
120 | def __init__(self, features, img_spec, batch_size=100, seed=10, splits='train', tokenizer=None):
121 | self.env = EnvBatch(features, img_spec, batch_size=batch_size)
122 | self.data = []
123 | self.scans = []
124 |
125 | if isinstance(splits, str):
126 | splits = [splits]
127 |
128 | assert isinstance(splits, list), 'expected type list or str type for argument "splits", found %s' % type(splits)
129 |
130 | print('Loading {} dataset'.format(",".join(splits)))
131 |
132 | json_data = load_datasets(splits)
133 | total_length = len(json_data)
134 |
135 | for i, item in enumerate(json_data):
136 | # Split multiple instructions into separate entries
137 | for j, instr in enumerate(item['instructions']):
138 | self.scans.append(item['scan'])
139 | new_item = dict(item)
140 | new_item['instr_id'] = '%s_%d' % (item['path_id'], j)
141 | new_item['instructions'] = instr
142 | if tokenizer:
143 | new_item['instr_encoding'] = tokenizer.encode_sentence(instr)
144 | self.data.append(new_item)
145 | print_progress(i + 1, total_length, prefix='Progress:', suffix='Complete', bar_length=50)
146 | self.scans = set(self.scans)
147 | self.splits = splits
148 | self.seed = seed
149 | random.seed(self.seed)
150 | random.shuffle(self.data)
151 | self.ix = 0
152 | self.batch_size = batch_size
153 | self._load_nav_graphs()
154 | print('R2RBatch loaded with %d instructions, using splits: %s' % (len(self.data), ",".join(splits)))
155 |
156 | def _load_nav_graphs(self):
157 | """ Load connectivity graph for each scan, useful for reasoning about shortest paths """
158 | print('Loading navigation graphs for %d scans' % len(self.scans))
159 | self.graphs = load_nav_graphs(self.scans)
160 | self.paths = {}
161 | for scan, G in self.graphs.items(): # compute all shortest paths
162 | self.paths[scan] = dict(nx.all_pairs_dijkstra_path(G))
163 | self.distances = {}
164 | for scan, G in self.graphs.items(): # compute all shortest paths
165 | self.distances[scan] = dict(nx.all_pairs_dijkstra_path_length(G))
166 |
167 | def _next_minibatch(self):
168 | batch = self.data[self.ix:self.ix + self.batch_size]
169 | if len(batch) < self.batch_size:
170 | random.shuffle(self.data)
171 | self.ix = self.batch_size - len(batch)
172 | batch += self.data[:self.ix]
173 | else:
174 | self.ix += self.batch_size
175 | self.batch = batch
176 |
177 | def reset_epoch(self):
178 | """ Reset the data index to beginning of epoch. Primarily for testing.
179 | You must still call reset() for a new episode. """
180 | self.ix = 0
181 |
182 | def _get_obs(self):
183 | raise NotImplementedError
184 |
185 | def reset(self):
186 | raise NotImplementedError
187 |
188 | def step(self, actions):
189 | raise NotImplementedError
190 |
191 |
192 | class LowLevelR2RBatch(R2RBatch):
193 | def __init__(self, features, img_spec, batch_size=100, seed=10, splits='train', tokenizer=None):
194 | super(LowLevelR2RBatch, self).__init__(features, img_spec, batch_size, seed, splits, tokenizer)
195 |
196 | def _shortest_path_action(self, state, goalviewpoint_id):
197 | """ Determine next action on the shortest path to goal, for supervised training. """
198 | if state.location.viewpointId == goalviewpoint_id:
199 | return 0, 0, 0 # do nothing
200 | path = self.paths[state.scanId][state.location.viewpointId][goalviewpoint_id]
201 | nextviewpoint_id = path[1]
202 | # Can we see the next viewpoint?
203 | for i, loc in enumerate(state.navigableLocations):
204 | if loc.viewpointId == nextviewpoint_id:
205 | # Look directly at the viewpoint before moving
206 | if loc.rel_heading > math.pi / 6.0:
207 | return 0, 1, 0 # Turn right
208 | elif loc.rel_heading < -math.pi / 6.0:
209 | return 0, -1, 0 # Turn left
210 | elif loc.rel_elevation > math.pi / 6.0 and state.viewIndex // 12 < 2:
211 | return 0, 0, 1 # Look up
212 | elif loc.rel_elevation < -math.pi / 6.0 and state.viewIndex // 12 > 0:
213 | return 0, 0, -1 # Look down
214 | else:
215 | return i, 0, 0 # Move
216 | # Can't see it - first neutralize camera elevation
217 | if state.viewIndex // 12 == 0:
218 | return 0, 0, 1 # Look up
219 | elif state.viewIndex // 12 == 2:
220 | return 0, 0, -1 # Look down
221 | # Otherwise decide which way to turn
222 | pos = [state.location.x, state.location.y, state.location.z]
223 | target_rel = self.graphs[state.scanId].node[nextviewpoint_id]['position'] - pos
224 | target_heading = math.pi / 2.0 - math.atan2(target_rel[1], target_rel[0]) # convert to rel to y axis
225 | if target_heading < 0:
226 | target_heading += 2.0 * math.pi
227 | if state.heading > target_heading and state.heading - target_heading < math.pi:
228 | return 0, -1, 0 # Turn left
229 | if target_heading > state.heading and target_heading - state.heading > math.pi:
230 | return 0, -1, 0 # Turn left
231 | return 0, 1, 0 # Turn right
232 |
233 | def _get_obs(self):
234 | obs = []
235 | for i, (feature, state) in enumerate(self.env.get_states()):
236 | item = self.batch[i]
237 | obs.append({
238 | 'instr_id': item['instr_id'],
239 | 'scan': state.scanId,
240 | 'viewpoint': state.location.viewpointId,
241 | 'viewIndex': state.viewIndex,
242 | 'heading': state.heading,
243 | 'elevation': state.elevation,
244 | 'feature': feature,
245 | 'step': state.step,
246 | 'navigableLocations': state.navigableLocations,
247 | 'instructions': item['instructions'],
248 | 'teacher': self._shortest_path_action(state, item['path'][-1]),
249 | })
250 | if 'instr_encoding' in item:
251 | obs[-1]['instr_encoding'] = item['instr_encoding']
252 | return obs
253 |
254 | def reset(self):
255 | """ Load a new minibatch / episodes. """
256 | self._next_minibatch()
257 | scan_ids = [item['scan'] for item in self.batch]
258 | viewpoint_ids = [item['path'][0] for item in self.batch]
259 | headings = [item['heading'] for item in self.batch]
260 | self.env.new_episode(scan_ids, viewpoint_ids, headings)
261 | return self._get_obs()
262 |
263 | def step(self, actions):
264 | """ Take action (same interface as make_actions) """
265 | self.env.make_actions(actions)
266 | return self._get_obs()
267 |
268 |
--------------------------------------------------------------------------------
/tasks/R2R/eval.py:
--------------------------------------------------------------------------------
1 | from tasks.R2R.utils import load_datasets, load_nav_graphs
2 | from tasks.R2R.env import LowLevelR2RBatch
3 | from tasks.R2R.utils import check_config_judge
4 | from collections import defaultdict
5 |
6 | import json
7 | import os
8 | import networkx as nx
9 | import numpy as np
10 | import pprint
11 | pp = pprint.PrettyPrinter(indent=4)
12 |
13 |
14 | class Evaluation(object):
15 | """ Results submission format: [{'instr_id': string, 'trajectory':[(viewpoint_id, heading_rads, elevation_rads),] } ] """
16 |
17 | def __init__(self, splits):
18 | self.error_margin = 3.0
19 | self.splits = splits
20 | self.gt = {}
21 | self.instr_ids = []
22 | self.scans = []
23 | for item in load_datasets(splits):
24 | self.gt[item['path_id']] = item
25 | self.scans.append(item['scan'])
26 | self.instr_ids += ['%d_%d' % (item['path_id'], i) for i in range(3)]
27 | self.scans = set(self.scans)
28 | self.instr_ids = set(self.instr_ids)
29 | self.graphs = load_nav_graphs(self.scans)
30 | self.distances = {}
31 | self.scores = None
32 | for scan, G in self.graphs.items(): # compute all shortest paths
33 | self.distances[scan] = dict(nx.all_pairs_dijkstra_path_length(G))
34 |
35 | def _get_nearest(self, scan, goal_id, path):
36 | near_id = path[0][0]
37 | near_d = self.distances[scan][near_id][goal_id]
38 | for item in path:
39 | d = self.distances[scan][item[0]][goal_id]
40 | if d < near_d:
41 | near_id = item[0]
42 | near_d = d
43 | return near_id
44 |
45 | def _score_item(self, instr_id, path):
46 | """ Calculate error based on the final position in trajectory, and also
47 | the closest position (oracle stopping rule). """
48 | gt = self.gt[int(instr_id.split('_')[0])]
49 | start = gt['path'][0]
50 | assert start == path[0][0], 'Result trajectories should include the start position'
51 | goal = gt['path'][-1]
52 | final_position = path[-1][0]
53 | nearest_position = self._get_nearest(gt['scan'], goal, path)
54 | self.scores['nav_errors'].append(self.distances[gt['scan']][final_position][goal])
55 | self.scores['oracle_errors'].append(self.distances[gt['scan']][nearest_position][goal])
56 | self.scores['trajectory_steps'].append(len(path) - 1)
57 | distance = 0 # Work out the length of the path in meters
58 | prev = path[0]
59 | for curr in path[1:]:
60 | if prev[0] != curr[0]:
61 | try:
62 | self.graphs[gt['scan']][prev[0]][curr[0]]
63 | except KeyError:
64 | print('Error: The provided trajectory moves from %s to %s but the navigation graph contains no '
65 | 'edge between these viewpoints. Please ensure the provided navigation trajectories '
66 | 'are valid, so that trajectory length can be accurately calculated.' % (prev[0], curr[0]))
67 | raise
68 | distance += self.distances[gt['scan']][prev[0]][curr[0]]
69 | prev = curr
70 | self.scores['trajectory_lengths'].append(distance)
71 | self.scores['shortest_path_lengths'].append(self.distances[gt['scan']][start][goal])
72 |
73 | def score(self, output_file):
74 | """ Evaluate each agent trajectory based on how close it got to the goal location """
75 | self.scores = defaultdict(list)
76 | instr_ids = set(self.instr_ids)
77 | with open(output_file) as f:
78 | for item in json.load(f):
79 | # Check against expected ids
80 | if item['instr_id'] in instr_ids:
81 | instr_ids.remove(item['instr_id'])
82 | self._score_item(item['instr_id'], item['trajectory'])
83 | assert len(instr_ids) == 0, 'Trajectories not provided for %d instruction ids: %s' % (len(instr_ids), instr_ids)
84 | assert len(self.scores['nav_errors']) == len(self.instr_ids)
85 | num_successes = len([i for i in self.scores['nav_errors'] if i < self.error_margin])
86 |
87 | oracle_successes = len([i for i in self.scores['oracle_errors'] if i < self.error_margin])
88 |
89 | spls = []
90 | for err, length, sp in zip(self.scores['nav_errors'], self.scores['trajectory_lengths'], self.scores['shortest_path_lengths']):
91 | if err < self.error_margin:
92 | spls.append(sp / max(length, sp))
93 | else:
94 | spls.append(0)
95 |
96 | score_summary = {
97 | 'length': np.average(self.scores['trajectory_lengths']),
98 | 'steps': np.average(self.scores['trajectory_steps']),
99 | 'nav_error': np.average(self.scores['nav_errors']),
100 | 'oracle success_rate': float(oracle_successes) / float(len(self.scores['oracle_errors'])),
101 | 'success_rate': float(num_successes) / float(len(self.scores['nav_errors'])),
102 | 'spl': np.average(spls),
103 | }
104 |
105 | assert score_summary['spl'] <= score_summary['success_rate']
106 | return score_summary, self.scores
107 |
108 |
109 | class Judge:
110 | def __init__(self, config):
111 | self.results = dict()
112 | self.config = check_config_judge(config)
113 | self.env = LowLevelR2RBatch(features=config['features'],
114 | img_spec=config['img_spec'],
115 | batch_size=config['batch_size'],
116 | seed=config['seed'],
117 | splits=config['splits']
118 | )
119 |
120 | self.results_path = os.path.join(self.config['results_path'], 'results.json')
121 | self.evaluations = [Evaluation([split]) for split in config['splits']]
122 |
123 | self.main_split = 'val_unseen'
124 | self.main_metric = 'spl'
125 |
126 | def test(self, agent):
127 | agent.eval()
128 | self.env.reset_epoch()
129 |
130 | # We rely on env showing the entire batch before repeating anything
131 | self.results = {}
132 | looped = False
133 | while True:
134 | if agent.get_name() == 'Dynamic':
135 | trajectories, _ = agent.rollout(self.env)
136 | else:
137 | trajectories = agent.rollout(self.env)
138 |
139 | for traj in trajectories:
140 | if traj['instr_id'] in self.results:
141 | looped = True
142 | else:
143 | self.results[traj['instr_id']] = traj['path']
144 |
145 | if looped:
146 | break
147 |
148 | output = [{'instr_id': k, 'trajectory': v} for k, v in self.results.items()]
149 |
150 | with open(self.results_path, 'w') as f:
151 | json.dump(output, f)
152 |
153 | main_metric = None
154 |
155 | for split, evaluation in zip(self.config['splits'], self.evaluations):
156 | score_summary, scores = evaluation.score(self.results_path)
157 | print("Agent: %s -- Split: %s" % (agent.get_name(), ",".join(evaluation.splits)))
158 | pp.pprint(score_summary)
159 | if split == self.main_split:
160 | assert self.main_metric in score_summary, 'Field %s not found in score_summary' % self.main_metric
161 | main_metric = score_summary[self.main_metric]
162 |
163 | return main_metric
164 |
--------------------------------------------------------------------------------
/tasks/R2R/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import sys
4 | import os
5 | import torch
6 | import torch.optim as optim
7 | import numpy as np
8 |
9 | sys.path.append(os.getcwd())
10 |
11 | from tasks.R2R.Agents import get_agent
12 | from tasks.R2R.env import load_features
13 | from tasks.R2R.trainer import Trainer
14 | from tasks.R2R.eval import Judge
15 |
16 |
17 | parser = argparse.ArgumentParser(description='PyTorch for Matterport3D Agent with Dynamic Convolutional Filters')
18 |
19 | # General options
20 | parser.add_argument('--name', type=str, default='custom_experiment',
21 | help='name for the experiment')
22 | parser.add_argument('--results_dir', type=str, default='tasks/R2R/results',
23 | help='home directory for results')
24 | parser.add_argument('--feature_store', type=str, default='img_features/ResNet-152-imagenet.tsv',
25 | help='feature store file')
26 | parser.add_argument('--eval_only', action="store_true",
27 | help='if true, does not train the model before evaluating')
28 | parser.add_argument('--seed', type=int, default=42,
29 | help='initial random seed')
30 | # Training options
31 | parser.add_argument('--num_epoch', type=int, default=100,
32 | help='number of epochs')
33 | parser.add_argument('--eval_every', type=int, default=5,
34 | help='number of training epochs between evaluations')
35 | parser.add_argument('--patience', type=int, default=30,
36 | help='number of epochs to wait before early stopping')
37 | parser.add_argument('--lr', type=float, default=0.001,
38 | help='base learning rate')
39 | parser.add_argument('--batch_size', type=int, default=128,
40 | help='batch size')
41 | # Agent options
42 | parser.add_argument('--num_heads', type=int, default=1,
43 | help='number of heads for multi-headed dynamic convolution')
44 | parser.add_argument('--max_episode_len', type=int, default=20,
45 | help='agent max number of steps before stopping')
46 |
47 |
48 | """ Device info """
49 | if torch.cuda.is_available():
50 | device = torch.device('cuda')
51 | else:
52 | device = torch.device('cpu')
53 | print('Running on device: {}'.format(device))
54 |
55 |
56 | def main(opts):
57 |
58 | splits = 'train'
59 | results_path = os.path.join(opts.results_dir, opts.name)
60 | features, img_spec = load_features(opts.feature_store)
61 |
62 | agent_config = {
63 | 'max_episode_len': opts.max_episode_len,
64 | 'num_heads': opts.num_heads,
65 | 'device': device,
66 | }
67 |
68 | trainer_config = {
69 | 'features': features,
70 | 'img_spec': img_spec,
71 | 'splits': splits,
72 | 'batch_size': opts.batch_size,
73 | 'seed': opts.seed,
74 | 'results_path': results_path,
75 | }
76 |
77 | judge_config = {
78 | 'features': features,
79 | 'img_spec': img_spec,
80 | 'splits': ['val_seen', 'val_unseen'],
81 | 'batch_size': opts.batch_size,
82 | 'seed': opts.seed,
83 | 'results_path': results_path,
84 | }
85 |
86 | agent = get_agent('Dynamic', agent_config)
87 | judge = Judge(judge_config)
88 |
89 | if opts.eval_only:
90 | agent.load(os.path.join(results_path, 'encoder_weights_best'),
91 | os.path.join(results_path, 'decoder_weights_best'))
92 | metric = judge.test(agent)
93 | print('Main metric result for this test: {:.4f}'.format(metric))
94 | else:
95 | trainer = Trainer(trainer_config)
96 | optimizer = optim.Adam(agent.get_trainable_params(), lr=opts.lr)
97 | best = trainer.train(agent, optimizer, opts.num_epoch, patience=opts.patience, eval_every=opts.eval_every, judge=judge)
98 | print('Best metric result for this test: {:.4f}'.format(best))
99 |
100 | print('----- End -----')
101 |
102 |
103 | if __name__ == '__main__':
104 | args = parser.parse_args()
105 |
106 | if os.path.exists(os.path.join(args.results_dir, args.name)):
107 | print('WARNING: Experiment with this name already exists! - {}'.format(args.name))
108 | else:
109 | os.makedirs(os.path.join(args.results_dir, args.name))
110 |
111 | torch.manual_seed(args.seed)
112 | np.random.seed(args.seed)
113 |
114 | main(args)
115 |
--------------------------------------------------------------------------------
/tasks/R2R/results/data_augmentation/decoder_weights_best:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/tasks/R2R/results/data_augmentation/decoder_weights_best
--------------------------------------------------------------------------------
/tasks/R2R/results/data_augmentation/encoder_weights_best:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/tasks/R2R/results/data_augmentation/encoder_weights_best
--------------------------------------------------------------------------------
/tasks/R2R/results/normal_data/decoder_weights_best:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/tasks/R2R/results/normal_data/decoder_weights_best
--------------------------------------------------------------------------------
/tasks/R2R/results/normal_data/encoder_weights_best:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aimagelab/DynamicConv-agent/717149587761b228c4789660de5699ac6b964b61/tasks/R2R/results/normal_data/encoder_weights_best
--------------------------------------------------------------------------------
/tasks/R2R/trainer.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from tasks.R2R.env import LowLevelR2RBatch
4 | from tasks.R2R.utils import check_config_trainer, print_progress
5 |
6 |
7 | class Trainer:
8 | def __init__(self, config):
9 | self.results = dict()
10 | self.config = check_config_trainer(config)
11 | self.env = LowLevelR2RBatch(features=config['features'],
12 | img_spec=config['img_spec'],
13 | batch_size=config['batch_size'],
14 | seed=config['seed'],
15 | splits=config['splits']
16 | )
17 | print('Success!')
18 |
19 | def _train_epoch(self, agent, optimizer, num_iter):
20 | epoch_loss = 0.
21 | agent.train()
22 | self.env.reset_epoch()
23 |
24 | for it in range(num_iter):
25 | optimizer.zero_grad()
26 | _, loss = agent.rollout(self.env)
27 | loss.backward()
28 | optimizer.step()
29 | epoch_loss += loss.item()
30 | suffix_msg = 'Running Loss: {:.4f}'.format(epoch_loss / (it+1))
31 | print_progress(it, num_iter, suffix=suffix_msg)
32 | else:
33 | suffix_msg = 'Running Loss: {:.4f}'.format(epoch_loss / num_iter)
34 | print_progress(num_iter, num_iter, suffix=suffix_msg)
35 |
36 | return epoch_loss / num_iter
37 |
38 | def train(self, agent, optimizer, num_epoch, num_iter_epoch=None, patience=None, eval_every=None, judge=None):
39 | best_metric = 0.
40 |
41 | if num_iter_epoch is None:
42 | num_iter_epoch = len(self.env.data) // self.env.batch_size + 1
43 | if eval_every is None:
44 | if judge is None:
45 | eval_every = num_epoch + 1 # Never tested
46 | else:
47 | eval_every = num_epoch # Test only on the last epoch
48 | if patience is None:
49 | patience = num_epoch
50 | reset_patience = patience
51 |
52 | for epoch in range(num_epoch):
53 | mean_loss = self._train_epoch(agent, optimizer, num_iter_epoch)
54 | print("Epoch {}/{} terminated: Epoch Loss = {:.4f}".format(epoch+1, num_epoch, mean_loss))
55 | agent.save(os.path.join(self.config['results_path'], 'encoder_weights_last'),
56 | os.path.join(self.config['results_path'], 'decoder_weights_last'))
57 |
58 | if (epoch+1) % eval_every == 0:
59 | metric = judge.test(agent)
60 | if metric is not None:
61 | print('Main metric results for this test: {:.4f}'.format(metric))
62 | if metric > best_metric:
63 | best_metric = metric
64 | patience = reset_patience
65 | print('New best! Saving weights...')
66 | agent.save(os.path.join(self.config['results_path'], 'encoder_weights_best'),
67 | os.path.join(self.config['results_path'], 'decoder_weights_best'))
68 | else:
69 | patience -= 1
70 | if patience == 0:
71 | print('{} epochs without improvement in main metric ({}) - patience is over!'.format(reset_patience, judge.main_metric))
72 | break
73 |
74 | print("Finishing training")
75 | return best_metric
76 |
--------------------------------------------------------------------------------
/tasks/R2R/utils.py:
--------------------------------------------------------------------------------
1 | """ Utils for io, language, connectivity graphs etc """
2 | import sys
3 | import json
4 | import numpy as np
5 | import networkx as nx
6 | import torch
7 |
8 | import nltk
9 | nltk.download('stopwords')
10 |
11 | from nltk.tokenize import RegexpTokenizer
12 | from nltk.corpus import stopwords
13 |
14 | # padding, unknown word, end of sentence
15 | base_vocab = ['', '', '']
16 | padding_idx = base_vocab.index('')
17 |
18 |
19 | def load_nav_graphs(scans):
20 | """ Load connectivity graph for each scan """
21 |
22 | def distance(pose1, pose2):
23 | """ Euclidean distance between two graph poses """
24 | return ((pose1['pose'][3] - pose2['pose'][3]) ** 2
25 | + (pose1['pose'][7] - pose2['pose'][7]) ** 2
26 | + (pose1['pose'][11] - pose2['pose'][11]) ** 2) ** 0.5
27 |
28 | graphs = {}
29 | for scan in scans:
30 | with open('connectivity/%s_connectivity.json' % scan) as f:
31 | g = nx.Graph()
32 | positions = {}
33 | data = json.load(f)
34 | for i, item in enumerate(data):
35 | if item['included']:
36 | for j, conn in enumerate(item['unobstructed']):
37 | if conn and data[j]['included']:
38 | positions[item['image_id']] = np.array([item['pose'][3],
39 | item['pose'][7], item['pose'][11]]);
40 | assert data[j]['unobstructed'][i], 'Graph should be undirected'
41 | g.add_edge(item['image_id'], data[j]['image_id'], weight=distance(item, data[j]))
42 | nx.set_node_attributes(g, values=positions, name='position')
43 | graphs[scan] = g
44 | return graphs
45 |
46 |
47 | def load_datasets(splits):
48 | data = []
49 | for split in splits:
50 | assert split in ['train', 'val_seen', 'val_unseen', 'test']
51 | with open('tasks/R2R/data/R2R_%s.json' % split) as f:
52 | data += json.load(f)
53 | return data
54 |
55 |
56 | def print_progress(iteration, total, prefix='', suffix='', decimals=1, bar_length=100):
57 | """
58 | Call in a loop to create terminal progress bar
59 | @params:
60 | iteration - Required : current iteration (Int)
61 | total - Required : total iterations (Int)
62 | prefix - Optional : prefix string (Str)
63 | suffix - Optional : suffix string (Str)
64 | decimals - Optional : positive number of decimals in percent complete (Int)
65 | bar_length - Optional : character length of bar (Int)
66 | """
67 | str_format = "{0:." + str(decimals) + "f}"
68 | percents = str_format.format(100 * (iteration / float(total)))
69 | filled_length = int(round(bar_length * iteration / float(total)))
70 | bar = '█' * filled_length + '-' * (bar_length - filled_length)
71 |
72 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
73 |
74 | if iteration == total:
75 | sys.stdout.write('\n')
76 | sys.stdout.flush()
77 |
78 |
79 | def check_config_trainer(config):
80 | field_list = ['features', 'img_spec', 'splits', 'batch_size', 'seed', 'results_path']
81 | assert isinstance(config, dict), 'expected type dict for argument config, found %s' % type(config)
82 | for field in field_list:
83 | assert field in config, 'missing required field in config: %s' % field
84 | return config
85 |
86 |
87 | def check_config_judge(config):
88 | field_list = ['features', 'img_spec', 'splits', 'batch_size', 'seed', 'results_path']
89 | assert isinstance(config, dict), 'expected type dict for argument config, found %s' % type(config)
90 | for field in field_list:
91 | assert field in config, 'missing required field in config: %s' % field
92 |
93 | if isinstance(config['splits'], str):
94 | config['splits'] = [config['splits']]
95 | assert isinstance(config['splits'], list), 'expected type list or str type for argument "splits", found %s' % type(config['splits'])
96 |
97 | return config
98 |
99 |
100 | def my_split_func(sentence):
101 | tokenizer = RegexpTokenizer(r'\w+')
102 | stop_words = set(stopwords.words('english'))
103 | include_words = ['up', 'down', 'out', 'into', 'through', 'above', 'before', 'below', 'to', 'over', 'under']
104 |
105 | for word in include_words:
106 | stop_words.remove(word)
107 |
108 | word_tokens = tokenizer.tokenize(sentence)
109 | filtered_sentence = [w for w in word_tokens if w not in stop_words]
110 | filtered_sentence_reversed = [w for w in reversed(word_tokens) if w not in stop_words]
111 |
112 | return filtered_sentence, filtered_sentence_reversed
113 |
114 |
115 | def batched_sentence_embedding(batch, word_encoder, device=torch.device('cpu')):
116 | """
117 | :param batch: batch of instructions of variable lengths --> suppose range [min_l, max_l]
118 | :param word_encoder: provides single-word embeddings --- must support __getitem__ method
119 | :param device: may be cpu or cuda -- default is cpu
120 | :return: tensor of shape [batch_len, max_l, embedding_size] where sentences are zero-padded to have same size
121 | """
122 | split_batch = []
123 |
124 | for sentence in batch:
125 | spl, spl_rev = my_split_func(sentence)
126 | split_batch.append(spl)
127 |
128 | lengths = [len(spl) for spl in split_batch]
129 | max_l = max(lengths)
130 |
131 | t = torch.zeros(len(batch), max_l, 300)
132 |
133 | for i, spl in enumerate(split_batch):
134 | e = torch.stack([word_encoder[word] for word in spl])
135 | t[i, :e.shape[0], :] = e.squeeze(dim=1)
136 |
137 | t = t.transpose(1, 2)
138 |
139 | embeddings = t.to(device=device)
140 | return embeddings, lengths
141 |
142 |
143 | def append_coordinates(features, agent_heading, agent_elevation):
144 | """ Appends elevation and headings coordinates to attention heatmap """
145 |
146 | """
147 | Assume features is 36 x num_features: appends 36-dimensional maps with elevation and headings.
148 | Indexing is the following:
149 | _________________________________________________
150 | | |
151 | up | 24 25 26 27 28 29 30 31 32 33 34 35 |
152 | | |
153 | center | 12 13 14 15 16 17 18 19 20 21 22 23 |
154 | | |
155 | down | 0 1 2 3 4 5 6 7 8 9 10 11 |
156 | |________________________________________________|
157 |
158 | left center right
159 | """
160 |
161 | abs_elevations = torch.tensor([-0.5, 0, 0.5], dtype=torch.float)
162 | elevations = abs_elevations - agent_elevation
163 | elevations_map = elevations.repeat(12, 1).transpose(0, 1).contiguous().view(36, 1)
164 |
165 | abs_headings = torch.tensor(np.linspace(0, (11./6.)*np.pi, 12), dtype=torch.float)
166 | headings = abs_headings - agent_heading
167 |
168 | headings_cos_map = torch.cos(headings).repeat(3).view(36, 1)
169 | headings_sin_map = torch.sin(headings).repeat(3).view(36, 1)
170 |
171 | feature_map = torch.cat((features, elevations_map, headings_cos_map, headings_sin_map), dim=-1)
172 |
173 | return feature_map
174 |
175 |
176 | def to_one_hot(indexes, output_dim):
177 | """
178 | :param indexes: list of numbers in the range [0, output_dim)
179 | :param output_dim: size of a single one-hot tensor
180 | :return: tensor containing one_hot representation of indexes
181 | """
182 | assert output_dim >= 2
183 | assert output_dim > max(indexes)
184 | assert min(indexes) >= 0
185 |
186 | return torch.eye(output_dim)[indexes]
187 |
188 |
--------------------------------------------------------------------------------
/web/README.md:
--------------------------------------------------------------------------------
1 | # Web
2 |
3 | This directory contains web-based applications for:
4 | - Viewing and saving first-person trajectories
5 | - Amazon Mechanical Turk (AMT) interfaces that were used to collect and evaluate navigation instructions
6 |
7 | Code is based on Javascript and the [three.js](https://threejs.org/) wrapper for OpenGL, as well as the [tween.js](https://github.com/tweenjs/tween.js/) library for animation. The [Gulp](https://gulpjs.com/) task runner (based on Node.js) is used for spinning up a web servers and optimizing and minifying javascript for deployment (e.g. to AMT).
8 |
9 | To get started, make sure you have [Node.js](https://nodejs.org/en/) >=6.0.0 installed, then install the remaining dependencies using the npm package manager:
10 | ```
11 | npm install
12 | ```
13 |
14 | You will also need to first install the Matterport data as described [here](../README.md). Then, set up symlinks to data (from the app directory) as follows:
15 | ```
16 | cd app
17 | ln -s ../../tasks/R2R/data/ R2Rdata
18 | ln -s ../../connectivity connectivity
19 | ln -s ../../data data
20 | ```
21 |
22 | Also, download the R2R trajectory data by running this script from the top level directory (if you haven't already done this):
23 | ```
24 | ./tasks/R2R/data/download.sh
25 | ```
26 |
27 |
28 | Now you can start a web server to check out the various visualizations and AMT user interfaces:
29 | ```
30 | gulp
31 | ```
32 |
33 | ## Trajectory Visualization
34 |
35 | `trajectory.html` is an application for viewing first-person trajectories and downloading them as videos:
36 | - Use `Choose File` to select a trajectory file in the leaderboard submission format. By default, the included file `val_unseen_shortest_agent.json` is selected (containing the shortest paths to goal in the unseen validation set).
37 | - `Play` visualizes the trajectory with the provided index.
38 | - `Download video` visualizes the trajectory then downloads it as a .webm video.
39 | - Camera parameters can be set with the `Width`, `Height` and `V-FOV` fields.
40 | - Change the `Index` field to view different trajectories from the file.
41 |
42 |
43 | ## AMT Interfaces
44 |
45 | `collect-hit.html` and `eval-hit.html` are the AMT interfaces used for collecting navigation instructions for the R2R data set, and benchmarking human performance on the R2R test set, respectively. Both interfaces appear as they would to a worker on AMT, except there is not 'Submit' button. Instead, both interfaces have a url parameter `?ix=0` that can be directly edited in your browser address bar to view different HITs. There are also instructions at the top of the UI that can be expanded.
46 |
47 | ### collect-hit
48 |
49 | The UI `collect-hit.html` shows workers a navigation trajectory that must be annotated with a navigation instruction. Workers can only move along the trajectory (either fly-through or by clicking through each step), but cannot move anywhere else. Trajectories are loaded from the file `sample_room_paths.json`. Navigation instructions are collected in the textarea with id `tag1`, which can be integrated with AMT.
50 |
51 | ### eval-hit
52 |
53 | The UI `eval-hit.html` situates workers in an environment and provides a navigation instruction sourced from `R2R_test.json`. Workers can move anywhere, and must submit when they are as close as possible to the goal location. The actual navigation trajectories are collected in a hidden input with id `traj`, in the form of comma-separated (viewpointID, heading_degrees, elevation_degrees) tuples.
54 |
55 | ### Integrating with AMT
56 |
57 | To actually use these interfaces to collect data they must be integrated with AMT. Please check the AMT docs. At high level, several additional steps are required to achieve this:
58 | - Run `gulp build` to generate optimized and minified javascript (`main.min.js`) in the `dist` directory.
59 | - Host online the minified javascript files, along with the Matterport skybox images (we suggest downsampling the originals to 50% or smaller to keep the HITs responsive), our connectivity graphs, and any other necessary files for the particular html template (e.g. your own version of `sample_room_paths.json` or `R2R_test.json`) so they are publicly accessible.
60 | - In the html template(s):
61 | - Review the HIT instructions and replace references to ACRV with your research group.
62 | - Replace all local urls with urls linking to your own publicly hosted assets, and
63 | - Switch to AMT parameters instead of url parameters, i.e., replace `var ix = location.search.split('ix=')[1];` with `var ix = ${ix}` and provide these parameters to AMT (e.g., in an uploaded csv file) when creating a batch of HITs. Note that the `ix` parameter is just an index into `sample_room_paths.json` or `R2R_test.json`.
64 | - Follow the AMT instructions to create a batch of HITs using your modified html template(s), such that the data collected in the `tag1` and/or `traj` fields will be available through AMT.
65 |
66 | Disclaimer: We provide this code to assist others collecting AMT annotations on top of Matterport-style data, but this is academic code and not a supported library. We may have forgotten something or left out a step! Feel free to submit pull requests with fixes.
67 |
--------------------------------------------------------------------------------
/web/app/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Matterport3D Simulator Web Tools
7 |
8 |
9 |
12 |
13 |
14 |
15 |
16 |