├── sim_imgs
    └── .gitkeep
├── tasks
    ├── agent
    │   ├── __init__.py
    │   ├── pomdp
    │   │   ├── __init__.py
    │   │   ├── instruction_attention.py
    │   │   ├── ast.py
    │   │   └── avast.py
    │   ├── observation
    │   │   ├── __init__.py
    │   │   ├── cv
    │   │   │   ├── __init__.py
    │   │   │   └── panorama_encoder.py
    │   │   ├── nlp
    │   │   │   ├── __init__.py
    │   │   │   ├── glove.py
    │   │   │   └── instruction_encoder.py
    │   │   ├── .DS_Store
    │   │   └── observation_encoder.py
    │   ├── model.py
    │   ├── agent_reinforce.py
    │   ├── replay_memory.py
    │   └── agent_seq2seq.py
    ├── env
    │   ├── __init__.py
    │   ├── adj_dict
    │   │   └── .gitkeep
    │   ├── img_features
    │   │   └── .gitkeep
    │   ├── nlp_features
    │   │   └── .gitkeep
    │   ├── r2r_dataset
    │   │   └── .gitkeep
    │   ├── mp_env_sim.py
    │   └── mp_env_dict.py
    ├── data
    │   ├── load_weight
    │   │   └── .gitkeep
    │   ├── results
    │   │   └── 0
    │   │   │   └── .gitkeep
    │   ├── save_weight
    │   │   └── .gitkeep
    │   └── scripts
    │   │   └── generate_adj_dict.py
    ├── config.json
    ├── utils.py
    ├── reinforce_fine_tune.py
    └── ast_pre_train.py
├── .dockerignore
├── web
    ├── .gitignore
    ├── app
    │   ├── js
    │   │   ├── RequestAnimationFrame.js
    │   │   ├── Detector.js
    │   │   └── Matterport3D.js
    │   ├── index.html
    │   └── trajectory.html
    ├── package.json
    ├── gulpfile.js
    └── README.md
├── .gitmodules
├── teaser.jpg
├── .gitignore
├── webgl_imgs
    ├── 5ZKStnWn8Zo_c76b52856e7c4f2a9a4419000c8e646a_4.13470589902238_-0.02922217527541366.png
    ├── 8194nk5LbLH_c9e8dc09263e4d0da77d16de0ecddd39_4.05504292862083_0.008533161479170466.png
    ├── ARNzJeq3xxb_9a671e6915de4eb897f45fee8bf2031d_5.616355886953764_0.02583868533558965.png
    ├── cV4RVeZvu5T_1b321779a4374c2b952c51820daa9e6c_6.266463179566256_0.07914721704610106.png
    ├── 17DRP5sb8fy_85c23efeaecd4d43a7dcd5b90137179e_2.551961945320492_0.008557380839564054.png
    ├── 1LXtFkjw3qL_187589bb7d4644f2943079fb949c0be9_1.8699330579409539_0.0004921836022802584.png
    ├── 1pXnuDYAj8r_163d61ac7edb43fb958c5d9e69ae11ad_4.626331047551077_-0.02444352614304746.png
    ├── 29hnd4uzFmX_1576d62e7bbb45e8a5ef9e7bb37b1839_5.844119909926444_-0.0006838914039405167.png
    ├── 2azQ1b91cZZ_3daad58ad53742038e50d62e91f84e7b_3.1736484087962933_0.016732869758208434.png
    ├── 2n8kARJN3HM_94ac3cea52ec455993f8562f78da3be1_2.604601935142565_-0.0009188787844489273.png
    ├── 5LpN3gDmAk7_bda8025f20404048a77381e9e0dc0ccf_5.325207878739601_-0.01083211073205187.png
    ├── 759xd9YjKW5_2343ef3bf04a4433af62f0d527d7512a_3.5451019786019264_-0.016938006310169448.png
    ├── 7y3sRwLe3Va_9bbf903d50da4ffd9e5d1fb7c9f4d69b_1.7348660165523566_0.008361841032265524.png
    ├── 82sE5b5pLXE_056a491afa534b17bac36f4f5898462a_1.689393931320027_-0.0037883068413356496.png
    ├── 8WUmhLawc2A_d21aae0b5d944f27a0074525c803fc9f_3.047458184407221_-0.04510889155759994.png
    ├── B6ByNegPMKs_e3a65955df26467581c32613c4e9f865_5.230794959607039_0.007265625492957138.png
    ├── b8cTxDM8gDG_f2944e0b66b9461994a7f757582f9bc3_0.0853092784395515_-0.007543204141144086.png
    ├── 2t7WUuJeko7_529f006f8293406da0b506defd2891a5_0.032985516949381344_-0.013788837143969411.png
    ├── 5q7pvUzZiYa_397403366d784caf804d741f32fd68b9_2.8746465006968234_-0.0007063598518199811.png
    └── ac26ZMwG7aT_efeef7cc82c84690addb0bf415f075ea_0.07434352566701552_-0.013447513736072197.png
├── src
    ├── lib
    │   ├── vertex.sh
    │   ├── fragment.sh
    │   └── Benchmark.cpp
    ├── test
    │   ├── python_test.py
    │   ├── python_vs_threejs_test.py
    │   └── rendertest_spec.json
    ├── driver
    │   ├── driver.py
    │   └── mattersim_main.cpp
    └── lib_python
    │   └── MatterSimPython.cpp
├── include
    ├── Benchmark.hpp
    ├── cbf.h
    └── NavGraph.hpp
├── download.sh
├── scripts
    ├── timer.py
    ├── downsize_skybox.py
    ├── fill_depth.py
    ├── precompute_img_features.py
    └── depth_to_skybox.py
├── connectivity
    ├── README.md
    ├── scans.txt
    ├── gZ6f7yhEvPG_connectivity.json
    ├── YmJkqBEsHnH_connectivity.json
    ├── GdvgFV5R1Z5_connectivity.json
    └── 8194nk5LbLH_connectivity.json
├── LICENSE
├── Dockerfile
├── CMakeLists.txt
├── README.md
└── cmake
    └── Modules
        └── FindNumPy.cmake


/sim_imgs/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tasks/agent/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tasks/env/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tasks/env/adj_dict/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | build
2 | data
3 | 


--------------------------------------------------------------------------------
/tasks/agent/pomdp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tasks/data/load_weight/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tasks/data/results/0/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tasks/data/save_weight/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tasks/env/img_features/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tasks/env/nlp_features/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tasks/env/r2r_dataset/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tasks/agent/observation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tasks/agent/observation/cv/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tasks/agent/observation/nlp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/web/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | node_modules
3 | app/R2Rdata
4 | app/connectivity
5 | app/data
6 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "pybind11"]
2 | 	path = pybind11
3 | 	url = https://github.com/pybind/pybind11.git
4 | 


--------------------------------------------------------------------------------
/teaser.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/teaser.jpg


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | /data
 3 | /build
 4 | /doxygen
 5 | /sim_imgs
 6 | *.so
 7 | *kdev4*
 8 | *.caffemodel
 9 | *.caffemodel.h5
10 | *.pyc
11 | *.out
12 | *.zip
13 | 


--------------------------------------------------------------------------------
/tasks/agent/observation/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/tasks/agent/observation/.DS_Store


--------------------------------------------------------------------------------
/webgl_imgs/5ZKStnWn8Zo_c76b52856e7c4f2a9a4419000c8e646a_4.13470589902238_-0.02922217527541366.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/5ZKStnWn8Zo_c76b52856e7c4f2a9a4419000c8e646a_4.13470589902238_-0.02922217527541366.png


--------------------------------------------------------------------------------
/webgl_imgs/8194nk5LbLH_c9e8dc09263e4d0da77d16de0ecddd39_4.05504292862083_0.008533161479170466.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/8194nk5LbLH_c9e8dc09263e4d0da77d16de0ecddd39_4.05504292862083_0.008533161479170466.png


--------------------------------------------------------------------------------
/webgl_imgs/ARNzJeq3xxb_9a671e6915de4eb897f45fee8bf2031d_5.616355886953764_0.02583868533558965.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/ARNzJeq3xxb_9a671e6915de4eb897f45fee8bf2031d_5.616355886953764_0.02583868533558965.png


--------------------------------------------------------------------------------
/webgl_imgs/cV4RVeZvu5T_1b321779a4374c2b952c51820daa9e6c_6.266463179566256_0.07914721704610106.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/cV4RVeZvu5T_1b321779a4374c2b952c51820daa9e6c_6.266463179566256_0.07914721704610106.png


--------------------------------------------------------------------------------
/webgl_imgs/17DRP5sb8fy_85c23efeaecd4d43a7dcd5b90137179e_2.551961945320492_0.008557380839564054.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/17DRP5sb8fy_85c23efeaecd4d43a7dcd5b90137179e_2.551961945320492_0.008557380839564054.png


--------------------------------------------------------------------------------
/webgl_imgs/1LXtFkjw3qL_187589bb7d4644f2943079fb949c0be9_1.8699330579409539_0.0004921836022802584.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/1LXtFkjw3qL_187589bb7d4644f2943079fb949c0be9_1.8699330579409539_0.0004921836022802584.png


--------------------------------------------------------------------------------
/webgl_imgs/1pXnuDYAj8r_163d61ac7edb43fb958c5d9e69ae11ad_4.626331047551077_-0.02444352614304746.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/1pXnuDYAj8r_163d61ac7edb43fb958c5d9e69ae11ad_4.626331047551077_-0.02444352614304746.png


--------------------------------------------------------------------------------
/webgl_imgs/29hnd4uzFmX_1576d62e7bbb45e8a5ef9e7bb37b1839_5.844119909926444_-0.0006838914039405167.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/29hnd4uzFmX_1576d62e7bbb45e8a5ef9e7bb37b1839_5.844119909926444_-0.0006838914039405167.png


--------------------------------------------------------------------------------
/webgl_imgs/2azQ1b91cZZ_3daad58ad53742038e50d62e91f84e7b_3.1736484087962933_0.016732869758208434.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/2azQ1b91cZZ_3daad58ad53742038e50d62e91f84e7b_3.1736484087962933_0.016732869758208434.png


--------------------------------------------------------------------------------
/webgl_imgs/2n8kARJN3HM_94ac3cea52ec455993f8562f78da3be1_2.604601935142565_-0.0009188787844489273.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/2n8kARJN3HM_94ac3cea52ec455993f8562f78da3be1_2.604601935142565_-0.0009188787844489273.png


--------------------------------------------------------------------------------
/webgl_imgs/5LpN3gDmAk7_bda8025f20404048a77381e9e0dc0ccf_5.325207878739601_-0.01083211073205187.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/5LpN3gDmAk7_bda8025f20404048a77381e9e0dc0ccf_5.325207878739601_-0.01083211073205187.png


--------------------------------------------------------------------------------
/webgl_imgs/759xd9YjKW5_2343ef3bf04a4433af62f0d527d7512a_3.5451019786019264_-0.016938006310169448.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/759xd9YjKW5_2343ef3bf04a4433af62f0d527d7512a_3.5451019786019264_-0.016938006310169448.png


--------------------------------------------------------------------------------
/webgl_imgs/7y3sRwLe3Va_9bbf903d50da4ffd9e5d1fb7c9f4d69b_1.7348660165523566_0.008361841032265524.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/7y3sRwLe3Va_9bbf903d50da4ffd9e5d1fb7c9f4d69b_1.7348660165523566_0.008361841032265524.png


--------------------------------------------------------------------------------
/webgl_imgs/82sE5b5pLXE_056a491afa534b17bac36f4f5898462a_1.689393931320027_-0.0037883068413356496.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/82sE5b5pLXE_056a491afa534b17bac36f4f5898462a_1.689393931320027_-0.0037883068413356496.png


--------------------------------------------------------------------------------
/webgl_imgs/8WUmhLawc2A_d21aae0b5d944f27a0074525c803fc9f_3.047458184407221_-0.04510889155759994.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/8WUmhLawc2A_d21aae0b5d944f27a0074525c803fc9f_3.047458184407221_-0.04510889155759994.png


--------------------------------------------------------------------------------
/webgl_imgs/B6ByNegPMKs_e3a65955df26467581c32613c4e9f865_5.230794959607039_0.007265625492957138.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/B6ByNegPMKs_e3a65955df26467581c32613c4e9f865_5.230794959607039_0.007265625492957138.png


--------------------------------------------------------------------------------
/webgl_imgs/b8cTxDM8gDG_f2944e0b66b9461994a7f757582f9bc3_0.0853092784395515_-0.007543204141144086.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/b8cTxDM8gDG_f2944e0b66b9461994a7f757582f9bc3_0.0853092784395515_-0.007543204141144086.png


--------------------------------------------------------------------------------
/webgl_imgs/2t7WUuJeko7_529f006f8293406da0b506defd2891a5_0.032985516949381344_-0.013788837143969411.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/2t7WUuJeko7_529f006f8293406da0b506defd2891a5_0.032985516949381344_-0.013788837143969411.png


--------------------------------------------------------------------------------
/webgl_imgs/5q7pvUzZiYa_397403366d784caf804d741f32fd68b9_2.8746465006968234_-0.0007063598518199811.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/5q7pvUzZiYa_397403366d784caf804d741f32fd68b9_2.8746465006968234_-0.0007063598518199811.png


--------------------------------------------------------------------------------
/webgl_imgs/ac26ZMwG7aT_efeef7cc82c84690addb0bf415f075ea_0.07434352566701552_-0.013447513736072197.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NYCU-MLLab/AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation/HEAD/webgl_imgs/ac26ZMwG7aT_efeef7cc82c84690addb0bf415f075ea_0.07434352566701552_-0.013447513736072197.png


--------------------------------------------------------------------------------
/src/lib/vertex.sh:
--------------------------------------------------------------------------------
 1 | R""(
 2 | #version 120
 3 | 
 4 | attribute vec3 vertex;
 5 | varying vec3 texCoord;
 6 | varying vec4 camCoord;
 7 | uniform mat4 ProjMat;
 8 | uniform mat4 ModelViewMat;
 9 | 
10 | void main() {
11 |   camCoord = ModelViewMat * vec4(vertex, 1.0);
12 |   gl_Position = ProjMat * camCoord;
13 |   texCoord = vertex;
14 | }
15 | )""
16 | 


--------------------------------------------------------------------------------
/src/lib/fragment.sh:
--------------------------------------------------------------------------------
 1 | R""(
 2 | #version 120
 3 | 
 4 | varying vec3 texCoord;
 5 | varying vec4 camCoord;
 6 | uniform samplerCube cubemap;
 7 | const vec3 camlook = vec3( 0.0, 0.0, -1.0 );
 8 | uniform bool isDepth;
 9 | 
10 | void main (void) {
11 |   vec4 color = textureCube(cubemap, texCoord);
12 |   if (isDepth) {
13 |     float scale = dot(camCoord.xyz, camlook) / length(camCoord.xyz);
14 |     gl_FragColor = color*scale;
15 |   } else {
16 |     gl_FragColor = color;
17 |   }
18 | }
19 | )""
20 | 


--------------------------------------------------------------------------------
/web/app/js/RequestAnimationFrame.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Provides requestAnimationFrame in a cross browser way.
 3 |  * http://paulirish.com/2011/requestanimationframe-for-smart-animating/
 4 |  */
 5 | 
 6 | if ( !window.requestAnimationFrame ) {
 7 | 
 8 | 	window.requestAnimationFrame = ( function() {
 9 | 
10 | 		return window.webkitRequestAnimationFrame ||
11 | 		window.mozRequestAnimationFrame ||
12 | 		window.oRequestAnimationFrame ||
13 | 		window.msRequestAnimationFrame ||
14 | 		function( /* function FrameRequestCallback */ callback, /* DOMElement Element */ element ) {
15 | 
16 | 			window.setTimeout( callback, 1000 / 60 );
17 | 
18 | 		};
19 | 
20 | 	} )();
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/include/Benchmark.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef MATTERSIM_BENCHMARK
 2 | #define MATTERSIM_BENCHMARK
 3 | 
 4 | #include <chrono>
 5 | 
 6 | namespace mattersim {
 7 | 
 8 |     class Timer {
 9 |     public:
10 |         Timer();
11 |         virtual void Start();
12 |         virtual void Stop();
13 |         virtual void Reset();
14 |         virtual float MilliSeconds();
15 |         virtual float MicroSeconds();
16 |         virtual float Seconds();
17 |         inline bool running() { return running_; }
18 | 
19 |     protected:
20 |         bool running_;
21 |         std::chrono::steady_clock::time_point start_;
22 |         std::chrono::steady_clock::duration elapsed_;
23 |     };
24 | }
25 | 
26 | #endif   // MATTERSIM_BENCHMARK
27 | 


--------------------------------------------------------------------------------
/src/test/python_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('build')
 3 | 
 4 | from MatterSim import Simulator
 5 | import math
 6 | import cv2
 7 | import json
 8 | import numpy as np
 9 | 
10 | 
11 | sim = Simulator()
12 | sim.setCameraResolution(500, 300)
13 | sim.setCameraVFOV(math.radians(60))
14 | sim.setElevationLimits(math.radians(-40),math.radians(50))
15 | sim.initialize()
16 | 
17 | with open("src/test/rendertest_spec.json") as f:
18 |     spec = json.load(f)
19 |     for tc in spec[:1]:
20 |         sim.newEpisode(tc["scanId"], tc["viewpointId"], tc["heading"], tc["elevation"])
21 |         state = sim.getState()
22 |         im = np.array(state.rgb, copy=False)
23 |         imgfile = tc["reference_image"]
24 |         cv2.imwrite("sim_imgs/"+imgfile, im);
25 |         cv2.imshow('rendering', im)
26 |         cv2.waitKey(0)
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/download.sh:
--------------------------------------------------------------------------------
 1 | # r2r dataset
 2 | wget https://www.dropbox.com/s/hh5qec8o5urcztn/R2R_train.json -P tasks/env/r2r_dataset/
 3 | wget https://www.dropbox.com/s/8ye4gqce7v8yzdm/R2R_val_seen.json -P tasks/env/r2r_dataset/
 4 | wget https://www.dropbox.com/s/p6hlckr70a07wka/R2R_val_unseen.json -P tasks/env/r2r_dataset/
 5 | wget https://www.dropbox.com/s/w4pnbwqamwzdwd1/R2R_test.json -P tasks/env/r2r_dataset/
 6 | pip3 install gdown
 7 | gdown --id 1Wlhp87sjUyhUuVSarrH22VXw7zOVvCfw
 8 | mv R2R_data_augmentation_paths.json tasks/env/r2r_dataset/R2R_train_aug.json
 9 | 
10 | # resnet
11 | wget https://www.dropbox.com/s/o57kxh2mn5rkx4o/ResNet-152-imagenet.zip -P tasks/env/img_features/
12 | unzip tasks/env/img_features/ResNet-152-imagenet.zip -d tasks/env/img_features/
13 | 
14 | # glove
15 | wget https://nlp.stanford.edu/data/glove.6B.zip -P tasks/env/nlp_features/
16 | unzip -j tasks/env/nlp_features/glove.6B.zip glove.6B.300d.txt -d tasks/env/nlp_features/
17 | 


--------------------------------------------------------------------------------
/web/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "web",
 3 |   "version": "1.0.0",
 4 |   "description": "This directory contains Amazon Mechanical Turk interfaces for collecting and evaluating navigation instructions, plus code for visualizing and saving first-person trajectories and floorplans",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "test": "echo \"Error: no test specified\" && exit 1"
 8 |   },
 9 |   "author": "Peter Anderson",
10 |   "license": "MIT",
11 |   "repository": {
12 |     "type": "git",
13 |     "url": "https://github.com/peteanderson80/Matterport3DSimulator"
14 |   },
15 |   "devDependencies": {
16 |     "browser-sync": "^2.26.3",
17 |     "gulp": "^3.9.1",
18 |     "gulp-babel-minify": "^0.5.0",
19 |     "gulp-cssnano": "^2.1.3",
20 |     "gulp-if": "^2.0.2",
21 |     "gulp-useref": "^3.1.5",
22 |     "run-sequence": "^2.2.1"
23 |   },
24 |   "dependencies": {
25 |     "del": "^3.0.0",
26 |     "gulp-cache": "^1.0.2"
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/include/cbf.h:
--------------------------------------------------------------------------------
 1 | // NYU Depth V2 Dataset Matlab Toolbox
 2 | // Authors: Nathan Silberman, Pushmeet Kohli, Derek Hoiem, Rob Fergus
 3 | 
 4 | #ifndef CBF_H_
 5 | #define CBF_H_
 6 | 
 7 | #include <stdint.h>
 8 | 
 9 | namespace cbf {
10 | 
11 | // Filters the given depth image using a Cross Bilateral Filter.
12 | //
13 | // Args:
14 | //   height - height of the images.
15 | //   width - width of the images.
16 | //   depth - HxW row-major ordered matrix.
17 | //   intensity - HxW row-major ordered matrix.
18 | //   mask - HxW row-major ordered matrix.
19 | //   result - HxW row-major ordered matrix.
20 | //   num_scales - the number of scales at which to perform the filtering.
21 | //   sigma_s - the space sigma (in pixels)
22 | //   sigma_r - the range sigma (in intensity values, 0-1)
23 | void cbf(int height, int width, uint8_t* depth, uint8_t* intensity,
24 |          uint8_t* mask, uint8_t* result, unsigned num_scales, double* sigma_s,
25 |          double* sigma_r);
26 | 
27 | }	 // namespace
28 | 
29 | #endif  // CBF_H_
30 | 


--------------------------------------------------------------------------------
/scripts/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/connectivity/README.md:
--------------------------------------------------------------------------------
 1 | ## connectivity
 2 | Connectivity graphs indicating the navigable paths between viewpoints in each scan.
 3 | 
 4 | Each json file contains an array of annotations, one for each viewpoint in the scan. All annotations share the same basic structure as follows:
 5 | 
 6 | ```
 7 | {
 8 |   "image_id": str,
 9 |   "pose": [float x 16],
10 |   "included": boolean,
11 |   "visible": [boolean x num_viewpoints],
12 |   "unobstructed": [boolean x num_viewpoints],
13 |   "height": float
14 | }
15 | ```
16 | - `image_id`: matterport skybox prefix
17 | - `pose`: 4x4 matrix in row major order that transforms matterport skyboxes to global coordinates (z-up). Pose matrices are based on the assumption that the camera is facing skybox image 3.
18 | - `included`: whether viewpoint is included in the simulator. Some overlapping viewpoints are excluded.
19 | - `visible`: indicates other viewpoints that can be seen from this viewpoint.
20 | - `unobstructed`: indicates transitions to other viewpoints that are considered navigable for an agent.
21 | - `height`: estimated height of the viewpoint above the floor. Not required for the simulator.
22 | 
23 | Units are in metres.
24 | 
25 | `scans.txt` contains a list of all the scan ids in the dataset.
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Peter Anderson, Philip Roberts, Qi Wu, Damien Teney, Jake Bruce
 4 | Mark Johnson, Niko Sunderhauf, Ian Reid, Stephen Gould, Anton van den Hengel
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/src/test/python_vs_threejs_test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('build')
 3 | 
 4 | from MatterSim import Simulator
 5 | import math
 6 | import cv2
 7 | import json
 8 | import numpy as np
 9 | 
10 | from os import listdir
11 | from os.path import isfile, join
12 | 
13 | # Download some images using web/app/trajectory.html, then recreate identical images with this script
14 | 
15 | 
16 | sim = Simulator()
17 | sim.setCameraResolution(1140, 650)
18 | sim.setCameraVFOV(math.radians(80))
19 | sim.setElevationLimits(math.radians(-40),math.radians(50))
20 | sim.initialize()
21 | 
22 | 
23 | download_path = '.'
24 | threejs_files = [f for f in listdir(download_path) if 'threejs' in f and isfile(join(download_path, f))]
25 | 
26 | for f in threejs_files:
27 |     print f
28 |     s = f.split("_")
29 |     scanId = s[1]
30 |     viewpointId = s[2]
31 |     heading = float(s[3])
32 |     elevation = float(s[4].replace('.png',''))
33 |     sim.newEpisode([scanId], [viewpointId], [heading], [elevation])
34 |     state = sim.getState()
35 |     pyim = np.array(state[0].rgb, copy=False)
36 |     cv2.imwrite(f.replace('threejs','python-1'), pyim);
37 |     jsim = cv2.imread(f)
38 |     im = cv2.addWeighted(jsim, 0.5, pyim, 0.5, 0) 
39 | 
40 |     cv2.imshow('ThreeJS', jsim)
41 |     cv2.imshow('Python', pyim)
42 |     cv2.imshow('Blend', im)
43 |     cv2.waitKey(0)
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/web/app/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |   <meta charset="UTF-8">
 6 |   <title>Matterport3D Simulator Web Tools</title>
 7 |   <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
 8 |   <meta name="viewport" content="width=device-width, initial-scale=1">
 9 |   <style>
10 |     .btn{ width:150px; }
11 |   </style>
12 | </head>
13 | 
14 | <body>
15 |   <div class="container">
16 |     <div class="page-header">
17 |       <h1>Web tools for Matterport data</h1>
18 |     <div>
19 |     <p>
20 |       <a class="btn btn-primary" href="/trajectory.html">trajectory.html</a> Visualize first-person trajectories in Matterport environments.
21 |     </p>
22 |     <!--p>
23 |       <a class="btn btn-primary" href="/connectivity.html">connectivity.html</a> Visualize navigation graphs.
24 |     </p-->
25 |     <p>
26 |       <a class="btn btn-warning" href="/collect-hit.html?ix=0">collect-hit.html</a> AMT HIT for collecting navigation instructions for the R2R data set.
27 |     </p>
28 |     <p>
29 |       <a class="btn btn-warning" href="/eval-hit.html?ix=0">eval-hit.html</a> AMT HIT for benchmarking human performance on the R2R test set.
30 |     </p>
31 |   <div>
32 | </body>
33 | 
34 | </html>
35 | 


--------------------------------------------------------------------------------
/tasks/agent/pomdp/instruction_attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class InstructionAttention(nn.Module):
 6 |     def __init__(
 7 |         self,
 8 |         embed_dim: int,
 9 |         dropout_ratio: float
10 |     ) -> None:
11 |         super(InstructionAttention, self).__init__()
12 |         self.drop = nn.Dropout(p=dropout_ratio)
13 |         self.h_fc = nn.Sequential(
14 |             nn.Linear(embed_dim, embed_dim, bias=False),
15 |             nn.Linear(embed_dim, embed_dim, bias=False)
16 |         )
17 |         self.softmax = nn.Softmax(dim=1)
18 |         return
19 | 
20 |     def forward(
21 |         self,
22 |         h_t: torch.Tensor,
23 |         instr_embed: torch.Tensor,
24 |         instr_mask: torch.Tensor or None
25 |     ) -> torch.Tensor:
26 |         h_latent = self.h_fc(h_t).unsqueeze(2)
27 | 
28 |         instr_attn_weight = torch.bmm(instr_embed, h_latent).squeeze(2)
29 |         if instr_mask is not None:
30 |             instr_attn_weight.data.masked_fill_(instr_mask, -float('inf'))
31 |         instr_attn_weight = self.softmax(instr_attn_weight)
32 |         weighted_instr_embed = torch.bmm(instr_attn_weight.unsqueeze(1), instr_embed).squeeze(1)
33 |         return self.drop(weighted_instr_embed), instr_attn_weight
34 | 
35 | 
36 | def main():
37 |     return
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     main()
42 | 


--------------------------------------------------------------------------------
/src/lib/Benchmark.cpp:
--------------------------------------------------------------------------------
 1 | #include <chrono>
 2 | 
 3 | #include "Benchmark.hpp"
 4 | 
 5 | namespace mattersim {
 6 | 
 7 |     Timer::Timer()
 8 |         : running_(false),
 9 |           elapsed_(0) {}
10 | 
11 |     void Timer::Start() {
12 |       if (!running()) {
13 |         start_ = std::chrono::steady_clock::now();
14 |         running_ = true;
15 |       }
16 |     }
17 | 
18 |     void Timer::Stop() {
19 |       if (running()) {
20 |         elapsed_ += std::chrono::steady_clock::now() - start_;
21 |         running_ = false;
22 |       }
23 |     }
24 | 
25 |     void Timer::Reset() {
26 |       if (running()) {
27 |         running_ = false;
28 |       }
29 |       elapsed_ = std::chrono::steady_clock::duration(0);
30 |     }
31 | 
32 |     float Timer::MicroSeconds() {
33 |       if (running()) {
34 |         elapsed_ += std::chrono::steady_clock::now() - start_;
35 |       }
36 |       return std::chrono::duration_cast<std::chrono::microseconds>(elapsed_).count();
37 |     }
38 | 
39 |     float Timer::MilliSeconds() {
40 |       if (running()) {
41 |         elapsed_ += std::chrono::steady_clock::now() - start_;
42 |       }
43 |       return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_).count();
44 |     }
45 | 
46 |     float Timer::Seconds() {
47 |       if (running()) {
48 |         elapsed_ += std::chrono::steady_clock::now() - start_;
49 |       }
50 |       return std::chrono::duration_cast<std::chrono::seconds>(elapsed_).count();
51 |     }
52 | 
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/connectivity/scans.txt:
--------------------------------------------------------------------------------
 1 | 17DRP5sb8fy
 2 | 1LXtFkjw3qL
 3 | 1pXnuDYAj8r
 4 | 29hnd4uzFmX
 5 | 2azQ1b91cZZ
 6 | 2n8kARJN3HM
 7 | 2t7WUuJeko7
 8 | 5LpN3gDmAk7
 9 | 5q7pvUzZiYa
10 | 5ZKStnWn8Zo
11 | 759xd9YjKW5
12 | 7y3sRwLe3Va
13 | 8194nk5LbLH
14 | 82sE5b5pLXE
15 | 8WUmhLawc2A
16 | aayBHfsNo7d
17 | ac26ZMwG7aT
18 | ARNzJeq3xxb
19 | B6ByNegPMKs
20 | b8cTxDM8gDG
21 | cV4RVeZvu5T
22 | D7G3Y4RVNrH
23 | D7N2EKCX4Sj
24 | dhjEzFoUFzH
25 | E9uDoFAP3SH
26 | e9zR4mvMWw7
27 | EDJbREhghzL
28 | EU6Fwq7SyZv
29 | fzynW3qQPVF
30 | GdvgFV5R1Z5
31 | gTV8FGcVJC9
32 | gxdoqLR6rwA
33 | gYvKGZ5eRqb
34 | gZ6f7yhEvPG
35 | HxpKQynjfin
36 | i5noydFURQK
37 | JeFG25nYj2p
38 | JF19kD82Mey
39 | jh4fc5c5qoQ
40 | JmbYfDe2QKZ
41 | jtcxE69GiFV
42 | kEZ7cmS4wCh
43 | mJXqzFtmKg4
44 | oLBMNvg9in8
45 | p5wJjkQkbXX
46 | pa4otMbVnkk
47 | pLe4wQe7qrG
48 | Pm6F8kyY3z2
49 | pRbA3pwrgk9
50 | PuKPg4mmafe
51 | PX4nDJXEHrG
52 | q9vSo1VnCiC
53 | qoiz87JEwZ2
54 | QUCTc6BB5sX
55 | r1Q1Z4BcV1o
56 | r47D5H71a5s
57 | rPc6DW4iMge
58 | RPmz2sHmrrY
59 | rqfALeAoiTq
60 | s8pcmisQ38h
61 | S9hNv5qa7GM
62 | sKLMLpTHeUy
63 | SN83YJsR3w2
64 | sT4fr6TAbpF
65 | TbHJrupSAjP
66 | ULsKaCPVFJR
67 | uNb9QFRL6hY
68 | ur6pFq6Qu1A
69 | UwV83HsGsw3
70 | Uxmj2M2itWa
71 | V2XKFyX4ASd
72 | VFuaQ6m2Qom
73 | VLzqgDo317F
74 | Vt2qJdWjCF2
75 | VVfe2KiqLaN
76 | Vvot9Ly1tCj
77 | vyrNrziPKCB
78 | VzqfbhrpDEA
79 | wc2JMjhGNzB
80 | WYY7iVyf5p8
81 | X7HyMhZNoso
82 | x8F5xyUWy9e
83 | XcA2TqTSSAj
84 | YFuZgdQ5vWj
85 | YmJkqBEsHnH
86 | yqstnuAEVhm
87 | YVUC4YcDtcY
88 | Z6MFQCViBuw
89 | ZMojNkEp431
90 | zsNo4HB9uLZ
91 | 


--------------------------------------------------------------------------------
/tasks/agent/observation/cv/panorama_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class PanoramaEncoder(nn.Module):
 6 |     def __init__(
 7 |         self,
 8 |         config: dict,
 9 |         vision_feature_size: int
10 |     ) -> None:
11 |         super(PanoramaEncoder, self).__init__()
12 |         query_dim = config['state_tracker']['obs']['vision']['attn']['query_dim']
13 | 
14 |         self.query_layer = nn.Sequential(
15 |             nn.Linear(query_dim, vision_feature_size, bias=False),
16 |             nn.Linear(vision_feature_size, vision_feature_size, bias=False),
17 |             nn.Dropout(p=config['state_tracker']['dropout_ratio'])
18 |         )
19 |         self.softmax = nn.Softmax(dim=1)
20 |         self.encode = self.forward
21 |         return
22 | 
23 |     def forward(
24 |         self,
25 |         visions: torch.Tensor,
26 |         h_t: torch.Tensor
27 |     ) -> torch.Tensor:
28 |         panorama = visions.squeeze(0)                                       # batch x v_num x v_dim
29 |         query = self.query_layer(h_t).unsqueeze(2)                          # batch x v_dim x 1
30 | 
31 |         # Get attention
32 |         attn = torch.bmm(panorama, query).squeeze(2)                        # batch x v_num
33 |         attn = self.softmax(attn)
34 | 
35 |         vision_embed = torch.bmm(attn.unsqueeze(1), panorama).squeeze(1)    # batch x v_dim
36 |         return vision_embed
37 | 
38 | 
39 | def main():
40 |     return
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     main()
45 | 


--------------------------------------------------------------------------------
/web/gulpfile.js:
--------------------------------------------------------------------------------
 1 | var gulp = require('gulp');
 2 | var browserSync = require('browser-sync');
 3 | var useref = require('gulp-useref');
 4 | var minify = require('gulp-babel-minify');
 5 | var gulpIf = require('gulp-if');
 6 | var cssnano = require('gulp-cssnano');
 7 | var cache = require('gulp-cache');
 8 | var del = require('del');
 9 | var runSequence = require('run-sequence');
10 | 
11 | // Development Tasks 
12 | // -----------------
13 | 
14 | // Start browserSync server
15 | gulp.task('browserSync', function() {
16 |   browserSync({
17 |     server: {
18 |       baseDir: 'app'
19 |     }
20 |   })
21 | })
22 | 
23 | // Watchers
24 | gulp.task('watch', function() {
25 |   gulp.watch('app/*.html', browserSync.reload);
26 |   gulp.watch('app/js/**/*.js', browserSync.reload);
27 | });
28 | 
29 | // Optimization Tasks 
30 | // ------------------
31 | 
32 | // Optimizing CSS and JavaScript 
33 | gulp.task('useref', function() {
34 |   return gulp.src('app/collect-hit.html')
35 |     .pipe(useref())
36 |     .pipe(gulpIf('*.js', minify({
37 |       mangle: { keepClassName: true }
38 |      })))
39 |     .pipe(gulpIf('*.css', cssnano()))
40 |     .pipe(gulp.dest('dist'));
41 | });
42 | 
43 | // Cleaning 
44 | gulp.task('clean', function() {
45 |   return del.sync('dist').then(function(cb) {
46 |     return cache.clearAll(cb);
47 |   });
48 | });
49 | 
50 | gulp.task('clean:dist', function() {
51 |   return del.sync(['dist/**/*']);
52 | });
53 | 
54 | // Build Sequences
55 | // ---------------
56 | 
57 | gulp.task('default', function(callback) {
58 |   runSequence(['browserSync'], 'watch',
59 |     callback
60 |   )
61 | })
62 | 
63 | gulp.task('build', function(callback) {
64 |   runSequence(
65 |     'clean:dist',
66 |     ['useref'],
67 |     callback
68 |   )
69 | })
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/web/app/js/Detector.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @author alteredq / http://alteredqualia.com/
 3 |  * @author mr.doob / http://mrdoob.com/
 4 |  */
 5 | 
 6 | Detector = {
 7 | 
 8 | 	canvas : !! window.CanvasRenderingContext2D,
 9 | 	webgl : ( function () { try { return !! window.WebGLRenderingContext && !! document.createElement( 'canvas' ).getContext( 'experimental-webgl' ); } catch( e ) { return false; } } )(),
10 | 	workers : !! window.Worker,
11 | 	fileapi : window.File && window.FileReader && window.FileList && window.Blob,
12 | 
13 | 	getWebGLErrorMessage : function () {
14 | 
15 | 		var domElement = document.createElement( 'div' );
16 | 
17 | 		domElement.style.fontFamily = 'monospace';
18 | 		domElement.style.fontSize = '13px';
19 | 		domElement.style.textAlign = 'center';
20 | 		domElement.style.background = '#eee';
21 | 		domElement.style.color = '#000';
22 | 		domElement.style.padding = '1em';
23 | 		domElement.style.width = '475px';
24 | 		domElement.style.margin = '5em auto 0';
25 | 
26 | 		if ( ! this.webgl ) {
27 | 
28 | 			domElement.innerHTML = window.WebGLRenderingContext ? [
29 | 				'Your graphics card does not seem to support <a href="http://khronos.org/webgl/wiki/Getting_a_WebGL_Implementation">WebGL</a>.<br />',
30 | 				'Find out how to get it <a href="http://get.webgl.org/">here</a>.'
31 | 			].join( '\n' ) : [
32 | 				'Your browser does not seem to support <a href="http://khronos.org/webgl/wiki/Getting_a_WebGL_Implementation">WebGL</a>.<br/>',
33 | 				'Find out how to get it <a href="http://get.webgl.org/">here</a>.'
34 | 			].join( '\n' );
35 | 
36 | 		}
37 | 
38 | 		return domElement;
39 | 
40 | 	},
41 | 
42 | 	addGetWebGLMessage : function ( parameters ) {
43 | 
44 | 		var parent, id, domElement;
45 | 
46 | 		parameters = parameters || {};
47 | 
48 | 		parent = parameters.parent !== undefined ? parameters.parent : document.body;
49 | 		id = parameters.id !== undefined ? parameters.id : 'oldie';
50 | 
51 | 		domElement = Detector.getWebGLErrorMessage();
52 | 		domElement.id = id;
53 | 
54 | 		parent.appendChild( domElement );
55 | 
56 | 	}
57 | 
58 | };
59 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Matterport3DSimulator
 2 | # Requires nvidia gpu with driver 396.37 or higher
 3 | FROM nvidia/cudagl:11.2.2-devel-ubuntu18.04
 4 | 
 5 | # Install a few libraries to support both EGL and OSMESA options
 6 | ENV DEBIAN_FRONTEND=noninteractive
 7 | RUN apt-get update && apt-get install -y wget doxygen curl apt-utils libjsoncpp-dev libepoxy-dev libglm-dev libosmesa6 libosmesa6-dev libglew-dev libopencv-dev python-opencv python3-setuptools python3-dev python3-pip
 8 | RUN pip3 install opencv-python==4.1.0.25 numpy==1.13.3 pandas==0.24.1 networkx==2.2
 9 | 
10 | #install latest cmake
11 | ADD https://cmake.org/files/v3.12/cmake-3.12.2-Linux-x86_64.sh /cmake-3.12.2-Linux-x86_64.sh
12 | RUN mkdir /opt/cmake
13 | RUN sh /cmake-3.12.2-Linux-x86_64.sh --prefix=/opt/cmake --skip-license
14 | RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
15 | RUN cmake --version
16 | 
17 | 
18 | #############
19 | # Customize #
20 | #############
21 | 
22 | # set noninteractive installation
23 | RUN echo export DEBIAN_FRONTEND=noninteractive
24 | #install tzdata package
25 | RUN apt-get update && apt-get install -y \
26 |     tzdata \
27 |  && rm -rf /var/lib/apt/lists/*
28 | # set your timezone
29 | RUN ln -fs /usr/share/zoneinfo/Asia/Taipei /etc/localtime
30 | RUN dpkg-reconfigure -f noninteractive tzdata
31 | 
32 | 
33 | RUN apt-get update && apt-get install -qqy \
34 |     x11-apps \
35 |     locales \
36 |     curl \
37 |     ca-certificates \
38 |     sudo \
39 |     git \
40 |     bzip2 \
41 |     libx11-6 \
42 |     openssh-server \
43 |     vim	\
44 |     ffmpeg \
45 |     htop \
46 |     python3-tk \
47 |  && rm -rf /var/lib/apt/lists/*
48 | 
49 | ENV NOTVISIBLE "in users profile"
50 | RUN echo "export VISIBLE=now" >> /etc/profile
51 | 
52 | RUN pip3 install matplotlib==3.3.4 nltk==3.6.2 tqdm==4.61.0 unidecode==1.2.0 tensorflow==1.14.0 tensorboardX==2.1 moviepy==1.0.3 flake8==3.9.2 flake8-unused-arguments==0.0.6
53 | RUN pip3 install torch
54 | 
55 | # Locale
56 | RUN locale-gen en_US.UTF-8
57 | RUN locale-gen zh_TW.UTF-8
58 | ENV LANG en_US.UTF-8
59 | ENV LANGUAGE en_US:en
60 | ENV LC_ALL en_US.UTF-8
61 | 
62 | 
63 | ENV PYTHONPATH=/root/mount/Matterport3DSimulator/build
64 | 
65 | CMD /bin/sh -c 'service ssh restart && bash'
66 | 


--------------------------------------------------------------------------------
/scripts/downsize_skybox.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | ''' Script for downsizing skybox images. '''
 4 | 
 5 | import os
 6 | import math
 7 | import cv2
 8 | import numpy as np
 9 | from multiprocessing import Pool
10 | from depth_to_skybox import camera_parameters
11 | 
12 | 
13 | NUM_WORKER_PROCESSES = 20
14 | DOWNSIZED_WIDTH = 512
15 | DOWNSIZED_HEIGHT = 512
16 | 
17 | # Constants
18 | SKYBOX_WIDTH = 1024
19 | SKYBOX_HEIGHT = 1024
20 | base_dir = 'data/v1/scans'
21 | skybox_template = '%s/%s/matterport_skybox_images/%s_skybox%d_sami.jpg'
22 | skybox_small_template = '%s/%s/matterport_skybox_images/%s_skybox%d_small.jpg'
23 | skybox_merge_template = '%s/%s/matterport_skybox_images/%s_skybox_small.jpg'
24 | 
25 | 
26 | 
27 | def downsizeWithMerge(scan):
28 |   # Load pano ids
29 |   intrinsics,_ = camera_parameters(scan)
30 |   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
31 |   print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids))
32 | 
33 |   for pano in pano_ids:
34 | 
35 |     ims = []
36 |     for skybox_ix in range(6):
37 | 
38 |       # Load and downsize skybox image
39 |       skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix))
40 |       ims.append(cv2.resize(skybox,(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_AREA))
41 | 
42 |     # Save output
43 |     newimg = np.concatenate(ims, axis=1)
44 |     assert cv2.imwrite(skybox_merge_template % (base_dir,scan,pano), newimg)
45 | 
46 | 
47 | def downsize(scan):
48 | 
49 |   # Load pano ids
50 |   intrinsics,_ = camera_parameters(scan)
51 |   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
52 |   print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids))
53 | 
54 |   for pano in pano_ids:
55 | 
56 |     for skybox_ix in range(6):
57 | 
58 |       # Load and downsize skybox image
59 |       skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix))
60 |       newimg = cv2.resize(skybox,(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_AREA)
61 | 
62 |       # Save output
63 |       assert cv2.imwrite(skybox_small_template % (base_dir,scan,pano,skybox_ix), newimg)
64 | 
65 | 
66 | if __name__ == '__main__':
67 | 
68 |   with open('connectivity/scans.txt') as f:
69 |     scans = [scan.strip() for scan in f.readlines()]
70 |     p = Pool(NUM_WORKER_PROCESSES)
71 |     p.map(downsizeWithMerge, scans)  
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/src/driver/driver.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('build')
 3 | import MatterSim
 4 | import time
 5 | import math
 6 | import cv2
 7 | import numpy as np
 8 | 
 9 | WIDTH = 800
10 | HEIGHT = 600
11 | VFOV = math.radians(60)
12 | HFOV = VFOV*WIDTH/HEIGHT
13 | TEXT_COLOR = [230, 40, 40]
14 | 
15 | cv2.namedWindow('Python RGB')
16 | cv2.namedWindow('Python Depth')
17 | 
18 | sim = MatterSim.Simulator()
19 | sim.setCameraResolution(WIDTH, HEIGHT)
20 | sim.setCameraVFOV(VFOV)
21 | sim.setDepthEnabled(True)
22 | sim.initialize()
23 | #sim.newEpisode(['2t7WUuJeko7'], ['1e6b606b44df4a6086c0f97e826d4d15'], [0], [0])
24 | #sim.newEpisode(['1LXtFkjw3qL'], ['0b22fa63d0f54a529c525afbf2e8bb25'], [0], [0])
25 | sim.newRandomEpisode(['1LXtFkjw3qL'])
26 | 
27 | heading = 0
28 | elevation = 0
29 | location = 0
30 | ANGLEDELTA = 5 * math.pi / 180
31 | 
32 | print '\nPython Demo'
33 | print 'Use arrow keys to move the camera.'
34 | print 'Use number keys (not numpad) to move to nearby viewpoints indicated in the RGB view.\n'
35 | 
36 | while True:
37 |     sim.makeAction([location], [heading], [elevation])
38 |     location = 0
39 |     heading = 0
40 |     elevation = 0
41 | 
42 |     state = sim.getState()[0]
43 |     locations = state.navigableLocations
44 |     rgb = np.array(state.rgb, copy=False)
45 |     for idx, loc in enumerate(locations[1:]):
46 |         # Draw actions on the screen
47 |         fontScale = 3.0/loc.rel_distance
48 |         x = int(WIDTH/2 + loc.rel_heading/HFOV*WIDTH)
49 |         y = int(HEIGHT/2 - loc.rel_elevation/VFOV*HEIGHT)
50 |         cv2.putText(rgb, str(idx + 1), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 
51 |             fontScale, TEXT_COLOR, thickness=3)
52 |     cv2.imshow('Python RGB', rgb)
53 | 
54 |     depth = np.array(state.depth, copy=False)
55 |     cv2.imshow('Python Depth', depth)
56 |     k = cv2.waitKey(1)
57 |     if k == -1:
58 |         continue
59 |     else:
60 |         k = (k & 255)
61 |     if k == ord('q'):
62 |         break
63 |     elif ord('1') <= k <= ord('9'):
64 |         location = k - ord('0')
65 |         if location >= len(locations):
66 |             location = 0
67 |     elif k == 81 or k == ord('a'):
68 |         heading = -ANGLEDELTA
69 |     elif k == 82 or k == ord('w'):
70 |         elevation = ANGLEDELTA
71 |     elif k == 83 or k == ord('d'):
72 |         heading = ANGLEDELTA
73 |     elif k == 84 or k == ord('s'):
74 |         elevation = -ANGLEDELTA
75 | 


--------------------------------------------------------------------------------
/scripts/fill_depth.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | ''' Script for filling missing values in undistorted depth images. '''
 4 | 
 5 | import os
 6 | import math
 7 | import cv2
 8 | import numpy as np
 9 | from multiprocessing import Pool
10 | from depth_to_skybox import camera_parameters
11 | 
12 | import sys
13 | sys.path.append('build')
14 | from MatterSim import cbf
15 | 
16 | 
17 | base_dir = 'data/v1/scans'
18 | color_template = '%s/%s/undistorted_color_images/%s_i%s.jpg'
19 | depth_template = '%s/%s/undistorted_depth_images/%s_d%s.png'
20 | filled_depth_template = '%s/%s/undistorted_depth_images/%s_d%s_filled.png'
21 | 
22 | def fill_joint_bilateral_filter(scan):
23 | 
24 |   # Load camera parameters
25 |   intrinsics,_ = camera_parameters(scan)
26 |   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
27 |   print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids))
28 | 
29 |   for pano in pano_ids:
30 | 
31 |     # Load undistorted depth and rgb images
32 |     for c in range(3):
33 |       for i in range(6):
34 |         name = '%d_%d' % (c,i)
35 |         rgb = cv2.imread(color_template % (base_dir,scan,pano,name))
36 |         intensity = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
37 | 
38 |         # Load 16bit depth image
39 |         depth = cv2.imread(depth_template % (base_dir,scan,pano,name), cv2.IMREAD_ANYDEPTH)
40 | 
41 |         # Convert the depth image to uint8.
42 |         maxDepth = np.max(depth)+1
43 |         depth = (depth.astype(np.float64)/maxDepth)
44 |         depth[depth > 1] = 1
45 |         depth = (depth*255).astype(np.uint8)
46 | 
47 |         #cv2.imshow('input', cv2.applyColorMap(depth, cv2.COLORMAP_JET))
48 |         
49 |         # Convert to col major order
50 |         depth = np.asfortranarray(depth)
51 |         intensity = np.asfortranarray(intensity)
52 |         mask = (depth == 0)
53 |         result = np.zeros_like(depth)
54 | 
55 |         # Fill holes
56 |         cbf(depth, intensity, mask, result)
57 |   
58 |         #cv2.imshow('result', cv2.applyColorMap(result, cv2.COLORMAP_JET))
59 |         #cv2.waitKey(0)
60 | 
61 |         result = (result.astype(np.float64)/255*maxDepth).astype(np.uint16)
62 |         assert cv2.imwrite(filled_depth_template % (base_dir,scan,pano,name), result)
63 | 
64 | 
65 | if __name__ == '__main__':
66 | 
67 |   with open('connectivity/scans.txt') as f:
68 |     scans = [scan.strip() for scan in f.readlines()]
69 |     p = Pool(10)
70 |     p.map(fill_joint_bilateral_filter, scans)
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/connectivity/gZ6f7yhEvPG_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"80929af5cf234ae38ac3a2a4e60e4342","pose":[0.983395,0.00450812,-0.181418,-2.79247,0.181442,-0.00570068,0.983385,-1.38801,0.00339928,-0.999973,-0.00642298,1.42676,0,0,0,1],"included":true,"visible":[false,true,true,false,false,true,false,false],"unobstructed":[false,true,false,true,false,true,false,false],"height":1.4191402375960298},{"image_id":"ba27da20782d4e1a825f0a133ad84da9","pose":[-0.7605,-0.0115739,-0.649234,-2.38988,0.648885,0.0237502,-0.760515,-0.0538717,0.0242219,-0.999651,-0.0105509,1.4341,0,0,0,1],"included":true,"visible":[true,false,true,true,false,true,false,true],"unobstructed":[true,false,false,false,false,true,false,true],"height":1.424939020658826},{"image_id":"46cecea0b30e4786b673f5e951bf82d4","pose":[0.593129,0.0137361,-0.80499,0.99933,0.804932,0.010707,0.59327,1.17558,0.0167685,-0.999848,-0.00470498,1.41684,0,0,0,1],"included":true,"visible":[false,false,false,true,true,false,true,true],"unobstructed":[false,false,false,true,true,false,true,true],"height":1.4252108727703763},{"image_id":"bda7a9e6d1d94b3aa8ff491beb158f3a","pose":[-0.378592,-0.0208239,0.925329,-0.182918,-0.925433,-0.00820128,-0.37882,-1.72967,0.0154776,-0.999749,-0.0161651,1.42205,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,true,true],"unobstructed":[true,false,true,false,true,false,false,true],"height":1.42983949725488},{"image_id":"dbb2f8000bc04b3ebcd0a55112786149","pose":[-0.595363,0.00457706,-0.803444,1.10196,0.803383,0.0168543,-0.595222,-1.10724,0.0108174,-0.999847,-0.0137106,1.41536,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,true,true],"unobstructed":[false,false,true,true,false,false,true,true],"height":1.4186255623107038},{"image_id":"29b20fa80dcd4771974303c1ccd8953f","pose":[0.292738,0.0164579,-0.956051,-2.77306,0.956096,0.0090939,0.292909,1.55377,0.0135152,-0.999823,-0.0130722,1.43367,0,0,0,1],"included":true,"visible":[true,true,true,false,true,false,false,false],"unobstructed":[true,true,false,false,false,false,false,false],"height":1.4237594118402337},{"image_id":"0ee20663dfa34b438d48750ddcd7366c","pose":[-0.75968,-0.0019971,-0.650293,-0.111567,0.650131,0.0201598,-0.759554,1.31337,0.014627,-0.999794,-0.0140156,1.42291,0,0,0,1],"included":true,"visible":[false,false,true,true,true,false,false,true],"unobstructed":[false,false,true,false,true,false,false,true],"height":1.4276556862049736},{"image_id":"47d8a8282c1c4a7fb3eeeacc45e9d959","pose":[-0.0254788,0.00643152,-0.999654,-0.0034508,0.999603,0.0120797,-0.0253995,0.0112371,0.0119124,-0.999906,-0.00673574,1.42388,0,0,0,1],"included":true,"visible":[true,true,true,true,true,false,true,false],"unobstructed":[false,true,true,true,true,false,true,false],"height":1.4268855357216241}]


--------------------------------------------------------------------------------
/src/driver/mattersim_main.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <opencv2/opencv.hpp>
 3 | 
 4 | #include "MatterSim.hpp"
 5 | 
 6 | using namespace mattersim;
 7 | 
 8 | #define WIDTH  1280
 9 | #define HEIGHT 720
10 | 
11 | #ifndef M_PI
12 | #define M_PI (3.14159265358979323846)
13 | #endif
14 | 
15 | int main(int argc, char *argv[]) {
16 | 
17 |     cv::namedWindow("C++ RGB");
18 |     cv::namedWindow("C++ Depth");
19 | 
20 |     Simulator sim;
21 | 
22 |     // Sets resolution. Default is 320X240
23 |     sim.setCameraResolution(640,480);
24 |     sim.setDepthEnabled(true);
25 | 
26 |     // Initialize the simulator. Further camera configuration won't take any effect from now on.
27 |     sim.initialize();
28 | 
29 |     std::cout << "\nC++ Demo" << std::endl;
30 |     std::cout << "Showing some random viewpoints in one building." << std::endl;
31 | 
32 |     int i = 0;
33 |     while(true) {
34 |         i++;
35 |         std::cout << "Episode #" << i << "\n";
36 | 
37 |         // Starts a new episode. It is not needed right after init() but it doesn't cost much and the loop is nicer.
38 |         sim.newRandomEpisode(std::vector<std::string>(1,"pa4otMbVnkk")); // Launches at a random location
39 | 
40 |         for (int k=0; k<500; k++) {
41 | 
42 |             // Get the state
43 |             SimStatePtr state = sim.getState().at(0); // SimStatePtr is std::shared_ptr<SimState>
44 | 
45 |             // Which consists of:
46 |             unsigned int n = state->step;
47 |             cv::Mat rgb  = state->rgb; // OpenCV CV_8UC3 type (i.e. 8bit color rgb)
48 |             cv::Mat depth  = state->depth; // OpenCV CV_16UC1 type (i.e. 16bit grayscale)
49 |             ViewpointPtr location = state->location; // Need a class to hold viewpoint id, and x,y,z location of a viewpoint
50 |             float heading = state->heading;
51 |             float elevation = state->elevation; // camera parameters
52 |             std::vector<ViewpointPtr> reachable = state->navigableLocations; // Where we can move to,
53 |             int locationIdx = 0; // Must be an index into reachable
54 |             double headingChange = M_PI / 500;
55 |             double elevationChange = 0;
56 | 
57 |             cv::imshow("C++ RGB", rgb);
58 |             cv::imshow("C++ Depth", depth);
59 |             cv::waitKey(10);
60 | 
61 |             sim.makeAction(std::vector<unsigned int>(1, locationIdx), 
62 |                            std::vector<double>(1, headingChange), 
63 |                            std::vector<double>(1, elevationChange));
64 | 
65 |         }
66 |     }
67 | 
68 |     // It will be done automatically in destructor but after close you can init it again with different settings.
69 |     sim.close();
70 | 
71 |     return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/tasks/agent/observation/nlp/glove.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from tqdm import tqdm
  3 | 
  4 | 
  5 | class Glove():
  6 |     def __init__(
  7 |         self,
  8 |         path: str
  9 |     ) -> None:
 10 |         super().__init__()
 11 |         self._words, self._word2idx, self._vectors, self._glove = [], {}, [], {}
 12 |         self._load(path)
 13 |         self.feature_dim = len(self._vectors[0])
 14 |         return
 15 | 
 16 |     def _load(
 17 |         self,
 18 |         path
 19 |     ) -> None:
 20 |         idx, words, word2idx, vectors = 0, [], {}, []
 21 |         with open(path, 'rb') as file_name:
 22 |             with tqdm(total=sum(1 for _ in file_name)) as pbar:
 23 |                 file_name.seek(0)
 24 |                 for line in file_name:
 25 |                     line_decode = line.decode().split()
 26 |                     word = line_decode[0]
 27 |                     words.append(word)
 28 |                     word2idx[word] = idx
 29 |                     idx += 1
 30 |                     vect = np.array(line_decode[1:]).astype(np.float)
 31 |                     vectors.append(vect)
 32 |                     pbar.update(1)
 33 |         self.set_words(words)
 34 |         self.set_word2idx(word2idx)
 35 |         self.set_vectors(vectors)
 36 |         self.set_glove({w: vectors[word2idx[w]] for w in words})
 37 |         return
 38 | 
 39 |     def w2v(
 40 |         self,
 41 |         word: str
 42 |     ) -> np.ndarray:
 43 |         return self._glove[word]
 44 | 
 45 |     @property
 46 |     def words(
 47 |         self
 48 |     ) -> list:
 49 |         return self._words
 50 | 
 51 |     def set_words(
 52 |         self,
 53 |         new_words: list
 54 |     ) -> None:
 55 |         self._words = new_words
 56 |         return
 57 | 
 58 |     @property
 59 |     def word2idx(
 60 |         self
 61 |     ) -> dict:
 62 |         return self._word2idx
 63 | 
 64 |     def set_word2idx(
 65 |         self,
 66 |         new_word2idx: dict
 67 |     ) -> None:
 68 |         self._word2idx = new_word2idx
 69 |         return
 70 | 
 71 |     @property
 72 |     def vectors(
 73 |         self
 74 |     ) -> list:
 75 |         return self._vectors
 76 | 
 77 |     def set_vectors(
 78 |         self,
 79 |         new_vectors: list
 80 |     ) -> None:
 81 |         self._vectors = new_vectors
 82 |         return
 83 | 
 84 |     @property
 85 |     def glove(
 86 |         self
 87 |     ) -> dict:
 88 |         return self._glove
 89 | 
 90 |     def set_glove(
 91 |         self,
 92 |         new_glove: dict
 93 |     ) -> None:
 94 |         self._glove = new_glove
 95 |         return
 96 | 
 97 | 
 98 | def main():
 99 |     return
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     main()
104 | 


--------------------------------------------------------------------------------
/tasks/agent/observation/observation_encoder.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import torch
 3 | from agent.observation.cv.panorama_encoder import PanoramaEncoder
 4 | from agent.observation.nlp.instruction_encoder import InstructionEncoder
 5 | 
 6 | 
 7 | class ObservationEncoder():
 8 |     def __init__(
 9 |         self,
10 |         config: dict,
11 |         vocab: list
12 |     ) -> None:
13 |         # get vision feature size
14 |         self.vision_feature_size = self._get_vision_feature_size(config)
15 |         # get observation embedding size
16 |         self.vision_dim, self.instr_dim = self._get_observation_space(config)
17 | 
18 |         # vision
19 |         self.vision = PanoramaEncoder(config, self.vision_feature_size).to(config['device'])
20 |         self.vision_encode = self.vision.encode
21 | 
22 |         # instruction
23 |         self.instr = InstructionEncoder(config, self.instr_dim, vocab).to(config['device'])
24 |         self.instr_encode = lambda *args: args[0]
25 |         return
26 | 
27 |     def _get_vision_feature_size(
28 |         self,
29 |         config: dict
30 |     ) -> int:
31 |         vision_feature_size = config['r2r_env']['pano_feature_size'] + \
32 |             config['r2r_env']['pose_space'] * config['r2r_env']['pose_repeat']
33 |         return vision_feature_size
34 | 
35 |     def _get_observation_space(
36 |         self,
37 |         config: dict
38 |     ) -> (int, int):
39 |         # vision
40 |         vision_dim = self._get_vision_feature_size(config)
41 |         # instruction
42 |         instr_dim = config['state_tracker']['obs']['instr']['lstm']['hidden_dim'] * \
43 |             (2 if config['state_tracker']['obs']['instr']['lstm']['bidirectional'] else 1)
44 |         return vision_dim, instr_dim
45 | 
46 |     def parameters(
47 |         self
48 |     ) -> itertools.chain:
49 |         params = []
50 |         for encoder in [self.vision, self.instr]:
51 |             if encoder is not None:
52 |                 params.append(encoder.parameters())
53 |         return itertools.chain(*params)
54 | 
55 |     def encode(
56 |         self,
57 |         vision: torch.Tensor,
58 |         instr: torch.Tensor,
59 |         h_t: torch.Tensor
60 |     ) -> tuple:
61 |         """
62 |         vision.shape =  (1, batch_size, view_num, vision_dim)
63 |         instr.shape =   (batch_size, max_len, instr_dim)
64 |         h_t.shape =     (batch_size, h_dim)
65 |         ---
66 |         vision_embed.shape =    (batch_size, vision_dim)
67 |         instr_embed.shape =     (batch_size, max_len, instr_dim)
68 |         """
69 |         vision_embed = self.vision_encode(vision, h_t)
70 |         instr_embed = self.instr_encode(instr, h_t)
71 |         return (vision_embed, instr_embed)
72 | 
73 | 
74 | def main():
75 |     return
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     main()
80 | 


--------------------------------------------------------------------------------
/web/app/js/Matterport3D.js:
--------------------------------------------------------------------------------
 1 | 
 2 | // Matterport3D utils for three.js
 3 | 
 4 | function Matterport3D(data_dir) {
 5 |   this.data_dir = (typeof data_dir !== 'undefined') ?  data_dir : "v1/scans/";
 6 | };
 7 | 
 8 | // Load a textured scene mesh
 9 | Matterport3D.prototype.load_mesh = function(scan_id, mesh_id, callback) { 
10 |   var base_url = this.data_dir+scan+"/matterport_mesh/"+mesh+"/"
11 |   var obj_url = base_url + mesh + ".obj";
12 |   var mat_url = base_url + mesh + ".mtl"; 
13 |   var mtlLoader = new THREE.MTLLoader();
14 |   mtlLoader.setTexturePath(this.data_dir+scan+"/matterport_mesh/"+mesh+"/");
15 |   mtlLoader.load(mat_url, function( materials ) {
16 |     materials.preload();
17 |     var objLoader = new THREE.OBJLoader();
18 |     objLoader.setMaterials( materials );
19 |     objLoader.load(obj_url, function ( object ) {
20 |       callback(object);
21 |     });
22 |   });
23 | };
24 | 
25 | // Load cube texture and return a promise
26 | Matterport3D.prototype.loadCubeTexture = function(urls) {
27 |   return new Promise(function(resolve, reject) {
28 |     const onLoad = function (texture) { return resolve(texture); }
29 |     const onError = function (event) { return reject(event); }
30 |     var loader = new THREE.CubeTextureLoader();
31 |     loader.setCrossOrigin('anonymous');
32 |     loader.load(urls, onLoad, null, onError);
33 |   });
34 | };
35 | 
36 | // Load json file and return a promise
37 | Matterport3D.prototype.loadJson = function(url) {
38 |   return new Promise(function(resolve, reject) {
39 |     d3.json(url, function(error, data) {
40 |       if (error) reject(error);
41 |       else resolve(data);
42 |     });
43 |   });
44 | };
45 | 
46 | // Load small cylinders representing viewpoints (projected down to floor level)
47 | Matterport3D.prototype.load_viewpoints = function(data, {h=0.5, opacity=0.7} = {}) {
48 |   var group = new THREE.Group();
49 |   for (var i = 0; i < data.length; i++) {
50 |     var pose = data[i]['pose'];
51 |     for(var k=0; k<pose.length;k++) pose[k] = parseFloat(pose[k]);
52 |     var height = parseFloat(data[i]['height']);
53 |     pose[11] -= height; // drop to surface level
54 |     var m = new THREE.Matrix4();
55 |     m.fromArray(pose);
56 |     m.transpose(); // switch row major to column major to suit three.js
57 |     var geometry = new THREE.CylinderBufferGeometry(0.15, 0.15, h, 128);
58 |     var material = new THREE.MeshLambertMaterial({color: 0x0000ff});
59 |     material.transparent = true;
60 |     material.opacity = opacity;
61 |     var cylinder = new THREE.Mesh(geometry, material);
62 |     cylinder.applyMatrix(m);
63 |     cylinder.height = height;
64 |     cylinder.name = data[i]['image_id'];
65 |     group.add(cylinder);
66 |     cylinder.included = true;
67 |     if (data[i].hasOwnProperty('included') ) {
68 |       if (data[i]['included'] == false){
69 |         cylinder.included = false;
70 |       }
71 |     }
72 |   }
73 |   return group;
74 | };
75 | 
76 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(Matterport_Simulator CXX)
 2 | cmake_minimum_required(VERSION 2.8)
 3 | 
 4 | option(OSMESA_RENDERING "Offscreen CPU rendering with OSMesa" OFF)
 5 | option(EGL_RENDERING "Offscreen GPU rendering with EGL" OFF)
 6 | 
 7 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 8 | # Make custom find-modules available
 9 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/Modules")
10 | 
11 | if(NOT CMAKE_BUILD_TYPE)
12 |   set(CMAKE_BUILD_TYPE Release)
13 | endif()
14 | 
15 | include_directories("${PROJECT_SOURCE_DIR}/include")
16 | 
17 | find_package(OpenCV REQUIRED)
18 | find_package(PkgConfig REQUIRED)
19 | find_package(OpenMP)
20 | if (OPENMP_CXX_FOUND)
21 |     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
22 |     set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
23 | endif()
24 | 
25 | pkg_check_modules(JSONCPP REQUIRED jsoncpp)
26 | 
27 | if(EGL_RENDERING)
28 |   add_definitions(-DEGL_RENDERING)
29 |   find_package(OpenGL REQUIRED COMPONENTS OpenGL EGL)
30 |   pkg_check_modules(EPOXY REQUIRED epoxy)
31 |   set(GL_LIBS OpenGL::OpenGL OpenGL::EGL ${EPOXY_LIBRARIES})
32 | elseif(OSMESA_RENDERING)
33 |   add_definitions(-DOSMESA_RENDERING)
34 |   pkg_check_modules(OSMESA REQUIRED osmesa)
35 |   set(GL_LIBS ${OSMESA_LIBRARIES})
36 | else()
37 |   cmake_policy(SET CMP0072 OLD)
38 |   find_package(OpenGL REQUIRED)
39 |   find_package(GLEW REQUIRED)
40 |   set(GL_LIBS ${OPENGL_LIBRARIES} ${GLEW_LIBRARIES})
41 | endif()
42 | 
43 | add_library(MatterSim SHARED src/lib/MatterSim.cpp src/lib/NavGraph.cpp src/lib/Benchmark.cpp src/lib/cbf.cpp)
44 | if(OSMESA_RENDERING)
45 |   target_compile_definitions(MatterSim PUBLIC "-DOSMESA_RENDERING")
46 | endif()
47 | target_include_directories(MatterSim PRIVATE ${JSONCPP_INCLUDE_DIRS})
48 | target_link_libraries(MatterSim ${JSONCPP_LIBRARIES} ${OpenCV_LIBS} ${GL_LIBS})
49 | 
50 | add_executable(tests src/test/main.cpp)
51 | target_include_directories(tests PRIVATE ${JSONCPP_INCLUDE_DIRS})
52 | target_link_libraries(tests MatterSim ${JSONCPP_LIBRARIES} ${OpenCV_LIBS})
53 | 
54 | add_executable(mattersim_main src/driver/mattersim_main.cpp)
55 | target_link_libraries(mattersim_main MatterSim)
56 | 
57 | add_subdirectory(pybind11)
58 | 
59 | find_package(PythonInterp 2.7)
60 | message(${PYTHON_EXECUTABLE})
61 | 
62 | # Need to search for python executable again to pick up an activated
63 | # virtualenv python, if any.
64 | unset(PYTHON_EXECUTABLE CACHE)
65 | find_program(PYTHON_EXECUTABLE python
66 |       PATHS ENV PATH         # look in the PATH environment variable
67 |       NO_DEFAULT_PATH        # do not look anywhere else...
68 |       )
69 | 
70 | find_package(NumPy REQUIRED)
71 | 
72 | pybind11_add_module(MatterSimPython src/lib_python/MatterSimPython.cpp)
73 | target_include_directories(MatterSimPython PRIVATE ${NUMPY_INCLUDES})
74 | target_link_libraries(MatterSimPython PRIVATE MatterSim)
75 | set_target_properties(MatterSimPython
76 |   PROPERTIES
77 |   OUTPUT_NAME MatterSim)
78 | 


--------------------------------------------------------------------------------
/tasks/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "r2r_env": {
 3 |         "mp": {
 4 |             "image_w": 800,
 5 |             "image_h": 600,
 6 |             "vfov": 60,
 7 |             "training_parallel": 256,
 8 |             "evaluate_parallel": 1024,
 9 |             "connectivity": "/root/mount/AVAST_R2R/connectivity/",
10 |             "skybox_dir": "/root/mount/AVAST_R2R/data/v1/scans/"
11 |         },
12 |         "word_embedding": "/root/mount/AVAST_R2R/tasks/env/nlp_features/glove.6B.300d",
13 |         "word_embedding_size": 300,
14 |         "pano_feature": "/root/mount/AVAST_R2R/tasks/env/img_features/ResNet-152-imagenet",
15 |         "pano_space": 36,
16 |         "pano_feature_size": 2048,
17 |         "pose_repeat": 32,
18 |         "pose_space": 4,
19 |         "adj_dict_file": "/root/mount/AVAST_R2R/tasks/env/adj_dict/total_adj_dict.json",
20 |         "dataset_dir": "/root/mount/AVAST_R2R/tasks/env/r2r_dataset/",
21 |         "expert_dir": "/root/mount/AVAST_R2R/tasks/env/expert/expert.tsv",
22 |         "max_iteration": 10,
23 |         "success_radius": 3,
24 |         "action_space": 14,
25 |         "finish_action_idx": 0,
26 |         "skip_action_idx": -1,
27 |         "reward_mode": {
28 |             "//shaping": ["goal", "fidelity"],
29 |             "shaping": "goal",
30 |             "scale": 1
31 |         }
32 |     },
33 |     "agent": {
34 |         "train": {
35 |             "replay_memory": {
36 |                 "max_epi_num": 100000,
37 |                 "min_epi_num": 0,
38 |                 "max_epi_len": 10,
39 |                 "demonstration":{
40 |                     "curriculum_progress": 10,
41 |                     "last_lecture": 7,
42 |                     "ratio": 0.2
43 |                 }
44 |             },
45 |             "learning": {
46 |                 "dropout_ratio": 0,
47 |                 "gamma": 0.95,
48 |                 "iteration": 20000,
49 |                 "batch_size": 256,
50 |                 "lr": 1e-5,
51 |                 "log_alpha_init": -3,
52 |                 "ema": 0.995,
53 |                 "target_replace_iteration": 1,
54 |                 "target_entropy_ratio": 0.05
55 |             }
56 |         },
57 |         "pre_train": {
58 |             "learning": {
59 |                 "dropout_ratio": 0.5,
60 |                 "gamma": 0.95,
61 |                 "iteration": 10000,
62 |                 "lr": 1e-4
63 |             }
64 |         }
65 |     },
66 |     "state_tracker": {
67 |         "dropout_ratio": 0.5,
68 |         "obs": {
69 |             "instr": {
70 |                 "lstm": {
71 |                     "hidden_dim": 512,
72 |                     "num_layers": 1,
73 |                     "bidirectional": false
74 |                 }
75 |             },
76 |             "vision": {
77 |                 "attn": {
78 |                     "query_dim": 512
79 |                 }
80 |             }
81 |         },
82 |         "ast": {
83 |             "hidden_dim": 512,
84 |             "num_layers": 1
85 |         },
86 |         "avast": {
87 |             "hidden_dim": 512,
88 |             "num_layers": 1,
89 |             "latent_dim": 64
90 |         }
91 |     },
92 |     "result_dir": "/root/mount/AVAST_R2R/tasks/data/results/0/",
93 |     "save_dir": "/root/mount/AVAST_R2R/tasks/data/save_weight/",
94 |     "device": "cuda:0"
95 | }
96 | 


--------------------------------------------------------------------------------
/tasks/env/mp_env_sim.py:
--------------------------------------------------------------------------------
  1 | import MatterSim
  2 | import numpy as np
  3 | 
  4 | 
  5 | class MatterEnvSim():
  6 |     sim = None
  7 |     gif = None
  8 | 
  9 |     def __init__(
 10 |         self,
 11 |         config: dict
 12 |     ) -> None:
 13 |         super().__init__()
 14 |         self.config = config
 15 | 
 16 |         # rendering init
 17 |         self.width = config['r2r_env']['mp']['image_w']
 18 |         self.height = config['r2r_env']['mp']['image_h']
 19 |         self.vfov = np.deg2rad(config['r2r_env']['mp']['vfov'])
 20 |         self.hfov = self.vfov * self.width / self.height
 21 |         self.nav_text_color = [230, 40, 40]
 22 |         self.goal_text_color = [40, 40, 230]
 23 |         self.verbose = config['args']['verbose']
 24 |         self.rendering_idx = config['args']['rendering_idx']
 25 |         return
 26 | 
 27 |     def _sim_init(
 28 |         self,
 29 |         parallel_num: int
 30 |     ) -> None:
 31 |         self.sim = MatterSim.Simulator()
 32 |         self.sim.setDatasetPath(self.config['r2r_env']['mp']['skybox_dir'])
 33 |         self.sim.setNavGraphPath(self.config['r2r_env']['mp']['connectivity'])
 34 |         self.sim.setCameraResolution(self.config['r2r_env']['mp']['image_w'], self.config['mp']['image_h'])
 35 |         self.sim.setCameraVFOV(np.deg2rad(self.config['r2r_env']['mp']['vfov']))
 36 |         self.sim.setDiscretizedViewingAngles(True)
 37 |         self.sim.setBatchSize(parallel_num)
 38 |         self.sim.setCacheSize(2 * parallel_num)
 39 |         self.sim.setDepthEnabled(False)
 40 |         self.sim.setRenderingEnabled(True)
 41 |         self.sim.initialize()
 42 |         return
 43 | 
 44 |     def get_states(
 45 |         self
 46 |     ) -> list:
 47 |         return self.sim.getState()
 48 | 
 49 |     def new_episodes(
 50 |         self,
 51 |         scan_ids: list,
 52 |         vp_ids: list,
 53 |         headings: list,
 54 |         gen_gif: bool
 55 |     ) -> None:
 56 |         self._sim_init(len(scan_ids))
 57 |         self.sim.newEpisode(scan_ids, vp_ids, headings, [0] * len(scan_ids))
 58 |         if gen_gif:
 59 |             # init gif (batch, time_step, height, width, channel)
 60 |             self.gif = np.zeros(
 61 |                 (
 62 |                     1,
 63 |                     int(self.config['r2r_env']['max_iteration'] * self.config['r2r_env']['action_space']),
 64 |                     self.config['r2r_env']['mp']['image_h'],
 65 |                     self.config['r2r_env']['mp']['image_w'],
 66 |                     3
 67 |                 ),
 68 |                 dtype=np.float32
 69 |             )
 70 |             self._add_frame_into_gif(self.sim.getState()[0])
 71 |         return
 72 | 
 73 |     def make_actions(
 74 |         self,
 75 |         forwards: list,
 76 |         headings: list,
 77 |         elevations: list
 78 |     ) -> None:
 79 |         self.sim.makeAction(forwards, headings, elevations)
 80 |         if not isinstance(self.gif, type(None)):
 81 |             self._add_frame_into_gif(self.sim.getState()[0])
 82 |         return
 83 | 
 84 |     def _add_frame_into_gif(
 85 |         self,
 86 |         state_info: MatterSim.SimState
 87 |     ) -> None:
 88 |         self.gif[self.rendering_idx, state_info.step] = np.array(
 89 |             state_info.rgb,
 90 |             copy=True,
 91 |             dtype=np.float32
 92 |         ) / 255
 93 |         return
 94 | 
 95 | 
 96 | def main():
 97 |     return
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     main()
102 | 


--------------------------------------------------------------------------------
/tasks/agent/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | class DuelingQNetwork(nn.Module):
  6 |     def __init__(
  7 |         self,
  8 |         state_dim: int,
  9 |         action_dim: int,
 10 |         embed_dim: int = 512
 11 |     ) -> None:
 12 |         super(DuelingQNetwork, self).__init__()
 13 |         # state value
 14 |         self.state_fc = nn.Sequential(
 15 |             nn.Linear(state_dim * 2, embed_dim),
 16 |             nn.Linear(embed_dim, embed_dim)
 17 |         )
 18 |         self.action_fc = nn.Sequential(
 19 |             nn.Linear(action_dim, embed_dim),
 20 |             nn.Linear(embed_dim, embed_dim)
 21 |         )
 22 |         self.out_fc = nn.Sequential(
 23 |             nn.Linear(embed_dim, embed_dim),
 24 |             nn.Linear(embed_dim, 1)
 25 |         )
 26 |         return
 27 | 
 28 |     def forward(
 29 |         self,
 30 |         belief_states: torch.Tensor,
 31 |         candidate_action_features: torch.Tensor
 32 |     ) -> torch.Tensor:
 33 |         """
 34 |         belief_states:              (batch_size, state_dim * 2)
 35 |         candidate_action_features:  (batch_size, action_space, action_dim)
 36 |         """
 37 |         batch_size, action_space = candidate_action_features.shape[:2]
 38 | 
 39 |         # batch_size x 1 x embed_dim
 40 |         s_latent = self.state_fc(belief_states).unsqueeze(1)
 41 |         # batch_size x action_space x embed_dim
 42 |         a_latent = self.action_fc(candidate_action_features)
 43 | 
 44 |         # batch_size x action_space
 45 |         q_out = self.out_fc(s_latent + a_latent).squeeze(2)
 46 |         return q_out
 47 | 
 48 | 
 49 | class TwinnedQNetwork(nn.Module):
 50 |     def __init__(
 51 |         self,
 52 |         state_dim: int,
 53 |         action_dim: int,
 54 |         dueling: bool = True
 55 |     ) -> None:
 56 |         super(TwinnedQNetwork, self).__init__()
 57 |         if dueling:
 58 |             self.q_net1 = DuelingQNetwork(state_dim, action_dim)
 59 |             self.q_net2 = DuelingQNetwork(state_dim, action_dim)
 60 |         else:
 61 |             raise NotImplementedError
 62 |         return
 63 | 
 64 |     def forward(
 65 |         self,
 66 |         belief_states: torch.Tensor,
 67 |         candidate_action_features: torch.Tensor
 68 |     ) -> (torch.Tensor, torch.Tensor):
 69 |         return self.q_net1(belief_states, candidate_action_features), self.q_net2(belief_states, candidate_action_features)
 70 | 
 71 | 
 72 | class CategoricalPolicy(nn.Module):
 73 |     def __init__(
 74 |         self,
 75 |         state_dim: int,
 76 |         action_dim: int
 77 |     ) -> None:
 78 |         super(CategoricalPolicy, self).__init__()
 79 |         self.state_fc = nn.Sequential(
 80 |             nn.Linear(state_dim, action_dim),
 81 |             nn.Linear(action_dim, action_dim)
 82 |         )
 83 |         return
 84 | 
 85 |     def forward(
 86 |         self,
 87 |         belief_states: torch.Tensor,
 88 |         candidate_action_features: torch.Tensor
 89 |     ) -> torch.Tensor:
 90 |         """
 91 |         belief_states:              (batch_size, state_dim)
 92 |         candidate_action_features:  (batch_size, action_space, action_dim)
 93 |         """
 94 |         batch_size, action_space = candidate_action_features.shape[:2]
 95 |         # batch_size x action_dim x 1
 96 |         s_latent = self.state_fc(belief_states).unsqueeze(2)
 97 |         out = torch.bmm(
 98 |             candidate_action_features, s_latent
 99 |         ).squeeze(2)
100 |         return out
101 | 
102 | 
103 | def main():
104 |     return
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     main()
109 | 


--------------------------------------------------------------------------------
/connectivity/YmJkqBEsHnH_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"006933a75f764c5485cf284bea0ded0b","pose":[0.210914,-0.00824746,-0.977469,-7.64722,0.977278,0.0232484,0.210677,-2.15553,0.0209873,-0.999695,0.0129646,1.56695,0,0,0,1],"included":true,"visible":[false,false,true,false,true,true,false,true,true,true,false],"unobstructed":[false,false,false,false,false,false,false,true,true,false,false],"height":1.524793092035509},{"image_id":"e4ede0695e4e4a77aae8537abb9f11d3","pose":[-0.0422212,-0.0176246,-0.998952,-0.133122,0.998904,0.0194092,-0.0425613,-0.0184591,0.0201393,-0.999656,0.016787,1.48352,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,false,false,false,false,false],"unobstructed":[false,false,true,false,false,true,false,false,false,false,false],"height":1.5227398475592409},{"image_id":"d471e89e00be49f49a7ecace814d60bf","pose":[0.426939,-0.00370058,-0.904272,-0.421886,0.904055,0.0239963,0.426739,-2.12366,0.0201203,-0.999705,0.0135916,1.49477,0,0,0,1],"included":true,"visible":[true,true,false,true,true,true,false,true,true,true,false],"unobstructed":[false,true,false,true,false,true,false,false,false,false,false],"height":1.5263900136377955},{"image_id":"b34af02ce9b642ebbd0c7e9e0ba3b553","pose":[0.960272,0.00870611,-0.278924,-0.0905727,0.278755,0.0168277,0.960214,-3.55265,0.0130537,-0.99982,0.0137334,1.49061,0,0,0,1],"included":true,"visible":[true,true,true,false,false,false,false,false,false,false,false],"unobstructed":[false,false,true,false,false,true,false,false,false,false,false],"height":1.5323637229797105},{"image_id":"01c80b5f8fbd4c969ee0bc03f1ec7a6c","pose":[0.359562,-0.0105291,-0.933061,-3.77309,0.932771,0.0313799,0.359097,-2.1838,0.0254987,-0.999452,0.0211054,1.53932,0,0,0,1],"included":true,"visible":[true,false,true,false,false,true,false,true,true,true,false],"unobstructed":[false,false,false,false,false,true,false,true,false,false,false],"height":1.5286629461398107},{"image_id":"82ea5baa30f945fe98f6cad3064af847","pose":[0.0376233,-0.0115611,-0.999224,-2.01669,0.998821,0.0310955,0.0372487,-2.16965,0.030641,-0.999449,0.0127185,1.50807,0,0,0,1],"included":true,"visible":[true,true,true,true,true,false,false,true,true,true,false],"unobstructed":[false,true,true,true,true,false,false,false,false,false,false],"height":1.5253207999550662},{"image_id":"aecbb791f30b452a9236c5a8c7030663","pose":[0.296076,-0.0242641,-0.954855,-13.5955,0.955111,0.0179483,0.2957,-2.22547,0.00996343,-0.999544,0.0284901,1.59272,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,true,true,true,true],"unobstructed":[false,false,false,false,false,false,false,false,false,true,true],"height":1.7557263982456066},{"image_id":"d841f7b710f9470796d55561f8f524db","pose":[0.270437,0.002913,-0.962732,-5.77716,0.962325,0.0284129,0.27041,-2.21321,0.028142,-0.999591,0.00488176,1.55947,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,false,true,true,false],"unobstructed":[true,false,false,false,true,false,false,false,false,false,false],"height":1.5357935019251416},{"image_id":"8e38fdd81c7949db9646968bafbbdcfc","pose":[-0.00277118,-0.0169575,-0.999852,-9.93905,0.999791,0.020127,-0.00311204,-2.17463,0.0201771,-0.999653,0.0168993,1.60592,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,false,true,false,true,true],"unobstructed":[true,false,false,false,false,false,false,false,false,true,false],"height":1.5208970888736792},{"image_id":"20fd759be0b64fc9aa96d290f0a704ec","pose":[0.227815,0.0117555,-0.973633,-12.1161,0.973367,0.0235263,0.228037,-2.15724,0.025587,-0.999654,-0.00608172,1.59969,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,true,true,true,false,true],"unobstructed":[false,false,false,false,false,false,true,false,true,false,false],"height":1.5261379179165138},{"image_id":"d838acff82244c2da0cf2651e54966cb","pose":[0.310234,-0.0632421,-0.948553,-15.2317,0.950604,0.0313736,0.308813,-2.28133,0.0102298,-0.997504,0.0698525,0.902626,0,0,0,1],"included":true,"visible":[true,false,true,false,true,true,true,true,true,true,false],"unobstructed":[false,false,false,false,false,false,true,false,false,false,false],"height":1.558854711359605}]


--------------------------------------------------------------------------------
/tasks/agent/pomdp/ast.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from agent.pomdp.instruction_attention import InstructionAttention
  4 | 
  5 | 
  6 | class AST(nn.Module):
  7 |     def __init__(
  8 |         self,
  9 |         config: dict,
 10 |         vision_dim: int,
 11 |         abs_pose_feature_size: int,
 12 |         action_feature_size: int,
 13 |         additional_track: str
 14 |     ) -> None:
 15 |         super(AST, self).__init__()
 16 |         self.config = config
 17 |         tracker_config = config['state_tracker'][config['args']['state_tracker']]
 18 |         self.h_dim = tracker_config['hidden_dim']
 19 |         self.n_layer = tracker_config['num_layers']  # number of layers of LSTM
 20 |         self.drop = nn.Dropout(p=config['state_tracker']['dropout_ratio'])
 21 | 
 22 |         # addtional information
 23 |         addtional_dim = self.h_dim
 24 |         self.additional_track = additional_track
 25 |         if additional_track == 'pose':
 26 |             self.angle_fc = nn.Sequential(
 27 |                 nn.Linear(abs_pose_feature_size, self.h_dim, bias=False),
 28 |                 nn.Linear(self.h_dim, addtional_dim, bias=False)
 29 |             )
 30 |         elif additional_track == 'action':
 31 |             self.angle_fc = nn.Sequential(
 32 |                 nn.Linear(action_feature_size, self.h_dim, bias=False),
 33 |                 nn.Linear(self.h_dim, addtional_dim, bias=False)
 34 |             )
 35 |         else:
 36 |             raise NotImplementedError
 37 | 
 38 |         # context belief state
 39 |         self.instr_attention = InstructionAttention(
 40 |             embed_dim=self.h_dim,
 41 |             dropout_ratio=config['state_tracker']['dropout_ratio']
 42 |         )
 43 | 
 44 |         # tracking belief state
 45 |         self.lstm = nn.LSTMCell(
 46 |             input_size=vision_dim + self.h_dim,
 47 |             hidden_size=self.h_dim
 48 |         )
 49 | 
 50 |         # belief state
 51 |         self.out_fc = nn.Sequential(
 52 |             nn.Linear(self.h_dim + self.h_dim, self.h_dim, bias=False),
 53 |             nn.Linear(self.h_dim, self.h_dim, bias=False)
 54 |         )
 55 | 
 56 |         self.state_dim = self.h_dim
 57 |         return
 58 | 
 59 |     def forward(
 60 |         self,
 61 |         vision_embed: torch.Tensor,
 62 |         instr_embed: torch.Tensor,
 63 |         instr_mask: torch.Tensor,
 64 |         abs_pose_features: torch.Tensor,
 65 |         action_features: torch.Tensor,
 66 |         hiddens: (torch.Tensor, torch.Tensor)
 67 |     ) -> (torch.Tensor, torch.Tensor, torch.Tensor, (torch.Tensor, torch.Tensor), dict):
 68 |         belief_states, context_belief_states, instr_attn_weight, (h_t, c_t) = self.inference(
 69 |             vision_embed, instr_embed, instr_mask, abs_pose_features, action_features, hiddens
 70 |         )
 71 |         return belief_states, context_belief_states, instr_attn_weight, (h_t, c_t), {}
 72 | 
 73 |     def inference(
 74 |         self,
 75 |         vision_embed: torch.Tensor,
 76 |         instr_embed: torch.Tensor,
 77 |         instr_mask: torch.Tensor,
 78 |         abs_pose_features: torch.Tensor,
 79 |         action_features: torch.Tensor,
 80 |         hiddens: (torch.Tensor, torch.Tensor)
 81 |     ) -> (torch.Tensor, torch.Tensor, torch.Tensor, (torch.Tensor, torch.Tensor)):
 82 |         if self.additional_track == 'pose':
 83 |             additional_embed = self.angle_fc(abs_pose_features)
 84 |         elif self.additional_track == 'action':
 85 |             additional_embed = self.angle_fc(action_features)
 86 |         else:
 87 |             raise NotImplementedError
 88 | 
 89 |         concat_input = torch.cat([vision_embed, additional_embed], 1)
 90 |         input_drop = self.drop(concat_input)
 91 |         h_t, c_t = self.lstm(input_drop, hiddens)
 92 |         h_t_drop = self.drop(h_t)
 93 |         context_belief_states, instr_attn_weight = self.instr_attention(h_t_drop, instr_embed, instr_mask)
 94 |         belief_states = torch.cat(
 95 |             [context_belief_states, h_t],
 96 |             dim=1
 97 |         )
 98 |         belief_states = self.out_fc(belief_states)
 99 |         return belief_states, context_belief_states, instr_attn_weight, (h_t, c_t)
100 | 
101 | 
102 | def main():
103 |     return
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     main()
108 | 


--------------------------------------------------------------------------------
/web/app/trajectory.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |     <title>Trajectory Viewer</title>
 6 |     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 8 |   </head>
 9 |   <body>
10 |     <div class="container">
11 |       <div class="page-header">
12 |         <h1>First-Person Trajectory Visualization</h1>
13 |         <div class="row">
14 |           <div class="col-md-12">
15 |             <form class="form-inline">
16 |               <strong>Input json trajectories:</strong>
17 |               <label class="btn btn-default btn-file">
18 |                 Choose file <input id="trajFile" name="trajFile" type="file" style="display: none;">
19 |               </label>
20 |               <span id="fileName"> val_unseen_shortest_agent.json </span>
21 |               <div class="form-group" style="margin-left: 20px;">
22 |                 <input class="form-check-input" type="checkbox" value="" id="show-instructions">
23 |                 <label class="form-check-label" for="show-instructions">Show adjacent viewpoints</label>
24 |               </div>
25 |             </form>
26 |             <p>
27 |               <strong>Trajectory:</strong>
28 |               <button type="button" class="btn btn-default" onclick="left()">
29 |                 <span class="glyphicon glyphicon-triangle-left"></span>
30 |               </button>
31 |               <span>Index:</span><input type="text" id="ix">
32 |               <button type="button" class="btn btn-default" onclick="right()">
33 |                 <span class="glyphicon glyphicon-triangle-right"></span>
34 |               </button>
35 |               <span>Instruction ID:</span><input disabled=True type="text" id="instr_id">
36 |             </p>
37 |             <p>
38 |               <strong>Camera parameters:</strong>
39 |               <span>Width:</span><input type="text" id="width">
40 |               <span>Height:</span><input type="text" id="height">
41 |               <span>V-FOV:</span><input type="text" id="vfov">
42 |             </p>
43 |             <p>
44 |               <strong>Controls:</strong>
45 |               <button class="btn btn-primary" onclick="play()" id="play" disabled=true>Play</button>
46 |               <button class="btn btn-warning" onclick="download()" id="download" disabled=true>Download video</button>
47 |               <button class="btn btn-warning" onclick="download_image()" id="download_image">Download image</button>
48 |               <span>Download requires Firefox 43 or above, or Chrome 51 or above.</span>
49 |             </p>
50 |             <p> Left click and drag to look around. Mouse wheel to zoom. </p>
51 |           </div>
52 |         </div>
53 |       </div>
54 |       <div>
55 |         <p id='instruction' style="font-size: 200%;"></p>
56 |       <div>
57 |         <figure style="display: inline-block; width: 100%;">
58 |           <canvas id="skybox" style="width:auto; display: block; margin: 0 auto;">
59 |         </figure>
60 |         <figure style="display: inline-block; width: 100%;">
61 |           <canvas id="floorplan" style="width:auto; display: block; margin: 0 auto;">
62 |         </figure>
63 |         <p id="instr" style="text-align: center; margin-left: 60px;"></p>
64 |       </div>
65 |     </div>
66 |     <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/d3/4.10.2/d3.min.js"></script>
67 |     <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/three.js/87/three.min.js"></script>
68 |     <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/tween.js/16.3.5/Tween.min.js"></script>
69 |     <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
70 |     <script type="text/javascript" src="/js/RequestAnimationFrame.js"></script>
71 |     <script type="text/javascript" src="/js/Detector.js"></script>
72 |     <script type="text/javascript" src="/js/PTZCameraControls.js"></script>
73 |     <script type="text/javascript" src="/js/Matterport3D.js"></script>
74 |     <script type="text/javascript" src="/js/Trajectory.js"></script>
75 |   </body>
76 | </html>
77 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AVAST: Attentive VAriational State Tracker for Vision-and-Language Navigation
  2 | 
  3 | This is the PyTorch implementation for AVAST: Attentive VAriational State Tracker for Vision-and-Language Navigation from National Chiao Tung University, Taiwan.
  4 | 
  5 | ---
  6 | 
  7 | ## Installation
  8 | 
  9 | Clone the AVAST_R2R repository:
 10 | ```bash
 11 | git clone --recursive https://github.com/NCTUMLlab/Je-Wei-Jang-AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation.git
 12 | mv Je-Wei-Jang-AVAST_Attentive_Variational_State_Tracker_for_Vision-and-Language-Navigation AVAST_R2R
 13 | cd AVAST_R2R
 14 | ```
 15 | 
 16 | ### Downloading pre-trained embedding and R2R datasets
 17 | ```bash
 18 | bash download.sh
 19 | ```
 20 | 
 21 | ### Downloading Matterport3D Dataset (optional)
 22 | To use original Matterport3D simulator you must first download the [Matterport3D Dataset](https://niessner.github.io/Matterport/) which is available after requesting access [here](https://niessner.github.io/Matterport/). The download script that will be provided allows for downloading of selected data types. At minimum you must download the `matterport_skybox_images`. If you wish to use depth outputs then also download `undistorted_depth_images` and `undistorted_camera_parameters`.
 23 | 
 24 | Set an environment variable to the location of the **unzipped** dataset, where <PATH> is the full absolute path (not a relative path or symlink) to the directory containing the individual matterport scan directories (17DRP5sb8fy, 2t7WUuJeko7, etc):
 25 | 
 26 | ```bash
 27 | export MATTERPORT_DATA_DIR=<PATH>
 28 | ```
 29 | 
 30 | ### Building using Docker
 31 | ```bash
 32 | docker build --rm -t r2r/avast:base .
 33 | docker run --name r2r -it --gpus all \
 34 |     -e DISPLAY -e="QT_X11_NO_MITSHM=1" -v /tmp/.X11-unix:/tmp/.X11-unix \
 35 |     --mount type=bind,source=$MATTERPORT_DATA_DIR,target=/root/mount/AVAST_R2R/data/v1/scans \
 36 |     --volume `pwd`:/root/mount/AVAST_R2R \
 37 |     --restart=unless-stopped --shm-size 32G \
 38 |     -p <ssh_port>:22 -p <tensorboard_port>:6006 r2r/avast:base
 39 | ```
 40 | 
 41 | ### Buiding Matterport3D simulator
 42 | Now (from inside the docker container), build the simulator code:
 43 | ```bash
 44 | cd /root/mount/AVAST_R2R
 45 | mkdir build && cd build
 46 | cmake -DEGL_RENDERING=ON ..
 47 | make
 48 | cd ../
 49 | echo export PYTHONPATH=$PYTHONPATH:/root/mount/AVAST_R2R/build >> ~/.bashrc
 50 | source ~/.bashrc
 51 | ```
 52 | 
 53 | ### Buiding Lookup Table for Location Connectivity
 54 | Now (from inside the docker container), build lookup table for location connectvity:
 55 | ```bash
 56 | python3 tasks/data/scripts/generate_adj_dict.py > ./tasks/env/adj_dict/total_adj_dict.json
 57 | ```
 58 | 
 59 | ---
 60 | 
 61 | ## Training and evaluation
 62 | ```
 63 | cd tasks
 64 | ```
 65 | 
 66 | ### Pre-training state trackers
 67 | * Pre-training attentive state tracker:
 68 |     ```
 69 |     python3 ast_pre_train.py --mode pre_train --state_tracker ast --agent seq2seq
 70 |     ```
 71 | * pre-training attentive variational state tracker:
 72 |     ```
 73 |     python3 avast_pre_train.py --mode pre_train --state_tracker avast --agent seq2seq
 74 |     ```
 75 |     
 76 | ### Fine-tuning an agent with RL algorithms
 77 | * Fine-tuning an agent with AST+REINFORCE:
 78 |     ```
 79 |     python3 reinforce_fine_tune.py --mode train --state_tracker ast --load_pre_trained_dir <ast_path> --agent reinforce
 80 |     ```   
 81 | * Fine-tuning an agent with AVAST+SACD+RECED:
 82 |     ```
 83 |     python3 sacd_fine_tune.py --mode train --state_tracker avast --load_pre_trained_dir <avast_path> --agent sacd --demo_activate --curriculum
 84 |     ```
 85 | 
 86 | ### Evaluating AST+Seq2Seq
 87 | * Evaluating AST+Seq2Seq:
 88 |     ```
 89 |     python3 ast_pre_train.py --mode test --state_tracker ast --agent seq2seq --load_dir <model_path>
 90 |     ```    
 91 | * Evaluating AVAST+Seq2Seq:
 92 |     ```
 93 |     python3 avast_pre_train.py --mode test --state_tracker avast --agent seq2seq --load_dir <model_path>
 94 |     ```
 95 | * Evaluating AST+REINFORCE:
 96 |     ```
 97 |     python3 reinforce_fine_tune.py --mode test --state_tracker ast --agent reinforce --load_dir <model_path>
 98 |     ```
 99 | * Evaluating AVAST+SACD:
100 |     ```
101 |     python3 sacd_fine_tune.py --mode test --state_tracker avast --agent sacd --load_dir <model_path>
102 |     ```
103 | 


--------------------------------------------------------------------------------
/src/lib_python/MatterSimPython.cpp:
--------------------------------------------------------------------------------
 1 | #include <pybind11/pybind11.h>
 2 | #include <pybind11/stl.h>
 3 | #include "MatterSim.hpp"
 4 | #include "cbf.h"
 5 | 
 6 | namespace py = pybind11;
 7 | 
 8 | namespace mattersim {
 9 | 
10 |     void cbf(py::buffer depth, py::buffer intensity, py::buffer mask, py::buffer result) {
11 |         double spaceSigmas[3] = {12, 5, 8};
12 |         double rangeSigmas[3] = {0.2, 0.08, 0.02};
13 |         py::buffer_info d_info = depth.request();
14 |         py::buffer_info i_info = intensity.request();
15 |         py::buffer_info m_info = mask.request();
16 |         py::buffer_info r_info = result.request();
17 |         cbf::cbf(d_info.shape[0], d_info.shape[1],
18 |             static_cast<uint8_t*>(d_info.ptr),
19 |             static_cast<uint8_t*>(i_info.ptr),
20 |             static_cast<uint8_t*>(m_info.ptr),
21 |             static_cast<uint8_t*>(r_info.ptr),
22 |             3, &spaceSigmas[0], &rangeSigmas[0]);
23 |     }
24 | 
25 | }
26 | 
27 | using namespace mattersim;
28 | 
29 | PYBIND11_MODULE(MatterSim, m) {
30 |     m.def("cbf", &mattersim::cbf, "Cross Bilateral Filter");
31 |     py::class_<Viewpoint, ViewpointPtr>(m, "ViewPoint")
32 |         .def_readonly("viewpointId", &Viewpoint::viewpointId)
33 |         .def_readonly("ix", &Viewpoint::ix)
34 |         .def_readonly("x", &Viewpoint::x)
35 |         .def_readonly("y", &Viewpoint::y)
36 |         .def_readonly("z", &Viewpoint::z)
37 |         .def_readonly("rel_heading", &Viewpoint::rel_heading)
38 |         .def_readonly("rel_elevation", &Viewpoint::rel_elevation)
39 |         .def_readonly("rel_distance", &Viewpoint::rel_distance);
40 |     py::class_<cv::Mat>(m, "Mat", pybind11::buffer_protocol())
41 |         .def_buffer([](cv::Mat& im) -> pybind11::buffer_info {
42 |             ssize_t item_size = im.elemSize() / im.channels();
43 |             std::string format = pybind11::format_descriptor<unsigned char>::format();
44 |             if (item_size == 2) { // handle 16bit data from depth maps
45 |                 format = pybind11::format_descriptor<unsigned short>::format();
46 |             }
47 |             return pybind11::buffer_info(
48 |                 im.data, // Pointer to buffer
49 |                 item_size, // Size of one scalar
50 |                 format,
51 |                 3, // Number of dimensions (row, cols, channels)
52 |                 { im.rows, im.cols, im.channels() }, // Buffer dimensions
53 |                 {   // Strides (in bytes) for each index
54 |                     item_size * im.channels() * im.cols,
55 |                     item_size * im.channels(),
56 |                     item_size
57 |                 }
58 |             );
59 |         });
60 |     py::class_<SimState, SimStatePtr>(m, "SimState")
61 |         .def_readonly("scanId", &SimState::scanId)
62 |         .def_readonly("step", &SimState::step)
63 |         .def_readonly("rgb", &SimState::rgb)
64 |         .def_readonly("depth", &SimState::depth)
65 |         .def_readonly("location", &SimState::location)
66 |         .def_readonly("heading", &SimState::heading)
67 |         .def_readonly("elevation", &SimState::elevation)
68 |         .def_readonly("viewIndex", &SimState::viewIndex)
69 |         .def_readonly("navigableLocations", &SimState::navigableLocations);
70 |     py::class_<Simulator>(m, "Simulator")
71 |         .def(py::init<>())
72 |         .def("setDatasetPath", &Simulator::setDatasetPath)
73 |         .def("setNavGraphPath", &Simulator::setNavGraphPath)
74 |         .def("setRenderingEnabled", &Simulator::setRenderingEnabled)
75 |         .def("setCameraResolution", &Simulator::setCameraResolution)
76 |         .def("setCameraVFOV", &Simulator::setCameraVFOV)
77 |         .def("setElevationLimits", &Simulator::setElevationLimits)
78 |         .def("setDiscretizedViewingAngles", &Simulator::setDiscretizedViewingAngles)
79 |         .def("setPreloadingEnabled", &Simulator::setPreloadingEnabled)
80 |         .def("setDepthEnabled", &Simulator::setDepthEnabled)
81 |         .def("setBatchSize", &Simulator::setBatchSize)
82 |         .def("setCacheSize", &Simulator::setCacheSize)
83 |         .def("setSeed", &Simulator::setSeed)
84 |         .def("initialize", &Simulator::initialize)
85 |         .def("newEpisode", &Simulator::newEpisode)
86 |         .def("newRandomEpisode", &Simulator::newRandomEpisode)
87 |         .def("getState", &Simulator::getState, py::return_value_policy::take_ownership)
88 |         .def("makeAction", &Simulator::makeAction)
89 |         .def("close", &Simulator::close)
90 |         .def("resetTimers", &Simulator::resetTimers)
91 |         .def("timingInfo", &Simulator::timingInfo);
92 | }
93 | 


--------------------------------------------------------------------------------
/connectivity/GdvgFV5R1Z5_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"0b02e18654324edd8d74c078b66bfb20","pose":[-0.057695,-0.000357129,0.998334,-2.46692,-0.998304,-0.00769199,-0.0576965,-3.15814,0.00770012,-0.99997,0.0000884733,1.5171,0,0,0,1],"included":true,"visible":[false,true,false,false,false,true,true,true,true,false,true,false],"unobstructed":[false,false,false,false,false,true,false,true,true,false,true,false],"height":1.51470410293751},{"image_id":"1db1c0a09ecf40d188197efc05ced3bb","pose":[-0.442443,0.0138817,0.896688,-4.03893,-0.89679,-0.0101225,-0.442338,-3.05434,0.00293664,-0.999852,0.0169288,0.974424,0,0,0,1],"included":true,"visible":[true,false,false,false,true,true,false,false,true,false,false,true],"unobstructed":[false,false,false,false,false,true,false,false,true,false,false,true],"height":0.9701803380402906},{"image_id":"6178647ca8d14dc09370f6c1b7ed2fd6","pose":[-0.870025,0.0056275,0.492973,-3.69279,-0.493005,-0.0105975,-0.869962,1.95433,0.000328893,-0.999927,0.0119957,1.51516,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,true,true,false,false,true,false],"unobstructed":[false,false,false,true,false,false,true,true,false,true,true,false],"height":1.517582101716661},{"image_id":"565cc21cd28b4ee6bb5ba83c5270c032","pose":[0.0242634,0.000986587,-0.999704,-3.91782,0.999699,0.00333371,0.024267,0.178675,0.00335701,-0.999993,-0.0009042,1.50868,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,false,false,true,true,false],"unobstructed":[false,false,true,false,false,false,false,false,false,true,true,false],"height":1.5114421933143356},{"image_id":"ef638e508e054c4aabd49b38d1b88fc7","pose":[0.0820523,0.0151057,0.996513,-4.61631,-0.995947,-0.0356725,0.0825462,-2.18899,0.0367954,-0.999249,0.0121187,1.52757,0,0,0,1],"included":true,"visible":[false,true,false,false,false,true,false,false,true,false,false,true],"unobstructed":[false,false,false,false,false,true,false,false,true,false,false,true],"height":1.5162868543024455},{"image_id":"97ed68de989e44fdaf2d9b949898fab6","pose":[0.0900997,0.0149714,0.99582,-3.64126,-0.995713,-0.0195971,0.0903844,-3.16818,0.0208687,-0.999695,0.0131427,1.52081,0,0,0,1],"included":true,"visible":[true,true,false,false,true,false,false,false,true,false,false,true],"unobstructed":[true,true,false,false,true,false,false,false,true,false,false,true],"height":1.5211418713547455},{"image_id":"5fd70cff4992429a99a84fd3c117ccb5","pose":[-0.0539877,-0.000800861,-0.998541,0.0108044,0.998337,0.0201438,-0.0539926,0.00604319,0.020158,-0.999796,-0.000286778,1.51223,0,0,0,1],"included":true,"visible":[true,false,true,true,false,false,false,true,false,true,true,false],"unobstructed":[false,false,true,false,false,false,false,true,false,false,true,false],"height":1.5113248528175798},{"image_id":"86d342c576ff46a9828d2ba377cc8cd5","pose":[0.998173,0.0151118,-0.0584746,-1.78347,0.0584707,0.000718574,0.998288,-1.89835,0.0151283,-0.999885,-0.000165129,1.52238,0,0,0,1],"included":true,"visible":[true,false,true,false,false,false,true,false,false,false,true,false],"unobstructed":[true,false,true,false,false,false,true,false,false,false,true,false],"height":1.5103397372923053},{"image_id":"8dba9ff900b14f9b84ead660f5f7f701","pose":[-0.999855,-0.0144511,0.00887107,-4.11579,-0.00895392,0.00564829,-0.999943,-2.90606,0.0144005,-0.999879,-0.00577567,1.51617,0,0,0,1],"included":true,"visible":[true,true,false,false,true,true,false,false,false,false,false,true],"unobstructed":[true,true,false,false,true,true,false,false,false,false,false,true],"height":1.5112098807574073},{"image_id":"0d8c5fbfd73f44e28d6da370520611e4","pose":[0.0769887,0.00664334,0.997009,-6.15424,-0.997016,-0.00490415,0.0770216,-0.0398163,0.00540151,-0.999965,0.00624716,1.50965,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,true,false,false,false,true,false],"unobstructed":[false,false,true,true,false,false,false,false,false,false,false,false],"height":1.5058928427471967},{"image_id":"aebb1de49d21485e8bef7633dfb58761","pose":[-0.0229751,-0.0058052,-0.999718,-1.94579,0.999719,0.00553997,-0.0230069,-0.026534,0.00567231,-0.999967,0.0056775,1.50582,0,0,0,1],"included":true,"visible":[true,false,true,true,false,false,true,true,false,true,false,false],"unobstructed":[true,false,true,true,false,false,true,true,false,false,false,false],"height":1.5101720791580233},{"image_id":"e34e51f3d6584ad09c510de5db84752f","pose":[-0.0418368,-0.0124855,0.999046,-3.99281,-0.993607,-0.104406,-0.0429142,-2.13265,0.104842,-0.994456,-0.00803644,0.980264,0,0,0,1],"included":true,"visible":[false,true,false,false,true,true,false,false,true,false,false,false],"unobstructed":[false,true,false,false,true,true,false,false,true,false,false,false],"height":0.969584316081611}]


--------------------------------------------------------------------------------
/tasks/agent/observation/nlp/instruction_encoder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn.utils.rnn import pad_sequence, pad_packed_sequence, pack_padded_sequence
  4 | import numpy as np
  5 | from agent.observation.nlp.glove import Glove
  6 | 
  7 | 
  8 | class InstructionEncoder(nn.Module):
  9 |     def __init__(
 10 |         self,
 11 |         config: dict,
 12 |         instr_dim: int,
 13 |         vocab: list
 14 |     ) -> None:
 15 |         super(InstructionEncoder, self).__init__()
 16 |         self.config = config
 17 |         self.padding_value = vocab.index('<pad>')
 18 | 
 19 |         # init embedding layer
 20 |         try:
 21 |             self.emb_layer = torch.load(config['r2r_env']['word_embedding'] + '.pt')
 22 |             print('Loading Glove embedding from %s.pt' % config['r2r_env']['word_embedding'])
 23 |         except FileNotFoundError:
 24 |             print('Loading Glove embedding from %s.txt' % config['r2r_env']['word_embedding'])
 25 |             # setup glove
 26 |             glove = Glove(config['r2r_env']['word_embedding'] + '.txt')
 27 |             weights_matrix = self.get_weights_matrix(vocab, glove)
 28 |             # build embedding layer
 29 |             vocab_size, feature_dim = weights_matrix.shape
 30 |             self.emb_layer = nn.Embedding(vocab_size, feature_dim)
 31 |             self.emb_layer.from_pretrained(torch.tensor(weights_matrix), freeze=True)
 32 |             # save vocab and embedding
 33 |             vocab_path = '/'.join(config['r2r_env']['word_embedding'].split('/')[:-1]) + '/vocab.txt'
 34 |             with open(vocab_path, 'w') as txt_file:
 35 |                 for word in vocab:
 36 |                     txt_file.write(word + '\n')
 37 |             torch.save(self.emb_layer, config['r2r_env']['word_embedding'] + '.pt')
 38 | 
 39 |         # init instruction encoder network
 40 |         self.drop = nn.Dropout(p=config['state_tracker']['dropout_ratio'])
 41 |         self.lstm = nn.LSTM(
 42 |             input_size=self.emb_layer.embedding_dim,
 43 |             hidden_size=config['state_tracker']['obs']['instr']['lstm']['hidden_dim'],
 44 |             num_layers=config['state_tracker']['obs']['instr']['lstm']['num_layers'],
 45 |             batch_first=True,
 46 |             bidirectional=config['state_tracker']['obs']['instr']['lstm']['bidirectional']
 47 |         )
 48 |         self.encoder2decoder = nn.Sequential(
 49 |             nn.Linear(instr_dim, instr_dim, bias=False),
 50 |             nn.Linear(instr_dim, instr_dim, bias=False)
 51 |         )
 52 |         self.encode = self.forward
 53 |         return
 54 | 
 55 |     def get_weights_matrix(
 56 |         self,
 57 |         vocab: list,
 58 |         glove: Glove
 59 |     ) -> np.ndarray:
 60 |         weights_matrix = np.zeros((len(vocab), glove.feature_dim))
 61 |         for i, word in enumerate(vocab):
 62 |             if word in glove.words:
 63 |                 weights_matrix[i] = glove.w2v(word)
 64 |             else:
 65 |                 weights_matrix[i] = np.random.normal(scale=0.6, size=(glove.feature_dim, ))
 66 |         return weights_matrix
 67 | 
 68 |     def forward(
 69 |         self,
 70 |         instrs: list
 71 |     ) -> (torch.Tensor, torch.Tensor, (torch.Tensor, torch.Tensor)):
 72 |         """
 73 |         instrs = [instr1, instr2, ...]
 74 |         instr: [token1_id, token2_id, ...]
 75 |         type(instr): torch.Tensor
 76 |         """
 77 |         instrs_id_len = [len(instr) for instr in instrs]
 78 |         instrs_id_pad = pad_sequence(instrs, batch_first=True, padding_value=self.padding_value).to(self.config['device'])
 79 | 
 80 |         instrs_embed = self.emb_layer(instrs_id_pad)
 81 |         instrs_embed_pack = pack_padded_sequence(
 82 |             instrs_embed,
 83 |             lengths=instrs_id_len,
 84 |             batch_first=True,
 85 |             enforce_sorted=False
 86 |         )
 87 |         enc_h, (enc_h_t, enc_c_t) = self.lstm(instrs_embed_pack)
 88 |         if self.lstm.bidirectional:
 89 |             h_t = torch.cat((enc_h_t[-1], enc_h_t[-2]), 1)
 90 |             c_t = torch.cat((enc_c_t[-1], enc_c_t[-2]), 1)
 91 |         else:
 92 |             h_t = enc_h_t[-1]
 93 |             c_t = enc_c_t[-1]
 94 |         decoder_init = self.encoder2decoder(h_t)
 95 |         instr_embed, lengths = pad_packed_sequence(enc_h, batch_first=True)
 96 |         instr_embed = self.drop(instr_embed)
 97 | 
 98 |         instr_mask = torch.ones(len(instrs), max(instrs_id_len), dtype=torch.bool, device=self.config['device'])
 99 |         for idx, instr in enumerate(instrs):
100 |             instr_mask[idx, :len(instr)] = torch.zeros_like(instr)
101 |         return instr_embed, instr_mask, (decoder_init, c_t)
102 | 
103 | 
104 | def main():
105 |     return
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     main()
110 | 


--------------------------------------------------------------------------------
/web/README.md:
--------------------------------------------------------------------------------
 1 | # Web
 2 | 
 3 | This directory contains web-based applications for:
 4 | - Viewing and saving first-person trajectories
 5 | - Amazon Mechanical Turk (AMT) interfaces that were used to collect and evaluate navigation instructions
 6 | 
 7 | Code is based on Javascript and the [three.js](https://threejs.org/) wrapper for OpenGL, as well as the [tween.js](https://github.com/tweenjs/tween.js/) library for animation. The [Gulp](https://gulpjs.com/) task runner (based on Node.js) is used for spinning up a web servers and optimizing and minifying javascript for deployment (e.g. to AMT).
 8 | 
 9 | To get started, make sure you have [Node.js](https://nodejs.org/en/) >=6.0.0 installed, then install the remaining dependencies using the npm package manager:
10 | ```
11 | npm install
12 | ```
13 | 
14 | You will also need to first install the Matterport data as described [here](../README.md). Then, set up symlinks to data (from the app directory) as follows:
15 | ```
16 | cd app
17 | ln -s ../../tasks/R2R/data/ R2Rdata
18 | ln -s ../../connectivity connectivity
19 | ln -s ../../data data
20 | ```
21 | 
22 | Also, download the R2R trajectory data by running this script from the top level directory (if you haven't already done this):
23 | ```
24 | ./tasks/R2R/data/download.sh
25 | ```
26 | 
27 | 
28 | Now you can start a web server to check out the various visualizations and AMT user interfaces:
29 | ```
30 | gulp
31 | ```
32 | 
33 | ## Trajectory Visualization
34 | 
35 | `trajectory.html` is an application for viewing first-person trajectories and downloading them as videos:
36 | - Use `Choose File` to select a trajectory file in the leaderboard submission format. By default, the included file `val_unseen_shortest_agent.json` is selected (containing the shortest paths to goal in the unseen validation set).
37 | - `Play` visualizes the trajectory with the provided index.
38 | - `Download video` visualizes the trajectory then downloads it as a .webm video.
39 | - Camera parameters can be set with the `Width`, `Height` and `V-FOV` fields.
40 | - Change the `Index` field to view different trajectories from the file.
41 | 
42 | 
43 | ## AMT Interfaces
44 | 
45 | `collect-hit.html` and `eval-hit.html` are the AMT interfaces used for collecting navigation instructions for the R2R data set, and benchmarking human performance on the R2R test set, respectively. Both interfaces appear as they would to a worker on AMT, except there is not 'Submit' button. Instead, both interfaces have a url parameter `?ix=0` that can be directly edited in your browser address bar to view different HITs. There are also instructions at the top of the UI that can be expanded.
46 | 
47 | ### collect-hit
48 | 
49 | The UI `collect-hit.html` shows workers a navigation trajectory that must be annotated with a navigation instruction. Workers can only move along the trajectory (either fly-through or by clicking through each step), but cannot move anywhere else. Trajectories are loaded from the file `sample_room_paths.json`. Navigation instructions are collected in the textarea with id `tag1`, which can be integrated with AMT. 
50 | 
51 | ### eval-hit
52 | 
53 | The UI `eval-hit.html` situates workers in an environment and provides a navigation instruction sourced from `R2R_test.json`. Workers can move anywhere, and must submit when they are as close as possible to the goal location. The actual navigation trajectories are collected in a hidden input with id `traj`, in the form of comma-separated (viewpointID, heading_degrees, elevation_degrees) tuples.
54 | 
55 | ### Integrating with AMT
56 | 
57 | To actually use these interfaces to collect data they must be integrated with AMT. Please check the AMT docs. At high level, several additional steps are required to achieve this:
58 | - Run `gulp build` to generate optimized and minified javascript (`main.min.js`) in the `dist` directory. 
59 | - Host online the minified javascript files, along with the Matterport skybox images (we suggest downsampling the originals to 50% or smaller to keep the HITs responsive), our connectivity graphs, and any other necessary files for the particular html template (e.g. your own version of `sample_room_paths.json` or `R2R_test.json`) so they are publicly accessible.
60 | - In the html template(s): 
61 |   - Review the HIT instructions and replace references to ACRV with your research group.
62 |   - Replace all local urls with urls linking to your own publicly hosted assets, and
63 |   - Switch to AMT parameters instead of url parameters, i.e., replace `var ix = location.search.split('ix=')[1];` with `var ix = ${ix}` and provide these parameters to AMT (e.g., in an uploaded csv file) when creating a batch of HITs. Note that the `ix` parameter is just an index into `sample_room_paths.json` or `R2R_test.json`.
64 | - Follow the AMT instructions to create a batch of HITs using your modified html template(s), such that the data collected in the `tag1` and/or `traj` fields will be available through AMT.
65 | 
66 | Disclaimer: We provide this code to assist others collecting AMT annotations on top of Matterport-style data, but this is academic code and not a supported library. We may have forgotten something or left out a step! Feel free to submit pull requests with fixes.
67 | 


--------------------------------------------------------------------------------
/cmake/Modules/FindNumPy.cmake:
--------------------------------------------------------------------------------
  1 | #-------------------------------------------------------------------------------
  2 | # Copyright (c) 2013, Lars Baehren <lbaehren@gmail.com>
  3 | # All rights reserved.
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without modification,
  6 | # are permitted provided that the following conditions are met:
  7 | #
  8 | #  * Redistributions of source code must retain the above copyright notice, this
  9 | #    list of conditions and the following disclaimer.
 10 | #  * Redistributions in binary form must reproduce the above copyright notice,
 11 | #    this list of conditions and the following disclaimer in the documentation
 12 | #    and/or other materials provided with the distribution.
 13 | #
 14 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 15 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 17 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 18 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 20 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 21 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 22 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 23 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 | #-------------------------------------------------------------------------------
 25 | 
 26 | # - Check for the presence of NumPy
 27 | #
 28 | # The following variables are set when NumPy is found:
 29 | #  NUMPY_FOUND      = Set to true, if all components of NUMPY have been found.
 30 | #  NUMPY_INCLUDES   = Include path for the header files of NUMPY
 31 | #  NUMPY_LIBRARIES  = Link these to use NUMPY
 32 | #  NUMPY_LFLAGS     = Linker flags (optional)
 33 | 
 34 | if (NOT NUMPY_FOUND)
 35 | 
 36 |     if (NOT NUMPY_ROOT_DIR)
 37 |         set (NUMPY_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
 38 |     endif (NOT NUMPY_ROOT_DIR)
 39 | 
 40 |     if (NOT PYTHONINTERP_FOUND)
 41 |         find_package (PythonInterp)
 42 |     endif (NOT PYTHONINTERP_FOUND)
 43 | 
 44 |     ##__________________________________________________________________________
 45 |     ## Check for the header files
 46 | 
 47 |     ## Use Python to determine the include directory
 48 |     execute_process (
 49 |         COMMAND ${PYTHON_EXECUTABLE} -c import\ numpy\;\ print\(numpy.get_include\(\)\)\;
 50 |         ERROR_VARIABLE NUMPY_FIND_ERROR
 51 |         RESULT_VARIABLE NUMPY_FIND_RESULT
 52 |         OUTPUT_VARIABLE NUMPY_FIND_OUTPUT
 53 |         OUTPUT_STRIP_TRAILING_WHITESPACE
 54 |         )
 55 |     ## process the output from the execution of the command
 56 |     if (NOT NUMPY_FIND_RESULT)
 57 |         set (NUMPY_INCLUDES ${NUMPY_FIND_OUTPUT})
 58 |     endif (NOT NUMPY_FIND_RESULT)
 59 | 
 60 |     ##__________________________________________________________________________
 61 |     ## Check for the library
 62 | 
 63 |     unset (NUMPY_LIBRARIES)
 64 | 
 65 |     if (PYTHON_SITE_PACKAGES)
 66 |         find_library (NUMPY_NPYMATH_LIBRARY npymath
 67 |             HINTS ${PYTHON_SITE_PACKAGES}/numpy/core
 68 |             PATH_SUFFIXES lib
 69 |             )
 70 |         if (NUMPY_NPYMATH_LIBRARY)
 71 |             list (APPEND NUMPY_LIBRARIES ${NUMPY_NPYMATH_LIBRARY})
 72 |         endif (NUMPY_NPYMATH_LIBRARY)
 73 |     endif (PYTHON_SITE_PACKAGES)
 74 | 
 75 |     ##__________________________________________________________________________
 76 |     ## Get API version of NumPy from 'numpy/numpyconfig.h'
 77 | 
 78 |     if (PYTHON_EXECUTABLE)
 79 |         execute_process (
 80 |             COMMAND ${PYTHON_EXECUTABLE} -c import\ numpy\;\ print\(numpy.__version__\)\;
 81 |             ERROR_VARIABLE NUMPY_API_VERSION_ERROR
 82 |             RESULT_VARIABLE NUMPY_API_VERSION_RESULT
 83 |             OUTPUT_VARIABLE NUMPY_API_VERSION
 84 |             OUTPUT_STRIP_TRAILING_WHITESPACE
 85 |             )
 86 |     else ()
 87 |         ## Backup procedure: extract version number directly from the header file
 88 |         if (NUMPY_INCLUDES)
 89 |             find_file (HAVE_NUMPYCONFIG_H numpy/numpyconfig.h
 90 |                 HINTS ${NUMPY_INCLUDES}
 91 |                 )
 92 |         endif (NUMPY_INCLUDES)
 93 |     endif ()
 94 | 
 95 |     ## Dissect full version number into major, minor and patch version
 96 |     if (NUMPY_API_VERSION)
 97 |         string (REGEX REPLACE "\\." ";" _tmp ${NUMPY_API_VERSION})
 98 |         list (GET _tmp 0 NUMPY_API_VERSION_MAJOR)
 99 |         list (GET _tmp 1 NUMPY_API_VERSION_MINOR)
100 |         list (GET _tmp 2 NUMPY_API_VERSION_PATCH)
101 |     endif (NUMPY_API_VERSION)
102 | 
103 |     ##__________________________________________________________________________
104 |     ## Actions taken when all components have been found
105 | 
106 |     find_package_handle_standard_args (NUMPY DEFAULT_MSG NUMPY_INCLUDES)
107 | 
108 |     if (NUMPY_FOUND)
109 |         if (NOT NUMPY_FIND_QUIETLY)
110 |             message (STATUS "Found components for NumPy")
111 |             message (STATUS "NUMPY_ROOT_DIR    = ${NUMPY_ROOT_DIR}")
112 |             message (STATUS "NUMPY_INCLUDES    = ${NUMPY_INCLUDES}")
113 |             message (STATUS "NUMPY_LIBRARIES   = ${NUMPY_LIBRARIES}")
114 |             message (STATUS "NUMPY_API_VERSION = ${NUMPY_API_VERSION}")
115 |         endif (NOT NUMPY_FIND_QUIETLY)
116 |     else (NUMPY_FOUND)
117 |         if (NUMPY_FIND_REQUIRED)
118 |             message (FATAL_ERROR "Could not find NUMPY!")
119 |         endif (NUMPY_FIND_REQUIRED)
120 |     endif (NUMPY_FOUND)
121 | 
122 |     ##__________________________________________________________________________
123 |     ## Mark advanced variables
124 | 
125 |   mark_as_advanced (
126 |     NUMPY_ROOT_DIR
127 |     NUMPY_INCLUDES
128 |     NUMPY_LIBRARIES
129 |     )
130 | 
131 | endif (NOT NUMPY_FOUND)
132 | 


--------------------------------------------------------------------------------
/tasks/env/mp_env_dict.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | import numpy as np
  3 | 
  4 | 
  5 | WorldState = namedtuple(
  6 |     "WorldState",
  7 |     ["scanId", "location", "viewIndex", "heading", "elevation"]
  8 | )
  9 | 
 10 | 
 11 | Location = namedtuple(
 12 |     "Location",
 13 |     ['viewpointId']
 14 | )
 15 | 
 16 | 
 17 | NavigableLocation = namedtuple(
 18 |     "NavigableLocation",
 19 |     ["viewpointId", "ix", "rel_heading", "rel_elevation", "rel_distance"]
 20 | )
 21 | 
 22 | 
 23 | class MatterEnvDictBase():
 24 |     def __init__(
 25 |         self,
 26 |         config: dict
 27 |     ) -> None:
 28 |         super().__init__()
 29 |         self.config = config
 30 | 
 31 |         self.rad30, self.rad360 = np.deg2rad(30), np.deg2rad(30) * 12
 32 |         self._state_infos = []
 33 |         self._view_indices, self._headings, self._elevations = None, None, None
 34 |         self._scan_ids, self._vp_ids = [], []
 35 |         return
 36 | 
 37 |     @property
 38 |     def state_infos(
 39 |         self
 40 |     ) -> list:
 41 |         return self._state_infos
 42 | 
 43 |     def set_state_infos(
 44 |         self,
 45 |         new_state_infos: list
 46 |     ) -> None:
 47 |         self._state_infos = new_state_infos
 48 |         return
 49 | 
 50 |     @property
 51 |     def view_indices(
 52 |         self
 53 |     ) -> np.ndarray:
 54 |         return self._view_indices
 55 | 
 56 |     def set_pose_by_view_index(
 57 |         self,
 58 |         new_view_indices: np.ndarray
 59 |     ) -> None:
 60 |         self._view_indices = new_view_indices
 61 |         self.set_headings((new_view_indices % 12) * self.rad30)
 62 |         self.set_elevations((new_view_indices // 12 - 1) * self.rad30)
 63 |         return
 64 | 
 65 |     @property
 66 |     def scan_ids(
 67 |         self
 68 |     ) -> list:
 69 |         return self._scan_ids
 70 | 
 71 |     def set_scan_ids(
 72 |         self,
 73 |         new_scan_ids: list
 74 |     ) -> None:
 75 |         self._scan_ids = new_scan_ids
 76 |         return
 77 | 
 78 |     @property
 79 |     def vp_ids(
 80 |         self
 81 |     ) -> list:
 82 |         return self._vp_ids
 83 | 
 84 |     def set_vp_ids(
 85 |         self,
 86 |         new_vp_ids: list
 87 |     ) -> None:
 88 |         self._vp_ids = new_vp_ids
 89 |         return
 90 | 
 91 |     @property
 92 |     def headings(
 93 |         self
 94 |     ) -> np.ndarray:
 95 |         return self._headings
 96 | 
 97 |     def set_headings(
 98 |         self,
 99 |         new_headings: np.ndarray
100 |     ) -> None:
101 |         self._headings = new_headings
102 |         return
103 | 
104 |     @property
105 |     def elevations(
106 |         self
107 |     ) -> np.ndarray:
108 |         return self._elevations
109 | 
110 |     def set_elevations(
111 |         self,
112 |         new_elevations: np.ndarray
113 |     ) -> None:
114 |         self._elevations = new_elevations
115 |         return
116 | 
117 | 
118 | class MatterEnvDict(MatterEnvDictBase):
119 |     def __init__(
120 |         self,
121 |         config: dict
122 |     ) -> None:
123 |         super().__init__(config)
124 |         # rendering init
125 |         self.verbose = config['args']['verbose']
126 |         self.rendering_idx = config['args']['rendering_idx']
127 |         return
128 | 
129 |     def get_states(
130 |         self
131 |     ) -> list:
132 |         return self.state_infos
133 | 
134 |     def discretize_heading_rad(
135 |         self,
136 |         rad: float
137 |     ) -> float:
138 |         return (round(rad / self.rad30) * self.rad30) % self.rad360
139 | 
140 |     def pose_to_view_index(
141 |         self,
142 |         heading: float,
143 |         elevation: float
144 |     ) -> int:
145 |         return int((12 * round(elevation / self.rad30 + 1) + round(heading / self.rad30) % 12))
146 | 
147 |     def update_world_states(
148 |         self
149 |     ) -> None:
150 |         for idx, (scan_id, vp_id) in enumerate(zip(self.scan_ids, self.vp_ids)):
151 |             view_index = self.view_indices[idx]
152 |             heading = self.headings[idx]
153 |             elevation = self.elevations[idx]
154 |             # update state_infos
155 |             self.state_infos[idx] = WorldState(
156 |                 scanId=scan_id,
157 |                 location=Location(viewpointId=vp_id),
158 |                 viewIndex=view_index,
159 |                 heading=heading,
160 |                 elevation=elevation
161 |             )
162 |         return
163 | 
164 |     def new_episodes(
165 |         self,
166 |         scan_ids: list,
167 |         vp_ids: list,
168 |         headings: list,
169 |         gen_gif: bool
170 |     ) -> None:
171 |         assert not gen_gif
172 |         view_indices = []
173 |         for heading in headings:
174 |             discrete_heading = self.discretize_heading_rad(heading)
175 |             view_indices.append(self.pose_to_view_index(discrete_heading, 0))
176 | 
177 |         # store current scan_ids, vp_ids, view_indices
178 |         self.set_scan_ids(scan_ids)
179 |         self.set_vp_ids(vp_ids)
180 |         self.set_pose_by_view_index(np.array(view_indices))
181 |         self.set_state_infos([None] * len(scan_ids))
182 |         # update world states
183 |         self.update_world_states()
184 |         return
185 | 
186 |     def make_actions(
187 |         self,
188 |         h_times: np.ndarray,
189 |         e_times: np.ndarray,
190 |         next_viewpoint_ids: list
191 |     ) -> None:
192 |         # pose_adapt
193 |         e_masks = np.logical_or(
194 |             np.logical_and(e_times == -1, self.view_indices < 12),
195 |             np.logical_and(e_times == 1, self.view_indices > 23)
196 |         )
197 |         e_times[e_masks] = 0
198 |         h_ticks = (self.view_indices + h_times) % 12        # 0~11
199 |         e_ticks = (self.view_indices + e_times * 12) // 12  # 0~2
200 |         self.set_pose_by_view_index(h_ticks + 12 * e_ticks)
201 |         # update vp_ids
202 |         self.set_vp_ids(next_viewpoint_ids)
203 |         # update world states
204 |         self.update_world_states()
205 |         return
206 | 
207 | 
208 | def main():
209 |     return
210 | 
211 | 
212 | if __name__ == '__main__':
213 |     main()
214 | 


--------------------------------------------------------------------------------
/scripts/precompute_img_features.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | ''' Script to precompute image features using a Caffe ResNet CNN, using 36 discretized views
  4 |     at each viewpoint in 30 degree increments, and the provided camera WIDTH, HEIGHT 
  5 |     and VFOV parameters. '''
  6 | 
  7 | import numpy as np
  8 | import cv2
  9 | import json
 10 | import math
 11 | import base64
 12 | import csv
 13 | import sys
 14 | 
 15 | csv.field_size_limit(sys.maxsize)
 16 | 
 17 | 
 18 | # Caffe and MatterSim need to be on the Python path
 19 | sys.path.insert(0, 'build')
 20 | import MatterSim
 21 | 
 22 | #caffe_root = '../'  # your caffe build
 23 | #sys.path.insert(0, caffe_root + 'python')
 24 | import caffe
 25 | 
 26 | from timer import Timer
 27 | 
 28 | 
 29 | TSV_FIELDNAMES = ['scanId', 'viewpointId', 'image_w','image_h', 'vfov', 'features']
 30 | VIEWPOINT_SIZE = 36 # Number of discretized views from one viewpoint
 31 | FEATURE_SIZE = 2048
 32 | BATCH_SIZE = 4  # Some fraction of viewpoint size - batch size 4 equals 11GB memory
 33 | GPU_ID = 0
 34 | PROTO = 'models/ResNet-152-deploy.prototxt'
 35 | MODEL = 'models/ResNet-152-model.caffemodel'  # You need to download this, see README.md
 36 | #MODEL = 'models/resnet152_places365.caffemodel'
 37 | OUTFILE = 'img_features/ResNet-152-imagenet.tsv'
 38 | GRAPHS = 'connectivity/'
 39 | 
 40 | # Simulator image parameters
 41 | WIDTH=640
 42 | HEIGHT=480
 43 | VFOV=60
 44 | 
 45 | 
 46 | def load_viewpointids():
 47 |     viewpointIds = []
 48 |     with open(GRAPHS+'scans.txt') as f:
 49 |         scans = [scan.strip() for scan in f.readlines()]
 50 |         for scan in scans:
 51 |             with open(GRAPHS+scan+'_connectivity.json')  as j:
 52 |                 data = json.load(j)
 53 |                 for item in data:
 54 |                     if item['included']:
 55 |                         viewpointIds.append((scan, item['image_id']))
 56 |     print 'Loaded %d viewpoints' % len(viewpointIds)
 57 |     return viewpointIds
 58 | 
 59 | 
 60 | def transform_img(im):
 61 |     ''' Prep opencv 3 channel image for the network '''
 62 |     im_orig = im.astype(np.float32, copy=True)
 63 |     im_orig -= np.array([[[103.1, 115.9, 123.2]]]) # BGR pixel mean
 64 |     blob = np.zeros((1, im.shape[0], im.shape[1], 3), dtype=np.float32)
 65 |     blob[0, :, :, :] = im_orig
 66 |     blob = blob.transpose((0, 3, 1, 2))
 67 |     return blob
 68 | 
 69 | 
 70 | def build_tsv():
 71 |     # Set up the simulator
 72 |     sim = MatterSim.Simulator()
 73 |     sim.setCameraResolution(WIDTH, HEIGHT)
 74 |     sim.setCameraVFOV(math.radians(VFOV))
 75 |     sim.setDiscretizedViewingAngles(True)
 76 |     sim.init()
 77 | 
 78 |     # Set up Caffe resnet
 79 |     caffe.set_device(GPU_ID)
 80 |     caffe.set_mode_gpu()
 81 |     net = caffe.Net(PROTO, MODEL, caffe.TEST)
 82 |     net.blobs['data'].reshape(BATCH_SIZE, 3, HEIGHT, WIDTH)
 83 | 
 84 |     count = 0
 85 |     t_render = Timer()
 86 |     t_net = Timer()
 87 |     with open(OUTFILE, 'wb') as tsvfile:
 88 |         writer = csv.DictWriter(tsvfile, delimiter = '\t', fieldnames = TSV_FIELDNAMES)          
 89 | 
 90 |         # Loop all the viewpoints in the simulator
 91 |         viewpointIds = load_viewpointids()
 92 |         for scanId,viewpointId in viewpointIds:
 93 |             t_render.tic()
 94 |             # Loop all discretized views from this location
 95 |             blobs = []
 96 |             features = np.empty([VIEWPOINT_SIZE, FEATURE_SIZE], dtype=np.float32)
 97 |             for ix in range(VIEWPOINT_SIZE):
 98 |                 if ix == 0:
 99 |                     sim.newEpisode(scanId, viewpointId, 0, math.radians(-30))
100 |                 elif ix % 12 == 0:
101 |                     sim.makeAction(0, 1.0, 1.0)
102 |                 else:
103 |                     sim.makeAction(0, 1.0, 0)
104 | 
105 |                 state = sim.getState()
106 |                 assert state.viewIndex == ix
107 |                 
108 |                 # Transform and save generated image
109 |                 blobs.append(transform_img(state.rgb))
110 | 
111 |             t_render.toc()
112 |             t_net.tic()
113 |             # Run as many forward passes as necessary
114 |             assert VIEWPOINT_SIZE % BATCH_SIZE == 0
115 |             forward_passes = VIEWPOINT_SIZE / BATCH_SIZE            
116 |             ix = 0
117 |             for f in range(forward_passes):
118 |                 for n in range(BATCH_SIZE):
119 |                     # Copy image blob to the net
120 |                     net.blobs['data'].data[n, :, :, :] = blobs[ix]
121 |                     ix += 1
122 |                 # Forward pass
123 |                 output = net.forward()
124 |                 features[f*BATCH_SIZE:(f+1)*BATCH_SIZE, :] = net.blobs['pool5'].data[:,:,0,0]
125 | 
126 |             writer.writerow({
127 |                 'scanId': scanId,
128 |                 'viewpointId': viewpointId,
129 |                 'image_w': WIDTH,
130 |                 'image_h': HEIGHT,
131 |                 'vfov' : VFOV,
132 |                 'features': base64.b64encode(features)
133 |             })
134 |             count += 1
135 |             t_net.toc()
136 |             if count % 100 == 0:
137 |                 print 'Processed %d / %d viewpoints, %.1fs avg render time, %.1fs avg net time, projected %.1f hours' %\
138 |                   (count,len(viewpointIds), t_render.average_time, t_net.average_time, 
139 |                   (t_render.average_time+t_net.average_time)*len(viewpointIds)/3600)
140 | 
141 | 
142 | def read_tsv(infile):
143 |     # Verify we can read a tsv
144 |     in_data = []
145 |     with open(infile, "r+b") as tsv_in_file:
146 |         reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames = TSV_FIELDNAMES)
147 |         for item in reader:
148 |             item['image_h'] = int(item['image_h'])
149 |             item['image_w'] = int(item['image_w'])   
150 |             item['vfov'] = int(item['vfov'])   
151 |             item['features'] = np.frombuffer(base64.decodestring(item['features']), 
152 |                     dtype=np.float32).reshape((VIEWPOINT_SIZE, FEATURE_SIZE))
153 |             in_data.append(item)
154 |     return in_data
155 | 
156 | 
157 | if __name__ == "__main__":
158 | 
159 |     build_tsv()
160 |     data = read_tsv(OUTFILE)
161 |     print 'Completed %d viewpoints' % len(data)
162 | 
163 | 


--------------------------------------------------------------------------------
/src/test/rendertest_spec.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   [
  3 |     {
  4 |       "scanId": "17DRP5sb8fy", 
  5 |       "viewpointId": "85c23efeaecd4d43a7dcd5b90137179e", 
  6 |       "elevation": 0.008557380839564054, 
  7 |       "heading": 2.551961945320492, 
  8 |       "reference_image": "17DRP5sb8fy_85c23efeaecd4d43a7dcd5b90137179e_2.551961945320492_0.008557380839564054.png"
  9 |     }, 
 10 |     {
 11 |       "scanId": "1LXtFkjw3qL", 
 12 |       "viewpointId": "187589bb7d4644f2943079fb949c0be9", 
 13 |       "elevation": 0.0004921836022802584, 
 14 |       "heading": 1.8699330579409539, 
 15 |       "reference_image": "1LXtFkjw3qL_187589bb7d4644f2943079fb949c0be9_1.8699330579409539_0.0004921836022802584.png"
 16 |     }, 
 17 |     {
 18 |       "scanId": "1pXnuDYAj8r", 
 19 |       "viewpointId": "163d61ac7edb43fb958c5d9e69ae11ad", 
 20 |       "elevation": -0.02444352614304746, 
 21 |       "heading": 4.626331047551077, 
 22 |       "reference_image": "1pXnuDYAj8r_163d61ac7edb43fb958c5d9e69ae11ad_4.626331047551077_-0.02444352614304746.png"
 23 |     }, 
 24 |     {
 25 |       "scanId": "29hnd4uzFmX", 
 26 |       "viewpointId": "1576d62e7bbb45e8a5ef9e7bb37b1839", 
 27 |       "elevation": -0.0006838914039405167, 
 28 |       "heading": 5.844119909926444, 
 29 |       "reference_image": "29hnd4uzFmX_1576d62e7bbb45e8a5ef9e7bb37b1839_5.844119909926444_-0.0006838914039405167.png"
 30 |     }
 31 |   ],
 32 |   [
 33 |     {
 34 |       "scanId": "2azQ1b91cZZ", 
 35 |       "viewpointId": "3daad58ad53742038e50d62e91f84e7b", 
 36 |       "elevation": 0.016732869758208434, 
 37 |       "heading": 3.1736484087962933, 
 38 |       "reference_image": "2azQ1b91cZZ_3daad58ad53742038e50d62e91f84e7b_3.1736484087962933_0.016732869758208434.png"
 39 |     }, 
 40 |     {
 41 |       "scanId": "2n8kARJN3HM", 
 42 |       "viewpointId": "94ac3cea52ec455993f8562f78da3be1", 
 43 |       "elevation": -0.0009188787844489273, 
 44 |       "heading": 2.604601935142565, 
 45 |       "reference_image": "2n8kARJN3HM_94ac3cea52ec455993f8562f78da3be1_2.604601935142565_-0.0009188787844489273.png"
 46 |     }, 
 47 |     {
 48 |       "scanId": "2t7WUuJeko7", 
 49 |       "viewpointId": "529f006f8293406da0b506defd2891a5", 
 50 |       "elevation": -0.013788837143969411, 
 51 |       "heading": 0.032985516949381344, 
 52 |       "reference_image": "2t7WUuJeko7_529f006f8293406da0b506defd2891a5_0.032985516949381344_-0.013788837143969411.png"
 53 |     }, 
 54 |     {
 55 |       "scanId": "5LpN3gDmAk7", 
 56 |       "viewpointId": "bda8025f20404048a77381e9e0dc0ccf", 
 57 |       "elevation": -0.01083211073205187, 
 58 |       "heading": 5.325207878739601, 
 59 |       "reference_image": "5LpN3gDmAk7_bda8025f20404048a77381e9e0dc0ccf_5.325207878739601_-0.01083211073205187.png"
 60 |     }
 61 |   ],
 62 |   [
 63 |     {
 64 |       "scanId": "5q7pvUzZiYa", 
 65 |       "viewpointId": "397403366d784caf804d741f32fd68b9", 
 66 |       "elevation": -0.0007063598518199811, 
 67 |       "heading": 2.8746465006968234, 
 68 |       "reference_image": "5q7pvUzZiYa_397403366d784caf804d741f32fd68b9_2.8746465006968234_-0.0007063598518199811.png"
 69 |     }, 
 70 |     {
 71 |       "scanId": "5ZKStnWn8Zo", 
 72 |       "viewpointId": "c76b52856e7c4f2a9a4419000c8e646a", 
 73 |       "elevation": -0.02922217527541366, 
 74 |       "heading": 4.13470589902238, 
 75 |       "reference_image": "5ZKStnWn8Zo_c76b52856e7c4f2a9a4419000c8e646a_4.13470589902238_-0.02922217527541366.png"
 76 |     }, 
 77 |     {
 78 |       "scanId": "759xd9YjKW5", 
 79 |       "viewpointId": "2343ef3bf04a4433af62f0d527d7512a", 
 80 |       "elevation": -0.016938006310169448, 
 81 |       "heading": 3.5451019786019264, 
 82 |       "reference_image": "759xd9YjKW5_2343ef3bf04a4433af62f0d527d7512a_3.5451019786019264_-0.016938006310169448.png"
 83 |     }, 
 84 |     {
 85 |       "scanId": "7y3sRwLe3Va", 
 86 |       "viewpointId": "9bbf903d50da4ffd9e5d1fb7c9f4d69b", 
 87 |       "elevation": 0.008361841032265524, 
 88 |       "heading": 1.7348660165523566, 
 89 |       "reference_image": "7y3sRwLe3Va_9bbf903d50da4ffd9e5d1fb7c9f4d69b_1.7348660165523566_0.008361841032265524.png"
 90 |     }
 91 |   ],
 92 |   [
 93 |     {
 94 |       "scanId": "8194nk5LbLH", 
 95 |       "viewpointId": "c9e8dc09263e4d0da77d16de0ecddd39", 
 96 |       "elevation": 0.008533161479170466, 
 97 |       "heading": 4.05504292862083, 
 98 |       "reference_image": "8194nk5LbLH_c9e8dc09263e4d0da77d16de0ecddd39_4.05504292862083_0.008533161479170466.png"
 99 |     }, 
100 |     {
101 |       "scanId": "82sE5b5pLXE", 
102 |       "viewpointId": "056a491afa534b17bac36f4f5898462a", 
103 |       "elevation": -0.0037883068413356496, 
104 |       "heading": 1.689393931320027, 
105 |       "reference_image": "82sE5b5pLXE_056a491afa534b17bac36f4f5898462a_1.689393931320027_-0.0037883068413356496.png"
106 |     }, 
107 |     {
108 |       "scanId": "8WUmhLawc2A", 
109 |       "viewpointId": "d21aae0b5d944f27a0074525c803fc9f", 
110 |       "elevation": -0.04510889155759994, 
111 |       "heading": 3.047458184407221, 
112 |       "reference_image": "8WUmhLawc2A_d21aae0b5d944f27a0074525c803fc9f_3.047458184407221_-0.04510889155759994.png"
113 |     }, 
114 |     {
115 |       "scanId": "ac26ZMwG7aT", 
116 |       "viewpointId": "efeef7cc82c84690addb0bf415f075ea", 
117 |       "elevation": -0.013447513736072197, 
118 |       "heading": 0.07434352566701552, 
119 |       "reference_image": "ac26ZMwG7aT_efeef7cc82c84690addb0bf415f075ea_0.07434352566701552_-0.013447513736072197.png"
120 |     }
121 |   ],
122 |   [
123 |     {
124 |       "scanId": "ARNzJeq3xxb", 
125 |       "viewpointId": "9a671e6915de4eb897f45fee8bf2031d", 
126 |       "elevation": 0.02583868533558965, 
127 |       "heading": 5.616355886953764, 
128 |       "reference_image": "ARNzJeq3xxb_9a671e6915de4eb897f45fee8bf2031d_5.616355886953764_0.02583868533558965.png"
129 |     }, 
130 |     {
131 |       "scanId": "B6ByNegPMKs", 
132 |       "viewpointId": "e3a65955df26467581c32613c4e9f865", 
133 |       "elevation": 0.007265625492957138, 
134 |       "heading": 5.230794959607039, 
135 |       "reference_image": "B6ByNegPMKs_e3a65955df26467581c32613c4e9f865_5.230794959607039_0.007265625492957138.png"
136 |     }, 
137 |     {
138 |       "scanId": "b8cTxDM8gDG", 
139 |       "viewpointId": "f2944e0b66b9461994a7f757582f9bc3", 
140 |       "elevation": -0.007543204141144086, 
141 |       "heading": 0.0853092784395515, 
142 |       "reference_image": "b8cTxDM8gDG_f2944e0b66b9461994a7f757582f9bc3_0.0853092784395515_-0.007543204141144086.png"
143 |     }, 
144 |     {
145 |       "scanId": "cV4RVeZvu5T", 
146 |       "viewpointId": "1b321779a4374c2b952c51820daa9e6c", 
147 |       "elevation": 0.07914721704610106, 
148 |       "heading": 6.266463179566256, 
149 |       "reference_image": "cV4RVeZvu5T_1b321779a4374c2b952c51820daa9e6c_6.266463179566256_0.07914721704610106.png"
150 |     }
151 |   ]
152 | ]
153 | 


--------------------------------------------------------------------------------
/tasks/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import time
  5 | import random
  6 | import argparse
  7 | import torch
  8 | import numpy as np
  9 | from tensorboardX import SummaryWriter
 10 | from env.rxr_env import RxREnv
 11 | from env.env_utils import Statistic
 12 | from agent.agent_seq2seq import AgentSeq2Seq
 13 | from agent.agent_sacd import AgentSACD
 14 | from agent.agent_reinforce import AgentReinforce
 15 | 
 16 | 
 17 | def print_log(
 18 |     it_now: int,
 19 |     lecture: int,
 20 |     iterations: list,
 21 |     alpha: float,
 22 |     loss_list: np.ndarray,
 23 |     return_average: float,
 24 |     stat_average: Statistic
 25 | ) -> None:
 26 |     if it_now >= 0:
 27 |         log = 'it:%6d [lec:%2d] | it: %.1f, alpha: %.2f, L_q1: %.3f, L_q2: %.3f, L_pi: %.3f, L_ent: %.3f, KLD: %.3f, R: %4.2f, PL: %5.2f, NE: %5.2f, SR: %.2f, SPL: %.2f, STP: %.2f, CLS: %.2f'\
 28 |             % (it_now, lecture,
 29 |                 np.average(iterations), alpha,
 30 |                 loss_list[0], loss_list[1], loss_list[2], loss_list[3], loss_list[4],
 31 |                 return_average,
 32 |                 stat_average.path_len[0], stat_average.nav_error[0],
 33 |                 stat_average.succ_rate[0], stat_average.succ_w_path_len[0],
 34 |                 stat_average.self_stop_rate[0], stat_average.cov_w_len_score[0])
 35 |     else:
 36 |         log = 'R: %4.3f, PL: %6.3f, NE: %6.3f, SR: %.3f, SPL: %.3f, STP: %.3f, CLS: %.3f'\
 37 |             % (return_average,
 38 |                 stat_average.path_len[0], stat_average.nav_error[0],
 39 |                 stat_average.succ_rate[0], stat_average.succ_w_path_len[0],
 40 |                 stat_average.self_stop_rate[0], stat_average.cov_w_len_score[0])
 41 |     print(log)
 42 |     sys.stdout.flush()
 43 |     return
 44 | 
 45 | 
 46 | def load_config(
 47 |     config_dir: str,
 48 |     show_info: bool = True
 49 | ) -> (argparse.Namespace, dict):
 50 |     # parse argument
 51 |     parser = argparse.ArgumentParser()
 52 |     parser.add_argument('--mode')
 53 |     parser.add_argument('--state_tracker')
 54 |     parser.add_argument('--agent')
 55 |     parser.add_argument('--additional_track', default='pose')
 56 |     parser.add_argument('--max_len', default=-1, type=int)
 57 |     parser.add_argument('--demo_activate', default=False, action='store_true')
 58 |     parser.add_argument('--curriculum', default=False, action='store_true')
 59 |     parser.add_argument('--seed', default=0, type=int)
 60 |     parser.add_argument('--load_dir', default='')
 61 |     parser.add_argument('--load_pre_trained_dir', default='')
 62 |     parser.add_argument('--exp_name', default='tmp')
 63 |     parser.add_argument('--rendering_idx', default=-1, type=int)
 64 |     parser.add_argument('--verbose', default=False, action='store_true')
 65 |     parser.add_argument('--aug_data', default=False, action='store_true')
 66 |     parser.add_argument('--load_expert', default=True, action='store_true')
 67 |     args = parser.parse_args()
 68 | 
 69 |     # load config
 70 |     with open(config_dir) as file_name:
 71 |         config = json.load(file_name)
 72 | 
 73 |     if args.mode == 'train':
 74 |         assert args.agent == 'sacd' or args.agent == 'reinforce'
 75 |     elif args.mode == 'test':
 76 |         assert args.load_dir != ''
 77 |     elif args.mode == 'pre_train':
 78 |         assert args.agent == 'seq2seq'
 79 |     else:
 80 |         raise NotImplementedError
 81 | 
 82 |     if args.additional_track not in ['pose', 'action']:
 83 |         raise NotImplementedError
 84 | 
 85 |     config['args'] = {
 86 |         'mode': args.mode,
 87 |         'state_tracker': args.state_tracker,
 88 |         'agent': args.agent,
 89 |         'additional_track': args.additional_track,
 90 |         'max_len': args.max_len if args.max_len else None,
 91 |         'demo_activate': args.demo_activate,
 92 |         'curriculum': args.curriculum,
 93 |         'load_dir': args.load_dir,
 94 |         'load_pre_trained_dir': args.load_pre_trained_dir,
 95 |         'exp_name': args.exp_name,
 96 |         'rendering_idx': args.rendering_idx,
 97 |         'verbose': args.verbose,
 98 |         'aug_data': args.aug_data,
 99 |         'load_expert': args.load_expert
100 |     }
101 |     config['seed'] = args.seed
102 | 
103 |     # setting random seed
104 |     os.environ['PYTHONHASHSEED'] = str(config['seed'])
105 |     torch.manual_seed(config['seed'])
106 |     torch.cuda.manual_seed_all(config['seed'])
107 |     np.random.seed(config['seed'])
108 |     random.seed(config['seed'])
109 | 
110 |     # print info
111 |     if show_info and config['args']['mode'] != 'test':
112 |         print(json.dumps(config, indent=2) + '\n')
113 | 
114 |     # select device
115 |     if torch.cuda.is_available():
116 |         config['device'] = torch.device(config['device'])
117 |     else:
118 |         config['device'] = torch.device('cpu')
119 |     return config
120 | 
121 | 
122 | def init_tb_writer(
123 |     config: dict
124 | ) -> SummaryWriter:
125 |     result_dir = config['result_dir']
126 |     # remove tmp
127 |     if os.path.isdir(os.path.join(result_dir, config['args']['exp_name'])):
128 |         os.system('rm -r %s' % os.path.join(result_dir, config['args']['exp_name']))
129 |         time.sleep(5)
130 |     # set result dir
131 |     return SummaryWriter(os.path.join(result_dir, config['args']['exp_name']))
132 | 
133 | 
134 | def init_agent(
135 |     config: dict,
136 |     env: RxREnv
137 | ) -> AgentSACD or AgentReinforce or AgentSeq2Seq:
138 |     if config['args']['agent'] == 'sacd':
139 |         agent = AgentSACD(config, env)
140 |     elif config['args']['agent'] == 'reinforce':
141 |         agent = AgentReinforce(config, env)
142 |     elif config['args']['agent'] == 'seq2seq':
143 |         agent = AgentSeq2Seq(config, env)
144 |     else:
145 |         raise NotImplementedError
146 |     if config['args']['load_pre_trained_dir']:
147 |         agent.load_pre_train(config['args']['load_pre_trained_dir'])
148 |     return agent
149 | 
150 | 
151 | def get_lecture(
152 |     evaluate: bool,
153 |     it_now: int,
154 |     config: dict
155 | ) -> int:
156 |     if evaluate or not config['args']['curriculum']:
157 |         return 0
158 |     else:
159 |         agent_config = config['agent'][config['args']['mode']]
160 |         total_iteration = agent_config['learning']['iteration']
161 |         progress = agent_config['replay_memory']['demonstration']['curriculum_progress']
162 |         last_lecture = agent_config['replay_memory']['demonstration']['last_lecture']
163 |         if progress > 0:
164 |             return min(1 + int(progress * (it_now / total_iteration)), last_lecture)
165 |         else:
166 |             return last_lecture
167 | 
168 | 
169 | def main():
170 |     return
171 | 
172 | 
173 | if __name__ == '__main__':
174 |     main()
175 | 


--------------------------------------------------------------------------------
/tasks/agent/pomdp/avast.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from agent.pomdp.instruction_attention import InstructionAttention
  4 | 
  5 | 
  6 | class AVAST(nn.Module):
  7 |     def __init__(
  8 |         self,
  9 |         config: dict,
 10 |         vision_dim: int,
 11 |         abs_pose_feature_size: int,
 12 |         action_feature_size: int,
 13 |         additional_track: str
 14 |     ) -> None:
 15 |         super(AVAST, self).__init__()
 16 |         self.config = config
 17 |         tracker_config = config['state_tracker'][config['args']['state_tracker']]
 18 |         self.h_dim = tracker_config['hidden_dim']
 19 |         self.z_dim = tracker_config['latent_dim']
 20 |         self.n_layer = tracker_config['num_layers']
 21 |         self.drop = nn.Dropout(p=config['state_tracker']['dropout_ratio'])
 22 | 
 23 |         # addtional information
 24 |         addtional_dim = self.h_dim
 25 |         self.additional_track = additional_track
 26 |         if additional_track == 'pose':
 27 |             self.angle_fc = nn.Sequential(
 28 |                 nn.Linear(abs_pose_feature_size, self.h_dim, bias=False),
 29 |                 nn.Linear(self.h_dim, addtional_dim, bias=False)
 30 |             )
 31 |         elif additional_track == 'action':
 32 |             self.angle_fc = nn.Sequential(
 33 |                 nn.Linear(action_feature_size, self.h_dim, bias=False),
 34 |                 nn.Linear(self.h_dim, addtional_dim, bias=False)
 35 |             )
 36 |         else:
 37 |             raise NotImplementedError
 38 | 
 39 |         # context belief state
 40 |         self.instr_attention = InstructionAttention(
 41 |             embed_dim=self.h_dim,
 42 |             dropout_ratio=config['state_tracker']['dropout_ratio']
 43 |         )
 44 | 
 45 |         # tracking belief state
 46 |         # transform
 47 |         self.phi_x = nn.Linear(vision_dim + addtional_dim, self.h_dim, bias=False)
 48 |         self.phi_z = nn.Linear(self.z_dim, self.h_dim, bias=False)
 49 |         # recurrent
 50 |         self.lstm = nn.LSTMCell(
 51 |             input_size=self.h_dim + self.h_dim,
 52 |             hidden_size=self.h_dim
 53 |         )
 54 |         # inference
 55 |         self.enc = nn.Linear(self.h_dim + self.h_dim, self.h_dim, bias=False)
 56 |         self.enc_mean = nn.Linear(self.h_dim, self.z_dim, bias=False)
 57 |         self.enc_std = nn.Sequential(
 58 |             nn.Linear(self.h_dim, self.z_dim, bias=False),
 59 |             nn.Softplus()
 60 |         )
 61 |         # prior
 62 |         self.prior = nn.Linear(self.h_dim, self.h_dim, bias=False)
 63 |         self.prior_mean = nn.Linear(self.h_dim, self.z_dim, bias=False)
 64 |         self.prior_std = nn.Sequential(
 65 |             nn.Linear(self.h_dim, self.z_dim, bias=False),
 66 |             nn.Softplus()
 67 |         )
 68 | 
 69 |         # belief state
 70 |         self.out_fc = nn.Sequential(
 71 |             nn.Linear(self.h_dim + self.z_dim, self.h_dim, bias=False),
 72 |             nn.Linear(self.h_dim, self.h_dim, bias=False)
 73 |         )
 74 | 
 75 |         self.state_dim = self.h_dim
 76 |         return
 77 | 
 78 |     def _reparameterized_sample(
 79 |         self,
 80 |         mean: torch.Tensor,
 81 |         std: torch.Tensor
 82 |     ) -> torch.Tensor:
 83 |         return mean + std * torch.randn_like(std)
 84 | 
 85 |     def forward(
 86 |         self,
 87 |         vision_embed: torch.Tensor,
 88 |         instr_embed: torch.Tensor,
 89 |         instr_mask: torch.Tensor,
 90 |         abs_pose_features: torch.Tensor,
 91 |         action_features: torch.Tensor,
 92 |         hiddens: (torch.Tensor, torch.Tensor)
 93 |     ) -> (torch.Tensor, torch.Tensor, (torch.Tensor, torch.Tensor), dict):
 94 |         if self.additional_track == 'pose':
 95 |             additional_embed = self.angle_fc(abs_pose_features)
 96 |         elif self.additional_track == 'action':
 97 |             additional_embed = self.angle_fc(action_features)
 98 |         else:
 99 |             raise NotImplementedError
100 | 
101 |         concat_input = torch.cat([vision_embed, additional_embed], 1)
102 |         input_drop = self.drop(concat_input)
103 | 
104 |         phi_x_t = self.phi_x(input_drop)
105 | 
106 |         # inference
107 |         enc_t = self.enc(torch.cat([phi_x_t, hiddens[0]], 1))
108 |         enc_mean_t = self.enc_mean(enc_t)
109 |         enc_std_t = self.enc_std(enc_t)
110 | 
111 |         # prior
112 |         prior_t = self.prior(hiddens[0])
113 |         prior_mean_t = self.prior_mean(prior_t)
114 |         prior_std_t = self.prior_std(prior_t)
115 | 
116 |         # sampling and reparameterization
117 |         z_t = self._reparameterized_sample(enc_mean_t, enc_std_t)
118 |         phi_z_t = self.phi_z(z_t)
119 | 
120 |         # recurrent
121 |         h_t, c_t = self.lstm(torch.cat([phi_x_t, phi_z_t], 1), hiddens)
122 |         h_t_drop = self.drop(h_t)
123 | 
124 |         context_belief_states, instr_attn_weight = self.instr_attention(h_t_drop, instr_embed, instr_mask)
125 |         belief_states = torch.cat(
126 |             [context_belief_states, z_t],
127 |             dim=1
128 |         )
129 |         belief_states = self.out_fc(belief_states)
130 |         dists = {
131 |             'enc_mean': enc_mean_t,
132 |             'enc_std': enc_std_t,
133 |             'prior_mean': prior_mean_t,
134 |             'prior_std': prior_std_t
135 |         }
136 |         return belief_states, context_belief_states, instr_attn_weight, (h_t, c_t), dists
137 | 
138 |     def inference(
139 |         self,
140 |         vision_embed: torch.Tensor,
141 |         instr_embed: torch.Tensor,
142 |         instr_mask: torch.Tensor,
143 |         abs_pose_features: torch.Tensor,
144 |         action_features: torch.Tensor,
145 |         hiddens: (torch.Tensor, torch.Tensor)
146 |     ) -> (torch.Tensor, torch.Tensor, torch.Tensor, (torch.Tensor, torch.Tensor)):
147 |         if self.additional_track == 'pose':
148 |             additional_embed = self.angle_fc(abs_pose_features)
149 |         elif self.additional_track == 'action':
150 |             additional_embed = self.angle_fc(action_features)
151 |         else:
152 |             raise NotImplementedError
153 | 
154 |         concat_input = torch.cat([vision_embed, additional_embed], 1)
155 |         input_drop = self.drop(concat_input)
156 | 
157 |         phi_x_t = self.phi_x(input_drop)
158 | 
159 |         # inference
160 |         enc_t = self.enc(torch.cat([phi_x_t, hiddens[0]], 1))
161 |         z_t = self.enc_mean(enc_t)
162 |         phi_z_t = self.phi_z(z_t)
163 | 
164 |         # recurrent
165 |         h_t, c_t = self.lstm(torch.cat([phi_x_t, phi_z_t], 1), hiddens)
166 |         h_t_drop = self.drop(h_t)
167 | 
168 |         context_belief_states, instr_attn_weight = self.instr_attention(h_t_drop, instr_embed, instr_mask)
169 |         belief_states = torch.cat(
170 |             [context_belief_states, z_t],
171 |             dim=1
172 |         )
173 |         belief_states = self.out_fc(belief_states)
174 |         return belief_states, context_belief_states, instr_attn_weight, (h_t, c_t)
175 | 
176 | 
177 | def main():
178 |     return
179 | 
180 | 
181 | if __name__ == '__main__':
182 |     main()
183 | 


--------------------------------------------------------------------------------
/tasks/agent/agent_reinforce.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | from torch.nn import functional as F
  4 | import torch.distributions as D
  5 | import numpy as np
  6 | from agent.model import CategoricalPolicy
  7 | from agent.agent_base import AgentBase
  8 | 
  9 | 
 10 | class AgentReinforce(AgentBase):
 11 |     def __init__(
 12 |         self,
 13 |         config: dict,
 14 |         env: object,
 15 |         weight_decay: float = 0.0005
 16 |     ) -> None:
 17 |         super().__init__(config, env)
 18 |         self.agent_learning_config = config['agent'][config['args']['mode']]['learning']
 19 | 
 20 |         assert config['args']['agent'] == 'reinforce'
 21 |         input_size = self.state_tracker.state_dim
 22 |         self.gamma = self.agent_learning_config['gamma']
 23 | 
 24 |         # policy
 25 |         self.policy = CategoricalPolicy(input_size, self.cv_utils.action_feature_size).to(config['device'])
 26 | 
 27 |         # setup loss function and optimizer
 28 |         self.optimizer_policy = torch.optim.Adam(self.policy.parameters(), lr=self.agent_learning_config['lr'], weight_decay=weight_decay)
 29 | 
 30 |         self.update_networks()
 31 |         for net_type, nets in self.networks.items():
 32 |             for net_id, net in nets.items():
 33 |                 if isinstance(net, torch.nn.Module):
 34 |                     print('%5s: %-10s' % (net_type, net_id), "<class 'torch.nn.Module'>")
 35 |                 else:
 36 |                     print('%5s: %-10s' % (net_type, net_id), type(net))
 37 |         return
 38 | 
 39 |     def _get_actions_prob(
 40 |         self,
 41 |         belief_state: torch.Tensor,
 42 |         candidate_action_embed: torch.Tensor,
 43 |         illegal: torch.Tensor
 44 |     ) -> (torch.Tensor, torch.Tensor, torch.Tensor):
 45 |         policy_out = self.policy(belief_state, candidate_action_embed)
 46 |         policy_out[illegal] = -float('inf')
 47 |         actions_prob = F.softmax(policy_out, dim=1)
 48 |         tiny_prob = (actions_prob == 0.0).float() * 1e-8
 49 |         actions_log_prob = torch.log(actions_prob + tiny_prob)
 50 |         return policy_out, actions_prob, actions_log_prob
 51 | 
 52 |     def _get_policy_rl_loss(
 53 |         self,
 54 |         batch_mem: list
 55 |     ) -> torch.Tensor or None:
 56 |         belief_state, critic_belief_state, action, intent, candidate_action_feature, reward, cummulative_reward, agent_mask, done = batch_mem
 57 | 
 58 |         # get policy loss
 59 |         illegal = self.intent_to_mask(intents=intent, find_legal=False)
 60 |         logit = self.policy(belief_state, candidate_action_feature)
 61 |         logit[illegal] = -float('inf')
 62 | 
 63 |         logit = logit[agent_mask]
 64 |         action = action[agent_mask].squeeze()
 65 | 
 66 |         agent_actions_log_prob = F.cross_entropy(logit, action.clone(), reduction='none').unsqueeze(1)
 67 |         agent_cummulative_reward = cummulative_reward[agent_mask]
 68 | 
 69 |         assert agent_actions_log_prob.shape == agent_cummulative_reward.shape
 70 |         if len(agent_actions_log_prob) > 0:
 71 |             policy_rl_loss = agent_cummulative_reward * agent_actions_log_prob
 72 |             return policy_rl_loss.mean()
 73 |         else:
 74 |             return None
 75 | 
 76 |     def train(
 77 |         self,
 78 |         agent_mems: list
 79 |     ) -> (np.ndarray, float):
 80 |         batch_agent_mem = self._get_batch_mem(agent_mems)
 81 |         policy_loss = self._get_policy_rl_loss(batch_agent_mem)
 82 |         self._update_param(loss=policy_loss, optimizer=self.optimizer_policy, network=self.policy, clip_grad=1)
 83 |         policy_loss_float = self._get_float_loss(policy_loss)
 84 |         return np.array([0, 0, policy_loss_float, 0, 0]), 0
 85 | 
 86 |     def act(
 87 |         self,
 88 |         location_infos: list,
 89 |         instr_embed: torch.Tensor,
 90 |         instr_mask: torch.Tensor,
 91 |         last_action_features: torch.Tensor,
 92 |         hiddens: torch.Tensor,
 93 |         evaluate: bool
 94 |     ) -> (np.ndarray, torch.Tensor, torch.Tensor):
 95 |         # get belief state
 96 |         if evaluate:
 97 |             belief_states, _, instr_attn_weight, hiddens = self.get_belief_states(
 98 |                 vision_features=self.cv_utils.get_vision_features(location_infos),
 99 |                 abs_pose_features=self.cv_utils.get_abs_pose_features(location_infos).squeeze(0),
100 |                 action_features=last_action_features,
101 |                 instr_embed=instr_embed,
102 |                 instr_mask=instr_mask,
103 |                 hiddens=hiddens,
104 |                 enable_grad=False
105 |             )
106 |         else:
107 |             belief_states, _, instr_attn_weight, hiddens, _ = self.get_belief_states_with_dist(
108 |                 vision_features=self.cv_utils.get_vision_features(location_infos),
109 |                 abs_pose_features=self.cv_utils.get_abs_pose_features(location_infos).squeeze(0),
110 |                 action_features=last_action_features,
111 |                 instr_embed=instr_embed,
112 |                 instr_mask=instr_mask,
113 |                 hiddens=hiddens,
114 |                 enable_grad=False
115 |             )
116 | 
117 |         with torch.no_grad():
118 |             # get candidate action features
119 |             candidate_action_features = self.cv_utils.get_candidate_action_features(location_infos).squeeze(0)
120 | 
121 |             # generate actions
122 |             legals = self.intent_to_mask(
123 |                 intents=[location_info.action_info.intents for location_info in location_infos],
124 |                 find_legal=True
125 |             )
126 |             policy_out = self.policy(belief_states, candidate_action_features)
127 |             illegal = torch.logical_not(torch.tensor(legals, dtype=torch.bool))
128 |             policy_out[illegal] = -float('inf')
129 |             if evaluate:
130 |                 # select action by greedy
131 |                 actions = torch.argmax(policy_out, dim=1).cpu().view(-1).numpy()
132 |             else:
133 |                 # sample action from categorical distribution
134 |                 actions_prob = F.softmax(policy_out, dim=1)
135 |                 actions_dist = D.Categorical(actions_prob)
136 |                 actions = actions_dist.sample().cpu().view(-1).numpy()
137 |         return actions, candidate_action_features, hiddens
138 | 
139 |     def load_pre_train(
140 |         self,
141 |         load_dir: str
142 |     ) -> None:
143 |         models = {
144 |             'pomdp': [self.config['args']['state_tracker']],
145 |             'obs': ['vision', 'instr'],
146 |             'agent': ['policy']
147 |         }
148 | 
149 |         for net_type, net_ids in models.items():
150 |             for net_id in net_ids:
151 |                 net_dir = os.path.join(load_dir, '%s_%s.pt' % (net_type, net_id))
152 |                 self.networks[net_type][net_id].load_state_dict(torch.load(net_dir))
153 |                 if net_type != 'agent':
154 |                     self.networks[net_type][net_id].eval()
155 |                     for param in self.networks[net_type][net_id].parameters():
156 |                         param.requires_grad = False
157 |                 print('load %s_%s from %s' % (net_type, net_id, net_dir))
158 |         return
159 | 
160 | 
161 | def main():
162 |     return
163 | 
164 | 
165 | if __name__ == '__main__':
166 |     main()
167 | 


--------------------------------------------------------------------------------
/tasks/data/scripts/generate_adj_dict.py:
--------------------------------------------------------------------------------
  1 | import MatterSim
  2 | import os
  3 | import json
  4 | from collections import namedtuple
  5 | import numpy as np
  6 | from tqdm import tqdm
  7 | 
  8 | 
  9 | Location = namedtuple(
 10 |     "Location",
 11 |     ["vp_id", "abs_heading", "abs_elevation", "view_index", "rel_heading", "rel_elevation", "forward"]
 12 | )
 13 | rad30 = np.deg2rad(30)
 14 | skybox_dir = '/root/mount/AVAST_R2R/data/v1/scans/'
 15 | connectivity_dir = '/root/mount/AVAST_R2R/connectivity/'
 16 | 
 17 | 
 18 | def absolutize_rad(
 19 |     rad: float
 20 | ) -> float:
 21 |     """R -> (-pi,pi]"""
 22 |     return rad - 2 * np.pi * round(rad / (2 * np.pi))
 23 | 
 24 | 
 25 | def get_angular_distance(
 26 |     rel_heading: float,
 27 |     rel_elevation: float
 28 | ) -> float:
 29 |     return np.sqrt(rel_heading ** 2 + rel_elevation ** 2)
 30 | 
 31 | 
 32 | def get_loc_navigable_key(
 33 |     scan_id: str,
 34 |     viewpoint_id: str,
 35 |     view_index: int
 36 | ) -> str:
 37 |     return '%s_%s_%d' % (scan_id, viewpoint_id, view_index)
 38 | 
 39 | 
 40 | def new_episodes(
 41 |     scan_ids: list,
 42 |     vp_ids: list,
 43 |     headings: list
 44 | ) -> None:
 45 |     sim = MatterSim.Simulator()
 46 |     sim.setDatasetPath(skybox_dir)
 47 |     sim.setNavGraphPath(connectivity_dir)
 48 |     sim.setCameraResolution(800, 600)
 49 |     sim.setCameraVFOV(np.deg2rad(60))
 50 |     sim.setDiscretizedViewingAngles(True)
 51 |     sim.setBatchSize(len(scan_ids))
 52 |     sim.setCacheSize(2 * len(scan_ids))
 53 |     sim.setDepthEnabled(False)
 54 |     sim.setRenderingEnabled(False)
 55 |     sim.newEpisode(scan_ids, vp_ids, headings, [0] * len(scan_ids))
 56 |     return sim
 57 | 
 58 | 
 59 | def main():
 60 |     # scan_ids = ['JF19kD82Mey']
 61 |     scan_ids = [tmp.split('_')[0] for tmp in os.listdir(connectivity_dir) if '.json' in tmp]
 62 | 
 63 |     connect = {}
 64 |     for scan_id in scan_ids:
 65 |         with open(connectivity_dir + '%s_connectivity.json' % scan_id) as file_name:
 66 |             connect_json = json.load(file_name)
 67 |         connect_scan_id = {}
 68 |         for loc in connect_json:
 69 |             if loc['included'] and any(loc['unobstructed']):
 70 |                 connect_scan_id.update({loc['image_id']: loc})
 71 |         connect.update({scan_id: connect_scan_id})
 72 | 
 73 |     # get loc navigable
 74 |     loc_navigable = {}
 75 |     max_action_num = 0
 76 |     for scan_id in tqdm(scan_ids):
 77 |         # get possible navigate point at 0, 30, ..., 330 degree
 78 |         start_vp_ids = list(connect[scan_id].keys())
 79 |         num_start_vp_id = len(start_vp_ids)
 80 |         sim = new_episodes([scan_id] * num_start_vp_id, start_vp_ids, [0] * num_start_vp_id)
 81 |         navigable = {start_vp_id: {} for start_vp_id in start_vp_ids}
 82 | 
 83 |         # change perspective
 84 |         sim.makeAction([0] * num_start_vp_id, [0.0] * num_start_vp_id, [-1.0] * num_start_vp_id)
 85 |         for elevation_idx in range(3):
 86 |             for heading_idx in range(12):
 87 |                 view_index = elevation_idx * 12 + heading_idx
 88 |                 state_infos = sim.getState()
 89 |                 for state_info in state_infos:
 90 |                     start_vp_id = state_info.location.viewpointId
 91 |                     for forward, loc_end_info in enumerate(state_info.navigableLocations[1:]):
 92 |                         end_vp_id = loc_end_info.viewpointId
 93 |                         distance = get_angular_distance(loc_end_info.rel_heading, loc_end_info.rel_elevation)
 94 | 
 95 |                         if end_vp_id not in navigable[start_vp_id] or distance < get_angular_distance(navigable[start_vp_id][end_vp_id].rel_heading, navigable[start_vp_id][end_vp_id].rel_elevation):
 96 |                             abs_heading = absolutize_rad(loc_end_info.rel_heading + state_info.heading)
 97 |                             abs_elevation = absolutize_rad(loc_end_info.rel_elevation + state_info.elevation)
 98 |                             navigable[start_vp_id][end_vp_id] = Location(
 99 |                                 vp_id=loc_end_info.viewpointId,
100 |                                 abs_heading=abs_heading,
101 |                                 abs_elevation=abs_elevation,
102 |                                 view_index=view_index,
103 |                                 rel_heading=loc_end_info.rel_heading,
104 |                                 rel_elevation=loc_end_info.rel_elevation,
105 |                                 forward=forward + 1
106 |                             )
107 |                 sim.makeAction([0] * num_start_vp_id, [1.0] * num_start_vp_id, [0.0] * num_start_vp_id)
108 |             sim.makeAction([0] * num_start_vp_id, [0.0] * num_start_vp_id, [1.0] * num_start_vp_id)
109 |         sim.makeAction([0] * num_start_vp_id, [0.0] * num_start_vp_id, [-1.0] * num_start_vp_id)
110 | 
111 |         navigable_sorted = {}
112 |         for start_vp_id, loc_ends in navigable.items():
113 |             loc_ends_sorted = sorted(loc_ends.values(), key=lambda x: abs(x.abs_heading))
114 |             if len(loc_ends_sorted) > max_action_num:
115 |                 max_action_num = len(loc_ends_sorted)
116 |             navigable_sorted[start_vp_id] = loc_ends_sorted
117 | 
118 |         for start_vp_id in start_vp_ids:
119 |             for elevation_idx in range(3):
120 |                 for heading_idx in range(12):
121 |                     view_index = elevation_idx * 12 + heading_idx
122 |                     heading = (view_index % 12) * rad30
123 |                     elevation = (view_index // 12 - 1) * rad30
124 | 
125 |                     loc_start = get_loc_navigable_key(scan_id, start_vp_id, view_index)
126 |                     loc_navigable[loc_start] = [
127 |                         {
128 |                             'absViewIndex': view_index,
129 |                             'nextViewpointId': start_vp_id,
130 |                             'rel_heading': 0,
131 |                             'rel_elevation': 0,
132 |                             'distance': 0,
133 |                             'forward': 0
134 |                         }
135 |                     ]
136 | 
137 |                     for loc_end in navigable_sorted[start_vp_id]:
138 |                         rel_heading = absolutize_rad(loc_end.abs_heading - heading)
139 |                         rel_elevation = absolutize_rad(loc_end.abs_elevation - elevation)
140 | 
141 |                         loc_navigable[loc_start].append(
142 |                             {
143 |                                 'absViewIndex': loc_end.view_index,
144 |                                 'nextViewpointId': loc_end.vp_id,
145 |                                 'rel_heading': rel_heading,
146 |                                 'rel_elevation': rel_elevation,
147 |                                 'distance': get_angular_distance(rel_heading, rel_elevation),
148 |                                 'forward': loc_end.forward
149 |                             }
150 |                         )
151 |                         if view_index == loc_end.view_index:
152 |                             assert loc_end.rel_heading == loc_navigable[loc_start][-1]['rel_heading']
153 |                             assert loc_end.rel_elevation == loc_navigable[loc_start][-1]['rel_elevation']
154 |     print(json.dumps(loc_navigable, indent=4))
155 |     # pipe into new json file
156 |     return
157 | 
158 | 
159 | if __name__ == '__main__':
160 |     main()
161 | 


--------------------------------------------------------------------------------
/tasks/agent/replay_memory.py:
--------------------------------------------------------------------------------
  1 | from collections import deque
  2 | import torch
  3 | import numpy as np
  4 | 
  5 | 
  6 | class ReplayMemory():
  7 |     def __init__(
  8 |         self,
  9 |         config: dict,
 10 |         env: object,
 11 |         expert: str = '',
 12 |         on_policy: bool = False
 13 |     ) -> None:
 14 |         super().__init__()
 15 |         self.replay_memory_config = config['agent'][config['args']['mode']]['replay_memory']
 16 |         self.device = config['device']
 17 |         assert self.replay_memory_config['max_epi_len'] == config['r2r_env']['max_iteration']
 18 |         self.cv_utils = env.cv_utils
 19 |         self.on_policy = on_policy
 20 | 
 21 |         if expert:
 22 |             self.split = expert
 23 |             self.max_epi_num = len(env.datasets[self.split])
 24 |             self.min_epi_num = len(env.datasets[self.split])
 25 |         else:
 26 |             self.max_epi_num = self.replay_memory_config['max_epi_num']
 27 |             self.min_epi_num = self.replay_memory_config['min_epi_num']
 28 | 
 29 |         self.max_epi_len = self.replay_memory_config['max_epi_len']
 30 | 
 31 |         # rl information
 32 |         self.memory = deque(maxlen=self.max_epi_num)
 33 |         self.instr = deque(maxlen=self.max_epi_num)
 34 |         return
 35 | 
 36 |     def assert_batch_is_valid(
 37 |         self,
 38 |         batch_size: int
 39 |     ) -> None:
 40 |         if self.on_policy:
 41 |             assert batch_size == len(self.memory) == len(self.instr)
 42 |             assert batch_size <= self.max_epi_num
 43 |         else:
 44 |             assert batch_size <= len(self.memory)
 45 |         assert len(self.memory) >= self.min_epi_num
 46 |         return
 47 | 
 48 |     def append(
 49 |         self,
 50 |         trajs: list,
 51 |         instrs: list
 52 |     ) -> None:
 53 |         assert len(trajs) == len(instrs)
 54 |         assert all([len(traj) <= self.max_epi_len for traj in trajs])
 55 |         # push trajectories, instructions into deques
 56 |         # -------------------------------------------------------
 57 |         # tran = (location_info, action, reward, expert_len)
 58 |         # traj = [tran1, tran2, ..., tranT]
 59 |         # trajs = [traj1, traj2, ...]
 60 |         # instrs = [instruction1, instruction2, ...]
 61 |         self.memory.extend(trajs)
 62 |         self.instr.extend(instrs)
 63 |         return
 64 | 
 65 |     def sample(
 66 |         self,
 67 |         batch_size: int,
 68 |         indices_select: np.ndarray = None
 69 |     ) -> list:
 70 |         self.assert_batch_is_valid(batch_size)
 71 | 
 72 |         # select memories by random select indices
 73 |         if indices_select is None:
 74 |             indices_select = np.random.choice(len(self.memory), replace=False, size=batch_size)
 75 |         assert batch_size == len(indices_select)
 76 |         traj_len_select = np.array([len(self.memory[idx]) for idx in indices_select], dtype=np.int)
 77 |         memory_select = np.array([self.memory[idx] for idx in indices_select], dtype=object)
 78 |         instr_select = np.array([self.instr[idx] for idx in indices_select], dtype=object)
 79 | 
 80 |         # divide selected-memory to serveral batch tensor by different length
 81 |         mems = []
 82 |         for traj_len in set(traj_len_select):
 83 |             # transform transitions
 84 |             mem_tmp = memory_select[(traj_len_select == traj_len)]
 85 |             # convert to list of tensor
 86 |             # -------------------------------------------------------
 87 |             # [(traj_len, 1, feature_size), (traj_len, 1, feature_size), ...]
 88 |             location_info_list, action_list, reward_list, expert_len_list = [], [], [], []
 89 |             for traj in mem_tmp:
 90 |                 location_info_list.append([tran[0] for tran in traj])
 91 |                 action_list.append(torch.tensor([tran[1] for tran in traj], dtype=torch.int64).view(traj_len, 1, 1))
 92 |                 reward_list.append(torch.tensor([(tran[2]) for tran in traj], dtype=torch.float).view(traj_len, 1, 1))
 93 |                 expert_len_list.append(traj[0][3])
 94 |             # convert to tensor
 95 |             # -------------------------------------------------------
 96 |             # (traj_len, batch_size_tmp, feature_size)
 97 |             action_batch = torch.cat(action_list, dim=1)
 98 |             reward_batch = torch.cat(reward_list, dim=1)
 99 |             expert_len_batch = torch.tensor(expert_len_list, dtype=torch.int64)
100 | 
101 |             # transform instructions
102 |             instr_tmp = instr_select[(traj_len_select == traj_len)]
103 |             instr_list_batch = [torch.tensor(instr, dtype=torch.long) for instr in instr_tmp]
104 | 
105 |             mems.append(
106 |                 self.preprocess_mem(
107 |                     (instr_list_batch, location_info_list, action_batch, reward_batch, expert_len_batch)
108 |                 )
109 |             )
110 | 
111 |         if self.on_policy:
112 |             self.memory.clear()
113 |             self.instr.clear()
114 |         return mems
115 | 
116 |     def __len__(
117 |         self
118 |     ) -> int:
119 |         return len(self.memory)
120 | 
121 |     def __str__(
122 |         self
123 |     ) -> str:
124 |         return str(self.memory)
125 | 
126 |     def get_intent_batch(
127 |         self,
128 |         location_info_list: list
129 |     ) -> torch.Tensor:
130 |         # traj_len, batch_size, action_space
131 |         intent_batch = np.zeros(
132 |             (
133 |                 len(location_info_list[0]),
134 |                 len(location_info_list),
135 |                 len(location_info_list[0][0].action_info.intents)
136 |             ),
137 |             dtype=np.float
138 |         )
139 |         for batch_idx, location_infos in enumerate(location_info_list):
140 |             for time_step, location_info in enumerate(location_infos):
141 |                 intent_batch[time_step, batch_idx] = location_info.action_info.intents
142 |         return torch.tensor(intent_batch, dtype=torch.int64, device=self.device)
143 | 
144 |     def get_candiate_action_embed_batch(
145 |         self,
146 |         location_info_list: list
147 |     ) -> torch.Tensor:
148 |         candiate_action_embed_batch = []
149 |         for location_infos in location_info_list:
150 |             candiate_action_embed_batch.append(
151 |                 self.cv_utils.get_candidate_action_features(location_infos, batch=False)
152 |             )
153 |         return torch.cat(candiate_action_embed_batch, dim=1)
154 | 
155 |     def get_vision_batch(
156 |         self,
157 |         location_info_list: list
158 |     ) -> torch.Tensor:
159 |         vision_batch = []
160 |         for location_infos in location_info_list:
161 |             vision_batch.append(
162 |                 self.cv_utils.get_vision_features(location_infos, batch=False)
163 |             )
164 |         return torch.cat(vision_batch, dim=1)
165 | 
166 |     def get_abs_pose_batch(
167 |         self,
168 |         location_info_list: list
169 |     ) -> torch.Tensor:
170 |         abs_pose_batch = []
171 |         for location_infos in location_info_list:
172 |             abs_pose_batch.append(
173 |                 self.cv_utils.get_abs_pose_features(location_infos, batch=False)
174 |             )
175 |         return torch.cat(abs_pose_batch, dim=1)
176 | 
177 |     def preprocess_mem(
178 |         self,
179 |         mem: tuple,
180 |     ) -> tuple:
181 |         instr_list_batch, location_info_list, action_batch, reward_batch, expert_len_batch = mem
182 |         new_mem = (
183 |             instr_list_batch,
184 |             self.get_vision_batch(location_info_list),
185 |             self.get_abs_pose_batch(location_info_list),
186 |             action_batch.to(self.device),
187 |             self.get_intent_batch(location_info_list),
188 |             self.get_candiate_action_embed_batch(location_info_list),
189 |             reward_batch.to(self.device),
190 |             expert_len_batch.to(self.device),
191 |         )
192 |         return new_mem
193 | 
194 | 
195 | def main():
196 |     return
197 | 
198 | 
199 | if __name__ == '__main__':
200 |     main()
201 | 


--------------------------------------------------------------------------------
/tasks/reinforce_fine_tune.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import numpy as np
  4 | from utils import load_config, print_log
  5 | from utils import init_tb_writer, init_agent
  6 | from env.rxr_env import RxREnv
  7 | from env.env_utils import Statistic
  8 | from agent.replay_memory import ReplayMemory
  9 | from agent.agent_reinforce import AgentReinforce
 10 | 
 11 | 
 12 | @torch.no_grad()
 13 | def run_data_parallel(
 14 |     data_indices: list,
 15 |     split: str,
 16 |     env: RxREnv,
 17 |     gen_gif: bool,
 18 |     agent: AgentReinforce or None,
 19 |     evaluate: bool,
 20 | ) -> (Statistic, np.ndarray, list, list):
 21 |     # setup recorder, expert
 22 |     trajs = [[] for _ in range(len(data_indices))]
 23 |     returns = np.zeros(len(data_indices))
 24 |     expert_lens = [0 for _ in range(len(data_indices))]
 25 | 
 26 |     # setup environment
 27 |     dones, location_infos = env.reset(data_indices, split, gen_gif)
 28 |     instr_embed, instr_mask, hiddens = agent.obs_encoder.instr.encode(
 29 |         [torch.tensor(instr, dtype=torch.long) for instr in env.instrs]
 30 |     )
 31 |     last_action_features = agent.get_init_action(len(data_indices))
 32 | 
 33 |     # run one episode
 34 |     while True:
 35 |         actions, candidate_action_features, hiddens = agent.act(
 36 |             location_infos=location_infos,
 37 |             instr_embed=instr_embed,
 38 |             instr_mask=instr_mask,
 39 |             last_action_features=last_action_features,
 40 |             hiddens=hiddens,
 41 |             evaluate=evaluate
 42 |         )
 43 | 
 44 |         # mask done action & interact with environment
 45 |         actions[dones >= 1] = -1
 46 |         rewards, dones, next_location_infos = env.step(actions)
 47 | 
 48 |         # update return
 49 |         returns += rewards
 50 | 
 51 |         # store trajectory
 52 |         if not evaluate:
 53 |             for parallel_idx, (done, location_info, action, reward, expert_len) in enumerate(zip(dones, location_infos, actions, rewards, expert_lens)):
 54 |                 if done <= 1:
 55 |                     trajs[parallel_idx].append([location_info, action, reward, expert_len])
 56 | 
 57 |         # terminate all trajectories
 58 |         if all(dones >= 1):
 59 |             stats = env.get_statistics()
 60 |             break
 61 | 
 62 |         # update states (the last thing in iteration)
 63 |         location_infos = next_location_infos
 64 |         last_action_features = torch.stack(
 65 |             [candidate_action_features[parallel_idx, action_idx, :] for parallel_idx, action_idx in enumerate(actions)],
 66 |             dim=0
 67 |         )
 68 |     return stats, returns, trajs, env.instrs
 69 | 
 70 | 
 71 | def rollout(
 72 |     config: dict,
 73 |     split: str,
 74 |     env: RxREnv,
 75 |     replay_memory_agent: ReplayMemory or None,
 76 |     agent: AgentReinforce,
 77 |     it_now: int,
 78 |     evaluate: bool,
 79 |     beam_size: int = 8
 80 | ) -> (np.ndarray, float, float, Statistic):
 81 |     env.set_env('dict')
 82 | 
 83 |     # rollout
 84 |     loss_list_all, alpha = np.zeros(5), 0
 85 |     if evaluate:
 86 |         # set parallel size
 87 |         parallel_size = config['r2r_env']['mp']['evaluate_parallel']
 88 | 
 89 |         # rollout all data
 90 |         returns, stats = [], Statistic([], [], [], [], [], [], [])
 91 |         for it_evaluate in range(env.get_it_num(split, parallel_size)):
 92 |             # get data_indices
 93 |             data_indices = env.get_data_indices(split, it_evaluate, parallel_size, evaluate)
 94 | 
 95 |             # rollout one parallel data
 96 |             stats_tmp, returns_tmp, _, _ = run_data_parallel(
 97 |                 data_indices=data_indices, split=split, env=env,
 98 |                 gen_gif=False, agent=agent, evaluate=evaluate
 99 |             )
100 | 
101 |             # record returns, statistics
102 |             returns += returns_tmp.tolist()
103 |             stats += stats_tmp
104 |     else:
105 |         assert split == 'train'
106 |         # set parallel size
107 |         parallel_size = config['r2r_env']['mp']['training_parallel'] // beam_size
108 | 
109 |         # get data_indices
110 |         data_indices = env.get_data_indices(split, it_now, parallel_size, evaluate)
111 | 
112 |         for _ in range(beam_size):
113 |             stats, returns, trajs, instrs = run_data_parallel(
114 |                 data_indices=data_indices, split=split, env=env,
115 |                 gen_gif=False, agent=agent, evaluate=evaluate
116 |             )
117 | 
118 |             # store in replay memory
119 |             replay_memory_agent.append(trajs, instrs)
120 | 
121 |         # update
122 |         agent_mems = replay_memory_agent.sample(config['agent']['train']['learning']['batch_size'])
123 |         loss_list_all, alpha = agent.train(agent_mems)
124 | 
125 |     # average the record of return, statistic
126 |     return_average = np.average(returns)
127 |     stat_average = stats.get_average()
128 | 
129 |     # log
130 |     print_log(it_now, 0, env.iterations, alpha, loss_list_all, return_average, stat_average)
131 |     return loss_list_all, alpha, return_average, stat_average
132 | 
133 | 
134 | def train_test(
135 |     config: dict
136 | ) -> None:
137 |     # initialize tensorboard, environment, agent, replaymemory
138 |     writer = init_tb_writer(config)
139 |     env = RxREnv(config)
140 |     agent = init_agent(config, env)
141 |     replay_memory_agent = ReplayMemory(config, env, on_policy=True)
142 | 
143 |     # rl training
144 |     for it_now in range(config['agent']['train']['learning']['iteration'] + 1):
145 |         # training stage
146 |         if it_now > 0:
147 |             loss_list_all, alpha, _, _ = rollout(
148 |                 config=config, split='train', env=env,
149 |                 replay_memory_agent=replay_memory_agent, agent=agent,
150 |                 it_now=it_now, evaluate=False
151 |             )
152 | 
153 |             # tensorboard for training information
154 |             writer.add_scalar('loss/alpha', alpha, it_now)
155 |             writer.add_scalar('loss/critic_1', loss_list_all[0], it_now)
156 |             writer.add_scalar('loss/critic_2', loss_list_all[1], it_now)
157 |             writer.add_scalar('loss/policy', loss_list_all[2], it_now)
158 |             writer.add_scalar('loss/entropy', loss_list_all[3], it_now)
159 |             writer.add_scalar('loss/KL divergence', loss_list_all[4], it_now)
160 |         # evaluation stage
161 |         if it_now % 50 == 0:
162 |             # switch to evaluation mode
163 |             agent.change_mode(is_train=False, ignore_state_tracker=True)
164 |             with torch.no_grad():
165 |                 print('-----test begin-----')
166 |                 return_it_now = {}
167 |                 for split in ['val_seen', 'val_unseen']:
168 |                     # evaluate from rollout
169 |                     _, _, return_average, stat_average = rollout(
170 |                         config=config, split=split, env=env,
171 |                         replay_memory_agent=None, agent=agent,
172 |                         it_now=it_now, evaluate=True
173 |                     )
174 |                     return_it_now.update({split: return_average})
175 | 
176 |                     # tensorboard for evaluation information
177 |                     writer.add_scalar('%s/navigation error' % split, stat_average.nav_error[0], it_now)
178 |                     writer.add_scalar('%s/path length' % split, stat_average.path_len[0], it_now)
179 |                     writer.add_scalar('%s/success rate' % split, stat_average.succ_rate[0], it_now)
180 |                     writer.add_scalar('%s/success rate weighted by path length' % split, stat_average.succ_w_path_len[0], it_now)
181 |                     writer.add_scalar('%s/self stop rate' % split, stat_average.self_stop_rate[0], it_now)
182 |                     writer.add_scalar('%s/coverage weighted by length score' % split, stat_average.cov_w_len_score[0], it_now)
183 |                     writer.add_scalar('%s/reward' % split, return_average, it_now)
184 |                 print('-----test end  -----')
185 |             # save
186 |             # agent.save(config['args']['exp_name'], it_now)
187 |             # switch to training mode
188 |             agent.change_mode(is_train=True, ignore_state_tracker=True)
189 |     return
190 | 
191 | 
192 | def main():
193 |     # load args, config
194 |     config = load_config('/root/mount/AVAST_R2R/tasks/config.json')
195 |     assert config['args']['agent'] == 'reinforce'
196 | 
197 |     # select mode
198 |     if config['args']['mode'] == 'train':
199 |         # delete old weight
200 |         if os.path.isdir(config['save_dir']):
201 |             os.system('rm -r %s' % config['save_dir'])
202 |             os.system('mkdir %s' % config['save_dir'])
203 |         train_test(config=config)
204 |     elif config['args']['mode'] == 'test':
205 |         raise NotImplementedError
206 |     else:
207 |         print('Invalid mode')
208 |     return
209 | 
210 | 
211 | if __name__ == '__main__':
212 |     main()
213 | 


--------------------------------------------------------------------------------
/connectivity/8194nk5LbLH_connectivity.json:
--------------------------------------------------------------------------------
1 | [{"image_id":"c9e8dc09263e4d0da77d16de0ecddd39","pose":[-0.611043,-0.00396746,-0.791588,-0.213904,0.791585,-0.00882497,-0.610996,2.305,-0.00456166,-0.999953,0.00853306,1.56916,0,0,0,1],"included":true,"visible":[false,false,false,false,true,true,false,true,true,true,false,false,false,false,false,false,false,false,false,false],"unobstructed":[false,false,false,false,true,false,false,false,true,true,false,false,false,false,false,false,false,false,false,false],"height":1.5826326295962942},{"image_id":"286b0c2d9a46408ba80b6ccebb21e582","pose":[0.951596,0.00201098,0.307346,6.58012,-0.307351,0.00915895,0.951552,-2.96479,-0.000901435,-0.999956,0.00933374,4.36353,0,0,0,1],"included":true,"visible":[false,false,true,true,false,false,false,false,false,false,false,false,false,true,false,true,false,true,false,true],"unobstructed":[false,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,true,true,false],"height":1.5712253956498747},{"image_id":"6776097c17ed4b93aee61704eb32f06c","pose":[-0.711582,-0.00419131,-0.702591,-1.68941,0.702575,0.00464776,-0.711594,-5.37908,0.00624796,-0.99998,-0.000362505,1.58622,0,0,0,1],"included":true,"visible":[false,false,false,false,false,true,true,true,false,true,false,true,false,false,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,true,true,false,false,false,false,true,false,false,false,false,false,false,false,true],"height":1.5804941871490743},{"image_id":"8c7e8da7d4a44ab695e6b3195eac0cf1","pose":[0.709879,0.011247,0.704234,8.62929,-0.70424,-0.00407304,0.70995,-1.77115,0.0108531,-0.999928,0.00502926,4.38556,0,0,0,1],"included":true,"visible":[false,true,false,false,false,false,false,false,false,false,true,false,true,true,false,false,false,true,true,false],"unobstructed":[false,true,false,false,false,false,false,false,false,false,true,false,false,true,false,false,false,true,true,false],"height":1.585645804390483},{"image_id":"f33c718aaf2c41469389a87944442c62","pose":[0.619478,0.0166688,0.784837,-3.88437,-0.784902,-0.00375152,0.619609,-0.528748,0.0132725,-0.999854,0.0107595,1.58368,0,0,0,1],"included":true,"visible":[true,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true],"unobstructed":[true,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,false,true],"height":1.5829827809014503},{"image_id":"fcd90a404061413385286bef9662630e","pose":[-0.111393,0.00837906,0.993741,2.80245,-0.993773,-0.00348217,-0.111367,-3.78204,0.0025272,-0.999959,0.00871482,1.58057,0,0,0,1],"included":true,"visible":[true,false,true,false,false,false,false,true,false,false,false,false,false,false,false,true,false,false,false,false],"unobstructed":[false,false,true,false,false,false,true,true,true,false,false,false,false,false,false,false,false,false,false,false],"height":1.5763528408163245},{"image_id":"c07d4ae8330542a09cf8f8dddb9728ce","pose":[-0.985207,-0.0101267,0.171069,0.656519,-0.171094,0.00168538,-0.985253,-5.08928,0.00968898,-0.999947,-0.00339301,1.57611,0,0,0,1],"included":true,"visible":[true,false,true,false,false,true,false,true,false,false,false,false,false,false,true,false,true,false,false,true],"unobstructed":[false,false,true,false,false,true,false,true,false,false,false,true,false,false,false,false,false,false,false,false],"height":1.575276915205382},{"image_id":"2393bffb53fe4205bcc67796c6fb76e3","pose":[-0.241654,0.00228344,-0.97036,3.33582,0.970294,0.0124463,-0.241608,-5.90025,0.0115256,-0.99992,-0.00522325,1.57791,0,0,0,1],"included":true,"visible":[false,false,true,false,false,true,true,false,false,false,false,false,false,false,true,false,true,false,false,false],"unobstructed":[false,false,false,false,false,true,true,false,false,false,false,false,false,false,true,false,false,false,false,false],"height":1.5730354249357412},{"image_id":"71bf74df73cd4e24a191ef4f2338ca22","pose":[0.906931,-0.00688335,-0.421222,0.122562,0.421182,-0.00662188,0.906952,-0.00319673,-0.00903217,-0.999954,-0.00310641,1.57207,0,0,0,1],"included":true,"visible":[true,false,false,false,true,true,true,true,false,true,false,false,false,false,false,false,false,false,false,false],"unobstructed":[true,false,false,false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false],"height":1.570272020216938},{"image_id":"be8a2edacab34ec8887ba6a7b1e4945f","pose":[0.791463,0.0101015,0.611133,-3.50132,-0.611154,-0.00121731,0.791511,1.58103,0.00873934,-0.999948,0.00521015,1.56992,0,0,0,1],"included":true,"visible":[true,false,true,false,true,false,false,true,false,false,false,true,false,false,false,false,false,false,false,true],"unobstructed":[true,false,false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false],"height":1.577126892771864},{"image_id":"9bdde31adaa1443bb206b09bfa3c474c","pose":[0.799844,0.0047414,0.60019,8.67581,-0.600208,0.0075118,0.799809,-4.8108,-0.000716311,-0.99996,0.00885413,2.82261,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,false,false,false,false,false,false,false,true,false,false,true,true,false,false],"unobstructed":[false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true,false,false,false],"height":1.58264400638767},{"image_id":"66d4adb61b57494aa2c1ad141a0fad9b","pose":[-0.34536,-0.0108675,-0.938407,-2.27885,0.938436,0.00459882,-0.345423,-3.2282,0.00806945,-0.99993,0.00861029,1.58739,0,0,0,1],"included":true,"visible":[false,false,true,false,false,true,true,true,false,true,false,false,false,false,true,true,false,false,false,true],"unobstructed":[false,false,true,false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,true],"height":1.5705441219971223},{"image_id":"83ff709c0e3e46079836153ea5c7feac","pose":[0.68423,0.0137303,0.729137,3.42529,-0.729235,0.00364543,0.684254,1.65175,0.00673696,-0.999899,0.012507,4.37069,0,0,0,1],"included":true,"visible":[false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true,false],"height":1.578378655072358},{"image_id":"d9e325df2f3948679c78b93d8025e2da","pose":[0.826698,0.0192407,0.562317,8.49764,-0.562455,0.00220125,0.826825,-0.816805,0.0146709,-0.999812,0.0126418,4.38875,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,true,false,true,false,false,false,false,true,true,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,false,false,false,false,false,true,true,false],"height":1.5865892751674604},{"image_id":"423efb97f77f4e7995f19c66fe82afbc","pose":[0.958879,0.00141119,0.283813,5.51819,-0.283808,0.0124035,0.958801,-5.67527,-0.00216725,-0.999922,0.012294,1.58856,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,true,false,false,false,false,false,false,false,false,true,false,false,false],"unobstructed":[false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,false,true,false,false,false],"height":1.5784339701720043},{"image_id":"6c49579a5cd34df8acb7f790b74e9eae","pose":[-0.95716,-0.00676032,-0.289482,-6.48379,0.289538,-0.00977451,-0.957117,-2.57899,0.00364085,-0.999929,0.0113132,1.59886,0,0,0,1],"included":true,"visible":[false,false,false,false,false,false,false,false,false,false,false,true,false,false,false,false,false,false,false,true],"unobstructed":[false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,false,true],"height":1.5798282335589897},{"image_id":"aeed67040d744240b188f66f17d87d43","pose":[0.132175,0.0257204,0.990893,7.67989,-0.991226,0.00381825,0.132121,-5.81072,-0.000385302,-0.999662,0.0259995,2.29866,0,0,0,1],"included":true,"visible":[false,false,true,false,false,false,true,true,false,false,true,false,false,false,true,false,false,false,false,false],"unobstructed":[false,false,false,false,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false,false],"height":1.6026680667792301},{"image_id":"aae01016bb354f78bd6db86e9d71af2b","pose":[0.0788252,0.00384462,0.996881,6.79041,-0.996887,0.00184069,0.0788186,-0.995862,-0.00153193,-0.999991,0.0039778,4.37219,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,false,true,false,false,false,false,true,false],"height":1.5770919536040346},{"image_id":"346b680ac5904359a1859c929ad312b6","pose":[-0.589008,0.00463239,0.808114,5.58585,-0.808123,0.00000695791,-0.589015,0.644327,-0.00273419,-0.999989,0.00373948,4.38174,0,0,0,1],"included":true,"visible":[false,true,false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,true,false,false],"unobstructed":[false,true,false,true,false,false,false,false,false,false,false,false,true,true,false,false,false,true,false,false],"height":1.5707587596461066},{"image_id":"ae91518ed77047b3bdeeca864cd04029","pose":[0.310985,0.0070688,0.950389,-4.60607,-0.950392,-0.00460962,0.31102,-2.5949,0.00657945,-0.999964,0.00528466,1.58581,0,0,0,1],"included":true,"visible":[false,false,true,false,true,true,false,true,false,true,false,true,false,false,false,true,false,false,false,false],"unobstructed":[false,false,true,false,true,false,false,false,false,false,false,true,false,false,false,true,false,false,false,false],"height":1.5747548700639524}]


--------------------------------------------------------------------------------
/scripts/depth_to_skybox.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | ''' Script for generating depth skyboxes based on undistorted depth images, 
  4 |     in order to support depth output in the simulator. The current version 
  5 |     assumes that undistorted depth images are aligned to matterport skyboxes, 
  6 |     and uses simple blending. Images are downsized 50%. '''
  7 | 
  8 | import os
  9 | import math
 10 | import cv2
 11 | import numpy as np
 12 | from multiprocessing import Pool
 13 | from numpy.linalg import inv,norm
 14 | from StringIO import StringIO
 15 | 
 16 | 
 17 | # Parameters
 18 | DOWNSIZED_WIDTH = 512
 19 | DOWNSIZED_HEIGHT = 512
 20 | NUM_WORKER_PROCESSES = 20
 21 | FILL_HOLES = True
 22 | VISUALIZE_OUTPUT = False
 23 | 
 24 | if FILL_HOLES:
 25 |   import sys
 26 |   sys.path.append('build')
 27 |   from MatterSim import cbf
 28 | 
 29 | # Constants
 30 | # Note: Matterport camera is really y=up, x=right, -z=look.
 31 | SKYBOX_WIDTH = 1024
 32 | SKYBOX_HEIGHT = 1024
 33 | base_dir = 'data/v1/scans'
 34 | skybox_template = '%s/%s/matterport_skybox_images/%s_skybox%d_sami.jpg'
 35 | color_template = '%s/%s/undistorted_color_images/%s_i%s.jpg'
 36 | depth_template = '%s/%s/undistorted_depth_images/%s_d%s.png'
 37 | camera_template = '%s/%s/undistorted_camera_parameters/%s.conf'
 38 | skybox_depth_template = '%s/%s/matterport_skybox_images/%s_skybox_depth_small.png'
 39 | 
 40 | 
 41 | # camera transform for skybox images 0-5 relative to image 1
 42 | skybox_transforms = [
 43 |   np.array([[1,0,0],[0,0,-1],[0,1,0]], dtype=np.double), #up (down)
 44 |   np.eye(3, dtype=np.double),
 45 |   np.array([[0,0,-1],[0,1,0],[1,0,0]], dtype=np.double), # right
 46 |   np.array([[-1,0,0],[0,1,0],[0,0,-1]], dtype=np.double), # 180
 47 |   np.array([[0,0,1],[0,1,0],[-1,0,0]], dtype=np.double), # left
 48 |   np.array([[1,0,0],[0,0,1],[0,-1,0]], dtype=np.double) # down (up)
 49 | ]
 50 | 
 51 | 
 52 | def camera_parameters(scan):
 53 |   ''' Returns two dicts containing undistorted camera intrinsics (3x3) and extrinsics (4x4),
 54 |       respectively, for a given scan. Viewpoint IDs are used as dict keys. '''
 55 |   intrinsics = {}
 56 |   extrinsics = {}
 57 |   with open(camera_template % (base_dir,scan,scan)) as f:
 58 |     pos = -1
 59 |     for line in f.readlines():
 60 |       if 'intrinsics_matrix' in line:
 61 |         intr = line.split()
 62 |         C = np.zeros((3, 3), np.double)
 63 |         C[0,0] = intr[1] # fx
 64 |         C[1,1] = intr[5] # fy
 65 |         C[0,2] = intr[3] # cx
 66 |         C[1,2] = intr[6] # cy
 67 |         C[2,2] = 1.0
 68 |         pos = 0
 69 |       elif pos >= 0 and pos < 6:
 70 |         q = line.find('.jpg')
 71 |         camera = line[q-37:q]
 72 |         if pos == 0:
 73 |           intrinsics[camera[:-2]] = C
 74 |         T = np.loadtxt(StringIO(line.split('jpg ')[1])).reshape((4,4))
 75 |         # T is camera-to-world transform, invert for world-to-camera
 76 |         extrinsics[camera] = (T,inv(T))
 77 |         pos += 1
 78 |   return intrinsics,extrinsics
 79 | 
 80 | 
 81 | def z_to_euclid(K_inv, depth):
 82 |   ''' Takes inverse intrinsics matrix and a depth image. Returns a new depth image with
 83 |       depth converted from z-distance into euclidean distance from the camera centre. '''
 84 | 
 85 |   assert len(depth.shape) == 2
 86 |   h = depth.shape[0]
 87 |   w = depth.shape[1]
 88 | 
 89 |   y,x = np.indices((h,w))
 90 |   homo_pixels = np.vstack((x.flatten(),y.flatten(),np.ones((x.size))))
 91 |   rays = K_inv.dot(homo_pixels)
 92 |   cos_theta = np.array([0,0,1]).dot(rays) / norm(rays,axis=0)
 93 | 
 94 |   output = depth / cos_theta.reshape(h,w)
 95 |   return output
 96 | 
 97 | 
 98 | def instrinsic_matrix(width, height):
 99 |   ''' Construct an ideal camera intrinsic matrix. '''
100 |   K = np.zeros((3, 3), np.double)
101 |   K[0,0] = width/2 #fx
102 |   K[1,1] = height/2 #fy
103 |   K[0,2] = width/2 #cx
104 |   K[1,2] = height/2 #cy
105 |   K[2,2] = 1.0
106 |   return K
107 | 
108 | 
109 | 
110 | def fill_joint_bilateral_filter(rgb, depth):
111 |   ''' Fill holes in a 16bit depth image given corresponding rgb image '''
112 | 
113 |   intensity = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
114 | 
115 |   # Convert the depth image to uint8.
116 |   maxDepth = np.max(depth)+1
117 |   depth = (depth.astype(np.float64)/maxDepth)
118 |   depth[depth > 1] = 1
119 |   depth = (depth*255).astype(np.uint8)
120 |   
121 |   # Convert to col major order
122 |   depth = np.asfortranarray(depth)
123 |   intensity = np.asfortranarray(intensity)
124 |   mask = (depth == 0)
125 |   result = np.zeros_like(depth)
126 | 
127 |   # Fill holes
128 |   cbf(depth, intensity, mask, result)
129 |   result = (result.astype(np.float64)/255*maxDepth).astype(np.uint16)
130 |   return result
131 | 
132 | 
133 | def depth_to_skybox(scan, visualize=VISUALIZE_OUTPUT, fill_holes=FILL_HOLES):
134 | 
135 |   # Load camera parameters
136 |   intrinsics,extrinsics = camera_parameters(scan)
137 |   # Skybox camera intrinsics
138 |   K_skybox = instrinsic_matrix(SKYBOX_WIDTH, SKYBOX_HEIGHT)
139 | 
140 |   pano_ids = list(set([item.split('_')[0] for item in intrinsics.keys()]))
141 |   print 'Processing scan %s with %d panoramas' % (scan, len(pano_ids))
142 | 
143 |   if visualize:
144 |     cv2.namedWindow('RGB')
145 |     cv2.namedWindow('Depth')
146 |     cv2.namedWindow('Skybox')
147 | 
148 |   for pano in pano_ids:
149 | 
150 |     # Load undistorted depth and rgb images
151 |     depth = {}
152 |     rgb = {}
153 |     for c in range(3):
154 |       K_inv = inv(intrinsics['%s_i%d' % (pano,c)])
155 |       for i in range(6):
156 |         name = '%d_%d' % (c,i)
157 |         if visualize:
158 |           rgb[name] = cv2.imread(color_template % (base_dir,scan,pano,name))
159 |         # Load 16bit grayscale image
160 |         d_im = cv2.imread(depth_template % (base_dir,scan,pano,name), cv2.IMREAD_ANYDEPTH)
161 |         depth[name] = z_to_euclid(K_inv, d_im)
162 | 
163 |     ims = []
164 |     for skybox_ix in range(6):
165 | 
166 |       # Load skybox image
167 |       skybox = cv2.imread(skybox_template % (base_dir,scan,pano,skybox_ix))
168 | 
169 |       # Skybox index 1 is the same orientation as camera image 1_5
170 |       skybox_ctw,_ = extrinsics[pano + '_i1_5']
171 |       skybox_ctw = skybox_ctw[:3,:3].dot(skybox_transforms[skybox_ix])
172 |       skybox_wtc = inv(skybox_ctw)
173 | 
174 |       base_depth = np.zeros((SKYBOX_HEIGHT,SKYBOX_WIDTH), np.uint16)
175 |       if visualize:
176 |         base_rgb = np.zeros((SKYBOX_HEIGHT,SKYBOX_WIDTH,3), np.uint8)
177 | 
178 |       for camera in range(3):
179 |         for angle in range(6):
180 | 
181 |           # Camera parameters
182 |           im_name = '%d_%d' % (camera,angle)
183 |           K_im = intrinsics[pano + '_i' + im_name[0]]
184 |           T_ctw,T_wtc = extrinsics[pano + '_i' + im_name]
185 |           R_ctw = T_ctw[:3,:3]
186 | 
187 |           # Check if this image can be skipped (facing away)
188 |           z = np.array([0,0,1])
189 |           if R_ctw.dot(z).dot(skybox_ctw.dot(z)) < 0:
190 |             continue
191 | 
192 |           # Compute homography
193 |           H = K_skybox.dot(skybox_wtc.dot(R_ctw.dot(inv(K_im))))
194 | 
195 |           # Warp and blend the depth image
196 |           flip = cv2.flip(depth[im_name], 1) # flip around y-axis
197 |           warp = cv2.warpPerspective(flip, H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_NEAREST)
198 |           mask = cv2.warpPerspective(np.ones_like(flip), H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR)
199 |           mask[warp == 0] = 0 # Set mask to zero where we don't have any depth values
200 |           mask = cv2.erode(mask,np.ones((3,3),np.uint8),iterations = 1)
201 |           locs = np.where(mask == 1)
202 |           base_depth[locs[0], locs[1]] = warp[locs[0], locs[1]]
203 | 
204 |           if visualize:
205 |             # Warp and blend the rgb image
206 |             flip = cv2.flip(rgb[im_name], 1) # flip around y-axis
207 |             warp = cv2.warpPerspective(flip, H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR)
208 |             mask = cv2.warpPerspective(np.ones_like(flip), H, (SKYBOX_HEIGHT,SKYBOX_WIDTH), flags=cv2.INTER_LINEAR)
209 |             mask = cv2.erode(mask,np.ones((3,3),np.uint8),iterations = 1)
210 |             locs = np.where(mask == 1)
211 |             base_rgb[locs[0], locs[1]] = warp[locs[0], locs[1]]
212 | 
213 |       depth_small = cv2.resize(cv2.flip(base_depth, 1),(DOWNSIZED_WIDTH,DOWNSIZED_HEIGHT),interpolation=cv2.INTER_NEAREST) # flip around y-axis, downsize
214 |       if fill_holes:
215 |         depth_filled = fill_joint_bilateral_filter(skybox, depth_small) # Fill holes
216 |         ims.append(depth_filled)
217 |       else:
218 |         ims.append(depth_small)
219 | 
220 |       if visualize and False:
221 |         cv2.imshow('Skybox', skybox)
222 |         cv2.imshow('Depth', cv2.applyColorMap((depth_small/256).astype(np.uint8), cv2.COLORMAP_JET))
223 |         rgb_output = cv2.flip(base_rgb, 1) # flip around y-axis
224 |         cv2.imshow('RGB', rgb_output)
225 |         cv2.waitKey(0)
226 | 
227 |     newimg = np.concatenate(ims, axis=1)
228 | 
229 |     if visualize:
230 |       maxDepth = np.max(newimg)+1
231 |       newimg = (newimg.astype(np.float64)/maxDepth)
232 |       newimg = (newimg*255).astype(np.uint8)
233 |       cv2.imshow('Depth pano', cv2.applyColorMap(newimg, cv2.COLORMAP_JET))
234 |       cv2.waitKey(0)
235 |     else:
236 |       # Save output
237 |       outfile = skybox_depth_template % (base_dir,scan,pano)
238 |       assert cv2.imwrite(outfile, newimg), ('Could not write to %s' % outfile)
239 | 
240 |   if visualize:
241 |     cv2.destroyAllWindows()
242 |   print 'Completed scan %s' % (scan)
243 | 
244 | 
245 | 
246 | if __name__ == '__main__':
247 | 
248 |   with open('connectivity/scans.txt') as f:
249 |     scans = [scan.strip() for scan in f.readlines()]
250 |     p = Pool(NUM_WORKER_PROCESSES)
251 |     p.map(depth_to_skybox, scans)
252 | 
253 | 
254 | 
255 | 


--------------------------------------------------------------------------------
/include/NavGraph.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef NAVGRAPH_HPP
  2 | #define NAVGRAPH_HPP
  3 | 
  4 | #include <memory>
  5 | #include <vector>
  6 | #include <unordered_map>
  7 | #include <random>
  8 | #include <cmath>
  9 | #include <sstream>
 10 | #include <stdexcept>
 11 | 
 12 | #include <jsoncpp/json/json.h>
 13 | #include <opencv2/opencv.hpp>
 14 | 
 15 | #ifdef OSMESA_RENDERING
 16 | #define GL_GLEXT_PROTOTYPES
 17 | #include <GL/gl.h>
 18 | #include <GL/osmesa.h>
 19 | #elif defined (EGL_RENDERING)
 20 | #include <epoxy/gl.h>
 21 | #include <EGL/egl.h>
 22 | #else
 23 | #include <GL/glew.h>
 24 | #endif
 25 | 
 26 | #define GLM_FORCE_RADIANS
 27 | #include <glm/glm.hpp>
 28 | #include <glm/gtc/matrix_transform.hpp>
 29 | #include <glm/gtc/type_ptr.hpp>
 30 | 
 31 | namespace mattersim {
 32 | 
 33 |     static void assertOpenGLError(const std::string& msg) {
 34 |       GLenum error = glGetError();
 35 |       if (error != GL_NO_ERROR) {
 36 |         std::stringstream s;
 37 |         s << "OpenGL error 0x" << std::hex << error << " at " << msg;
 38 |         throw std::runtime_error(s.str());
 39 |       }
 40 |     }
 41 | #ifdef EGL_RENDERING
 42 |     static void assertEGLError(const std::string& msg) {
 43 |       EGLint error = eglGetError();
 44 | 
 45 |       if (error != EGL_SUCCESS) {
 46 |         std::stringstream s;
 47 |         s << "EGL error 0x" << std::hex << error << " at " << msg;
 48 |         throw std::runtime_error(s.str());
 49 |       }
 50 |     }
 51 | #endif
 52 | 
 53 |     /**
 54 |      * Navigation graph indicating which panoramic viewpoints are adjacent, and also 
 55 |      * containing (optionally pre-loaded) skybox / cubemap images and textures.
 56 |      * Class is a singleton to ensure images and textures are only loaded once.
 57 |      */
 58 |     class NavGraph final {
 59 | 
 60 |     private:
 61 | 
 62 |         NavGraph(const std::string& navGraphPath, const std::string& datasetPath, 
 63 |                 bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize);
 64 | 
 65 |         ~NavGraph();
 66 | 
 67 |     public:
 68 |         // Delete the default, copy and move constructors
 69 |         NavGraph() = delete;
 70 |         NavGraph(const NavGraph&) = delete;
 71 |         NavGraph& operator=(const NavGraph&) = delete;
 72 |         NavGraph(NavGraph&&) = delete;
 73 |         NavGraph& operator=(NavGraph&&) = delete;
 74 | 
 75 |         /**
 76 |          * First call will load the navigation graph from disk and (optionally) preload the 
 77 |          * cubemap images into memory.
 78 |          * @param navGraphPath - directory containing json viewpoint connectivity graphs
 79 |          * @param datasetPath - directory containing a data directory for each Matterport scan id
 80 |          * @param preloadImages - if true, all cubemap images will be loaded into CPU memory immediately
 81 |          * @param renderDepth - if true, depth map images are also required
 82 |          * @param randomSeed - only used for randomViewpoint function
 83 |          * @param cacheSize - number of pano textures to keep in GPU memory
 84 |          */
 85 |         static NavGraph& getInstance(const std::string& navGraphPath, const std::string& datasetPath, 
 86 |                 bool preloadImages, bool renderDepth, int randomSeed, unsigned int cacheSize);
 87 |   
 88 |         /**
 89 |          * Select a random viewpoint from a scan
 90 |          */
 91 |         const std::string& randomViewpoint(const std::string& scanId);
 92 |                       
 93 |         /**
 94 |          * Find the index of a selected viewpointId
 95 |          */
 96 |         unsigned int index(const std::string& scanId, const std::string& viewpointId) const;
 97 | 
 98 |         /**
 99 |          * ViewpointId of a selected viewpoint index
100 |          */
101 |         const std::string& viewpoint(const std::string& scanId, unsigned int ix) const;
102 | 
103 |         /**
104 |          * Camera rotation matrix for a selected viewpoint index
105 |          */
106 |         const glm::mat4& cameraRotation(const std::string& scanId, unsigned int ix) const;
107 | 
108 |         /**
109 |          * Camera position vector for a selected viewpoint index
110 |          */
111 |         const glm::vec3& cameraPosition(const std::string& scanId, unsigned int ix) const;
112 | 
113 |         /**
114 |          * Return a list of other viewpoint indices that are reachable from a selected viewpoint index
115 |          */
116 |         std::vector<unsigned int> adjacentViewpointIndices(const std::string& scanId, unsigned int ix) const;
117 | 
118 |         /**
119 |          * Get cubemap RGB (and optionally, depth) textures for a selected viewpoint index
120 |          */
121 |         std::pair<GLuint, GLuint> cubemapTextures(const std::string& scanId, unsigned int ix);
122 | 
123 |         /**
124 |          * Free GPU memory associated with this viewpoint's textures
125 |          */
126 |         void deleteCubemapTextures(const std::string& scanId, unsigned int ix);
127 | 
128 | 
129 |     protected:
130 | 
131 |         /**
132 |          * Helper class representing nodes in the navigation graph and their cubemap textures.
133 |          */
134 |         class Location {
135 | 
136 |         public:
137 |             /**
138 |              * Construct a location object from a json struct
139 |              * @param viewpoint - json struct
140 |              * @param skyboxDir - directory containing a data directory for each Matterport scan id
141 |              * @param preload - if true, all cubemap images will be loaded into CPU memory immediately
142 |              * @param depth - if true, depth textures will also be provided
143 |              */
144 |             Location(const Json::Value& viewpoint, const std::string& skyboxDir, bool preload, bool depth);
145 | 
146 |             Location() = delete; // no default constructor
147 | 
148 |             /**
149 |              * Return the cubemap RGB (and optionally, depth) textures for this viewpoint, which will 
150 |              * be loaded from CPU memory or disk if necessary
151 |              */
152 |             std::pair<GLuint, GLuint> cubemapTextures();
153 | 
154 |             /**
155 |              * Free GPU memory associated with RGB and depth textures at this location
156 |              */
157 |             void deleteCubemapTextures();
158 | 
159 |             std::string viewpointId;        //! Unique Matterport identifier for every pano
160 |             bool included;                  //! Some duplicated viewpoints have been excluded
161 |             glm::mat4 rot;                  //! Camera pose rotation component
162 |             glm::vec3 pos;                  //! Camera pose translation component
163 |             std::vector<bool> unobstructed; //! Connections to other graph locations
164 | 
165 |         protected:
166 | 
167 |             /**
168 |              * Load RGB (and optionally, depth) cubemap images from disk into CPU memory
169 |              */
170 |             void loadCubemapImages();
171 | 
172 |             /**
173 |              * Create RGB (and optionally, depth) textures from cubemap images (e.g., in GPU memory)
174 |              */
175 |             void loadCubemapTextures();
176 | 
177 |             GLuint cubemap_texture;
178 |             GLuint depth_texture;
179 |             cv::Mat xpos;                   //! RGB images for faces of the cubemap
180 |             cv::Mat xneg;
181 |             cv::Mat ypos;
182 |             cv::Mat yneg;
183 |             cv::Mat zpos;
184 |             cv::Mat zneg;
185 |             cv::Mat xposD;                   //! Depth images for faces of the cubemap
186 |             cv::Mat xnegD;
187 |             cv::Mat yposD;
188 |             cv::Mat ynegD;
189 |             cv::Mat zposD;
190 |             cv::Mat znegD;
191 |             bool im_loaded;
192 |             bool includeDepth;
193 |             std::string skyboxDir;          //! Path to skybox images
194 |         };
195 |         typedef std::shared_ptr<Location> LocationPtr;
196 | 
197 | 
198 |         /**
199 |          * Helper class implementing a LRU cache for cubemap textures.
200 |          */
201 |         class TextureCache {
202 | 
203 |         public:
204 |             TextureCache(unsigned int size) : size(size) {
205 |                 cacheMap.reserve(size+1);
206 |             }
207 | 
208 |             TextureCache() = delete; // no default constructor
209 | 
210 |             void add(LocationPtr loc) {
211 |                 auto map_it = cacheMap.find(loc);
212 |                 if (map_it != cacheMap.end()) {
213 |                     // Remove entry from middle of list
214 |                     cacheList.erase(map_it->second);
215 |                     cacheMap.erase(map_it);
216 |                 }
217 |                 // Add element to list and save iterator on map
218 |                 auto list_it = cacheList.insert(cacheList.begin(), loc);
219 |                 cacheMap.emplace(loc, list_it);
220 |                 if (cacheMap.size() >= size) {
221 |                     removeEldest();
222 |                 }
223 |             }
224 | 
225 |             void removeEldest() {
226 |                 if (cacheMap.empty()) {
227 |                     throw std::runtime_error("MatterSim: TextureCache is empty");
228 |                 }
229 |                 LocationPtr loc = cacheList.back();
230 |                 loc->deleteCubemapTextures();
231 |                 cacheMap.erase(loc);
232 |                 cacheList.pop_back();
233 |             }
234 | 
235 |         private:
236 |             unsigned int size;
237 |             std::unordered_map<LocationPtr, std::list<LocationPtr>::iterator > cacheMap;
238 |             std::list<LocationPtr> cacheList;
239 |         };
240 | 
241 |         
242 |         std::map<std::string, std::vector<LocationPtr> > scanLocations;
243 |         std::default_random_engine generator;
244 |         TextureCache cache;
245 |     };
246 | 
247 | }
248 | 
249 | #endif
250 | 


--------------------------------------------------------------------------------
/tasks/agent/agent_seq2seq.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from agent.agent_base import AgentBase
  5 | from agent.model import CategoricalPolicy, TwinnedQNetwork
  6 | 
  7 | 
  8 | class AgentSeq2Seq(AgentBase):
  9 |     def __init__(
 10 |         self,
 11 |         config: dict,
 12 |         env: object,
 13 |         weight_decay: float = 0.0005
 14 |     ) -> None:
 15 |         super().__init__(config, env)
 16 |         assert config['args']['agent'] == 'seq2seq'
 17 | 
 18 |         if config['args']['mode'] != 'test':
 19 |             self.agent_learning_config = config['agent'][config['args']['mode']]['learning']
 20 |             self.optimizer_obs = torch.optim.Adam(self.obs_encoder.parameters(), lr=self.agent_learning_config['lr'], weight_decay=weight_decay)
 21 |             self.optimizer_pomdp = torch.optim.Adam(self.state_tracker.parameters(), lr=self.agent_learning_config['lr'], weight_decay=weight_decay)
 22 |             self.gamma = self.agent_learning_config['gamma']
 23 | 
 24 |         input_size = self.state_tracker.state_dim
 25 |         # critic
 26 |         self.q_behavior = TwinnedQNetwork(input_size, self.cv_utils.action_feature_size).to(config['device'])
 27 |         # policy
 28 |         self.policy = CategoricalPolicy(input_size, self.cv_utils.action_feature_size).to(config['device'])
 29 | 
 30 |         # setup loss function and optimizer
 31 |         if config['args']['mode'] != 'test':
 32 |             self.cross_entropy = nn.CrossEntropyLoss()
 33 |             self.optimizer_q1 = torch.optim.Adam(self.q_behavior.q_net1.parameters(), lr=self.agent_learning_config['lr'])
 34 |             self.optimizer_q2 = torch.optim.Adam(self.q_behavior.q_net2.parameters(), lr=self.agent_learning_config['lr'])
 35 |             self.optimizer_policy = torch.optim.Adam(self.policy.parameters(), lr=self.agent_learning_config['lr'], weight_decay=weight_decay)
 36 | 
 37 |         self.update_networks()
 38 |         for net_type, nets in self.networks.items():
 39 |             for net_id, net in nets.items():
 40 |                 if isinstance(net, torch.nn.Module):
 41 |                     print('%5s: %-10s' % (net_type, net_id), "<class 'torch.nn.Module'>")
 42 |                 else:
 43 |                     print('%5s: %-10s' % (net_type, net_id), type(net))
 44 |         return
 45 | 
 46 |     def _kld_gauss(
 47 |         self,
 48 |         mean_1: torch.Tensor,
 49 |         std_1: torch.Tensor,
 50 |         mean_2: torch.Tensor,
 51 |         std_2: torch.Tensor
 52 |     ) -> torch.Tensor:
 53 |         kld_element = (2 * torch.log(std_2) - 2 * torch.log(std_1) + (std_1.pow(2) + (mean_1 - mean_2).pow(2)) / std_2.pow(2) - 1)
 54 |         return 0.5 * torch.sum(kld_element) / mean_1.shape[0]
 55 | 
 56 |     def train(
 57 |         self,
 58 |         pair_datas: dict
 59 |     ) -> np.ndarray:
 60 |         labels = torch.tensor(pair_datas['labels'], device=self.config['device'])
 61 |         logits = torch.cat(pair_datas['logits'], dim=0)
 62 |         q1_s = torch.cat(pair_datas['q1s'], dim=0)
 63 |         q2_s = torch.cat(pair_datas['q2s'], dim=0)
 64 |         target_values = torch.zeros_like(q1_s)
 65 |         for batch_idx, (action, curriculum_reward) in enumerate(zip(pair_datas['labels'], pair_datas['curriculum_rewards'])):
 66 |             target_values[batch_idx, action] = curriculum_reward
 67 | 
 68 |         loss = torch.zeros(1, device=self.config['device'])
 69 |         # get critic loss
 70 |         legal = (q1_s != -float('inf'))
 71 |         q1_loss = torch.mean((q1_s[legal] - target_values[legal]).pow(2))
 72 |         q2_loss = torch.mean((q2_s[legal] - target_values[legal]).pow(2))
 73 |         loss += (q1_loss + q2_loss)
 74 |         # get policy loss
 75 |         policy_loss = self.cross_entropy(logits, labels)
 76 |         loss += policy_loss
 77 | 
 78 |         # get avast loss
 79 |         if self.config['args']['state_tracker'] == 'avast':
 80 |             # kld
 81 |             kld_loss = self._kld_gauss(
 82 |                 mean_1=torch.cat(pair_datas['posterior_means'], dim=0),
 83 |                 std_1=torch.cat(pair_datas['posterior_stds'], dim=0),
 84 |                 mean_2=torch.cat(pair_datas['prior_means'], dim=0),
 85 |                 std_2=torch.cat(pair_datas['prior_stds'], dim=0)
 86 |             )
 87 |             loss += kld_loss
 88 | 
 89 |         # zero grad
 90 |         if self.config['args']['state_tracker'] == 'avast' and not self.config['args']['aug_data']:
 91 |             pass
 92 |         else:
 93 |             self.optimizer_obs.zero_grad()
 94 |             self.optimizer_pomdp.zero_grad()
 95 |         self.optimizer_policy.zero_grad()
 96 |         self.optimizer_q1.zero_grad()
 97 |         self.optimizer_q2.zero_grad()
 98 | 
 99 |         # get gradient
100 |         loss.backward()
101 | 
102 |         # gradient clipping
103 |         nn.utils.clip_grad_norm_(self.obs_encoder.instr.parameters(), 100)
104 |         nn.utils.clip_grad_norm_(self.state_tracker.parameters(), 10)
105 |         nn.utils.clip_grad_norm_(self.q_behavior.parameters(), 1)
106 | 
107 |         # update
108 |         self.optimizer_obs.step()
109 |         self.optimizer_pomdp.step()
110 |         self.optimizer_policy.step()
111 |         self.optimizer_q1.step()
112 |         self.optimizer_q2.step()
113 |         return np.array([q1_loss.item(), q2_loss.item(), policy_loss.item(), 0, kld_loss.item() if self.config['args']['state_tracker'] == 'avast' else 0])
114 | 
115 |     def act(
116 |         self,
117 |         location_infos: list,
118 |         instr_embed: torch.Tensor,
119 |         instr_mask: torch.Tensor,
120 |         last_action_features: torch.Tensor,
121 |         hiddens: torch.Tensor,
122 |         evaluate: bool,
123 |         act_by: str = 'policy'
124 |     ) -> (np.ndarray, dict, torch.Tensor, torch.Tensor):
125 |         # get belief state
126 |         belief_states, context_belief_states, instr_attn_weight, hiddens = self.get_belief_states(
127 |             vision_features=self.cv_utils.get_vision_features(location_infos),
128 |             abs_pose_features=self.cv_utils.get_abs_pose_features(location_infos).squeeze(0),
129 |             action_features=last_action_features,
130 |             instr_embed=instr_embed,
131 |             instr_mask=instr_mask,
132 |             hiddens=hiddens,
133 |             enable_grad=not evaluate
134 |         )
135 | 
136 |         with torch.set_grad_enabled(not evaluate):
137 |             # get candidate action feature
138 |             candidate_action_features = self.cv_utils.get_candidate_action_features(location_infos).squeeze(0)
139 | 
140 |             # generate actions
141 |             legals = self.intent_to_mask(
142 |                 intents=[location_info.action_info.intents for location_info in location_infos],
143 |                 find_legal=True
144 |             )
145 |             illegal = torch.logical_not(torch.tensor(legals, dtype=torch.bool).view(-1, self.action_space))
146 | 
147 |             q1_s, q2_s = self.q_behavior(torch.cat([context_belief_states, hiddens[0]], dim=1), candidate_action_features)
148 |             q1_s[illegal] = -float('inf')
149 |             q2_s[illegal] = -float('inf')
150 | 
151 |             policy_out = self.policy(belief_states, candidate_action_features)
152 |             policy_out[illegal] = -float('inf')
153 | 
154 |             outputs = {
155 |                 'logits': policy_out,
156 |                 'q1s': q1_s,
157 |                 'q2s': q2_s
158 |             }
159 | 
160 |             # select action by greedy
161 |             if act_by == 'critic':
162 |                 q_s = q1_s if np.random.rand() < 0.5 else q2_s
163 |                 actions = torch.argmax(q_s, dim=1).cpu().view(-1).numpy()
164 |             elif act_by == 'policy':
165 |                 actions = torch.argmax(policy_out, dim=1).cpu().view(-1).numpy()
166 |             else:
167 |                 raise NotImplementedError
168 |         return actions, outputs, candidate_action_features, hiddens
169 | 
170 |     def act_with_dists(
171 |         self,
172 |         location_infos: list,
173 |         instr_embed: torch.Tensor,
174 |         instr_mask: torch.Tensor,
175 |         last_action_features: torch.Tensor,
176 |         hiddens: torch.Tensor,
177 |         evaluate: bool
178 |     ) -> (np.ndarray, dict, torch.Tensor, torch.Tensor, dict):
179 |         assert not evaluate
180 |         # get belief state
181 |         belief_states, context_belief_states, instr_attn_weight, hiddens, dists = self.get_belief_states_with_dist(
182 |             vision_features=self.cv_utils.get_vision_features(location_infos),
183 |             abs_pose_features=self.cv_utils.get_abs_pose_features(location_infos).squeeze(0),
184 |             action_features=last_action_features,
185 |             instr_embed=instr_embed,
186 |             instr_mask=instr_mask,
187 |             hiddens=hiddens,
188 |             enable_grad=not evaluate
189 |         )
190 | 
191 |         # get candidate action feature
192 |         candidate_action_features = self.cv_utils.get_candidate_action_features(location_infos).squeeze(0)
193 | 
194 |         # generate actions
195 |         legals = self.intent_to_mask(
196 |             intents=[location_info.action_info.intents for location_info in location_infos],
197 |             find_legal=True
198 |         )
199 |         illegal = torch.logical_not(torch.tensor(legals, dtype=torch.bool).view(-1, self.action_space))
200 | 
201 |         q1_s, q2_s = self.q_behavior(torch.cat([context_belief_states, hiddens[0]], dim=1), candidate_action_features)
202 |         q1_s[illegal] = -float('inf')
203 |         q2_s[illegal] = -float('inf')
204 | 
205 |         policy_out = self.policy(belief_states, candidate_action_features)
206 |         policy_out[illegal] = -float('inf')
207 | 
208 |         outputs = {'logits': policy_out, 'q1s': q1_s, 'q2s': q2_s}
209 | 
210 |         # select action by greedy
211 |         actions = torch.argmax(policy_out, dim=1).cpu().view(-1).numpy()
212 |         return actions, outputs, candidate_action_features, hiddens, dists
213 | 
214 | 
215 | def main():
216 |     return
217 | 
218 | 
219 | if __name__ == '__main__':
220 |     main()
221 | 
222 | 


--------------------------------------------------------------------------------
/tasks/ast_pre_train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import numpy as np
  4 | from utils import load_config, print_log
  5 | from utils import init_tb_writer, init_agent
  6 | from env.rxr_env import RxREnv
  7 | from env.env_utils import Statistic
  8 | from agent.agent_seq2seq import AgentSeq2Seq
  9 | 
 10 | 
 11 | def run_data_parallel(
 12 |     data_indices: list,
 13 |     split: str,
 14 |     env: RxREnv,
 15 |     gen_gif: bool,
 16 |     agent: AgentSeq2Seq,
 17 |     evaluate: bool,
 18 |     act_by: str = 'policy'
 19 | ) -> (Statistic, np.ndarray, list, list):
 20 |     # setup recorder, expert
 21 |     pair_datas = {
 22 |         'logits': [], 'labels': [],
 23 |         'q1s': [], 'q2s': [], 'curriculum_rewards': []
 24 |     }
 25 |     returns = np.zeros(len(data_indices))
 26 |     expert_trajs = env.get_expert_trajs(data_indices, split)
 27 | 
 28 |     # setup environment
 29 |     dones, location_infos = env.reset(data_indices, split, gen_gif)
 30 |     instr_embed, instr_mask, hiddens = agent.obs_encoder.instr.encode(
 31 |         [torch.tensor(instr, dtype=torch.long) for instr in env.instrs]
 32 |     )
 33 |     last_action_features = agent.get_init_action(len(data_indices))
 34 | 
 35 |     # run one episode
 36 |     while True:
 37 |         actions, outputs, candidate_action_features, hiddens = agent.act(
 38 |             location_infos=location_infos,
 39 |             instr_embed=instr_embed,
 40 |             instr_mask=instr_mask,
 41 |             last_action_features=last_action_features,
 42 |             hiddens=hiddens,
 43 |             evaluate=evaluate,
 44 |             act_by=act_by
 45 |         )
 46 | 
 47 |         if not evaluate:
 48 |             actions = np.zeros(len(data_indices), dtype=np.int)
 49 |             for parallel_idx, expert_traj in enumerate(expert_trajs):
 50 |                 actions[parallel_idx] = expert_traj[env.iterations[parallel_idx]]
 51 | 
 52 |         # mask done action & interact with environment
 53 |         actions[dones >= 1] = -1
 54 |         rewards, dones, next_location_infos = env.step(actions)
 55 | 
 56 |         # update return
 57 |         returns += rewards
 58 | 
 59 |         # store trajectory
 60 |         if not evaluate:
 61 |             for parallel_idx, done in enumerate(dones):
 62 |                 if done <= 1:
 63 |                     iteration = env.iterations[parallel_idx]
 64 |                     expert_len = len(expert_trajs[parallel_idx])
 65 |                     pair_datas['logits'].append(
 66 |                         outputs['logits'][parallel_idx: parallel_idx + 1]
 67 |                     )
 68 |                     pair_datas['labels'].append(
 69 |                         actions[parallel_idx]
 70 |                     )
 71 |                     pair_datas['q1s'].append(
 72 |                         outputs['q1s'][parallel_idx: parallel_idx + 1]
 73 |                     )
 74 |                     pair_datas['q2s'].append(
 75 |                         outputs['q2s'][parallel_idx: parallel_idx + 1]
 76 |                     )
 77 |                     pair_datas['curriculum_rewards'].append(
 78 |                         2 * env.reward_scale * (agent.gamma ** (expert_len - iteration - 1))
 79 |                     )
 80 | 
 81 |         # terminate all trajectories
 82 |         if all(dones >= 1):
 83 |             stats = env.get_statistics()
 84 |             break
 85 | 
 86 |         # update states (the last thing in iteration)
 87 |         location_infos = next_location_infos
 88 |         last_action_features = torch.stack(
 89 |             [candidate_action_features[parallel_idx, action_idx, :] for parallel_idx, action_idx in enumerate(actions)],
 90 |             dim=0
 91 |         )
 92 |     return stats, returns, pair_datas
 93 | 
 94 | 
 95 | def rollout(
 96 |     config: dict,
 97 |     split: str,
 98 |     env: RxREnv,
 99 |     agent: AgentSeq2Seq,
100 |     it_now: int,
101 |     evaluate: bool,
102 |     act_by: str = 'policy'
103 | ) -> (np.ndarray, float, Statistic):
104 |     env.set_env('dict')
105 | 
106 |     # rollout
107 |     loss_list = np.zeros(5)  # critic1, critic2, policy, entropy, kld
108 |     if evaluate:
109 |         # set parallel size
110 |         parallel_size = config['r2r_env']['mp']['evaluate_parallel']
111 | 
112 |         # rollout all data
113 |         returns, stats = [], Statistic([], [], [], [], [], [], [])
114 |         for it_evaluate in range(env.get_it_num(split, parallel_size)):
115 |             # get data_indices
116 |             data_indices = env.get_data_indices(split, it_evaluate, parallel_size, evaluate)
117 | 
118 |             # rollout one parallel data
119 |             stats_tmp, returns_tmp, _ = run_data_parallel(
120 |                 data_indices=data_indices, split=split, env=env,
121 |                 gen_gif=False, agent=agent, evaluate=evaluate,
122 |                 act_by=act_by
123 |             )
124 | 
125 |             # record returns, statistics
126 |             returns += returns_tmp.tolist()
127 |             stats += stats_tmp
128 |     else:
129 |         assert split == 'train'
130 |         # set parallel size
131 |         parallel_size = config['r2r_env']['mp']['training_parallel']
132 | 
133 |         # get data_indices
134 |         data_indices = env.get_data_indices(split, it_now, parallel_size, evaluate)
135 | 
136 |         # rollout one parallel data
137 |         stats, returns, pair_datas = run_data_parallel(
138 |             data_indices=data_indices, split=split, env=env,
139 |             gen_gif=False, agent=agent, evaluate=evaluate
140 |         )
141 | 
142 |         # update
143 |         loss_list += agent.train(pair_datas)
144 | 
145 |     # average the record of loss, return, statistic
146 |     return_average = np.average(returns)
147 |     stat_average = stats.get_average()
148 | 
149 |     # log
150 |     print_log(it_now, 0, env.iterations, 0, loss_list, return_average, stat_average)
151 |     return loss_list, return_average, stat_average
152 | 
153 | 
154 | def test(
155 |     config: dict
156 | ) -> None:
157 |     # initialize environment, agent
158 |     env = RxREnv(config)
159 |     agent = init_agent(config, env)
160 |     agent.load(config['args']['load_dir'])
161 | 
162 |     # switch to evaluation mode
163 |     agent.change_mode(is_train=False)
164 |     with torch.no_grad():
165 |         print('-----test begin-----')
166 |         for split in ['val_seen', 'val_unseen']:
167 |             print(split)
168 |             # evaluate from rollout
169 |             _, return_average, stat_average = rollout(
170 |                 config=config, split=split, env=env,
171 |                 agent=agent, it_now=-1, evaluate=True
172 |             )
173 |             _, return_average, stat_average = rollout(
174 |                 config=config, split=split, env=env,
175 |                 agent=agent, it_now=-1, evaluate=True,
176 |                 act_by='critic'
177 |             )
178 |         print('-----test end  -----')
179 |     return
180 | 
181 | 
182 | def train_test(
183 |     config: dict
184 | ) -> None:
185 |     # initialize tensorboard, environment, agent
186 |     writer = init_tb_writer(config)
187 |     env = RxREnv(config)
188 |     agent = init_agent(config, env)
189 | 
190 |     # bc training
191 |     for it_now in range(config['agent']['pre_train']['learning']['iteration'] + 1):
192 |         # training stage
193 |         if it_now > 0:
194 |             loss_list_all, _, _ = rollout(
195 |                 config=config, split='train', env=env,
196 |                 agent=agent, it_now=it_now, evaluate=False
197 |             )
198 |             # tensorboard for training information
199 |             writer.add_scalar('loss/critic_1', loss_list_all[0], it_now)
200 |             writer.add_scalar('loss/critic_2', loss_list_all[1], it_now)
201 |             writer.add_scalar('loss/policy', loss_list_all[2], it_now)
202 |             writer.add_scalar('loss/entropy', loss_list_all[3], it_now)
203 |             writer.add_scalar('loss/KL divergence', loss_list_all[4], it_now)
204 | 
205 |         # evaluation stage
206 |         if it_now % 50 == 0:
207 |             # switch to evaluation mode
208 |             agent.change_mode(is_train=False)
209 |             with torch.no_grad():
210 |                 print('-----test begin-----')
211 |                 for split in ['val_seen', 'val_unseen']:
212 |                     # evaluate from rollout
213 |                     _, return_average, stat_average = rollout(
214 |                         config=config, split=split, env=env,
215 |                         agent=agent, it_now=it_now, evaluate=True
216 |                     )
217 | 
218 |                     # tensorboard for evaluation information
219 |                     writer.add_scalar('%s/navigation error' % split, stat_average.nav_error[0], it_now)
220 |                     writer.add_scalar('%s/path length' % split, stat_average.path_len[0], it_now)
221 |                     writer.add_scalar('%s/success rate' % split, stat_average.succ_rate[0], it_now)
222 |                     writer.add_scalar('%s/success rate weighted by path length' % split, stat_average.succ_w_path_len[0], it_now)
223 |                     writer.add_scalar('%s/self stop rate' % split, stat_average.self_stop_rate[0], it_now)
224 |                     writer.add_scalar('%s/coverage weighted by length score' % split, stat_average.cov_w_len_score[0], it_now)
225 |                     writer.add_scalar('%s/reward' % split, return_average, it_now)
226 | 
227 |                     # evaluate from rollout
228 |                     _, return_average, stat_average = rollout(
229 |                         config=config, split=split, env=env,
230 |                         agent=agent, it_now=it_now, evaluate=True,
231 |                         act_by='critic'
232 |                     )
233 | 
234 |                     # tensorboard for evaluation information
235 |                     writer.add_scalar('%s_q/navigation error' % split, stat_average.nav_error[0], it_now)
236 |                     writer.add_scalar('%s_q/path length' % split, stat_average.path_len[0], it_now)
237 |                     writer.add_scalar('%s_q/success rate' % split, stat_average.succ_rate[0], it_now)
238 |                     writer.add_scalar('%s_q/success rate weighted by path length' % split, stat_average.succ_w_path_len[0], it_now)
239 |                     writer.add_scalar('%s_q/self stop rate' % split, stat_average.self_stop_rate[0], it_now)
240 |                     writer.add_scalar('%s_q/coverage weighted by length score' % split, stat_average.cov_w_len_score[0], it_now)
241 |                     writer.add_scalar('%s_q/reward' % split, return_average, it_now)
242 |                 print('-----test end  -----')
243 | 
244 |             # save
245 |             agent.save(config['args']['exp_name'], it_now)
246 |             # switch to training mode
247 |             agent.change_mode(is_train=True)
248 |     return
249 | 
250 | 
251 | def main():
252 |     # load args, config
253 |     config = load_config('/root/mount/AVAST_R2R/tasks/config.json')
254 |     assert config['args']['agent'] == 'seq2seq'
255 | 
256 |     # select mode
257 |     if config['args']['mode'] == 'pre_train':
258 |         # delete old weight
259 |         if os.path.isdir(config['save_dir']):
260 |             os.system('rm -r %s' % config['save_dir'])
261 |             os.system('mkdir %s' % config['save_dir'])
262 |         train_test(config=config)
263 |     elif config['args']['mode'] == 'test':
264 |         test(config=config)
265 |     else:
266 |         print('Invalid mode')
267 |     return
268 | 
269 | 
270 | if __name__ == '__main__':
271 |     main()
272 | 


--------------------------------------------------------------------------------