├── LICENSE ├── README.md ├── data ├── 7Scenes │ ├── chess │ │ ├── pose_avg_stats.txt │ │ ├── pose_avg_stats_colmap.txt │ │ ├── pose_avg_stats_colmap_opencv.txt │ │ ├── pose_stats.txt │ │ ├── stats.txt │ │ ├── world_setup.json │ │ └── world_setup_opencv.json │ ├── fire │ │ ├── pose_avg_stats.txt │ │ ├── pose_avg_stats_colmap.txt │ │ ├── pose_avg_stats_colmap_opencv.txt │ │ ├── pose_stats.txt │ │ ├── stats.txt │ │ ├── world_setup.json │ │ └── world_setup_opencv.json │ ├── heads │ │ ├── pose_avg_stats.txt │ │ ├── pose_avg_stats_colmap.txt │ │ ├── pose_avg_stats_colmap_opencv.txt │ │ ├── pose_stats.txt │ │ ├── stats.txt │ │ ├── world_setup.json │ │ └── world_setup_opencv.json │ ├── office │ │ ├── pose_avg_stats.txt │ │ ├── pose_avg_stats_colmap.txt │ │ ├── pose_avg_stats_colmap_opencv.txt │ │ ├── pose_stats.txt │ │ ├── stats.txt │ │ ├── world_setup.json │ │ └── world_setup_opencv.json │ ├── pumpkin │ │ ├── pose_avg_stats.txt │ │ ├── pose_avg_stats_colmap.txt │ │ ├── pose_avg_stats_colmap_opencv.txt │ │ ├── pose_avg_stats_old.txt │ │ ├── pose_stats.txt │ │ ├── stats.txt │ │ ├── world_setup.json │ │ └── world_setup_opencv.json │ ├── redkitchen │ │ ├── pose_avg_stats.txt │ │ ├── pose_avg_stats_colmap.txt │ │ ├── pose_avg_stats_colmap_opencv.txt │ │ ├── pose_stats.txt │ │ ├── stats.txt │ │ ├── world_setup.json │ │ └── world_setup_opencv.json │ └── stairs │ │ ├── pose_avg_stats.txt │ │ ├── pose_avg_stats_colmap.txt │ │ ├── pose_avg_stats_colmap_opencv.txt │ │ ├── pose_stats.txt │ │ ├── stats.txt │ │ ├── unique_view.txt │ │ ├── world_setup.json │ │ └── world_setup_opencv.json ├── 7Scenes_colmap_poses │ ├── chess │ │ ├── chess_test.txt │ │ └── chess_train.txt │ ├── fire │ │ ├── fire_test.txt │ │ └── fire_train.txt │ ├── heads │ │ ├── heads_test.txt │ │ └── heads_train.txt │ ├── office │ │ ├── office_test.txt │ │ └── office_train.txt │ ├── pumpkin │ │ ├── pumpkin_test.txt │ │ └── pumpkin_train.txt │ ├── redkitchen │ │ ├── redkitchen_test.txt │ │ └── redkitchen_train.txt │ └── stairs │ │ ├── stairs_test.txt │ │ └── stairs_train.txt ├── Cambridge_world_setup │ ├── GreatCourt │ │ ├── pose_avg_stats.txt │ │ └── world_setup.json │ ├── KingsCollege │ │ ├── pose_avg_stats.txt │ │ └── world_setup.json │ ├── OldHospital │ │ ├── pose_avg_stats.txt │ │ └── world_setup.json │ ├── ShopFacade │ │ ├── pose_avg_stats.txt │ │ └── world_setup.json │ └── StMarysChurch │ │ ├── pose_avg_stats.txt │ │ └── world_setup.json ├── setup_7scenes.py └── setup_cambridge.py ├── dataset_loaders ├── cambridge_scenes.py ├── load_7Scenes.py ├── load_7Scenes_colmap.py ├── load_Cambridge.py ├── seven_scenes.py ├── seven_scenes_colmap.py └── utils │ └── color.py ├── environment.yml ├── imgs ├── .DS_Store ├── nefes.png └── pipeline.png ├── paper_result ├── DFNet_NeFeS50_7Scenes_colmap │ ├── chess │ │ ├── DFNet_chess_NeFeS50_APR_pose_results.txt │ │ ├── DFNet_chess_results.txt │ │ ├── chess_test_gt.txt │ │ ├── chess_test_gt_filename.txt │ │ ├── chess_train_gt.txt │ │ └── chess_train_gt_filename.txt │ ├── fire │ │ ├── DFNet_fire_NeFeS50_APR_pose_results.txt │ │ ├── DFNet_fire_results.txt │ │ ├── fire_test_gt.txt │ │ ├── fire_test_gt_filename.txt │ │ ├── fire_train_gt.txt │ │ └── fire_train_gt_filename.txt │ ├── heads │ │ ├── DFNet_heads_NeFeS50_APR_pose_results.txt │ │ ├── DFNet_heads_results.txt │ │ ├── heads_test_gt.txt │ │ ├── heads_test_gt_filename.txt │ │ ├── heads_train_gt.txt │ │ └── heads_train_gt_filename.txt │ ├── office │ │ ├── DFNet_office_NeFeS50_APR_pose_results.txt │ │ ├── DFNet_office_results.txt │ │ ├── office_test_gt.txt │ │ ├── office_test_gt_filename.txt │ │ ├── office_train_gt.txt │ │ └── office_train_gt_filename.txt │ ├── pumpkin │ │ ├── DFNet_pumpkin_NeFeS50_APR_pose_results.txt │ │ ├── DFNet_pumpkin_results.txt │ │ ├── pumpkin_test_gt.txt │ │ ├── pumpkin_test_gt_filename.txt │ │ ├── pumpkin_train_gt.txt │ │ └── pumpkin_train_gt_filename.txt │ ├── redkitchen │ │ ├── DFNet_redkitchen_NeFeS50_APR_pose_results.txt │ │ ├── DFNet_redkitchen_results.txt │ │ ├── redkitchen_test_gt.txt │ │ ├── redkitchen_test_gt_filename.txt │ │ ├── redkitchen_train_gt.txt │ │ └── redkitchen_train_gt_filename.txt │ └── stairs │ │ ├── DFNet_stairs_NeFeS50_APR_pose_results.txt │ │ ├── DFNet_stairs_results.txt │ │ ├── stairs_test_gt.txt │ │ ├── stairs_test_gt_filename.txt │ │ ├── stairs_train_gt.txt │ │ └── stairs_train_gt_filename.txt └── DFNet_NeFeS50_Cambridge │ ├── KingsCollege │ ├── DFNet_KingsCollege_NeFeS50_APR_pose_results.txt │ ├── DFNet_KingsCollege_results.txt │ ├── KingsCollege_test_gt.txt │ ├── KingsCollege_test_gt_filename.txt │ ├── KingsCollege_train_gt.txt │ └── KingsCollege_train_gt_filename.txt │ ├── OldHospital │ ├── DFNet_OldHospital_NeFeS50_APR_pose_results.txt │ ├── DFNet_OldHospital_results.txt │ ├── OldHospital_test_gt.txt │ ├── OldHospital_test_gt_filename.txt │ ├── OldHospital_train_gt.txt │ └── OldHospital_train_gt_filename.txt │ ├── ShopFacade │ ├── DFNet_ShopFacade_NeFeS50_APR_pose_results.txt │ ├── DFNet_ShopFacade_results.txt │ ├── ShopFacade_test_gt.txt │ ├── ShopFacade_test_gt_filename.txt │ ├── ShopFacade_train_gt.txt │ └── ShopFacade_train_gt_filename.txt │ └── StMarysChurch │ ├── DFNet_StMarysChurch_NeFeS50_APR_pose_results.txt │ ├── DFNet_StMarysChurch_results.txt │ ├── StMarysChurch_test_gt.txt │ ├── StMarysChurch_test_gt_filename.txt │ ├── StMarysChurch_train_gt.txt │ └── StMarysChurch_train_gt_filename.txt └── script ├── config ├── 7Scenes │ ├── .DS_Store │ └── dfnet │ │ ├── config_chess_DFM.txt │ │ ├── config_chess_stage1.txt │ │ ├── config_chess_stage2.txt │ │ ├── config_fire_DFM.txt │ │ ├── config_fire_stage1.txt │ │ ├── config_fire_stage2.txt │ │ ├── config_heads_DFM.txt │ │ ├── config_heads_stage1.txt │ │ ├── config_heads_stage2.txt │ │ ├── config_kitchen_DFM.txt │ │ ├── config_kitchen_stage1.txt │ │ ├── config_kitchen_stage2.txt │ │ ├── config_office_DFM.txt │ │ ├── config_office_stage1.txt │ │ ├── config_office_stage2.txt │ │ ├── config_pumpkin_DFM.txt │ │ ├── config_pumpkin_stage1.txt │ │ ├── config_pumpkin_stage2.txt │ │ ├── config_stairs_DFM.txt │ │ ├── config_stairs_stage1.txt │ │ └── config_stairs_stage2.txt └── Cambridge │ ├── .DS_Store │ └── dfnet │ ├── config_church_DFM.txt │ ├── config_church_stage1.txt │ ├── config_church_stage2.txt │ ├── config_hospital_DFM.txt │ ├── config_hospital_stage1.txt │ ├── config_hospital_stage2.txt │ ├── config_kings_DFM.txt │ ├── config_kings_stage1.txt │ ├── config_kings_stage2.txt │ ├── config_shop_DFM.txt │ ├── config_shop_stage1.txt │ └── config_shop_stage2.txt ├── dm ├── DFM_APR_refine.py ├── DFM_pose_refine.py ├── __init__.py ├── callbacks.py ├── direct_pose_model.py ├── options.py ├── pose_model.py └── prepare_data.py ├── eval.py ├── eval.sh ├── feature ├── dfnet.py ├── misc.py └── model.py ├── models ├── __init__.py ├── activation.py ├── decoder.py ├── losses.py ├── nerf.py ├── nerfh.py ├── nerfh_nff.py ├── nerfh_tcnn.py ├── options.py ├── poses.py ├── ray_utils.py └── rendering.py ├── mstransformer ├── backbone.py ├── pencoder.py ├── transformer.py ├── transformer_encoder.py └── transposenet.py ├── run_nefes.py ├── test_apr_refinement.sh ├── test_refinement.py ├── train_nefes.sh └── utils ├── align_traj.py ├── colmap.py ├── comp_ate.py ├── lie_group_helper.py ├── set_sys_path.py ├── utils.py └── vis_cam_traj.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Active Vision Laboratory 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Neural Refinement for Absolute Pose Regression with Feature Synthesis 2 | **[Shuai Chen](https://scholar.google.com/citations?user=c0xTh_YAAAAJ&hl=en), 3 | [Yash Bhalgat](https://scholar.google.com/citations?user=q0VSEHYAAAAJ&hl=en), 4 | [Xinghui Li](https://scholar.google.com/citations?user=XLlgbBoAAAAJ&hl=en), 5 | [Jiawang Bian](https://scholar.google.com/citations?user=zeGz5JcAAAAJ&hl=en&oi=sra), 6 | [Kejie Li](https://scholar.google.com/citations?hl=en&user=JBwsoCUAAAAJ), 7 | [Zirui Wang](https://scholar.google.com/citations?user=zCBKqa8AAAAJ&hl=en), 8 | and [Victor Prisacariu](https://scholar.google.com/citations?user=GmWA-LoAAAAJ&hl=en) (CVPR 2024)** 9 | 10 | **[Project Page](https://nefes.active.vision) | [Paper](https://arxiv.org/abs/2303.10087)** 11 | 12 | [![NeFeS1](imgs/pipeline.png)](https://arxiv.org/abs/2303.10087) 13 | [![NeFeS2](imgs/nefes.png)](https://arxiv.org/abs/2303.10087) 14 | 15 | 16 | ## Installation 17 | We tested our code based on CUDA11.3+, PyTorch 1.11.0+, and Python 3.7+ using [docker](https://docs.docker.com/engine/install/ubuntu/). 18 | 19 | We also provide a `conda` environment 20 | ```sh 21 | conda env create -f environment.yml 22 | conda activate nefes 23 | pip install git+https://github.com/princeton-vl/lietorch.git # if your lietorch doesn't work, you can set lietorch=False in poses.py 24 | pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch 25 | # install pytorch3d 26 | cd .. 27 | git clone https://github.com/facebookresearch/pytorch3d.git && cd pytorch3d && pip install -e . 28 | ``` 29 | 30 | ## Datasets 31 | This paper uses two public datasets: 32 | - [Microsoft 7-Scenes](https://www.microsoft.com/en-us/research/project/rgb-d-dataset-7-scenes/) 33 | - [Cambridge Landmarks](https://www.repository.cam.ac.uk/handle/1810/251342/) 34 | 35 | - **7-Scenes** 36 | 37 | We use a similar data preparation as in [MapNet](https://github.com/NVlabs/geomapnet). You can download the [7-Scenes](https://www.microsoft.com/en-us/research/project/rgb-d-dataset-7-scenes/) datasets to the `data/deepslam_data/7Scenes` directory using the script below. 38 | 39 | ```sh 40 | cd data 41 | python setup_7scenes.py 42 | ``` 43 | 44 | 1. we additionally computed a pose averaging stats (pose_avg_stats.txt) and manually tuned world_setup.json in `data/7Scenes` to align the 7Scenes' coordinate system with NeRF's coordinate system (OpenGL). You could generate your own re-alignment to a new pose_avg_stats.txt using the `--save_pose_avg_stats` configuration. 45 | 46 | 2. In our `setup_7scenes.py` script, we also copy the 7scenes colmap poses to the deepslam_data/7Scenes/{SCENE}/ folder, courtsey to [Brachmann21](https://github.com/tsattler/visloc_pseudo_gt_limitations). 47 | 48 | - **Cambridge Landmarks** 49 | 50 | To downlaod Cambridge Landmarks, please use this script. 51 | ```sh 52 | cd data 53 | python setup_cambridge.py 54 | ``` 55 | We also put the `pose_avg_stats.txt` and `world_setup.json` to the `data/Cambridge/CAMBRIDGE_SCENES` like we provided in the source code. 56 | 57 | As we described in the paper, we also applied semantic filtering when training NeFeS to filter out temporal objects using [Cheng22](https://github.com/facebookresearch/Mask2Former). Therefore, in the script, we download and put them into `data/Cambridge/{CAMBRIDGE_SCENE}/train/semantic` and `data/Cambridge/{CAMBRIDGE_SCENE}/test/semantic`. 58 | 59 | ## Pre-trained Models 60 | We currently provide pretrained NeFeS models and DFNet models used in our paper. 61 | 62 | Download and decompress [paper_models.zip](https://www.robots.ox.ac.uk/~shuaic/NeFeS2024/paper_models.zip) to {REPO_PATH}/logs/paper_models 63 | ```sh 64 | wget https://www.robots.ox.ac.uk/~shuaic/NeFeS2024/paper_models.zip 65 | unzip paper_models.zip 66 | mkdir logs 67 | mv paper_models/ logs/ 68 | ``` 69 | 70 | ### GPUs for Pre-trained Models and Verifying Paper Results 71 | Due to our limited resource, my pre-trained models are trained using different GPUs such as Nvidia 3090, 3080ti, RTX 6000, or 1080ti GPUs. We noticed that models' performance might jitter slightly (could be better or worse) when running inference with different types of GPUs. Therefore, all experiments on the paper are reported based on the same GPUs as they were trained. To providing necesssary reference, we also include the experimental results ran by our machines. 72 | 73 | #### You can easily obtain our paper results (Table 1 and Table 2 DFNet + NeFeS50) by running: 74 | ```sh 75 | sh eval.sh 76 | ``` 77 | 78 | ## Training NeFeS 79 | We provide NeFeS training script in `train_nefes.sh` 80 | ```sh 81 | sh train_nefes.sh 82 | ``` 83 | In this script, we run a three stage progressive training schedule, as described in the Supplementary Material of the paper. 84 | ```sh 85 | # Stage 1 of training color only nerf, initializing the 3D geometry to a reasonable extent. 86 | python run_nefes.py --config config/7Scenes/dfnet/config_stairs_stage1.txt 87 | 88 | # Stage 2 and 3 for training feature and fusion modules, obtaining best neural feature fields performance for NeFeS. 89 | python run_nefes.py --config config/7Scenes/dfnet/config_stairs_stage2.txt 90 | ``` 91 | 92 | ## Evaluation 93 | After training NeFeS, it is ready to test the APRs with NeFeS refinement. Notice that we've already provided paper results [above](#you-can-easily-verify-our-paper-results-table-1-and-table-2-dfnet--nefes50-by-running) ran by ourselves. 94 | To use your own trained model, you can choose to use the following script. 95 | ```sh 96 | # this script is an example of running DFNet + NeFeS50 97 | sh test_apr_refinement.sh 98 | ``` 99 | 100 | In the script, we utilize paper models by default in the config file. You could replace the default models with your own models if you have trained ones. 101 | ```sh 102 | python test_refinement.py --config config/7Scenes/dfnet/config_stairs_DFM.txt --ft_path $YOUR_NeFeS 103 | ``` 104 | 105 | If your GPU is out-of memory, please consider reducing `--netchunk` parameters. 106 | 107 | If you want to try to see if NeFeS can refine your own APR model/pose estimator, you can add your network loader to `load_APR_and_FeatureNet()` in dm/direct_pose_model.py. 108 | Notice that it is recommanded to train your APR/pose estimator in openGL coordinate system (best way is through our dataloader, as we did for [PoseNet (pytorch)](https://github.com/ActiveVisionLab/direct-posenet/tree/main) and [MsTransformer](https://github.com/yolish/multi-scene-pose-transformer)). This is because our NeFeS is trained in openGL convention, otherwise you will have to adjust the cooridnate system yourself. 109 | 110 | ## Acknowledgement 111 | We thank Dr. Michael Hobley and Dr. Theo Costain for their generous discussion on this work as well as their kind proof reading for our paper manuscripts. We also thank Changkun Liu for kindly providing assistant on ensuring conda environment consistency. 112 | 113 | ## Publications 114 | Please cite our paper and star this repo if you find our work helpful. Thanks! 115 | ``` 116 | @inproceedings{chen2024nefes, 117 | author = {Chen, Shuai and Bhalgat, Yash and Li, Xinghui and Bian, Jia-Wang and Li, Kejie and Wang, Zirui and Prisacariu, Victor Adrian}, 118 | title = {Neural Refinement for Absolute Pose Regression with Feature Synthesis}, 119 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 120 | month = {June}, 121 | year = {2024}, 122 | pages = {20987-20996} 123 | } 124 | ``` 125 | This code builds on previous camera relocalization pipelines, namely Direct-PoseNet and DFNet. Please consider citing: 126 | ``` 127 | @inproceedings{chen2022dfnet, 128 | title={DFNet: Enhance Absolute Pose Regression with Direct Feature Matching}, 129 | author={Chen, Shuai and Li, Xinghui and Wang, Zirui and Prisacariu, Victor}, 130 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, 131 | year={2022} 132 | } 133 | @inproceedings{chen2021direct, 134 | title={Direct-PoseNet: Absolute pose regression with photometric consistency}, 135 | author={Chen, Shuai and Wang, Zirui and Prisacariu, Victor}, 136 | booktitle={2021 International Conference on 3D Vision (3DV)}, 137 | pages={1175--1185}, 138 | year={2021}, 139 | organization={IEEE} 140 | } 141 | ``` 142 | -------------------------------------------------------------------------------- /data/7Scenes/chess/pose_avg_stats.txt: -------------------------------------------------------------------------------- 1 | 9.350092088047137207e-01 1.857172640673737662e-01 -3.021040835171091565e-01 4.217527911023845610e-01 2 | -3.778758022057916027e-02 8.992282578255548220e-01 4.358447419771065423e-01 -7.391797421190476891e-01 3 | 3.526044217412146464e-01 -3.961030650668416753e-01 8.478045078986057304e-01 2.995973868145236363e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/chess/pose_avg_stats_colmap.txt: -------------------------------------------------------------------------------- 1 | 9.340561125396901199e-01 -6.659541752462687370e-02 3.508621224811283024e-01 -4.238275175852468379e-01 2 | 1.838859342648906381e-01 9.318965222298547513e-01 -3.126576962677076188e-01 5.803440046766772387e-01 3 | -3.061456218971128629e-01 3.565584415220349190e-01 8.826896034125515378e-01 4.341176385263388338e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/chess/pose_avg_stats_colmap_opencv.txt: -------------------------------------------------------------------------------- 1 | 9.362093372281413695e-01 1.896575216428261768e-01 -2.958751449026179037e-01 4.121464999857256117e-01 2 | -4.592623860048735546e-02 9.006904037115472939e-01 4.320272876450524935e-01 -7.403196055023827382e-01 3 | 3.484291283673844886e-01 -3.908795481298947827e-01 8.519450224978803998e-01 2.872277951098815008e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/chess/pose_stats.txt: -------------------------------------------------------------------------------- 1 | 0.0000000 0.0000000 0.0000000 2 | 1.0000000 1.0000000 1.0000000 3 | -------------------------------------------------------------------------------- /data/7Scenes/chess/stats.txt: -------------------------------------------------------------------------------- 1 | 5.009650708326967017e-01 4.413125411911532625e-01 4.458285283490354689e-01 2 | 4.329720281018845096e-02 5.278270383679337097e-02 4.760929057962018374e-02 3 | -------------------------------------------------------------------------------- /data/7Scenes/chess/world_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":2, 4 | "pose_scale": 0.5, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, 1.0] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/chess/world_setup_opencv.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":2, 4 | "pose_scale": 1, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, -0.3] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/fire/pose_avg_stats.txt: -------------------------------------------------------------------------------- 1 | 9.682675339923704216e-01 1.308464847914461437e-01 -2.129252921427031431e-01 4.483261509061373107e-02 2 | -9.619441241477342738e-03 8.708690325271444266e-01 4.914209952122894909e-01 -4.201599145869548413e-01 3 | 2.497307529451176511e-01 -4.737787728496888895e-01 8.444928806274848432e-01 7.115903476590906829e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/fire/pose_avg_stats_colmap.txt: -------------------------------------------------------------------------------- 1 | 9.617410571705420352e-01 -9.295835830903931285e-04 2.739585275673132592e-01 -1.427134502335393462e-01 2 | 8.191226308769947173e-02 9.552253960585564974e-01 -2.843146564646740759e-01 2.798126969008590259e-01 3 | -2.614278487620241842e-01 2.958776412626276664e-01 9.187556265365258712e-01 8.342835799034218833e-03 4 | -------------------------------------------------------------------------------- /data/7Scenes/fire/pose_avg_stats_colmap_opencv.txt: -------------------------------------------------------------------------------- 1 | 9.661856274155838920e-01 1.401377916676138924e-01 -2.164410606194651643e-01 5.064882451031420957e-02 2 | -2.024852387191147796e-02 8.780593921182213712e-01 4.781231025520397138e-01 -4.113604743046889167e-01 3 | 2.570512218538615890e-01 -4.575730578383129799e-01 8.512059481018285911e-01 6.981849296396421822e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/fire/pose_stats.txt: -------------------------------------------------------------------------------- 1 | 0.0000000 0.0000000 0.0000000 2 | 1.0000000 1.0000000 1.0000000 3 | -------------------------------------------------------------------------------- /data/7Scenes/fire/stats.txt: -------------------------------------------------------------------------------- 1 | 5.222627479256024552e-01 4.620521564670138082e-01 4.212473626365915158e-01 2 | 5.550322239689903236e-02 5.943252514694064015e-02 5.525370066993806617e-02 3 | -------------------------------------------------------------------------------- /data/7Scenes/fire/world_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":3, 4 | "pose_scale": 1, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, 1.0] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/fire/world_setup_opencv.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":3, 4 | "pose_scale": 1, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, -0.5] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/heads/pose_avg_stats.txt: -------------------------------------------------------------------------------- 1 | 9.821477519358328134e-01 -8.872136230840418913e-02 1.658743899386847798e-01 -1.157979895409090576e-02 2 | 3.111216594703735891e-02 9.462589180611266082e-01 3.219100699261678855e-01 -1.317583753459090623e-01 3 | -1.855204207020725304e-01 -3.110025399573565497e-01 9.321263828701550347e-01 9.636789777181822836e-02 4 | -------------------------------------------------------------------------------- /data/7Scenes/heads/pose_avg_stats_colmap.txt: -------------------------------------------------------------------------------- 1 | 9.808486931492518268e-01 1.189175714114518861e-02 -1.944078888818578654e-01 5.470921998983249018e-02 2 | -6.693113356009454396e-02 9.579286937181828998e-01 -2.790928897548922150e-01 1.511542448820546169e-01 3 | 1.829099901803221684e-01 2.867598365592218812e-01 9.403790361491278782e-01 -2.748217807296896367e-02 4 | -------------------------------------------------------------------------------- /data/7Scenes/heads/pose_avg_stats_colmap_opencv.txt: -------------------------------------------------------------------------------- 1 | 9.820398514337927987e-01 -7.857145934796547215e-02 1.715349992620316399e-01 -1.759783250161013959e-02 2 | 1.464067066239504132e-02 9.381558675195578179e-01 3.459034822044744217e-01 -1.390983989374055818e-01 3 | -1.881047074329865731e-01 -3.371796168432440455e-01 9.224567876204248229e-01 9.614693017983963474e-02 4 | -------------------------------------------------------------------------------- /data/7Scenes/heads/pose_stats.txt: -------------------------------------------------------------------------------- 1 | 0.0000000 0.0000000 0.0000000 2 | 1.0000000 1.0000000 1.0000000 3 | -------------------------------------------------------------------------------- /data/7Scenes/heads/stats.txt: -------------------------------------------------------------------------------- 1 | 4.570619554738562518e-01 4.504317877348855137e-01 4.586057516467524908e-01 2 | 7.874170624948270691e-02 7.747845434384653673e-02 7.183367877515742239e-02 3 | -------------------------------------------------------------------------------- /data/7Scenes/heads/world_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":2.5, 4 | "pose_scale": 1, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, 1.0] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/heads/world_setup_opencv.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":2.5, 4 | "pose_scale": 1, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, -0.4] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/office/pose_avg_stats.txt: -------------------------------------------------------------------------------- 1 | 9.192216692444369341e-01 -2.380853847231172160e-01 3.136030490488191935e-01 -2.580896777083788868e-02 2 | 8.889970878427481960e-02 9.014018431780023155e-01 4.237588452095970570e-01 -8.784274026985845474e-01 3 | -3.835731541303978309e-01 -3.616490933163600818e-01 8.497538283137727744e-01 1.063082783627855354e+00 4 | -------------------------------------------------------------------------------- /data/7Scenes/office/pose_avg_stats_colmap.txt: -------------------------------------------------------------------------------- 1 | 9.833540773052701356e-01 9.261160633272706721e-02 -1.563260983315739872e-01 1.540049440648031120e-01 2 | -1.097706624107348539e-01 9.884009456335306476e-01 -1.049474742175865033e-01 9.132221896381339521e-01 3 | 1.447935092502748711e-01 1.203605460406959593e-01 9.821141372751824861e-01 -4.021252880857227319e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/office/pose_avg_stats_colmap_opencv.txt: -------------------------------------------------------------------------------- 1 | 9.838976565094801341e-01 -1.235605843340006060e-01 1.291440417293547582e-01 -1.952415012257482235e-02 2 | 5.928329169880300342e-02 9.072573967990911203e-01 4.163766411299882897e-01 -8.929118594873651782e-01 3 | -1.686146281925374191e-01 -4.020159175360805692e-01 8.999735047250644326e-01 1.054244766762980623e+00 4 | -------------------------------------------------------------------------------- /data/7Scenes/office/pose_stats.txt: -------------------------------------------------------------------------------- 1 | 0.0000000 0.0000000 0.0000000 2 | 1.0000000 1.0000000 1.0000000 3 | -------------------------------------------------------------------------------- /data/7Scenes/office/stats.txt: -------------------------------------------------------------------------------- 1 | 4.703657901067226921e-01 4.414751487847252132e-01 4.351020758221028628e-01 2 | 7.105139804377599844e-02 7.191485421006868495e-02 6.783299267371162289e-02 3 | -------------------------------------------------------------------------------- /data/7Scenes/office/world_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":2, 4 | "pose_scale": 0.5, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, 0.5] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/office/world_setup_opencv.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":2, 4 | "pose_scale": 0.625, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, 0.0] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/pumpkin/pose_avg_stats.txt: -------------------------------------------------------------------------------- 1 | 9.994189485731389544e-01 7.232278172020349324e-03 -3.330854823320933411e-02 -6.206658357107126822e-02 2 | -8.310756492857687694e-03 9.994418989496620664e-01 -3.235462795969396704e-02 -7.690604500476190264e-01 3 | 3.305596102789840757e-02 3.261264749044923833e-02 9.989212775109888032e-01 4.472261112787878634e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/pumpkin/pose_avg_stats_colmap.txt: -------------------------------------------------------------------------------- 1 | 9.999424407060272246e-01 -5.417876631659246899e-03 9.260771440725883846e-03 8.173387349957192405e-02 2 | 4.470846109605275918e-03 9.950387249703220860e-01 9.938786316497520701e-02 5.646830536484777063e-01 3 | -9.753297387933523160e-03 -9.934073898577426565e-02 9.950056737364981752e-01 -2.709970477240858377e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/pumpkin/pose_avg_stats_colmap_opencv.txt: -------------------------------------------------------------------------------- 1 | 9.999203343857488990e-01 1.229425610093747359e-02 -2.859396582589080870e-03 -9.013786538319475128e-02 2 | -1.233121915835506260e-02 9.998356453274146460e-01 -1.328997241460514595e-02 -7.529619143749367582e-01 3 | 2.695536302960407742e-03 1.332417350670991353e-02 9.999075959729492036e-01 4.329883231377848474e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/pumpkin/pose_avg_stats_old.txt: -------------------------------------------------------------------------------- 1 | 9.867033112503645897e-01 8.544426416488330733e-02 -1.382600929006191914e-01 7.374091044342952206e-02 2 | -9.057380802494104099e-02 9.953998641700174677e-01 -3.123292669878471872e-02 -7.475368646794867677e-01 3 | 1.349554032539169723e-01 4.334037530562200036e-02 9.899033543740218821e-01 3.342737444938814195e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/pumpkin/pose_stats.txt: -------------------------------------------------------------------------------- 1 | 0.0000000 0.0000000 0.0000000 2 | 1.0000000 1.0000000 1.0000000 3 | -------------------------------------------------------------------------------- /data/7Scenes/pumpkin/stats.txt: -------------------------------------------------------------------------------- 1 | 5.503370888799515859e-01 4.492568432042766124e-01 4.579284152018213705e-01 2 | 4.053158612557544727e-02 4.899782680513672939e-02 3.385843494567825074e-02 3 | -------------------------------------------------------------------------------- /data/7Scenes/pumpkin/world_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":2.5, 4 | "pose_scale": 0.5, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, 1.0] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/pumpkin/world_setup_opencv.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":2.5, 4 | "pose_scale": 0.5, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, -0.5] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/redkitchen/pose_avg_stats.txt: -------------------------------------------------------------------------------- 1 | 9.397923675833245172e-01 1.720816216688166589e-01 -2.952595829367096747e-01 1.068215764548530455e-01 2 | -1.553194639452672166e-01 9.846599603919916621e-01 7.950236801879838333e-02 -4.759053293419227559e-01 3 | 3.044111856550024142e-01 -2.885615852243439416e-02 9.521035406737251572e-01 9.771192826975949597e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/redkitchen/pose_avg_stats_colmap.txt: -------------------------------------------------------------------------------- 1 | 9.474122814757611755e-01 -2.045741177876075900e-01 2.460881940287161507e-01 -3.216257149000772420e-01 2 | 1.704998482045340935e-01 9.734357688627769400e-01 1.528155936439894169e-01 1.687619158575152101e-01 3 | -2.708131656163042522e-01 -1.028213704925008132e-01 9.571248586777588718e-01 -4.942885518981923743e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/redkitchen/pose_avg_stats_colmap_opencv.txt: -------------------------------------------------------------------------------- 1 | 9.498783504863566041e-01 1.944958274865911663e-01 -2.447498567264634051e-01 -1.031232663219011914e-01 2 | -1.522796381179143677e-01 9.716026602013511093e-01 1.811054458163388059e-01 -5.653208159647153952e-01 3 | 2.730238654257081676e-01 -1.347577225244029064e-01 9.525220864253179931e-01 1.164258334606231626e+00 4 | -------------------------------------------------------------------------------- /data/7Scenes/redkitchen/pose_stats.txt: -------------------------------------------------------------------------------- 1 | 0.0000000 0.0000000 0.0000000 2 | 1.0000000 1.0000000 1.0000000 3 | -------------------------------------------------------------------------------- /data/7Scenes/redkitchen/stats.txt: -------------------------------------------------------------------------------- 1 | 5.262172203420504291e-01 4.400453064527823366e-01 4.320846191351511711e-01 2 | 4.872459633076364760e-02 6.484063059696282272e-02 5.724255797232574716e-02 3 | -------------------------------------------------------------------------------- /data/7Scenes/redkitchen/world_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":2, 4 | "pose_scale": 0.5, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, 0.5] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/redkitchen/world_setup_opencv.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":2, 4 | "pose_scale": 0.625, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, -0.3] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/stairs/pose_avg_stats.txt: -------------------------------------------------------------------------------- 1 | 9.981044025604641767e-01 6.017846147761087006e-02 -1.289008780444797844e-02 4.026755230123485463e-02 2 | -4.703056619089211743e-02 8.809109637765394352e-01 4.709394862846526530e-01 -9.428126168765132986e-01 3 | 3.969543340464730397e-02 -4.694405464725816546e-01 8.820713383249345618e-01 3.852607943350118691e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/stairs/pose_avg_stats_colmap.txt: -------------------------------------------------------------------------------- 1 | 9.880547700366734132e-01 -1.476706754772779406e-01 4.405840455419323537e-02 -1.788777311062440256e-01 2 | 1.535687461077752392e-01 9.197402330142706584e-01 -3.612402856738723966e-01 8.469717937558889354e-01 3 | 1.282230972415489625e-02 3.636911813323767673e-01 9.314313248943253409e-01 1.529007980453316040e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/stairs/pose_avg_stats_colmap_opencv.txt: -------------------------------------------------------------------------------- 1 | 9.956526937247213160e-01 8.877633349914967886e-02 -2.818645222716121418e-02 5.874995192887124545e-02 2 | -6.526516103862914364e-02 8.808409822488185714e-01 4.688919094477301819e-01 -9.183240333621724982e-01 3 | 6.645428679406686912e-02 -4.650138993636473428e-01 8.828056984214066594e-01 3.673521410739095105e-01 4 | -------------------------------------------------------------------------------- /data/7Scenes/stairs/pose_stats.txt: -------------------------------------------------------------------------------- 1 | 0.0000000 0.0000000 0.0000000 2 | 1.0000000 1.0000000 1.0000000 3 | -------------------------------------------------------------------------------- /data/7Scenes/stairs/stats.txt: -------------------------------------------------------------------------------- 1 | 4.472714732115506964e-01 4.312183359438830910e-01 4.291487246732026972e-01 2 | 3.258580609153208241e-02 2.618736971489385446e-02 1.208855922484347589e-02 3 | -------------------------------------------------------------------------------- /data/7Scenes/stairs/unique_view.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 344 3 | 401 4 | 489 5 | 500 6 | 617 7 | 1126 8 | 1213 9 | 1362 10 | -------------------------------------------------------------------------------- /data/7Scenes/stairs/world_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":4, 4 | "pose_scale": 1, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, 0.0] 7 | } -------------------------------------------------------------------------------- /data/7Scenes/stairs/world_setup_opencv.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0, 3 | "far":4, 4 | "pose_scale": 0.5, 5 | "pose_scale2": 1, 6 | "move_all_cam_vec": [0.0, 0.0, -0.4] 7 | } -------------------------------------------------------------------------------- /data/Cambridge_world_setup/GreatCourt/pose_avg_stats.txt: -------------------------------------------------------------------------------- 1 | 3.405292754461299309e-01 4.953070871398960184e-01 7.991937825040464904e-01 4.704508373014760281e+01 2 | -9.402316354416121458e-01 1.812586354202151695e-01 2.882876667504056800e-01 3.467785281210451842e+01 3 | -2.069849976503225410e-03 -8.495976674371187309e-01 5.274272643753655787e-01 1.101080132352710184e+00 4 | -------------------------------------------------------------------------------- /data/Cambridge_world_setup/GreatCourt/world_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0.0, 3 | "far":10.0, 4 | "pose_scale": 0.3027, 5 | "pose_scale2": 0.2, 6 | "move_all_cam_vec": [0.0, 0.0, 0.0] 7 | } -------------------------------------------------------------------------------- /data/Cambridge_world_setup/KingsCollege/pose_avg_stats.txt: -------------------------------------------------------------------------------- 1 | 9.995083419588323137e-01 -1.453974655309233331e-02 2.777895111190991154e-02 2.004095163645802913e+01 2 | -2.395968310872182219e-02 2.172811532927548528e-01 9.758149588979971867e-01 -2.354010655332784197e+01 3 | -2.022394471995193205e-02 -9.760007664924973403e-01 2.168259575466510436e-01 1.650110331018928678e+00 4 | -------------------------------------------------------------------------------- /data/Cambridge_world_setup/KingsCollege/world_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0.0, 3 | "far":10.0, 4 | "pose_scale": 0.3027, 5 | "pose_scale2": 0.2, 6 | "move_all_cam_vec": [0.0, 0.0, 0.0] 7 | } -------------------------------------------------------------------------------- /data/Cambridge_world_setup/OldHospital/pose_avg_stats.txt: -------------------------------------------------------------------------------- 1 | 9.997941252129602940e-01 6.239930741698496326e-03 1.930726428032739084e-02 1.319547963328867723e+01 2 | -3.333807443587469103e-03 -8.880897259859261705e-01 4.596580515189216398e-01 -6.473184854291670343e-01 3 | 2.001481745059596404e-02 -4.596277862168271500e-01 -8.878860879751624413e-01 2.310333011616541654e+01 4 | -------------------------------------------------------------------------------- /data/Cambridge_world_setup/OldHospital/world_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0.0, 3 | "far":10.0, 4 | "pose_scale": 0.3027, 5 | "pose_scale2": 0.2, 6 | "move_all_cam_vec": [0.0, 0.0, 5.0] 7 | } -------------------------------------------------------------------------------- /data/Cambridge_world_setup/ShopFacade/pose_avg_stats.txt: -------------------------------------------------------------------------------- 1 | 2.084004683986779016e-01 1.972095064159990266e-02 9.778447365901210553e-01 -4.512817941282106560e+00 2 | -9.780353328393808221e-01 8.307943784904847639e-03 2.082735359757174609e-01 1.914896116567694540e+00 3 | -4.016526979027209426e-03 -9.997710048685441997e-01 2.101916590087021808e-02 1.768500113487243564e+00 4 | -------------------------------------------------------------------------------- /data/Cambridge_world_setup/ShopFacade/world_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0.0, 3 | "far":20.0, 4 | "pose_scale": 0.3027, 5 | "pose_scale2": 0.32, 6 | "move_all_cam_vec": [0.0, 0.0, 2.5] 7 | } -------------------------------------------------------------------------------- /data/Cambridge_world_setup/StMarysChurch/pose_avg_stats.txt: -------------------------------------------------------------------------------- 1 | -6.692001528162709878e-01 7.430812642562667492e-01 1.179059789653581552e-03 1.114036505648812359e+01 2 | 3.891382817260490012e-02 3.662925707351961935e-02 -9.985709847092467673e-01 -5.441265972613005403e-02 3 | -7.420625778515127502e-01 -6.681979738352623599e-01 -5.342844106669619036e-02 1.708768320112491068e+01 4 | -------------------------------------------------------------------------------- /data/Cambridge_world_setup/StMarysChurch/world_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "near":0.0, 3 | "far":10.0, 4 | "pose_scale": 0.3027, 5 | "pose_scale2": 0.2, 6 | "move_all_cam_vec": [0.0, 0.0, 0.0] 7 | } -------------------------------------------------------------------------------- /data/setup_7scenes.py: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/vislearn/dsacstar/blob/master/datasets/setup_7scenes.py 2 | import os 3 | 4 | # name of the folder where we download the original 7scenes dataset to 5 | # we restructure the dataset by creating symbolic links to that folder 6 | src_src_folder='deepslam_data' 7 | src_folder = '7Scenes' 8 | colmap_poses = '7Scenes_colmap_poses' 9 | # focallength = 525.0 10 | 11 | def mkdir(directory): 12 | """Checks whether the directory exists and creates it if necessacy.""" 13 | if not os.path.exists(directory): 14 | os.makedirs(directory) 15 | 16 | # download the original 7 scenes dataset for poses and images 17 | mkdir(src_src_folder) 18 | mkdir(src_src_folder+'/'+src_folder) 19 | os.chdir(src_src_folder+'/'+src_folder) 20 | 21 | for ds in ['chess', 'fire', 'heads', 'office', 'pumpkin', 'redkitchen', 'stairs']: 22 | print("=== Downloading 7scenes Data:", ds, "===============================") 23 | 24 | os.system('wget http://download.microsoft.com/download/2/8/5/28564B23-0828-408F-8631-23B1EFF1DAC8/' + ds + '.zip') 25 | os.system('unzip ' + ds + '.zip') 26 | os.system('rm ' + ds + '.zip') 27 | 28 | sequences = os.listdir(ds) 29 | 30 | for file in sequences: 31 | if file.endswith('.zip'): 32 | 33 | print("Unpacking", file) 34 | os.system('unzip ' + ds + '/' + file + ' -d ' + ds) 35 | os.system('rm ' + ds + '/' + file) 36 | 37 | print("Copying colmap pose files...") 38 | os.system(f'cp ../../7Scenes_colmap_poses/{ds}/*.txt {ds}/') 39 | -------------------------------------------------------------------------------- /data/setup_cambridge.py: -------------------------------------------------------------------------------- 1 | # courtsey to dsac* https://github.com/vislearn/dsacstar/ 2 | import os 3 | import math 4 | 5 | import numpy as np 6 | import cv2 as cv 7 | import torch 8 | from skimage import io 9 | 10 | # setup individual scene IDs and their download location 11 | scenes = [ 12 | 'https://www.repository.cam.ac.uk/bitstream/handle/1810/251342/KingsCollege.zip', 13 | 'https://www.repository.cam.ac.uk/bitstream/handle/1810/251340/OldHospital.zip', 14 | 'https://www.repository.cam.ac.uk/bitstream/handle/1810/251336/ShopFacade.zip', 15 | 'https://www.repository.cam.ac.uk/bitstream/handle/1810/251294/StMarysChurch.zip', 16 | # 'https://www.repository.cam.ac.uk/bitstream/handle/1810/251291/GreatCourt.zip', 17 | ] 18 | 19 | semantic = ['https://www.robots.ox.ac.uk/~shuaic/NeFeS2024/Cambridge_semantic.zip'] 20 | semantic_file = semantic[0].split('/')[-1] 21 | 22 | target_height = 480 # rescale images 23 | nn_subsampling = 8 # sub sampling of our CNN architecture, for size of the initalization targets 24 | 25 | def mkdir(directory): 26 | """Checks whether the directory exists and creates it if necessacy.""" 27 | if not os.path.exists(directory): 28 | os.makedirs(directory) 29 | 30 | mkdir('Cambridge') 31 | for scene in scenes: 32 | 33 | scene_file = scene.split('/')[-1] 34 | scene_name = scene_file[:-4] 35 | 36 | print("===== Processing " + scene_name + " ===================") 37 | 38 | print("Downloading and unzipping data...") 39 | os.system('wget ' + scene) 40 | os.system('unzip ' + scene_file) 41 | os.system('rm ' + scene_file) 42 | os.system('mv ' + scene_name + ' Cambridge_' + scene_name) 43 | os.chdir('Cambridge_' + scene_name) 44 | 45 | modes = ['train', 'test'] 46 | input_file = 'reconstruction.nvm' 47 | 48 | print("Loading SfM reconstruction...") 49 | 50 | f = open(input_file) 51 | reconstruction = f.readlines() 52 | f.close() 53 | 54 | num_cams = int(reconstruction[2]) 55 | num_pts = int(reconstruction[num_cams + 4]) 56 | 57 | # read points 58 | pts_dict = {} 59 | for cam_idx in range(0, num_cams): 60 | pts_dict[cam_idx] = [] 61 | 62 | pt = pts_start = num_cams + 5 63 | pts_end = pts_start + num_pts 64 | 65 | while pt < pts_end: 66 | 67 | pt_list = reconstruction[pt].split() 68 | pt_3D = [float(x) for x in pt_list[0:3]] 69 | pt_3D.append(1.0) 70 | 71 | for pt_view in range(0, int(pt_list[6])): 72 | cam_view = int(pt_list[7 + pt_view * 4]) 73 | pts_dict[cam_view].append(pt_3D) 74 | 75 | pt += 1 76 | 77 | print("Reconstruction contains %d cameras and %d 3D points." % (num_cams, num_pts)) 78 | 79 | for mode in modes: 80 | 81 | print("Converting " + mode + " data...") 82 | 83 | img_output_folder = mode + '/rgb/' 84 | cal_output_folder = mode + '/calibration/' 85 | pose_output_folder = mode + '/poses/' 86 | target_output_folder = mode + '/init/' 87 | 88 | mkdir(img_output_folder) 89 | mkdir(cal_output_folder) 90 | mkdir(pose_output_folder) 91 | mkdir(target_output_folder) 92 | 93 | # get list of images for current mode (train vs. test) 94 | image_list = 'dataset_'+mode+'.txt' 95 | 96 | f = open(image_list) 97 | camera_list = f.readlines() 98 | f.close() 99 | camera_list = camera_list[3:] 100 | 101 | image_list = [camera.split()[0] for camera in camera_list] 102 | 103 | for cam_idx in range(num_cams): 104 | 105 | print("Processing camera %d of %d." % (cam_idx, num_cams)) 106 | image_file = reconstruction[3 + cam_idx].split()[0] 107 | image_file = image_file[:-3] + 'png' 108 | 109 | if image_file not in image_list: 110 | print("Skipping image " + image_file + ". Not part of set: " + mode + ".") 111 | continue 112 | 113 | image_idx = image_list.index(image_file) 114 | 115 | # read camera 116 | camera = camera_list[image_idx].split() 117 | cam_rot = [float(r) for r in camera[4:]] 118 | 119 | #quaternion to axis-angle 120 | angle = 2 * math.acos(cam_rot[0]) 121 | x = cam_rot[1] / math.sqrt(1 - cam_rot[0]**2) 122 | y = cam_rot[2] / math.sqrt(1 - cam_rot[0]**2) 123 | z = cam_rot[3] / math.sqrt(1 - cam_rot[0]**2) 124 | 125 | cam_rot = [x * angle, y * angle, z * angle] 126 | 127 | cam_rot = np.asarray(cam_rot) 128 | cam_rot, _ = cv.Rodrigues(cam_rot) 129 | 130 | cam_trans = [float(r) for r in camera[1:4]] 131 | cam_trans = np.asarray([cam_trans]) 132 | cam_trans = np.transpose(cam_trans) 133 | cam_trans = - np.matmul(cam_rot, cam_trans) 134 | 135 | if np.absolute(cam_trans).max() > 10000: 136 | print("Skipping image " + image_file + ". Extremely large translation. Outlier?") 137 | print(cam_trans) 138 | continue 139 | 140 | cam_pose = np.concatenate((cam_rot, cam_trans), axis = 1) 141 | cam_pose = np.concatenate((cam_pose, [[0, 0, 0, 1]]), axis = 0) 142 | cam_pose = torch.tensor(cam_pose).float() 143 | 144 | focal_length = float(reconstruction[3 + cam_idx].split()[1]) 145 | 146 | #load image 147 | image = io.imread(image_file) 148 | image_file = image_file.replace('/', '_') 149 | 150 | #load 3D points from reconstruction 151 | pts_3D = torch.tensor(pts_dict[cam_idx]) 152 | 153 | img_aspect = image.shape[0] / image.shape[1] 154 | 155 | if img_aspect > 1: 156 | #portrait 157 | img_w = target_height 158 | img_h = int(math.ceil(target_height * img_aspect)) 159 | else: 160 | #landscape 161 | img_w = int(math.ceil(target_height / img_aspect)) 162 | img_h = target_height 163 | 164 | out_w = int(math.ceil(img_w / nn_subsampling)) 165 | out_h = int(math.ceil(img_h / nn_subsampling)) 166 | 167 | out_scale = out_w / image.shape[1] 168 | img_scale = img_w / image.shape[1] 169 | 170 | out_tensor = torch.zeros((3, out_h, out_w)) 171 | out_zbuffer = torch.zeros((out_h, out_w)) 172 | 173 | image = cv.resize(image, (img_w, img_h)) 174 | io.imsave(img_output_folder + image_file, image) 175 | 176 | with open(cal_output_folder + image_file[:-3] + 'txt', 'w') as f: 177 | f.write(str(focal_length * img_scale)) 178 | 179 | inv_cam_pose = cam_pose.inverse() 180 | 181 | with open(pose_output_folder + image_file[:-3] + 'txt', 'w') as f: 182 | f.write(str(float(inv_cam_pose[0, 0])) + ' ' + str(float(inv_cam_pose[0, 1])) + ' ' + str(float(inv_cam_pose[0, 2])) + ' ' + str(float(inv_cam_pose[0, 3])) + '\n') 183 | f.write(str(float(inv_cam_pose[1, 0])) + ' ' + str(float(inv_cam_pose[1, 1])) + ' ' + str(float(inv_cam_pose[1, 2])) + ' ' + str(float(inv_cam_pose[1, 3])) + '\n') 184 | f.write(str(float(inv_cam_pose[2, 0])) + ' ' + str(float(inv_cam_pose[2, 1])) + ' ' + str(float(inv_cam_pose[2, 2])) + ' ' + str(float(inv_cam_pose[2, 3])) + '\n') 185 | f.write(str(float(inv_cam_pose[3, 0])) + ' ' + str(float(inv_cam_pose[3, 1])) + ' ' + str(float(inv_cam_pose[3, 2])) + ' ' + str(float(inv_cam_pose[3, 3])) + '\n') 186 | 187 | 188 | fine = 0 189 | conflict = 0 190 | 191 | for pt_idx in range(0, pts_3D.size(0)): 192 | 193 | scene_pt = pts_3D[pt_idx] 194 | scene_pt = scene_pt.unsqueeze(0) 195 | scene_pt = scene_pt.transpose(0, 1) 196 | 197 | # scene to camera coordinates 198 | cam_pt = torch.mm(cam_pose, scene_pt) 199 | # projection to image 200 | img_pt = cam_pt[0:2, 0] * focal_length / cam_pt[2, 0] * out_scale 201 | 202 | y = img_pt[1] + out_h / 2 203 | x = img_pt[0] + out_w / 2 204 | 205 | x = int(torch.clamp(x, min=0, max=out_tensor.size(2)-1)) 206 | y = int(torch.clamp(y, min=0, max=out_tensor.size(1)-1)) 207 | 208 | if cam_pt[2, 0] > 1000: #filter some outlier points (large depth) 209 | continue 210 | 211 | if out_zbuffer[y, x] == 0 or out_zbuffer[y, x] > cam_pt[2, 0]: 212 | out_zbuffer[y, x] = cam_pt[2, 0] 213 | out_tensor[:, y, x] = pts_3D[pt_idx, 0:3] 214 | 215 | torch.save(out_tensor, target_output_folder + image_file[:-4] + '.dat') 216 | 217 | os.chdir('..') 218 | os.system(f'mv Cambridge_{scene_name}/ Cambridge/{scene_name}') 219 | os.system(f'cp Cambridge_world_setup/{scene_name}/* Cambridge/{scene_name}/') 220 | 221 | # put semantic/ folders to Cambridge scenes 222 | os.system('wget ' + semantic[0]) 223 | os.system('unzip ' + semantic_file) 224 | os.system('rm ' + semantic_file) 225 | for scene in scenes: 226 | scene_file = scene.split('/')[-1] 227 | scene_name = scene_file[:-4] 228 | os.system(f'cp -d -r Cambridge_semantic/{scene_name}_semantic_train Cambridge/{scene_name}/train/semantic') 229 | os.system(f'cp -d -r Cambridge_semantic/{scene_name}_semantic_test Cambridge/{scene_name}/test/semantic') 230 | 231 | -------------------------------------------------------------------------------- /dataset_loaders/seven_scenes_colmap.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2018 NVIDIA Corporation. All rights reserved. 3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 4 | """ 5 | 6 | """ 7 | pytorch data loader for the 7-scenes dataset 8 | """ 9 | import os 10 | import os.path as osp 11 | import numpy as np 12 | from PIL import Image 13 | import torch 14 | from torch.utils import data 15 | import sys 16 | # import pickle 17 | # import pdb,copy 18 | import cv2 19 | 20 | sys.path.insert(0, '../') 21 | import transforms3d.quaternions as txq 22 | 23 | # see for formulas: 24 | # https://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-801-machine-vision-fall-2004/readings/quaternions.pdf 25 | # and "Quaternion and Rotation" - Yan-Bin Jia, September 18, 2016 26 | from dataset_loaders.utils.color import rgb_to_yuv 27 | import json 28 | 29 | def RT2QT(poses_in, mean_t, std_t): 30 | """ 31 | processes the 1x12 raw pose from dataset by aligning and then normalizing 32 | :param poses_in: N x 12 33 | :param mean_t: 3 34 | :param std_t: 3 35 | :return: processed poses (translation + quaternion) N x 7 36 | """ 37 | poses_out = np.zeros((len(poses_in), 7)) 38 | poses_out[:, 0:3] = poses_in[:, [3, 7, 11]] 39 | 40 | # align 41 | for i in range(len(poses_out)): 42 | R = poses_in[i].reshape((3, 4))[:3, :3] 43 | q = txq.mat2quat(R) 44 | q = q/(np.linalg.norm(q) + 1e-12) # normalize 45 | q *= np.sign(q[0]) # constrain to hemisphere 46 | poses_out[i, 3:] = q 47 | 48 | # normalize translation 49 | poses_out[:, :3] -= mean_t 50 | poses_out[:, :3] /= std_t 51 | return poses_out 52 | 53 | import transforms3d.quaternions as txq # Warning: outdated package 54 | 55 | def process_poses_quat2mat(poses_in): 56 | """ 57 | processes the raw pose from dataset [Qw, Qx, Qy, Qz, Tx, Ty, Tz] to [R, T] 58 | produce logq 59 | :param poses_in: N x 7 60 | :return: processed poses N x 12 61 | """ 62 | poses_out = np.zeros((len(poses_in), 3, 4)) # (1000,12) 63 | for i in range(len(poses_out)): 64 | q = poses_in[i,:4] 65 | 66 | # use transforms3d ([Qw, Qx, Qy, Qz]) 67 | R = txq.quat2mat(q) 68 | 69 | # # use scipy, same result as transform3d 70 | # from scipy.spatial.transform import Rotation as R 71 | # # convert [Qw, Qx, Qy, Qz] -> [Qx, Qy, Qz, Qw] 72 | # q = np.array([q[1], q[2], q[3], q[0]]) 73 | # R2 = R.from_quat(q) # scipy takes (x, y, z, w) -> R 74 | # R2 = R2.as_matrix() 75 | 76 | poses_out[i,:3,:3] = R 77 | poses_out[i,:3,3] = poses_in[i,4:] 78 | poses_out = poses_out.reshape(poses_out.shape[0],12) 79 | return poses_out.reshape(poses_out.shape[0],12) 80 | 81 | from torchvision.datasets.folder import default_loader 82 | def load_image(filename, loader=default_loader): 83 | try: 84 | img = loader(filename) 85 | except IOError as e: 86 | print('Could not load image {:s}, IOError: {:s}'.format(filename, e)) 87 | return None 88 | except: 89 | print('Could not load image {:s}, unexpected error'.format(filename)) 90 | return None 91 | return img 92 | 93 | def load_depth_image(filename): 94 | try: 95 | img_depth = Image.fromarray(np.array(Image.open(filename)).astype("uint16")) 96 | except IOError as e: 97 | print('Could not load image {:s}, IOError: {:s}'.format(filename, e)) 98 | return None 99 | return img_depth 100 | 101 | def normalize(x): 102 | return x / np.linalg.norm(x) 103 | 104 | def viewmatrix(z, up, pos): 105 | vec2 = normalize(z) 106 | vec1_avg = up 107 | vec0 = normalize(np.cross(vec1_avg, vec2)) 108 | vec1 = normalize(np.cross(vec2, vec0)) 109 | m = np.stack([vec0, vec1, vec2, pos], 1) 110 | return m 111 | 112 | def normalize_recenter_pose(poses, sc, hwf): 113 | ''' normalize xyz into [-1, 1], and recenter pose ''' # BUG train and val should use same normalization!!! 114 | target_pose = poses.reshape(poses.shape[0],3,4) 115 | target_pose[:,:3,3] = target_pose[:,:3,3] * sc 116 | 117 | x_norm = target_pose[:,0,3] 118 | y_norm = target_pose[:,1,3] 119 | z_norm = target_pose[:,2,3] 120 | 121 | tpose_ = target_pose+0 122 | 123 | # find the center of pose 124 | center = np.array([x_norm.mean(), y_norm.mean(), z_norm.mean()]) 125 | bottom = np.reshape([0,0,0,1.], [1,4]) 126 | 127 | # pose avg 128 | vec2 = normalize(tpose_[:, :3, 2].sum(0)) 129 | up = tpose_[:, :3, 1].sum(0) 130 | hwf=np.array(hwf).transpose() 131 | c2w = np.concatenate([viewmatrix(vec2, up, center), hwf], 1) 132 | c2w = np.concatenate([c2w[:3,:4], bottom], -2) 133 | 134 | bottom = np.tile(np.reshape(bottom, [1,1,4]), [tpose_.shape[0],1,1]) 135 | poses = np.concatenate([tpose_[:,:3,:4], bottom], -2) 136 | poses = np.linalg.inv(c2w) @ poses 137 | return poses[:,:3,:].reshape(poses.shape[0],12) 138 | 139 | class SevenScenes_colmap(data.Dataset): 140 | def __init__(self, args, scene, data_path, train, transform=None, 141 | target_transform=None, mode=0, seed=7, 142 | df=1., trainskip=1, testskip=1, hwf=[480,640,585.], 143 | ret_idx=False, fix_idx=False, ret_hist=False, hist_bin=10): 144 | """ 145 | load 7scenes data with COLMAP poses from Brachmann21 "On the Limits of Pseudo Ground Truth in Visual Camera Re-localisation" 146 | :param scene: scene name ['chess', 'pumpkin', ...] 147 | :param data_path: root 7scenes data directory. 148 | Usually '../data/deepslam_data/7Scenes' 149 | :param train: if True, return the training images. If False, returns the 150 | testing images 151 | :param transform: transform to apply to the images 152 | :param target_transform: transform to apply to the poses 153 | :param mode: (Obsolete) 0: just color image, 1: color image in NeRF 0-1 and resized. 154 | :param df: downscale factor 155 | :param trainskip: due to 7scenes are so big, now can use less training sets # of trainset = 1/trainskip 156 | :param testskip: skip part of testset, # of testset = 1/testskip 157 | :param hwf: H,W,Focal from COLMAP 158 | :param ret_idx: bool, currently only used by NeRF-W 159 | """ 160 | 161 | self.transform = transform 162 | self.target_transform = target_transform 163 | self.df = df 164 | 165 | self.H, self.W, self.focal = hwf 166 | self.H = int(self.H) 167 | self.W = int(self.W) 168 | np.random.seed(seed) 169 | 170 | self.train = train 171 | self.ret_idx = ret_idx 172 | self.fix_idx = fix_idx 173 | self.ret_hist = ret_hist 174 | self.hist_bin = hist_bin # histogram bin size 175 | 176 | # directories 177 | base_dir = osp.join(osp.expanduser(data_path), scene) # '../data/deepslam_data/7Scenes' 178 | deepslam_data_dir, _ = osp.split(data_path) 179 | data_dir, _ = osp.split(deepslam_data_dir) 180 | 181 | data_dir = osp.join(data_dir, '7Scenes', scene) # '../data/7Scenes/chess' 182 | world_setup_fn = data_dir + '/world_setup.json' 183 | 184 | # read json file 185 | with open(world_setup_fn, 'r') as myfile: 186 | data=myfile.read() 187 | 188 | # parse json file 189 | obj = json.loads(data) 190 | self.near = obj['near'] 191 | self.far = obj['far'] 192 | self.pose_scale = obj['pose_scale'] 193 | self.pose_scale2 = obj['pose_scale2'] 194 | self.move_all_cam_vec = obj['move_all_cam_vec'] 195 | 196 | # decide which sequences to use 197 | if train: 198 | split_file = osp.join(base_dir, 'TrainSplit.txt') 199 | else: 200 | split_file = osp.join(base_dir, 'TestSplit.txt') 201 | with open(split_file, 'r') as f: 202 | seqs = [int(l.split('sequence')[-1]) for l in f if not l.startswith('#')] # parsing 203 | 204 | # read poses and collect image names 205 | self.c_imgs = [] 206 | self.d_imgs = [] 207 | self.gt_idx = np.empty((0,), dtype=int) 208 | ps = {} 209 | vo_stats = {} 210 | gt_offset = int(0) 211 | 212 | for seq in seqs: 213 | seq_dir = osp.join(base_dir, 'seq-{:02d}'.format(seq)) 214 | seq_data_dir = osp.join(data_dir, 'seq-{:02d}'.format(seq)) 215 | 216 | p_filenames = [n for n in os.listdir(osp.join(seq_dir, '.')) if n.find('pose') >= 0] 217 | idxes = [int(n[6:12]) for n in p_filenames] 218 | 219 | frame_idx = np.array(sorted(idxes)) 220 | 221 | # trainskip and testskip 222 | if train and trainskip > 1: 223 | frame_idx = frame_idx[::trainskip] 224 | elif not train and testskip > 1: 225 | frame_idx = frame_idx[::testskip] 226 | 227 | 228 | self.gt_idx = np.hstack((self.gt_idx, gt_offset+frame_idx)) 229 | gt_offset += len(p_filenames) 230 | c_imgs = [osp.join(seq_dir, 'frame-{:06d}.color.png'.format(i)) for i in frame_idx] 231 | d_imgs = [osp.join(seq_dir, 'frame-{:06d}.depth.png'.format(i)) for i in frame_idx] 232 | self.c_imgs.extend(c_imgs) 233 | self.d_imgs.extend(d_imgs) 234 | 235 | ## parsing GT poses from colmap (for img size of 480x640x3) ### 236 | print("using colmap psudo poses") 237 | if train: 238 | colmap_pose_file = osp.join(base_dir, scene+'_train.txt') 239 | else: 240 | colmap_pose_file = osp.join(base_dir, scene+'_test.txt') 241 | 242 | colmap_poses = np.loadtxt(colmap_pose_file, dtype=str) 243 | 244 | # making a dictionary based on colmap GT files, i.e., {'seq-01/frame-000685.color.png': array(['0.941876', '0.087411', '-0.323503', '0.0239663', '0.525913', '1.011429', '-0.233122', '526.22']} 245 | colmap_pose_dict = {} 246 | for i in range(len(colmap_poses)): 247 | colmap_pose_dict[colmap_poses[i, 0]] = colmap_poses[i, 1:] 248 | 249 | # we fetch the quaternion rot, translation t, and focal length f from the dictionary based on c_imgs 250 | self.poses_qtf = [] # qw qx qy qz tx ty tz f 251 | for names in self.c_imgs: 252 | dir1, f_name = osp.split(names) 253 | dir2, seq_name = osp.split(dir1) 254 | c_imgs_tmp = osp.join(seq_name,f_name) 255 | try: 256 | qtf_data = colmap_pose_dict[c_imgs_tmp].astype(np.float32) 257 | except: 258 | print("Error: ", c_imgs_tmp, "check if the colmap file exists") 259 | breakpoint() 260 | self.poses_qtf.append(qtf_data) 261 | self.poses_qtf = np.asarray(self.poses_qtf) 262 | assert(self.poses_qtf.shape[0] == len(self.c_imgs)) 263 | 264 | self.focal = self.poses_qtf[0, -1] 265 | # convert quaternion to rotation matrix 266 | self.poses = process_poses_quat2mat(self.poses_qtf[:,:7]) 267 | 268 | # debug read one img and get the shape of the img 269 | img = load_image(self.c_imgs[0]) 270 | img_np = (np.array(img) / 255.).astype(np.float32) # (480,640,3) 271 | self.H, self.W = img_np.shape[:2] 272 | 273 | if self.df != 1.: 274 | self.H = int(self.H//self.df) 275 | self.W = int(self.W//self.df) 276 | self.focal = self.focal/self.df 277 | 278 | def __len__(self): 279 | return self.poses.shape[0] 280 | 281 | def __getitem__(self, index): 282 | # print("index:", index) 283 | img = load_image(self.c_imgs[index]) # chess img.size = (640,480) 284 | if self.df != 1.: 285 | img_np = (np.array(img) / 255.).astype(np.float32) 286 | dims = (self.W, self.H) 287 | img_half_res = cv2.resize(img_np, dims, interpolation=cv2.INTER_AREA) # (H, W, 3) 288 | img = img_half_res 289 | if self.transform is not None: 290 | img = self.transform(img) 291 | 292 | pose = self.poses[index] 293 | if self.target_transform is not None: 294 | pose = self.target_transform(pose) 295 | 296 | out = {} 297 | out['img'] = img 298 | out['pose'] = pose 299 | 300 | if self.ret_hist: 301 | yuv = rgb_to_yuv(img) 302 | y_img = yuv[0] # extract y channel only 303 | hist = torch.histc(y_img, bins=self.hist_bin, min=0., max=1.) # compute intensity histogram 304 | hist = hist/(hist.sum())*100 # convert to histogram density, in terms of percentage per bin 305 | hist = torch.round(hist) 306 | out['hist'] = hist 307 | 308 | if self.ret_idx: 309 | out['idx'] = index 310 | if self.fix_idx: 311 | out['idx'] = 0 312 | 313 | return out 314 | 315 | def main(): 316 | """ 317 | visualizes the dataset 318 | """ 319 | # from common.vis_utils import show_batch, show_stereo_batch 320 | from torchvision.utils import make_grid 321 | import torchvision.transforms as transforms 322 | import sys 323 | sys.path.append('../script/') 324 | 325 | # from script.models.options import config_parser 326 | from models.options import config_parser 327 | 328 | # use this to run the script 329 | # python seven_scenes_colmap.py --config ../script/config/7Scenes/config_chess.txt 330 | 331 | parser = config_parser() 332 | args = parser.parse_args() 333 | print(parser.format_values()) 334 | 335 | data_dir, scene = osp.split(args.datadir) # ../data/7Scenes, chess 336 | dataset_folder, dataset = osp.split(data_dir) # ../data, 7Scenes 337 | data_dir = osp.join(dataset_folder, 'deepslam_data', dataset) # ../data/deepslam_data/7Scenes 338 | 339 | num_workers = 6 340 | transform = transforms.Compose([ 341 | transforms.ToTensor()]) 342 | target_transform = transforms.Lambda(lambda x: torch.Tensor(x)) 343 | 344 | kwargs = dict(args=args, scene=scene, data_path=data_dir, 345 | transform=transform, target_transform=target_transform, 346 | df=2, ret_idx=True, fix_idx=False, ret_hist=True, hist_bin=10) 347 | dset = SevenScenes_colmap(train=True, trainskip=args.trainskip, **kwargs) 348 | print('Loaded 7Scenes sequence {:s}, length = {:d}'.format(scene, len(dset))) 349 | breakpoint() 350 | 351 | data_loader = data.DataLoader(dset, batch_size=4, shuffle=False, num_workers=num_workers) 352 | 353 | batch_count = 0 354 | N = 2 355 | for batch in data_loader: 356 | print('Minibatch {:d}'.format(batch_count)) 357 | 358 | batch_count += 1 359 | if batch_count >= N: 360 | break 361 | 362 | if __name__ == '__main__': 363 | main() 364 | -------------------------------------------------------------------------------- /dataset_loaders/utils/color.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def rgb_to_yuv(image: torch.Tensor) -> torch.Tensor: 5 | r""" 6 | From Kornia. 7 | Convert an RGB image to YUV. 8 | 9 | .. image:: _static/img/rgb_to_yuv.png 10 | 11 | The image data is assumed to be in the range of (0, 1). 12 | 13 | Args: 14 | image: RGB Image to be converted to YUV with shape :math:`(*, 3, H, W)`. 15 | 16 | Returns: 17 | YUV version of the image with shape :math:`(*, 3, H, W)`. 18 | 19 | Example: 20 | >>> input = torch.rand(2, 3, 4, 5) 21 | >>> output = rgb_to_yuv(input) # 2x3x4x5 22 | """ 23 | if not isinstance(image, torch.Tensor): 24 | raise TypeError(f"Input type is not a torch.Tensor. Got {type(image)}") 25 | 26 | if len(image.shape) < 3 or image.shape[-3] != 3: 27 | raise ValueError(f"Input size must have a shape of (*, 3, H, W). Got {image.shape}") 28 | 29 | r: torch.Tensor = image[..., 0, :, :] 30 | g: torch.Tensor = image[..., 1, :, :] 31 | b: torch.Tensor = image[..., 2, :, :] 32 | 33 | y: torch.Tensor = 0.299 * r + 0.587 * g + 0.114 * b 34 | u: torch.Tensor = -0.147 * r - 0.289 * g + 0.436 * b 35 | v: torch.Tensor = 0.615 * r - 0.515 * g - 0.100 * b 36 | 37 | out: torch.Tensor = torch.stack([y, u, v], -3) 38 | 39 | return out 40 | 41 | def yuv_to_rgb(image: torch.Tensor) -> torch.Tensor: 42 | r"""Convert an YUV image to RGB. 43 | From Kornia 44 | The image data is assumed to be in the range of (0, 1) for luma and (-0.5, 0.5) for chroma. 45 | 46 | Args: 47 | image: YUV Image to be converted to RGB with shape :math:`(*, 3, H, W)`. 48 | 49 | Returns: 50 | RGB version of the image with shape :math:`(*, 3, H, W)`. 51 | 52 | Example: 53 | >>> input = torch.rand(2, 3, 4, 5) 54 | >>> output = yuv_to_rgb(input) # 2x3x4x5 55 | """ 56 | if not isinstance(image, torch.Tensor): 57 | raise TypeError(f"Input type is not a torch.Tensor. Got {type(image)}") 58 | 59 | if len(image.shape) < 3 or image.shape[-3] != 3: 60 | raise ValueError(f"Input size must have a shape of (*, 3, H, W). Got {image.shape}") 61 | 62 | y: torch.Tensor = image[..., 0, :, :] 63 | u: torch.Tensor = image[..., 1, :, :] 64 | v: torch.Tensor = image[..., 2, :, :] 65 | 66 | r: torch.Tensor = y + 1.14 * v # coefficient for g is 0 67 | g: torch.Tensor = y + -0.396 * u - 0.581 * v 68 | b: torch.Tensor = y + 2.029 * u # coefficient for b is 0 69 | 70 | out: torch.Tensor = torch.stack([r, g, b], -3) 71 | 72 | return out 73 | 74 | def rgb_to_yuv_pixels(image: torch.Tensor) -> torch.Tensor: 75 | r""" 76 | From Kornia. 77 | Convert an RGB pixels to YUV. 78 | 79 | .. image:: _static/img/rgb_to_yuv.png 80 | 81 | The image data is assumed to be in the range of (0, 1). 82 | 83 | Args: 84 | image: RGB Image to be converted to YUV with shape :math:`(*, 3, H, W)`. 85 | 86 | Returns: 87 | YUV version of the image with shape :math:`(*, 3, H, W)`. 88 | 89 | Example: 90 | >>> input rgb torch.tensor(N, 3) 91 | >>> output yuv torch.tensor(N, 3) 92 | """ 93 | if not isinstance(image, torch.Tensor): 94 | raise TypeError(f"Input type is not a torch.Tensor. Got {type(image)}") 95 | 96 | if len(image.shape) != 2: 97 | raise ValueError(f"Input size must have a shape of (*, 3). Got {image.shape}") 98 | 99 | r: torch.Tensor = image[..., 0,] 100 | g: torch.Tensor = image[..., 1,] 101 | b: torch.Tensor = image[..., 2,] 102 | 103 | y: torch.Tensor = 0.299 * r + 0.587 * g + 0.114 * b 104 | u: torch.Tensor = -0.147 * r - 0.289 * g + 0.436 * b 105 | v: torch.Tensor = 0.615 * r - 0.515 * g - 0.100 * b 106 | out: torch.Tensor = torch.stack([y, u, v], -1) 107 | 108 | return out 109 | 110 | def yuv_to_rgb_pixels(image: torch.Tensor) -> torch.Tensor: 111 | r"""Convert an YUV pixels to RGB. 112 | From Kornia 113 | The image data is assumed to be in the range of (0, 1) for luma and (-0.5, 0.5) for chroma. 114 | 115 | Args: 116 | image: YUV Image to be converted to RGB with shape :math:`(*, 3, H, W)`. 117 | 118 | Returns: 119 | RGB version of the image with shape :math:`(*, 3, H, W)`. 120 | 121 | Example: 122 | >>> input = torch.rand(2, 3, 4, 5) 123 | >>> output = yuv_to_rgb(input) # 2x3x4x5 124 | """ 125 | if not isinstance(image, torch.Tensor): 126 | raise TypeError(f"Input type is not a torch.Tensor. Got {type(image)}") 127 | 128 | if len(image.shape) != 2: 129 | raise ValueError(f"Input size must have a shape of (*, 3). Got {image.shape}") 130 | 131 | y: torch.Tensor = image[..., 0] 132 | u: torch.Tensor = image[..., 1] 133 | v: torch.Tensor = image[..., 2] 134 | 135 | r: torch.Tensor = y + 1.14 * v # coefficient for g is 0 136 | g: torch.Tensor = y + -0.396 * u - 0.581 * v 137 | b: torch.Tensor = y + 2.029 * u # coefficient for b is 0 138 | 139 | out: torch.Tensor = torch.stack([r, g, b], -1) 140 | 141 | return out 142 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: nefes 2 | channels: 3 | - pytorch 4 | - nvidia 5 | - defaults 6 | dependencies: 7 | - cuda-toolkit=12.1.0=0 8 | - ffmpeg=4.3=hf484d3e_0 9 | - numpy=1.26.4=py39h5f9d8c6_0 10 | - pip=23.3.1=py39h06a4308_0 11 | - python=3.9.18=h955ad1f_0 12 | - pytorch=2.2.1=py3.9_cuda12.1_cudnn8.9.2_0 13 | - torchvision=0.17.1=py39_cu121 14 | - yaml=0.2.5=h7b6447c_0 15 | - pip: 16 | - configargparse==1.7 17 | - efficientnet-pytorch==0.7.1 18 | - einops==0.7.0 19 | - imageio==2.34.1 20 | - kornia==0.7.2 21 | - matplotlib==3.8.4 22 | - ninja==1.11.1.1 23 | - opencv-python==4.9.0.80 24 | - packaging==24.0 25 | - pandas==2.2.1 26 | - scikit-image==0.22.0 27 | - scipy==1.12.0 28 | - torchsummary==1.5.1 29 | - torchviz==0.0.2 30 | - tqdm==4.66.2 31 | - transforms3d==0.4.1 32 | -------------------------------------------------------------------------------- /imgs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ActiveVisionLab/NeFeS/1ac72cb48af60d5bfa1fed1d1af9d0f1dc750b7a/imgs/.DS_Store -------------------------------------------------------------------------------- /imgs/nefes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ActiveVisionLab/NeFeS/1ac72cb48af60d5bfa1fed1d1af9d0f1dc750b7a/imgs/nefes.png -------------------------------------------------------------------------------- /imgs/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ActiveVisionLab/NeFeS/1ac72cb48af60d5bfa1fed1d1af9d0f1dc750b7a/imgs/pipeline.png -------------------------------------------------------------------------------- /paper_result/DFNet_NeFeS50_Cambridge/OldHospital/OldHospital_test_gt_filename.txt: -------------------------------------------------------------------------------- 1 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00001.png 2 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00002.png 3 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00003.png 4 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00004.png 5 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00005.png 6 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00006.png 7 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00007.png 8 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00008.png 9 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00009.png 10 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00010.png 11 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00011.png 12 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00012.png 13 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00013.png 14 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00014.png 15 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00015.png 16 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00016.png 17 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00017.png 18 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00018.png 19 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00019.png 20 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00020.png 21 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00021.png 22 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00022.png 23 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00023.png 24 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00024.png 25 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00025.png 26 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00026.png 27 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00027.png 28 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00028.png 29 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00029.png 30 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00030.png 31 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00031.png 32 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00032.png 33 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00033.png 34 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00034.png 35 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00035.png 36 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00036.png 37 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00037.png 38 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00038.png 39 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00039.png 40 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00040.png 41 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00041.png 42 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00042.png 43 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00043.png 44 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00044.png 45 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00045.png 46 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00046.png 47 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00047.png 48 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00048.png 49 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00049.png 50 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00050.png 51 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00051.png 52 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00052.png 53 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00053.png 54 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00054.png 55 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00055.png 56 | ../data/Cambridge/OldHospital/test/rgb/seq4_frame00056.png 57 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00001.png 58 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00002.png 59 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00003.png 60 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00004.png 61 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00005.png 62 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00006.png 63 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00007.png 64 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00008.png 65 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00009.png 66 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00010.png 67 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00011.png 68 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00012.png 69 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00013.png 70 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00014.png 71 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00015.png 72 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00016.png 73 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00017.png 74 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00018.png 75 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00019.png 76 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00020.png 77 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00021.png 78 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00022.png 79 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00023.png 80 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00024.png 81 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00025.png 82 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00026.png 83 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00027.png 84 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00028.png 85 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00029.png 86 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00030.png 87 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00031.png 88 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00032.png 89 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00033.png 90 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00034.png 91 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00035.png 92 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00036.png 93 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00037.png 94 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00038.png 95 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00039.png 96 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00040.png 97 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00041.png 98 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00042.png 99 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00043.png 100 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00044.png 101 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00045.png 102 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00046.png 103 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00047.png 104 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00048.png 105 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00049.png 106 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00050.png 107 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00051.png 108 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00052.png 109 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00053.png 110 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00054.png 111 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00055.png 112 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00056.png 113 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00057.png 114 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00058.png 115 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00059.png 116 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00060.png 117 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00061.png 118 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00062.png 119 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00063.png 120 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00064.png 121 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00065.png 122 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00066.png 123 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00067.png 124 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00068.png 125 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00069.png 126 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00070.png 127 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00071.png 128 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00072.png 129 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00073.png 130 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00074.png 131 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00075.png 132 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00076.png 133 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00077.png 134 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00078.png 135 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00079.png 136 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00080.png 137 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00081.png 138 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00082.png 139 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00083.png 140 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00084.png 141 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00085.png 142 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00086.png 143 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00087.png 144 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00088.png 145 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00089.png 146 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00090.png 147 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00091.png 148 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00092.png 149 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00093.png 150 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00094.png 151 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00095.png 152 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00096.png 153 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00097.png 154 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00098.png 155 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00099.png 156 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00100.png 157 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00101.png 158 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00102.png 159 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00103.png 160 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00104.png 161 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00105.png 162 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00106.png 163 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00107.png 164 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00108.png 165 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00109.png 166 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00110.png 167 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00111.png 168 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00112.png 169 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00113.png 170 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00114.png 171 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00115.png 172 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00116.png 173 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00117.png 174 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00118.png 175 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00119.png 176 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00120.png 177 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00121.png 178 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00122.png 179 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00123.png 180 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00124.png 181 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00125.png 182 | ../data/Cambridge/OldHospital/test/rgb/seq8_frame00126.png 183 | -------------------------------------------------------------------------------- /paper_result/DFNet_NeFeS50_Cambridge/ShopFacade/ShopFacade_test_gt_filename.txt: -------------------------------------------------------------------------------- 1 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00001.png 2 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00002.png 3 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00003.png 4 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00004.png 5 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00005.png 6 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00006.png 7 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00007.png 8 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00008.png 9 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00009.png 10 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00010.png 11 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00011.png 12 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00012.png 13 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00013.png 14 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00014.png 15 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00015.png 16 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00016.png 17 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00017.png 18 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00018.png 19 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00019.png 20 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00020.png 21 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00021.png 22 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00022.png 23 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00023.png 24 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00024.png 25 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00025.png 26 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00026.png 27 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00027.png 28 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00028.png 29 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00029.png 30 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00030.png 31 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00031.png 32 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00032.png 33 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00033.png 34 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00034.png 35 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00035.png 36 | ../data/Cambridge/ShopFacade/test/rgb/seq1_frame00036.png 37 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00001.png 38 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00002.png 39 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00003.png 40 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00004.png 41 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00005.png 42 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00006.png 43 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00007.png 44 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00008.png 45 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00009.png 46 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00010.png 47 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00011.png 48 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00012.png 49 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00013.png 50 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00014.png 51 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00015.png 52 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00016.png 53 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00017.png 54 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00018.png 55 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00019.png 56 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00020.png 57 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00021.png 58 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00022.png 59 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00023.png 60 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00024.png 61 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00025.png 62 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00026.png 63 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00027.png 64 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00028.png 65 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00029.png 66 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00030.png 67 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00031.png 68 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00032.png 69 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00033.png 70 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00034.png 71 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00035.png 72 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00036.png 73 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00037.png 74 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00038.png 75 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00039.png 76 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00040.png 77 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00041.png 78 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00042.png 79 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00043.png 80 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00044.png 81 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00045.png 82 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00046.png 83 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00047.png 84 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00048.png 85 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00049.png 86 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00050.png 87 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00051.png 88 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00052.png 89 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00053.png 90 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00054.png 91 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00055.png 92 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00056.png 93 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00057.png 94 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00058.png 95 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00059.png 96 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00060.png 97 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00061.png 98 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00062.png 99 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00063.png 100 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00064.png 101 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00065.png 102 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00066.png 103 | ../data/Cambridge/ShopFacade/test/rgb/seq3_frame00067.png 104 | -------------------------------------------------------------------------------- /script/config/7Scenes/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ActiveVisionLab/NeFeS/1ac72cb48af60d5bfa1fed1d1af9d0f1dc750b7a/script/config/7Scenes/.DS_Store -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_chess_DFM.txt: -------------------------------------------------------------------------------- 1 | # ########################################## FM EXP. NFF APR ########################################## 2 | # test on DFM post processing using NeFeS. optimize APR model. 3 | model_name=DFM_NFF_APR 4 | # NeRF/NFF Signature 5 | expname=DFNet_NeFeS50_7Scenes_chess_colmap 6 | ft_path=../logs/paper_models/nefes/NeFeS_chess_colmap/nefes.tar 7 | tinyscale=4 8 | datadir=../data/7Scenes/chess 9 | dataset_type=7Scenes_colmap 10 | # Feature CNN Signature 11 | pretrain_model_path=../logs/paper_models/apr/DFNet_chess_colmap/checkpoint-chess-colmap.pt 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_chess/checkpoint-chess.pt 13 | dfnet=True 14 | # Other Training Signature 15 | trainskip=1 16 | testskip=1 17 | df=2 # df=2 which use a input of 240x427 18 | load_pose_avg_stats=True 19 | freezeBN=True 20 | learning_rate=0.00001 21 | pose_only=2 # APR+NeFeS50 22 | svd_reg=True 23 | transient_at_test=True # exp. 24 | # netchunk=2097152 # default 2^21, high gpu utility/high memory usage, consider reduce this if GPU OOM -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_chess_stage1.txt: -------------------------------------------------------------------------------- 1 | # Train nefes color only stage 2 | expname=nefes_chess_color_only 3 | basedir=../logs 4 | datadir=../data/7Scenes/chess 5 | dataset_type=7Scenes_colmap 6 | trainskip=2 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_chess/checkpoint-chess.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | # render_test=True # this is for testing the NeFeS rendering after training -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_chess_stage2.txt: -------------------------------------------------------------------------------- 1 | # Train nefes new schedule stage 2 | expname=nefes_chess 3 | basedir=../logs 4 | datadir=../data/7Scenes/chess 5 | dataset_type=7Scenes_colmap 6 | trainskip=2 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_chess/checkpoint-chess.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | new_schedule=2 # new schedule, train color+feature+fusion 17 | ft_path=../logs/nefes_chess_color_only/000600.tar 18 | # render_test=True 19 | -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_fire_DFM.txt: -------------------------------------------------------------------------------- 1 | # ########################################## FM EXP. NFF APR ########################################## 2 | # test on DFM post processing using NeFeS. optimize APR model. 3 | model_name=DFM_NFF_APR 4 | # NeRF/NFF Signature 5 | expname=DFNet_NeFeS50_7Scenes_fire_colmap 6 | ft_path=../logs/paper_models/nefes/NeFeS_fire_colmap/nefes.tar 7 | tinyscale=4 8 | datadir=../data/7Scenes/fire 9 | dataset_type=7Scenes_colmap 10 | # Feature CNN Signature 11 | pretrain_model_path=../logs/paper_models/apr/DFNet_fire_colmap/checkpoint-fire-colmap.pt 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_fire/checkpoint-fire.pt 13 | dfnet=True 14 | # Other Training Signature 15 | trainskip=1 16 | testskip=1 17 | df=2 # df=2 which use a input of 240x427 18 | load_pose_avg_stats=True 19 | freezeBN=True 20 | learning_rate=0.00001 21 | pose_only=2 # APR+NeFeS50 22 | svd_reg=True 23 | transient_at_test=True # exp. 24 | # netchunk=2097152 # default 2^21, high gpu utility/high memory usage, consider reduce this if GPU OOM -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_fire_stage1.txt: -------------------------------------------------------------------------------- 1 | # Train nefes color only stage 2 | expname=nefes_fire_color_only 3 | basedir=../logs 4 | datadir=../data/7Scenes/fire 5 | dataset_type=7Scenes_colmap 6 | trainskip=2 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_fire/checkpoint-fire.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | # render_test=True # this is for testing the NeFeS rendering after training -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_fire_stage2.txt: -------------------------------------------------------------------------------- 1 | # Train nefes new schedule stage 2 | expname=nefes_fire 3 | basedir=../logs 4 | datadir=../data/7Scenes/fire 5 | dataset_type=7Scenes_colmap 6 | trainskip=2 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_fire/checkpoint-fire.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | new_schedule=2 # new schedule, train color+feature+fusion 17 | ft_path=../logs/nefes_fire_color_only/000600.tar 18 | # render_test=True 19 | -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_heads_DFM.txt: -------------------------------------------------------------------------------- 1 | # ########################################## FM EXP. NFF APR ########################################## 2 | # test on DFM post processing using NeFeS. optimize APR model. 3 | model_name=DFM_NFF_APR 4 | # NeRF/NFF Signature 5 | expname=DFNet_NeFeS50_7Scenes_heads_colmap 6 | ft_path=../logs/paper_models/nefes/NeFeS_heads_colmap/nefes.tar 7 | tinyscale=4 8 | datadir=../data/7Scenes/heads 9 | dataset_type=7Scenes_colmap 10 | # Feature CNN Signature 11 | pretrain_model_path=../logs/paper_models/apr/DFNet_heads_colmap/checkpoint-heads-colmap.pt 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_heads/checkpoint-heads.pt 13 | dfnet=True 14 | # Other Training Signature 15 | trainskip=1 16 | testskip=1 17 | df=2 # df=2 which use a input of 240x427 18 | load_pose_avg_stats=True 19 | freezeBN=True 20 | learning_rate=0.00001 21 | pose_only=2 # APR+NeFeS50 22 | svd_reg=True 23 | transient_at_test=True # exp. 24 | # netchunk=2097152 # default 2^21, high gpu utility/high memory usage, consider reduce this if GPU OOM -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_heads_stage1.txt: -------------------------------------------------------------------------------- 1 | # Train nefes color only stage 2 | expname=nefes_heads_color_only 3 | basedir=../logs 4 | datadir=../data/7Scenes/heads 5 | dataset_type=7Scenes_colmap 6 | trainskip=1 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_heads/checkpoint-heads.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | # render_test=True # this is for testing the NeFeS rendering after training -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_heads_stage2.txt: -------------------------------------------------------------------------------- 1 | # Train nefes new schedule stage 2 | expname=nefes_heads 3 | basedir=../logs 4 | datadir=../data/7Scenes/heads 5 | dataset_type=7Scenes_colmap 6 | trainskip=1 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_heads/checkpoint-heads.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | new_schedule=2 # new schedule, train color+feature+fusion 17 | ft_path=../logs/nefes_heads_color_only/000600.tar 18 | # render_test=True 19 | -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_kitchen_DFM.txt: -------------------------------------------------------------------------------- 1 | # ########################################## FM EXP. NFF APR ########################################## 2 | # test on DFM post processing using NeFeS. optimize APR model. 3 | model_name=DFM_NFF_APR 4 | # NeRF/NFF Signature 5 | expname=DFNet_NeFeS50_7Scenes_kitchen_colmap 6 | ft_path=../logs/paper_models/nefes/NeFeS_kitchen_colmap/nefes.tar 7 | tinyscale=4 8 | datadir=../data/7Scenes/redkitchen 9 | dataset_type=7Scenes_colmap 10 | # Feature CNN Signature 11 | pretrain_model_path=../logs/paper_models/apr/DFNet_kitchen_colmap/checkpoint-kitchen-colmap.pt 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_kitchen/checkpoint-kitchen.pt 13 | dfnet=True 14 | # Other Training Signature 15 | trainskip=1 16 | testskip=1 17 | df=2 # df=2 which use a input of 240x427 18 | load_pose_avg_stats=True 19 | freezeBN=True 20 | learning_rate=0.00001 21 | pose_only=2 # APR+NeFeS50 22 | svd_reg=True 23 | transient_at_test=True # exp. -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_kitchen_stage1.txt: -------------------------------------------------------------------------------- 1 | # Train nefes color only stage 2 | expname=nefes_kitchen_color_only 3 | basedir=../logs 4 | datadir=../data/7Scenes/redkitchen 5 | dataset_type=7Scenes_colmap 6 | trainskip=2 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_kitchen/checkpoint-kitchen.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | # render_test=True # this is for testing the NeFeS rendering after training -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_kitchen_stage2.txt: -------------------------------------------------------------------------------- 1 | # Train nefes new schedule stage 2 | expname=nefes_kitchen 3 | basedir=../logs 4 | datadir=../data/7Scenes/redkitchen 5 | dataset_type=7Scenes_colmap 6 | trainskip=2 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_kitchen/checkpoint-kitchen.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | new_schedule=2 # new schedule, train color+feature+fusion 17 | ft_path=../logs/nefes_kitchen_color_only/000600.tar 18 | # render_test=True 19 | -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_office_DFM.txt: -------------------------------------------------------------------------------- 1 | # ########################################## FM EXP. NFF APR ########################################## 2 | # test on DFM post processing using NeFeS. optimize APR model. 3 | model_name=DFM_NFF_APR 4 | # NeRF/NFF Signature 5 | expname=DFNet_NeFeS50_7Scenes_office_colmap 6 | ft_path=../logs/paper_models/nefes/NeFeS_office_colmap/nefes.tar 7 | tinyscale=4 8 | datadir=../data/7Scenes/office 9 | dataset_type=7Scenes_colmap 10 | # Feature CNN Signature 11 | pretrain_model_path=../logs/paper_models/apr/DFNet_office_colmap/checkpoint-office-colmap.pt 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_office/checkpoint-office.pt 13 | dfnet=True 14 | # Other Training Signature 15 | trainskip=1 16 | testskip=1 17 | df=2 # df=2 which use a input of 240x427 18 | load_pose_avg_stats=True 19 | freezeBN=True 20 | learning_rate=0.00001 21 | pose_only=2 # APR+NeFeS50 22 | svd_reg=True 23 | transient_at_test=True # exp. -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_office_stage1.txt: -------------------------------------------------------------------------------- 1 | # Train nefes color only stage 2 | expname=nefes_office_color_only 3 | basedir=../logs 4 | datadir=../data/7Scenes/office 5 | dataset_type=7Scenes_colmap 6 | trainskip=2 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_office/checkpoint-office.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | # render_test=True # this is for testing the NeFeS rendering after training -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_office_stage2.txt: -------------------------------------------------------------------------------- 1 | # Train nefes new schedule stage 2 | expname=nefes_office 3 | basedir=../logs 4 | datadir=../data/7Scenes/office 5 | dataset_type=7Scenes_colmap 6 | trainskip=2 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_office/checkpoint-office.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | new_schedule=2 # new schedule, train color+feature+fusion 17 | ft_path=../logs/nefes_office_color_only/000600.tar 18 | # render_test=True 19 | -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_pumpkin_DFM.txt: -------------------------------------------------------------------------------- 1 | # ########################################## FM EXP. NFF APR ########################################## 2 | # test on DFM post processing using NeFeS. optimize APR model. 3 | model_name=DFM_NFF_APR 4 | # NeRF/NFF Signature 5 | expname=DFNet_NeFeS50_7Scenes_pumpkin_colmap 6 | ft_path=../logs/paper_models/nefes/NeFeS_pumpkin_colmap/nefes.tar 7 | tinyscale=4 8 | datadir=../data/7Scenes/pumpkin 9 | dataset_type=7Scenes_colmap 10 | # Feature CNN Signature 11 | pretrain_model_path=../logs/paper_models/apr/DFNet_pumpkin_colmap/checkpoint-pumpkin-colmap.pt 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_pumpkin/checkpoint-pumpkin.pt 13 | dfnet=True 14 | # Other Training Signature 15 | trainskip=1 16 | testskip=1 17 | df=2 # df=2 which use a input of 240x427 18 | load_pose_avg_stats=True 19 | freezeBN=True 20 | learning_rate=0.00001 21 | pose_only=2 # APR+NeFeS50 22 | svd_reg=True 23 | transient_at_test=True # exp. -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_pumpkin_stage1.txt: -------------------------------------------------------------------------------- 1 | # Train nefes color only stage 2 | expname=nefes_pumpkin_color_only 3 | basedir=../logs 4 | datadir=../data/7Scenes/pumpkin 5 | dataset_type=7Scenes_colmap 6 | trainskip=2 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_pumpkin/checkpoint-pumpkin.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | # render_test=True # this is for testing the NeFeS rendering after training -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_pumpkin_stage2.txt: -------------------------------------------------------------------------------- 1 | # Train nefes new schedule stage 2 | expname=nefes_pumpkin 3 | basedir=../logs 4 | datadir=../data/7Scenes/pumpkin 5 | dataset_type=7Scenes_colmap 6 | trainskip=2 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_pumpkin/checkpoint-pumpkin.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | new_schedule=2 # new schedule, train color+feature+fusion 17 | ft_path=../logs/nefes_pumpkin_color_only/000600.tar 18 | # render_test=True 19 | -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_stairs_DFM.txt: -------------------------------------------------------------------------------- 1 | # ########################################## FM EXP. NFF APR ########################################## 2 | # test on DFM post processing using NeFeS. optimize APR model. 3 | model_name=DFM_NFF_APR 4 | # NeRF/NFF Signature 5 | expname=DFNet_NeFeS50_7Scenes_stairs_colmap 6 | ft_path=../logs/paper_models/nefes/NeFeS_stairs_colmap/nefes.tar 7 | tinyscale=4 8 | datadir=../data/7Scenes/stairs 9 | dataset_type=7Scenes_colmap 10 | # Feature CNN Signature 11 | pretrain_model_path=../logs/paper_models/apr/DFNet_stairs_colmap/checkpoint-stairs-colmap.pt 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_stairs/checkpoint-stairs.pt 13 | dfnet=True 14 | # Other Training Signature 15 | trainskip=1 16 | testskip=1 17 | df=2 # df=2 which use a input of 240x427 18 | load_pose_avg_stats=True 19 | freezeBN=True 20 | learning_rate=0.00001 21 | pose_only=2 # APR+NeFeS50 22 | svd_reg=True 23 | transient_at_test=True # exp. -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_stairs_stage1.txt: -------------------------------------------------------------------------------- 1 | # Train nefes color only stage 2 | expname=nefes_stairs_color_only 3 | basedir=../logs 4 | datadir=../data/7Scenes/stairs 5 | dataset_type=7Scenes_colmap 6 | trainskip=2 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_stairs/checkpoint-stairs.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | # epochs=600 # original 17 | # epochs=50 # for unit test 18 | # render_test=True # this is for testing the NeFeS rendering after training -------------------------------------------------------------------------------- /script/config/7Scenes/dfnet/config_stairs_stage2.txt: -------------------------------------------------------------------------------- 1 | # Train nefes new schedule stage 2 | expname=nefes_stairs 3 | basedir=../logs 4 | datadir=../data/7Scenes/stairs 5 | dataset_type=7Scenes_colmap 6 | trainskip=2 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_stairs/checkpoint-stairs.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=False # this is True for cambridge dataset 16 | new_schedule=2 # new schedule, train color+feature+fusion 17 | ft_path=../logs/nefes_stairs_color_only/000600.tar 18 | # render_test=True 19 | -------------------------------------------------------------------------------- /script/config/Cambridge/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ActiveVisionLab/NeFeS/1ac72cb48af60d5bfa1fed1d1af9d0f1dc750b7a/script/config/Cambridge/.DS_Store -------------------------------------------------------------------------------- /script/config/Cambridge/dfnet/config_church_DFM.txt: -------------------------------------------------------------------------------- 1 | # ########################################## DFM EXP. NFF APR ########################################## 2 | # test on DFM post processing using neural feature field. optimize APR model. Using direct_feature_matching.py 3 | model_name=DFM_NFF_APR 4 | # NeRF/NFF Signature 5 | expname=DFNet_NeFeS50_7Scenes_church 6 | ft_path=../logs/paper_models/nefes/NeFeS_church/nefes.tar 7 | tinyscale=4 8 | datadir=../data/Cambridge/StMarysChurch 9 | dataset_type=Cambridge 10 | # Feature CNN Signature 11 | pretrain_model_path=../logs/paper_models/apr/DFNet_church/checkpoint-church.pt 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_church/checkpoint-church.pt 13 | dfnet=True 14 | # Other Training Signature 15 | trainskip=1 16 | testskip=1 17 | df=2 # df=2 which use a input of 240x427 18 | load_pose_avg_stats=True 19 | freezeBN=True 20 | learning_rate=0.00001 21 | pose_only=2 # APR+NeFeS50 22 | svd_reg=True 23 | transient_at_test=True # exp. 24 | no_verification_step=True # no need for verification since the initial errors were large. 25 | # netchunk=2097152 # 2^21, high gpu utility/high memory usage. Danger hot! 26 | -------------------------------------------------------------------------------- /script/config/Cambridge/dfnet/config_church_stage1.txt: -------------------------------------------------------------------------------- 1 | # Train nefes color only stage 2 | expname=nefes_church_color_only 3 | basedir=../logs 4 | datadir=../data/Cambridge/StMarysChurch 5 | dataset_type=Cambridge 6 | trainskip=1 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_church/checkpoint-church.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=True # this is True for cambridge dataset 16 | # render_test=True # this is for testing the NeFeS rendering after training -------------------------------------------------------------------------------- /script/config/Cambridge/dfnet/config_church_stage2.txt: -------------------------------------------------------------------------------- 1 | # Train nefes new schedule stage 2 | expname=nefes_church 3 | basedir=../logs 4 | datadir=../data/Cambridge/StMarysChurch 5 | dataset_type=Cambridge 6 | trainskip=1 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_church/checkpoint-church.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=True # this is True for cambridge dataset 16 | new_schedule=2 # new schedule, train color+feature+fusion 17 | ft_path=../logs/nefes_church_color_only/000600.tar 18 | # render_test=True 19 | -------------------------------------------------------------------------------- /script/config/Cambridge/dfnet/config_hospital_DFM.txt: -------------------------------------------------------------------------------- 1 | # ########################################## DFM EXP. NFF APR ########################################## 2 | # test on DFM post processing using neural feature field. optimize APR model. Using direct_feature_matching.py 3 | model_name=DFM_NFF_APR 4 | # NeRF/NFF Signature 5 | expname=DFNet_NeFeS50_7Scenes_hospital 6 | ft_path=../logs/paper_models/nefes/NeFeS_hospital/nefes.tar 7 | tinyscale=4 8 | datadir=../data/Cambridge/OldHospital 9 | dataset_type=Cambridge 10 | # Feature CNN Signature 11 | pretrain_model_path=../logs/paper_models/apr/DFNet_hospital/checkpoint-hospital.pt 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_hospital/checkpoint-hospital.pt 13 | dfnet=True 14 | # Other Training Signature 15 | trainskip=1 16 | testskip=1 17 | df=2 # df=2 which use a input of 240x427 18 | load_pose_avg_stats=True 19 | freezeBN=True 20 | learning_rate=0.00001 21 | pose_only=2 # APR+NeFeS50 22 | svd_reg=True 23 | transient_at_test=True # exp. 24 | no_verification_step=True # no need for verification since the initial errors were large. 25 | # netchunk=2097152 # 2^21, high gpu utility/high memory usage. Danger hot! 26 | -------------------------------------------------------------------------------- /script/config/Cambridge/dfnet/config_hospital_stage1.txt: -------------------------------------------------------------------------------- 1 | # Train nefes color only stage 2 | expname=nefes_hospital_color_only 3 | basedir=../logs 4 | datadir=../data/Cambridge/OldHospital 5 | dataset_type=Cambridge 6 | trainskip=1 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_hospital/checkpoint-hospital.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=True # this is True for cambridge dataset 16 | # render_test=True # this is for testing the NeFeS rendering after training -------------------------------------------------------------------------------- /script/config/Cambridge/dfnet/config_hospital_stage2.txt: -------------------------------------------------------------------------------- 1 | # Train nefes new schedule stage 2 | expname=nefes_hospital 3 | basedir=../logs 4 | datadir=../data/Cambridge/OldHospital 5 | dataset_type=Cambridge 6 | trainskip=1 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_hospital/checkpoint-hospital.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=True # this is True for cambridge dataset 16 | new_schedule=2 # new schedule, train color+feature+fusion 17 | ft_path=../logs/nefes_hospital_color_only/000600.tar 18 | # render_test=True 19 | -------------------------------------------------------------------------------- /script/config/Cambridge/dfnet/config_kings_DFM.txt: -------------------------------------------------------------------------------- 1 | # ########################################## DFM EXP. NFF APR ########################################## 2 | # test on DFM post processing using neural feature field. optimize APR model. Using direct_feature_matching.py 3 | model_name=DFM_NFF_APR 4 | # NeRF/NFF Signature 5 | expname=DFNet_NeFeS50_7Scenes_kings 6 | ft_path=../logs/paper_models/nefes/NeFeS_kings/nefes.tar 7 | tinyscale=4 8 | datadir=../data/Cambridge/KingsCollege 9 | dataset_type=Cambridge 10 | # Feature CNN Signature 11 | pretrain_model_path=../logs/paper_models/apr/DFNet_kings/checkpoint-kings.pt 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_kings/checkpoint-kings.pt 13 | dfnet=True 14 | # Other Training Signature 15 | trainskip=1 16 | testskip=1 17 | df=2 # df=2 which use a input of 240x427 18 | load_pose_avg_stats=True 19 | freezeBN=True 20 | learning_rate=0.00001 21 | pose_only=2 # APR+NeFeS50 22 | svd_reg=True 23 | transient_at_test=True # exp. 24 | no_verification_step=True # no need for verification since the initial errors were large. 25 | # netchunk=2097152 # 2^21, high gpu utility/high memory usage. Danger hot! 26 | -------------------------------------------------------------------------------- /script/config/Cambridge/dfnet/config_kings_stage1.txt: -------------------------------------------------------------------------------- 1 | # Train nefes color only stage 2 | expname=nefes_kings_color_only 3 | basedir=../logs 4 | datadir=../data/Cambridge/KingsCollege 5 | dataset_type=Cambridge 6 | trainskip=1 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_kings/checkpoint-kings.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=True # this is True for cambridge dataset 16 | # render_test=True # this is for testing the NeFeS rendering after training -------------------------------------------------------------------------------- /script/config/Cambridge/dfnet/config_kings_stage2.txt: -------------------------------------------------------------------------------- 1 | # Train nefes new schedule stage 2 | expname=nefes_kings 3 | basedir=../logs 4 | datadir=../data/Cambridge/KingsCollege 5 | dataset_type=Cambridge 6 | trainskip=1 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_kings/checkpoint-kings.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=True # this is True for cambridge dataset 16 | new_schedule=2 # new schedule, train color+feature+fusion 17 | ft_path=../logs/nefes_kings_color_only/000600.tar 18 | # render_test=True 19 | -------------------------------------------------------------------------------- /script/config/Cambridge/dfnet/config_shop_DFM.txt: -------------------------------------------------------------------------------- 1 | # ########################################## DFM EXP. NFF APR ########################################## 2 | # test on DFM post processing using neural feature field. optimize APR model. Using direct_feature_matching.py 3 | model_name=DFM_NFF_APR 4 | # NeRF/NFF Signature 5 | expname=DFNet_NeFeS50_7Scenes_shop 6 | ft_path=../logs/paper_models/nefes/NeFeS_shop/nefes.tar 7 | tinyscale=4 8 | datadir=../data/Cambridge/ShopFacade 9 | dataset_type=Cambridge 10 | # Feature CNN Signature 11 | pretrain_model_path=../logs/paper_models/apr/DFNet_shop/checkpoint-shop.pt 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_shop/checkpoint-shop.pt 13 | dfnet=True 14 | # Other Training Signature 15 | trainskip=1 16 | testskip=1 17 | df=2 # df=2 which use a input of 240x427 18 | load_pose_avg_stats=True 19 | freezeBN=True 20 | learning_rate=0.00001 21 | pose_only=2 # APR+NeFeS50 22 | svd_reg=True 23 | transient_at_test=True # exp. 24 | no_verification_step=True # no need for verification since the initial errors were large. 25 | # netchunk=2097152 # 2^21, high gpu utility/high memory usage. Danger hot! 26 | -------------------------------------------------------------------------------- /script/config/Cambridge/dfnet/config_shop_stage1.txt: -------------------------------------------------------------------------------- 1 | # Train nefes color only stage 2 | expname=nefes_shop_color_only 3 | basedir=../logs 4 | datadir=../data/Cambridge/ShopFacade 5 | dataset_type=Cambridge 6 | trainskip=1 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_shop/checkpoint-shop.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=True # this is True for cambridge dataset 16 | # render_test=True # this is for testing the NeFeS rendering after training -------------------------------------------------------------------------------- /script/config/Cambridge/dfnet/config_shop_stage2.txt: -------------------------------------------------------------------------------- 1 | # Train nefes new schedule stage 2 | expname=nefes_shop 3 | basedir=../logs 4 | datadir=../data/Cambridge/ShopFacade 5 | dataset_type=Cambridge 6 | trainskip=1 7 | testskip=10 8 | df=2 # train 9 | load_pose_avg_stats=True 10 | dfnet=True 11 | color_feat_fusion_nerfw_loss=True 12 | pretrain_featurenet_path=../logs/paper_models/dfnet/DFNet_shop/checkpoint-shop.pt 13 | transient_at_test=True 14 | tinyscale=4 # Train 15 | semantic=True # this is True for cambridge dataset 16 | new_schedule=2 # new schedule, train color+feature+fusion 17 | ft_path=../logs/nefes_shop_color_only/000600.tar 18 | # render_test=True 19 | -------------------------------------------------------------------------------- /script/dm/DFM_APR_refine.py: -------------------------------------------------------------------------------- 1 | import time, gc 2 | from copy import deepcopy 3 | import os, sys 4 | import os.path as osp 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.init 8 | import numpy as np 9 | 10 | from dm.pose_model import get_error_in_q, compute_pose_error_SE3 11 | from dm.DFM_pose_refine import inference_pose_regression, inference_pose_feature_extraction, load_NeRF_model, FeatureLoss 12 | from dm.direct_pose_model import fix_coord_supp 13 | from models.nerfh import img2mse, mse2psnr 14 | from models.rendering import render 15 | 16 | from torchvision.utils import save_image 17 | from utils.utils import save_image_saliancy, SSIM, set_default_to_cuda, set_default_to_cpu 18 | 19 | # # try to be deterministic 20 | # np.random.seed(0) 21 | # torch.manual_seed(0) 22 | # import random 23 | # random.seed(0) 24 | 25 | PROFILING_DFM = False 26 | def start_timer(): 27 | global start_time 28 | gc.collect() 29 | torch.cuda.empty_cache() 30 | torch.cuda.reset_peak_memory_stats() 31 | torch.cuda.synchronize() 32 | start_time = time.time() 33 | 34 | def end_timer_and_print(local_msg): 35 | torch.cuda.synchronize() 36 | end_time = time.time() 37 | print("\n" + local_msg) 38 | print("Total execution time = {:.3f} sec".format(end_time - start_time)) 39 | print("Max memory used by tensors = {} bytes".format(torch.cuda.max_memory_allocated())) 40 | 41 | def plot_rgb_n_batch_salient_feature(target_in, rgb_in, features_target, features_rgb, i=0): 42 | ''' 43 | print 1 pair of batch of salient feature map 44 | :param: target_in [B, 3, H, W] 45 | :param: rgb_in [B, 3, H, W] 46 | :param: features_target [B, C, H, W] 47 | :param: features_rgb [B, C, H, W] 48 | :param: frame index i of batch 49 | ''' 50 | print("for debug only...") 51 | 52 | if target_in != None: 53 | save_image(target_in[i], './tmp/target_in.png') 54 | if rgb_in != None: 55 | save_image(rgb_in[i], './tmp/rgb_in.png') 56 | features_t = features_target[i].clone()[:, None, :, :] 57 | features_r = features_rgb[i].clone()[:, None, :, :] 58 | save_image_saliancy(features_t, './tmp/target', True) 59 | save_image_saliancy(features_r, './tmp/rgb', True) 60 | 61 | def plot_features(features_target, features_rgb, fn1, fn2, i=0): 62 | ''' 63 | print 1 pair of 1 sample of salient feature map 64 | :param: features_target [B, C, H, W] 65 | :param: features_rgb [B, C, H, W] 66 | :param: fn1 filename of target feature map 67 | :param: fn2 filename of rgb feature map 68 | :param: frame index i of batch 69 | ''' 70 | # print("for debug only...") 71 | features_t = features_target[i].clone()[:, None, :, :] 72 | features_r = features_rgb[i].clone()[:, None, :, :] 73 | save_image_saliancy(features_t[0], fn1, True) 74 | save_image_saliancy(features_r[0], fn2, True) 75 | 76 | def PoseLoss(args, pose_, pose, device): 77 | loss_func = nn.MSELoss() 78 | predict_pose = pose_.reshape(args.batch_size, 12).to(device) # maynot need reshape 79 | pose_loss = loss_func(predict_pose, pose) 80 | return pose_loss 81 | 82 | # scaler = torch.cuda.amp.GradScaler(enabled=True) 83 | 84 | def train_on_batch(args, data, model, feat_model, feature_target, pose, img_idx, hwf, optimizer, device, world_setup_dict, render_kwargs_test, feature_loss, iter_i=None): 85 | ''' Perform 1 step of training ''' 86 | 87 | H, W, focal = hwf 88 | data = data.to(device) # [1, 3, 240, 427] non_blocking=Truen 89 | 90 | # pose regression module 91 | pose_ = inference_pose_regression(args, data, device, model) # here returns predicted pose [1, 3, 4] # real img features and predicted pose # features: (1, [3, 1, 128, 240, 427]), predict_pose: [1, 3, 4] 92 | 93 | pose_nerf = pose_.clone() 94 | 95 | # direct matching module 96 | # rescale the predicted pose to nerf scales 97 | pose_nerf = fix_coord_supp(args, pose_nerf, world_setup_dict, device=device) 98 | 99 | pose = pose.to(device) 100 | img_idx = img_idx.to(device) 101 | 102 | # every new tensor from onward is in GPU, here memory cost is a bottleneck 103 | torch.set_default_device('cuda') 104 | torch.set_default_dtype(torch.float32) 105 | assert(args.nerfh_nff) 106 | 107 | rgb, _, _, extras = render(int(H//args.tinyscale), int(W//args.tinyscale), focal/args.tinyscale, chunk=args.chunk, c2w=pose_nerf[0,:3,:4], img_idx=img_idx, **render_kwargs_test) 108 | if args.encode_hist: 109 | affine_color_transform = render_kwargs_test['network_fn'].affine_color_transform 110 | rgb = affine_color_transform(args, rgb, img_idx, 1) 111 | # NeRF feature + RGB -> CNN Fusion -> Feature 112 | Fusion_Net = render_kwargs_test['network_fn'].run_fusion_net 113 | render_rgb, render_feature, feature_rgb = Fusion_Net(rgb, extras['feat_map'], int(H//args.tinyscale), int(W//args.tinyscale), 1) # (1,3,120,213), (1,16,120,213), (1,16,120,213) 114 | feature_rgb = torch.nn.Upsample(size=(H, W), mode='bicubic')(feature_rgb) 115 | 116 | # VERIFICATION_STEP exp. 117 | rgb = render_rgb.reshape(1, 3, int(H//args.tinyscale), int(W//args.tinyscale)) 118 | rgb = torch.nn.Upsample(size=(H, W), mode='bicubic')(rgb) 119 | # end of VERIFICATION_STEP exp. 120 | 121 | 122 | # crop potential invalid region on the edge of feautre map, to compensate zero padding in CNN 123 | feature_target = feature_target[:, :, 10:-10, 10:-10] 124 | feature_rgb = feature_rgb[:, :, 10:-10, 10:-10] 125 | gt_img = data[:, :, 10:-10, 10:-10] 126 | rgb = rgb[:, :, 10:-10, 10:-10] 127 | 128 | # only use feature loss 129 | loss = feature_loss(feature_rgb[0], feature_target[0]) 130 | 131 | ### Loss Design End 132 | loss.backward() 133 | optimizer.step() 134 | optimizer.zero_grad() 135 | 136 | # end of every new tensor from onward is in GPU 137 | torch.set_default_device('cpu') 138 | torch.set_default_dtype(torch.float32) 139 | 140 | device_cpu = torch.device('cpu') 141 | iter_loss = loss.to(device_cpu).detach().numpy() 142 | iter_loss = np.array([iter_loss]) 143 | 144 | if args.nerfh_nff: 145 | try: 146 | psnr = mse2psnr(img2mse(rgb, gt_img)) 147 | iter_psnr = psnr.to(device_cpu).detach().numpy() 148 | compute_ssim = SSIM().to(device) 149 | ssim = compute_ssim(rgb, gt_img).mean() 150 | except: 151 | print('check if 1 paddings are removed') 152 | else: 153 | psnr = mse2psnr(img2mse(rgb.cpu(), data.cpu())) 154 | iter_psnr = psnr.to(device_cpu).detach().numpy() 155 | ssim = 0 156 | return iter_loss, iter_psnr, ssim 157 | 158 | def DFM_post_processing(args, model, feat_model, hwf, near, far, device, test_dl=None): 159 | ''' Use Direct Feature Matching as a Post-processing. We optimize APR model in this loop 160 | 161 | predict_poses: [N, 3, 4] 162 | feat_model: feature extractor 163 | render_kwargs_test: kwargs for the nefes model 164 | hwf: [H, W, focal] 165 | device: gpu device 166 | test_dl: test dataloader 167 | world_setup_dict: world setup dict 168 | ''' 169 | SAVE_DFM_RESULT = True 170 | VERIFICATION_STEP = not args.no_verification_step 171 | 172 | if SAVE_DFM_RESULT: 173 | pose_results = [] 174 | 175 | # load NeFeS model 176 | render_kwargs_test = load_NeRF_model(args, near, far) 177 | 178 | world_setup_dict = { 179 | 'pose_scale' : test_dl.dataset.pose_scale, 180 | 'pose_scale2' : test_dl.dataset.pose_scale2, 181 | 'move_all_cam_vec' : test_dl.dataset.move_all_cam_vec, 182 | } 183 | 184 | model.eval() 185 | 186 | # Benchmark inital pose precision 187 | print("Initial Precision:") 188 | get_error_in_q(args, test_dl, model, len(test_dl.dataset), device, batch_size=1) 189 | model.train() # # TODO: resume gradiant update 190 | 191 | # set nerf model requires_grad to False to accelerate the refinement? 192 | render_kwargs_test['network_fn'].requires_grad_(False) 193 | render_kwargs_test['network_fine'].requires_grad_(False) 194 | feat_model.eval().requires_grad_(False) 195 | 196 | import torch.optim as optim 197 | 198 | results = np.zeros((len(test_dl.dataset), 2)) 199 | 200 | ### Core optimization loop exp. per image per model ### 201 | init_iter = args.opt_iter 202 | # profiling 203 | start_timer() 204 | for batch_idx, batch_data in enumerate(test_dl): 205 | data, pose, img_idx = batch_data['img'], batch_data['pose'], batch_data['hist'] 206 | 207 | if batch_idx % 10 == 0: 208 | print("renders {}/total {}".format(batch_idx, len(test_dl.dataset)), flush=True) 209 | pp_model = deepcopy(model) 210 | 211 | # set optimizer for post processing model (BAD Implementation) 212 | optimizer_pp = optim.Adam(pp_model.parameters(), lr=args.learning_rate) #weight_decay=weight_decay, **kwargs 213 | feature_loss = FeatureLoss(per_pixel=args.per_pixel).to(device) 214 | 215 | # We move the query image feature extraction to here for acceleration. This only need once per image. 216 | LARGE_FEATURE_SIZE = True # use 240x427 feature size, or 60x106 217 | if LARGE_FEATURE_SIZE: 218 | feature_list, _ = inference_pose_feature_extraction(args, data, device, feat_model, retFeature=True, isSingleStream=True, return_pose=False, H=int(hwf[0]), W=int(hwf[1])) 219 | else: 220 | feature_list, _ = inference_pose_feature_extraction(args, data, device, feat_model, retFeature=True, isSingleStream=True, return_pose=False, H=int(hwf[0]//args.tinyscale), W=int(hwf[1]//args.tinyscale)) # here returns GT img and nerf img features (2, [3, 1, 128, 240, 427]) 221 | feature_target = feature_list[0][0] 222 | 223 | for i in range(init_iter): 224 | loss, psnr, ssim = train_on_batch(args, data, pp_model, feat_model, feature_target, pose, img_idx, hwf, optimizer_pp, device, world_setup_dict, render_kwargs_test, feature_loss, i) 225 | 226 | if VERIFICATION_STEP: 227 | if i==0: 228 | init_psnr = psnr 229 | init_ssim = ssim 230 | 231 | elif i % (init_iter-1) == 0: 232 | end_psnr = psnr 233 | end_ssim = ssim 234 | 235 | ### inference pp_model 236 | data = data.to(device) # input 237 | pose = pose.reshape(1,3,4) # label 238 | 239 | predict_pose = inference_pose_regression(args, data, device, pp_model) 240 | predict_pose = predict_pose.reshape(1,3,4).cpu().detach() 241 | 242 | if VERIFICATION_STEP: # this is a fail safe mechanism to prevent degradation after refinement 243 | retreat=False 244 | if end_psnr < init_psnr: 245 | retreat=True 246 | if end_ssim < init_ssim: 247 | retreat=True 248 | if retreat: 249 | predict_pose = inference_pose_regression(args, data, device, model) 250 | predict_pose = predict_pose.reshape(1,3,4).cpu().detach() 251 | 252 | # compute pose error between the ground truth and the network predicted pose 253 | error_x, theta = compute_pose_error_SE3(pose, predict_pose) # we recently update this for better error precision 254 | results[batch_idx,:] = [error_x, theta] 255 | 256 | if SAVE_DFM_RESULT: 257 | pose_results.append(predict_pose.numpy()) 258 | 259 | end_timer_and_print("Mixed precision:") 260 | 261 | median_result = np.median(results,axis=0) 262 | mean_result = np.mean(results,axis=0) 263 | 264 | # standard log 265 | print ('Median error {}m and {} degrees.'.format(median_result[0], median_result[1])) 266 | print ('Mean error {}m and {} degrees.'.format(mean_result[0], mean_result[1])) 267 | 268 | if SAVE_DFM_RESULT: 269 | print('saving pose results...') 270 | pose_results = np.concatenate(pose_results).reshape(-1,12) 271 | scene = osp.split(args.datadir)[-1] 272 | save_folder = f'tmp/{args.PoseEstimatorType}_NeFeS{init_iter:01d}_{args.dataset_type}/{scene}/' 273 | 274 | if osp.exists(save_folder) is False: 275 | os.makedirs(save_folder) 276 | np.savetxt(os.path.join(f'{save_folder}/{args.PoseEstimatorType}_{scene}_NeFeS{init_iter:01d}_APR_pose_results.txt'), pose_results) 277 | sys.exit() 278 | # -------------------------------------------------------------------------------- /script/dm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ActiveVisionLab/NeFeS/1ac72cb48af60d5bfa1fed1d1af9d0f1dc750b7a/script/dm/__init__.py -------------------------------------------------------------------------------- /script/dm/callbacks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from tqdm import tqdm 4 | import os 5 | 6 | class EarlyStopping: 7 | """Early stops the training if validation loss doesn't improve after a given patience.""" 8 | # source https://blog.csdn.net/qq_37430422/article/details/103638681 9 | def __init__(self, args, patience=50, verbose=False, delta=0): 10 | """ 11 | Args: 12 | patience (int): How long to wait after last time validation loss improved. 13 | Default: 50 14 | verbose (bool): If True, prints a message for each validation loss improvement. 15 | Default: False 16 | delta (float): Minimum change in the monitored quantity to qualify as an improvement. 17 | Default: 0 18 | """ 19 | self.val_on_psnr = args.val_on_psnr 20 | self.patience = patience 21 | self.verbose = verbose 22 | self.counter = 0 23 | self.best_score = None 24 | self.early_stop = False 25 | self.val_loss_min = np.Inf 26 | self.delta = delta 27 | 28 | self.basedir = args.basedir 29 | self.model_name = args.model_name 30 | 31 | self.out_folder = os.path.join(self.basedir, self.model_name) 32 | self.ckpt_save_path = os.path.join(self.out_folder, 'checkpoint.pt') 33 | if not os.path.isdir(self.out_folder): 34 | os.mkdir(self.out_folder) 35 | 36 | def __call__(self, val_loss, model, epoch=-1, save_multiple=False, save_all=False, val_psnr=None): 37 | 38 | # find maximum psnr 39 | if self.val_on_psnr: 40 | score = val_psnr 41 | if self.best_score is None: 42 | self.best_score = score 43 | self.save_checkpoint(val_psnr, model, epoch=epoch, save_multiple=save_multiple) 44 | elif score < self.best_score + self.delta: 45 | self.counter += 1 46 | 47 | if self.counter >= self.patience: 48 | self.early_stop = True 49 | 50 | if save_all: # save all ckpt 51 | self.save_checkpoint(val_psnr, model, epoch=epoch, save_multiple=True, update_best=False) 52 | else: # save best ckpt only 53 | self.best_score = score 54 | self.save_checkpoint(val_psnr, model, epoch=epoch, save_multiple=save_multiple) 55 | self.counter = 0 56 | 57 | # find minimum loss 58 | else: 59 | score = -val_loss 60 | if self.best_score is None: 61 | self.best_score = score 62 | self.save_checkpoint(val_loss, model, epoch=epoch, save_multiple=save_multiple) 63 | elif score < self.best_score + self.delta: 64 | self.counter += 1 65 | 66 | if self.counter >= self.patience: 67 | self.early_stop = True 68 | 69 | if save_all: # save all ckpt 70 | self.save_checkpoint(val_loss, model, epoch=epoch, save_multiple=True, update_best=False) 71 | else: # save best ckpt only 72 | self.best_score = score 73 | self.save_checkpoint(val_loss, model, epoch=epoch, save_multiple=save_multiple) 74 | self.counter = 0 75 | 76 | def save_checkpoint(self, val_loss, model, epoch=-1, save_multiple=False, update_best=True): 77 | '''Saves model when validation loss decrease.''' 78 | if self.verbose: 79 | tqdm.write(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 80 | ckpt_save_path = self.ckpt_save_path 81 | if save_multiple: 82 | ckpt_save_path = ckpt_save_path[:-3]+f'-{epoch:04d}-{val_loss:.4f}.pt' 83 | 84 | torch.save(model.state_dict(), ckpt_save_path) 85 | if update_best: 86 | self.val_loss_min = val_loss 87 | 88 | def isBestModel(self): 89 | ''' Check if current model the best one. 90 | get early stop counter, if counter==0: it means current model has the best validation loss 91 | ''' 92 | return self.counter==0 -------------------------------------------------------------------------------- /script/dm/direct_pose_model.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import sys 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.init 7 | import numpy as np 8 | 9 | from models.nerfh import img2mse 10 | from models.ray_utils import get_rays 11 | ''' FeatureNet models ''' 12 | from feature.dfnet import DFNet as FeatureNet 13 | 14 | ''' APR Models ''' 15 | from feature.dfnet import DFNet 16 | from utils.utils import freeze_bn_layer 17 | 18 | def disable_model_grad(model): 19 | ''' set whole model to requires_grad=False, this is for nerf model ''' 20 | print("disable_model_grad...") 21 | for module in model.modules(): 22 | # print("this is a layer:", module) 23 | if hasattr(module, 'weight'): 24 | module.weight.requires_grad_(False) 25 | if hasattr(module, 'bias'): 26 | module.bias.requires_grad_(False) 27 | return model 28 | 29 | def svd_reg(pose): 30 | ''' 31 | Refer to Direct-PN supp. Orthogonalize the Rotation Matrix 32 | :param: pose [B, 3, 4] 33 | return pose [B, 3, 4] 34 | ''' 35 | R_torch = pose[:,:3,:3].clone() # debug 36 | u,s,v=torch.svd(R_torch) 37 | Rs = torch.matmul(u, v.transpose(-2,-1)) 38 | pose[:,:3,:3] = Rs 39 | return pose 40 | 41 | def inference_pose_regression(args, data, device, model): 42 | """ 43 | Inference the Pose Regression Network 44 | Inputs: 45 | args: parsed argument 46 | data: Input image in shape (batchsize, channels, H, W) 47 | device: gpu device 48 | model: PoseNet model 49 | Outputs: 50 | pose: Predicted Pose in shape (batchsize, 3, 4) 51 | """ 52 | inputs = data.to(device) 53 | predict_pose = model(inputs) 54 | pose = predict_pose.reshape(args.batch_size, 3, 4) 55 | 56 | pose = svd_reg(pose) if args.svd_reg else pose # only needed for models that predict SE(3) 57 | return pose 58 | 59 | def rgb_loss(rgb, target, extras): 60 | ''' Compute RGB MSE Loss, original from NeRF Paper ''' 61 | # Compute MSE loss between predicted and true RGB. 62 | img_loss = img2mse(rgb, target) 63 | loss = img_loss 64 | 65 | # Add MSE loss for coarse-grained model 66 | if 'rgb0' in extras: 67 | img_loss0 = img2mse(extras['rgb0'], target) 68 | loss += img_loss0 69 | return loss 70 | 71 | def PoseLoss(args, pose_, pose, device): 72 | loss_func = nn.MSELoss() 73 | predict_pose = pose_.reshape(args.batch_size, 12).to(device) # maynot need reshape 74 | pose_loss = loss_func(predict_pose, pose) 75 | return pose_loss 76 | 77 | def load_PoseNet(args, device): 78 | from dm.pose_model import PoseNet_res34, EfficientNetB3, PoseNetV2 79 | # create model 80 | if args.resnet34: # for paper experiment table1 81 | model = PoseNet_res34(droprate=args.dropout) 82 | elif args.efficientnet: 83 | model = EfficientNetB3() # vanilla posenet 84 | else: # default mobilenetv2 backbone 85 | model = PoseNetV2() 86 | 87 | if args.pretrain_model_path: 88 | print("load pretrained model from: ", args.pretrain_model_path) 89 | model.load_state_dict(torch.load(args.pretrain_model_path)) 90 | 91 | # Freeze BN to not updating gamma and beta 92 | if args.freezeBN: 93 | model = freeze_bn_layer(model) 94 | model.to(device) 95 | return model 96 | 97 | def load_exisiting_model(args, isFeatureNet=False): 98 | ''' Load a pretrained PoseNet model ''' 99 | 100 | if isFeatureNet==False: # load apr 101 | model = DFNet() 102 | model.load_state_dict(torch.load(args.pretrain_model_path)) 103 | return model 104 | else: # load dfnet for feature extraction 105 | model=FeatureNet() 106 | model.load_state_dict(torch.load(args.pretrain_featurenet_path)) 107 | return model 108 | 109 | def load_FeatureNet(args, device): 110 | # load pretrained FeatureNet model 111 | if args.pretrain_featurenet_path == '': 112 | print('check FeatureNet model path') 113 | sys.exit() 114 | else: 115 | feat_model = load_exisiting_model(args, isFeatureNet=True) 116 | feat_model.eval() 117 | feat_model.to(device) 118 | return feat_model 119 | 120 | def load_MsTransformer(args, device): 121 | ''' load pretrained MsTransformer Models using the offical code ''' 122 | from mstransformer.transposenet import EMSTransPoseNet 123 | torch.nn.Module.dump_patches = True 124 | import json 125 | # load MSTransformer 126 | if '7Scenes' in args.dataset_type: 127 | config_path = '../logs/mstransformer/7scenes_config.json' 128 | elif 'Cambridge' in args.dataset_type: 129 | config_path = '../logs/mstransformer/CambridgeLandmarks_config.json' 130 | 131 | with open(config_path, "r") as read_file: 132 | config = json.load(read_file) 133 | 134 | model_params = config['ems-transposenet'] 135 | general_params = config['general'] 136 | config = {**model_params, **general_params} 137 | 138 | backbone_path = '../logs/mstransformer/efficient-net-b0.pth' 139 | ckpt_path = args.pretrain_model_path 140 | model = EMSTransPoseNet(config, backbone_path, args=args) 141 | model.load_state_dict(torch.load(ckpt_path)) 142 | 143 | model.to(device) 144 | 145 | return model 146 | 147 | 148 | def load_APR_and_FeatureNet(args, device): 149 | ''' Load both APR and FeatureNet models ''' 150 | 151 | ### pose regression module 152 | if args.pretrain_model_path == '': 153 | print('check pretrained model path') 154 | sys.exit() 155 | elif args.PoseEstimatorType == 'PoseNet': 156 | model = load_PoseNet(args, device) 157 | elif args.PoseEstimatorType == 'MsTransformer': 158 | model = load_MsTransformer(args, device) 159 | elif args.PoseEstimatorType == 'DFNet': 160 | model = load_exisiting_model(args) # load pretrained DFNet model 161 | if args.freezeBN: 162 | model = freeze_bn_layer(model) 163 | model.to(device) 164 | elif args.PoseEstimatorType == 'NetVlad': 165 | print('loading NetVlad') 166 | model = None 167 | else: 168 | NotImplementedError 169 | 170 | ### feature extraction module 171 | feat_model = load_FeatureNet(args, device) 172 | 173 | return model, feat_model 174 | 175 | def prepare_batch_render(args, pose, batch_size, target_, H, W, focal, half_res=True, rand=True): 176 | ''' Break batch of images into rays ''' 177 | target_ = target_.permute(0, 2, 3, 1).numpy()#.squeeze(0) # convert to numpy image 178 | if half_res: 179 | N_rand = batch_size * (H//2) * (W//2) 180 | target_half = np.stack([cv2.resize(target_[i], (H//2, W//2), interpolation=cv2.INTER_AREA) for i in range(batch_size)], 0) 181 | target_half = torch.Tensor(target_half) 182 | 183 | rays = torch.stack([torch.stack(get_rays(H//2, W//2, focal/2, pose[i]), 0) for i in range(batch_size)], 0) # [N, ro+rd, H, W, 3] (130, 2, 100, 100, 3) 184 | rays_rgb = torch.cat((rays, target_half[:, None, ...]), 1) 185 | 186 | else: 187 | # N_rand = batch_size * H * W 188 | N_rand = args.N_rand 189 | target_ = torch.Tensor(target_) 190 | rays = torch.stack([torch.stack(get_rays(H, W, focal, pose[i]), 0) for i in range(batch_size)], 0) # [N, ro+rd, H, W, 3] (130, 2, 200, 200, 3) 191 | # [N, ro+rd+rgb, H, W, 3] 192 | rays_rgb = torch.cat([rays, target_[:, None, ...]], 1) 193 | 194 | # [N, H, W, ro+rd+rgb, 3] 195 | rays_rgb = rays_rgb.permute(0, 2, 3, 1, 4) 196 | 197 | # [(N-1)*H*W, ro+rd+rgb, 3] 198 | rays_rgb = torch.reshape(rays_rgb, (-1, 3, 3)) 199 | 200 | if 1: 201 | #print('shuffle rays') 202 | rays_rgb = rays_rgb[torch.randperm(rays_rgb.shape[0])] 203 | 204 | # Random over all images 205 | batch = rays_rgb[:N_rand].permute(1, 0 , 2) # [B, 2+1, 3*?] # (4096, 3, 3) 206 | batch_rays, target_s = batch[:2], batch[2] # [2, 4096, 3], [4096, 3] 207 | 208 | return batch_rays, target_s 209 | 210 | def fix_coord_supp(args, pose, world_setup_dict, device=None): 211 | # this function needs to be fixed because it is taking args.pose_scale 212 | '''supplementary fix_coord() for direct matching 213 | Inputs: 214 | args: parsed argument 215 | pose: pose [N, 3, 4] 216 | device: cpu or gpu 217 | Outputs: 218 | pose: converted Pose in shape [N, 3, 4] 219 | ''' 220 | if not torch.is_tensor(pose): 221 | pose = torch.Tensor(pose).to(device) 222 | sc=world_setup_dict['pose_scale'] # manual tuned factor, align with colmap scale 223 | if device is None: 224 | move_all_cam_vec = torch.Tensor(world_setup_dict['move_all_cam_vec']) 225 | else: 226 | move_all_cam_vec = torch.Tensor(world_setup_dict['move_all_cam_vec']).to(device) 227 | sc2 = world_setup_dict['pose_scale2'] 228 | pose[:,:3,3] *= sc 229 | # move center of camera pose 230 | pose[:, :3, 3] += move_all_cam_vec 231 | pose[:,:3,3] *= sc2 232 | return pose 233 | 234 | -------------------------------------------------------------------------------- /script/dm/prepare_data.py: -------------------------------------------------------------------------------- 1 | import utils.set_sys_path 2 | import torch 3 | from torch.utils.data import TensorDataset, DataLoader 4 | import numpy as np 5 | 6 | from dataset_loaders.utils.color import rgb_to_yuv 7 | to8b = lambda x : (255*np.clip(x,0,1)).astype(np.uint8) 8 | 9 | def prepare_data(args, images, poses_train, i_split, hist): 10 | ''' prepare data for ready to train posenet, return dataloaders ''' 11 | 12 | i_train, i_val, i_test = i_split 13 | 14 | img_train = torch.Tensor(images[i_train]).permute(0, 3, 1, 2) # now shape is [N, CH, H, W] 15 | pose_train = torch.Tensor(poses_train[i_train]) 16 | hist_train = torch.Tensor(hist[i_train]) 17 | 18 | trainset = TensorDataset(img_train, pose_train, hist_train) 19 | if args.render_test == True: 20 | train_dl = DataLoader(trainset, batch_size=1, shuffle=False) 21 | else: 22 | train_dl = DataLoader(trainset, batch_size=1, shuffle=True) 23 | 24 | img_val = torch.Tensor(images[i_val]).permute(0, 3, 1, 2) # now shape is [N, CH, H, W] 25 | pose_val = torch.Tensor(poses_train[i_val]) 26 | hist_val = torch.Tensor(hist[i_val]) 27 | 28 | valset = TensorDataset(img_val, pose_val, hist_val) 29 | val_dl = DataLoader(valset, shuffle=False) 30 | 31 | img_test = torch.Tensor(images[i_test]).permute(0, 3, 1, 2) # now shape is [N, CH, H, W] 32 | pose_test = torch.Tensor(poses_train[i_test]) 33 | hist_test = torch.Tensor(hist[i_test]) 34 | 35 | testset = TensorDataset(img_test, pose_test, hist_test) 36 | test_dl = DataLoader(testset, shuffle=False) 37 | 38 | return train_dl, val_dl, test_dl 39 | 40 | def load_Colmap_dataset(args): 41 | ''' load training data in llff style, currently only support exp. for heads ''' 42 | if args.dataset_type == 'llff': 43 | images, poses, bds, render_poses, i_test = load_llff_data(args.datadir, factor=args.df, 44 | recenter=True, bd_factor=None, 45 | spherify=args.spherify, path_zflat=False) 46 | breakpoint() 47 | hwf = poses[0,:3,-1] 48 | poses = poses[:,:3,:4] 49 | print('Loaded llff', images.shape, render_poses.shape, hwf, args.datadir) 50 | 51 | # if not isinstance(i_test, list): 52 | # i_test = [i_test] 53 | 54 | # if args.llffhold > 0: 55 | # print('Auto LLFF holdout,', args.llffhold) 56 | # i_test = np.arange(images.shape[0])[::args.llffhold] 57 | 58 | i_test = np.arange(231, 334, 1, dtype=int) 59 | i_val = i_test 60 | i_train = np.array([i for i in np.arange(int(images.shape[0])) if 61 | (i not in i_test and i not in i_val)]) 62 | 63 | i_train = i_train[::args.trainskip] 64 | i_val = i_val[::args.testskip] 65 | i_test = i_test[::args.testskip] 66 | 67 | print('DEFINING BOUNDS') 68 | if args.no_ndc: 69 | near = np.ndarray.min(bds) * .9 70 | far = np.ndarray.max(bds) * 1. 71 | else: 72 | near = 0. 73 | far = 1. 74 | 75 | i_split = [i_train, i_val, i_test] 76 | else: 77 | print('Unknown dataset type', args.dataset_type, 'exiting') 78 | return 79 | 80 | poses_train = poses[:,:3,:].reshape((poses.shape[0],12)) # get rid of last row [0,0,0,1] 81 | print("images.shape {}, poses_train.shape {}".format(images.shape, poses_train.shape)) 82 | 83 | INPUT_SHAPE = images[0].shape 84 | H = images[0].shape[0] 85 | W = images[0].shape[1] 86 | print("=====================================================================") 87 | print("INPUT_SHAPE:", INPUT_SHAPE) 88 | 89 | hist=None 90 | if args.encode_hist: 91 | imgs = torch.Tensor(images).permute(0,3,1,2) 92 | yuv = rgb_to_yuv(imgs) 93 | y_img = yuv[:,0] # extract y channel only 94 | hist = [torch.histc(y_img[i], bins=args.hist_bin, min=0., max=1.) for i in np.arange(imgs.shape[0])] # basically same as other dataloaders but in batch 95 | hist = torch.stack(hist) 96 | hist = torch.round(hist/(H*W)*100) # convert to histogram density, in terms of percentage per bin 97 | hist = np.asarray(hist) 98 | return images, poses_train, render_poses, hwf, i_split, near, far, hist 99 | 100 | def load_dataset(args): 101 | ''' load posenet training data ''' 102 | if args.dataset_type == 'llff': 103 | if args.no_bd_factor: 104 | bd_factor = None 105 | else: 106 | bd_factor = 0.75 107 | images, poses, bds, render_poses, i_test = load_llff_data(args.datadir, args.factor, 108 | recenter=True, bd_factor=bd_factor, 109 | spherify=args.spherify) 110 | 111 | hwf = poses[0,:3,-1] 112 | poses = poses[:,:3,:4] 113 | print('Loaded llff', images.shape, render_poses.shape, hwf, args.datadir) 114 | if not isinstance(i_test, list): 115 | i_test = [i_test] 116 | 117 | if args.llffhold > 0: 118 | print('Auto LLFF holdout,', args.llffhold) 119 | i_test = np.arange(images.shape[0])[::args.llffhold] 120 | 121 | i_val = i_test 122 | i_train = np.array([i for i in np.arange(int(images.shape[0])) if 123 | (i not in i_test and i not in i_val)]) 124 | 125 | print('DEFINING BOUNDS') 126 | if args.no_ndc: 127 | near = np.ndarray.min(bds) * .9 128 | far = np.ndarray.max(bds) * 1. 129 | else: 130 | near = 0. 131 | far = 1. 132 | 133 | i_split = [i_train, i_val, i_test] 134 | elif args.dataset_type == 'blender': 135 | images, poses, render_poses, hwf, i_split = load_blender_data(args.datadir, True, args.testskip) 136 | print('Loaded blender', images.shape, render_poses.shape, hwf, args.datadir) 137 | i_train, i_val, i_test = i_split 138 | # breakpoint() 139 | near = 2. 140 | far = 6. 141 | 142 | if args.white_bkgd: 143 | images = images[...,:3]*images[...,-1:] + (1.-images[...,-1:]) # [400, 400, 400, 3] 144 | else: 145 | images = images[...,:3] # 146 | else: 147 | print('Unknown dataset type', args.dataset_type, 'exiting') 148 | return 149 | 150 | poses_train = poses[:,:3,:].reshape((poses.shape[0],12)) # get rid of last row [0,0,0,1] 151 | print("images.shape {}, poses_train.shape {}".format(images.shape, poses_train.shape)) 152 | 153 | INPUT_SHAPE = images[0].shape 154 | print("=====================================================================") 155 | print("INPUT_SHAPE:", INPUT_SHAPE) 156 | 157 | hist=None 158 | # if args.encode_hist: 159 | hist = np.zeros((images.shape[0],10)) 160 | return images, poses_train, render_poses, hwf, i_split, near, far, hist -------------------------------------------------------------------------------- /script/eval.py: -------------------------------------------------------------------------------- 1 | import utils.set_sys_path 2 | import os.path as osp 3 | import numpy as np 4 | import torch 5 | import math 6 | 7 | 8 | from dm.prepare_data import load_dataset 9 | from dm.options import config_parser 10 | from dataset_loaders.load_7Scenes import load_7Scenes_dataloader 11 | from dataset_loaders.load_7Scenes_colmap import load_7Scenes_dataloader_colmap 12 | from dataset_loaders.load_Cambridge import load_Cambridge_dataloader 13 | import cv2 14 | 15 | parser = config_parser() 16 | args = parser.parse_args() 17 | device = torch.device('cuda:0') # this is really controlled in train.sh 18 | 19 | scene = osp.split(args.datadir)[-1] 20 | 21 | # DFNet vs. DFNet+NeFeS 22 | if args.dataset_type == '7Scenes': 23 | APR_folder = '../paper_result/DFNet_NeFeS50_7scenes/' 24 | APR_filename=APR_folder+scene+f'/DFNet_{scene}_NeFeS50_APR_pose_results.txt' 25 | elif args.dataset_type == '7Scenes_colmap': 26 | APR_folder = '../paper_result/DFNet_NeFeS50_7Scenes_colmap/' 27 | APR_filename=APR_folder+scene+f'/DFNet_{scene}_NeFeS50_APR_pose_results.txt' 28 | elif args.dataset_type == 'Cambridge': 29 | APR_folder = '../paper_result/DFNet_NeFeS50_Cambridge/' 30 | APR_filename=APR_folder+scene+f'/DFNet_{scene}_NeFeS50_APR_pose_results.txt' 31 | else: 32 | NotImplementedError 33 | 34 | def compute_pose_error_SE3(pose, predict_pose): 35 | ''' 36 | compute pose error between two SE(3) pose 37 | pose: (4,4) or (3,4) 38 | predict_pose: (4,4) or (3,4) 39 | return: t_err, R_err 40 | ''' 41 | predict_pose = predict_pose.squeeze() 42 | pose = pose.squeeze() 43 | # torch.set_printoptions(precision=32) 44 | t_error = float(torch.norm(pose[0:3,3] - predict_pose[0:3,3])) 45 | 46 | pose_R = pose[0:3,0:3].numpy() 47 | predict_pose_R = predict_pose[0:3,0:3].numpy() 48 | 49 | r_error = np.matmul(predict_pose_R, np.transpose(pose_R)) 50 | r_error = np.linalg.norm(cv2.Rodrigues(r_error)[0])*180/math.pi 51 | return t_error, r_error 52 | 53 | def compute_accuracy_stats_on_errors(t_R_errors): 54 | ''' 55 | compute stats on errors 56 | t_R_errors: (N, 2) numpy array 57 | ''' 58 | pct500_10 = 0 # 500cm/10deg 59 | pct50_5 = 0 # 50cm/5deg 60 | pct25_2 = 0 # 25cm/2deg 61 | pct10_5 = 0 # 10cm/5deg 62 | pct5 = 0 # 5cm/5deg 63 | pct2 = 0 # 2cm/2deg 64 | pct1 = 0 # 1cm/1deg 65 | 66 | total_frames = t_R_errors.shape[0] 67 | for i in range(total_frames): 68 | if t_R_errors[i,0] < 5 and t_R_errors[i,1] < 10: 69 | pct500_10 += 1 70 | if t_R_errors[i,0] < 0.5 and t_R_errors[i,1] < 5: 71 | pct50_5 += 1 72 | if t_R_errors[i,0] < 0.25 and t_R_errors[i,1] < 2: 73 | pct25_2 += 1 74 | if t_R_errors[i,0] < 0.1 and t_R_errors[i,1] < 5: 75 | pct10_5 += 1 76 | if t_R_errors[i,0] < 0.05 and t_R_errors[i,1] < 5: 77 | pct5 += 1 78 | if t_R_errors[i,0] < 0.02 and t_R_errors[i,1] < 2: 79 | pct2 += 1 80 | if t_R_errors[i,0] < 0.01 and t_R_errors[i,1] < 1: 81 | pct1 += 1 82 | print("=============================================") 83 | print("Accuracy:") 84 | print(f"500cm/10deg: {pct500_10/total_frames*100:.1f}%", ) 85 | print(f"50cm/5deg: {pct50_5/total_frames*100:.1f}%", ) 86 | print(f"25cm/2deg: {pct25_2/total_frames*100:.1f}%", ) 87 | print(f"10cm/5deg: {pct10_5/total_frames*100:.1f}%", ) 88 | print(f"5cm/5deg: {pct5/total_frames*100:.1f}%", ) 89 | print(f"2cm/2deg: {pct2/total_frames*100:.1f}%", ) 90 | print(f"1cm/1deg: {pct1/total_frames*100:.1f}%", ) 91 | 92 | def compute_none_ATE_error(pose1, pose2): 93 | ''' 94 | plot and compute pose error from two trajectories, without ATE alignment 95 | :param pose1/refined_pose: (N0, 3/4, 4) torch tensor 96 | :param pose2/gt_pose: (N0, 3/4, 4) torch tensor 97 | ''' 98 | 99 | from dm.pose_model import vis_pose 100 | assert(pose1.shape == pose2.shape) 101 | t_R_errors = np.zeros((pose2.shape[0], 2)) 102 | ind2 = 0 103 | 104 | pose1_list = [] 105 | pose2_list = [] 106 | ang_error_list = [] 107 | 108 | for i in range(pose2.shape[0]): 109 | 110 | poses_gt = pose2[i:i+1] 111 | poses_pred = pose1[i:i+1] 112 | 113 | pose1_list.append(poses_pred[:,:3,3].squeeze()) 114 | pose2_list.append(poses_gt[:,:3,3].squeeze()) 115 | 116 | error_x, theta = compute_pose_error_SE3(torch.Tensor(poses_gt), torch.Tensor(poses_pred)) 117 | t_R_errors[ind2,:] = [error_x, theta] 118 | 119 | ang_error_list.append(theta) 120 | ind2 += 1 121 | median_result = np.median(t_R_errors,axis=0) 122 | mean_result = np.mean(t_R_errors,axis=0) 123 | # standard log 124 | print ('pose Median error {}m and {} degrees.'.format(median_result[0], median_result[1])) 125 | print ('pose Mean error {}m and {} degrees.'.format(mean_result[0], mean_result[1])) 126 | 127 | pose1_list = np.array(pose1_list) 128 | pose2_list = np.array(pose2_list) 129 | ang_error_list = np.array(ang_error_list) 130 | vis_info_ret = {"pose": pose1_list, "pose_gt": pose2_list, "theta": ang_error_list} 131 | # vis_pose(vis_info_ret) 132 | compute_accuracy_stats_on_errors(t_R_errors) 133 | return vis_info_ret 134 | 135 | 136 | print(parser.format_values()) 137 | MODE = args.pose_only 138 | # Load data 139 | if args.dataset_type == '7Scenes': 140 | train_dl, val_dl, test_dl, hwf, i_split, near, far = load_7Scenes_dataloader(args) 141 | if args.set_near_far: 142 | print('use customized near_far') 143 | near = args.near_far[0] 144 | far = args.near_far[1] 145 | elif args.dataset_type == '7Scenes_colmap': 146 | train_dl, val_dl, test_dl, hwf, i_split, near, far = load_7Scenes_dataloader_colmap(args) 147 | if args.set_near_far: 148 | print('use customized near_far') 149 | near = args.near_far[0] 150 | far = args.near_far[1] 151 | elif args.dataset_type == 'Cambridge': 152 | train_dl, val_dl, test_dl, hwf, i_split, near, far = load_Cambridge_dataloader(args) 153 | if args.set_near_far: 154 | print('use customized near_far') 155 | near = args.near_far[0] 156 | far = args.near_far[1] 157 | else: 158 | images, poses_train, render_poses, hwf, i_split, near, far = load_dataset(args) 159 | # Cast intrinsics to right types 160 | H, W, focal = hwf 161 | H, W = int(H), int(W) 162 | hwf = [H, W, focal] 163 | if args.set_near_far: 164 | print('use customized near_far') 165 | near = args.near_far[0] 166 | far = args.near_far[1] 167 | 168 | i_train, i_val, i_test = i_split 169 | print('TRAIN views are', i_train) 170 | print('TEST views are', i_test) 171 | print('VAL views are', i_val) 172 | 173 | # load GT pose results 174 | gt_pose = test_dl.dataset.poses[i_test] 175 | if torch.is_tensor(gt_pose): 176 | gt_pose = gt_pose.numpy().reshape(gt_pose.shape[0], 3, 4).astype(np.float32) 177 | else: 178 | gt_pose = gt_pose.reshape(gt_pose.shape[0], 3, 4).astype(np.float32) 179 | 180 | # load APR pose results 181 | apr_pose = np.loadtxt(APR_filename) 182 | apr_pose = apr_pose.reshape(apr_pose.shape[0], 3, 4).astype(np.float32) 183 | 184 | # # apply KS Filtering 185 | # from lck.ks_filter import ks_filter 186 | # ks_filter(apr_pose, gt_pose, KS_filename, th=0.95) 187 | 188 | vis_info_ret = compute_none_ATE_error(apr_pose, gt_pose) 189 | -------------------------------------------------------------------------------- /script/eval.sh: -------------------------------------------------------------------------------- 1 | # !/bin/bash 2 | 3 | ################################################### Evaluate paper Exp. result ######################################################################## 4 | ### 7Scenes sfm apr refinement 5 | python eval.py --config config/7Scenes/dfnet/config_heads_DFM.txt 6 | python eval.py --config config/7Scenes/dfnet/config_fire_DFM.txt 7 | python eval.py --config config/7Scenes/dfnet/config_chess_DFM.txt 8 | python eval.py --config config/7Scenes/dfnet/config_office_DFM.txt 9 | python eval.py --config config/7Scenes/dfnet/config_pumpkin_DFM.txt 10 | python eval.py --config config/7Scenes/dfnet/config_kitchen_DFM.txt 11 | python eval.py --config config/7Scenes/dfnet/config_stairs_DFM.txt 12 | 13 | ### Cambridge, dataloader is slower to initialize due to preload policy. 14 | python eval.py --config config/Cambridge/dfnet/config_shop_DFM.txt 15 | python eval.py --config config/Cambridge/dfnet/config_kings_DFM.txt 16 | python eval.py --config config/Cambridge/dfnet/config_hospital_DFM.txt 17 | python eval.py --config config/Cambridge/dfnet/config_church_DFM.txt 18 | -------------------------------------------------------------------------------- /script/feature/dfnet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Source code from DFNet: Enhance Absolute Pose Regression with Direct Feature Matching https://arxiv.org/abs/2204.00559 3 | ''' 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | from torchvision import models 8 | from typing import List 9 | 10 | # VGG-16 Layer Names and Channels 11 | vgg16_layers = { 12 | "conv1_1": 64, 13 | "relu1_1": 64, 14 | "conv1_2": 64, 15 | "relu1_2": 64, 16 | "pool1": 64, 17 | "conv2_1": 128, 18 | "relu2_1": 128, 19 | "conv2_2": 128, 20 | "relu2_2": 128, 21 | "pool2": 128, 22 | "conv3_1": 256, 23 | "relu3_1": 256, 24 | "conv3_2": 256, 25 | "relu3_2": 256, 26 | "conv3_3": 256, 27 | "relu3_3": 256, 28 | "pool3": 256, 29 | "conv4_1": 512, 30 | "relu4_1": 512, 31 | "conv4_2": 512, 32 | "relu4_2": 512, 33 | "conv4_3": 512, 34 | "relu4_3": 512, 35 | "pool4": 512, 36 | "conv5_1": 512, 37 | "relu5_1": 512, 38 | "conv5_2": 512, 39 | "relu5_2": 512, 40 | "conv5_3": 512, 41 | "relu5_3": 512, 42 | "pool5": 512, 43 | } 44 | 45 | class AdaptLayers(nn.Module): 46 | """Small adaptation layers. 47 | """ 48 | 49 | def __init__(self, hypercolumn_layers: List[str], output_dim: int = 128): 50 | """Initialize one adaptation layer for every extraction point. 51 | 52 | Args: 53 | hypercolumn_layers: The list of the hypercolumn layer names. 54 | output_dim: The output channel dimension. 55 | """ 56 | super(AdaptLayers, self).__init__() 57 | self.layers = [] 58 | channel_sizes = [vgg16_layers[name] for name in hypercolumn_layers] 59 | for i, l in enumerate(channel_sizes): 60 | layer = nn.Sequential( 61 | nn.Conv2d(l, 64, kernel_size=1, stride=1, padding=0), 62 | nn.ReLU(), 63 | nn.Conv2d(64, output_dim, kernel_size=5, stride=1, padding=2), 64 | nn.BatchNorm2d(output_dim), 65 | ) 66 | self.layers.append(layer) 67 | self.add_module("adapt_layer_{}".format(i), layer) # ex: adapt_layer_0 68 | 69 | def forward(self, features: List[torch.tensor]): 70 | """Apply adaptation layers. # here is list of three levels of features 71 | """ 72 | 73 | for i, _ in enumerate(features): 74 | # breakpoint() 75 | features[i] = getattr(self, "adapt_layer_{}".format(i))(features[i]) 76 | return features 77 | 78 | class DFNet(nn.Module): 79 | ''' DFNet implementation ''' 80 | default_conf = { 81 | 'hypercolumn_layers': ["conv1_2", "conv3_3", "conv5_3"], 82 | 'output_dim': 128, 83 | } 84 | mean = [0.485, 0.456, 0.406] 85 | std = [0.229, 0.224, 0.225] 86 | 87 | def __init__(self, feat_dim=12, places365_model_path=''): 88 | super().__init__() 89 | 90 | self.layer_to_index = {k: v for v, k in enumerate(vgg16_layers.keys())} 91 | self.hypercolumn_indices = [self.layer_to_index[n] for n in self.default_conf['hypercolumn_layers']] # [2, 14, 28] 92 | 93 | # Initialize architecture 94 | # vgg16 = models.vgg16(pretrained=True) 95 | vgg16 = models.vgg16(weights='DEFAULT') 96 | self.encoder = nn.Sequential(*list(vgg16.features.children())) 97 | self.scales = [] 98 | current_scale = 0 99 | for i, layer in enumerate(self.encoder): 100 | if isinstance(layer, torch.nn.MaxPool2d): 101 | current_scale += 1 102 | if i in self.hypercolumn_indices: 103 | self.scales.append(2**current_scale) 104 | 105 | self.adaptation_layers = AdaptLayers(self.default_conf['hypercolumn_layers'], self.default_conf['output_dim']) 106 | 107 | # pose regression layers 108 | self.avgpool = nn.AdaptiveAvgPool2d(1) 109 | self.fc_pose = nn.Linear(512, feat_dim) 110 | 111 | def forward(self, x, return_feature=False, isSingleStream=False, return_pose=True, upsampleH=240, upsampleW=427): 112 | ''' 113 | inference DFNet. It can regress camera pose as well as extract intermediate layer features. 114 | :param x: image blob (2B x C x H x W) two stream or (B x C x H x W) single stream 115 | :param return_feature: whether to return features as output 116 | :param isSingleStream: whether it's an single stream inference or siamese network inference 117 | :param return_pose: whether to return predicted pose as output 118 | :param upsampleH: feature upsample size H 119 | :param upsampleW: feature upsample size W 120 | :return feature_maps: (2, [B, C, H, W]) or (1, [B, C, H, W]) or None 121 | :return predict: [2B, 12] or [B, 12] 122 | ''' 123 | # normalize input data 124 | mean, std = x.new_tensor(self.mean), x.new_tensor(self.std) 125 | x = (x - mean[:, None, None]) / std[:, None, None] 126 | 127 | ### encoder ### 128 | feature_maps = [] 129 | for i in range(len(self.encoder)): 130 | x = self.encoder[i](x) 131 | 132 | if i in self.hypercolumn_indices: 133 | feature = x.clone() 134 | feature_maps.append(feature) 135 | 136 | if i==self.hypercolumn_indices[-1]: 137 | if return_pose==False: 138 | predict = None 139 | break 140 | 141 | ### extract and process intermediate features ### 142 | if return_feature: 143 | feature_maps = self.adaptation_layers(feature_maps) # (3, [B, C, H', W']), H', W' are different in each layer 144 | 145 | if isSingleStream: # not siamese network style inference 146 | feature_stacks = [] 147 | for f in feature_maps: 148 | feature_stacks.append(torch.nn.UpsamplingBilinear2d(size=(upsampleH, upsampleW))(f)) 149 | feature_maps = [torch.stack(feature_stacks)] # (1, [3, B, C, H, W]) 150 | else: # siamese network style inference 151 | feature_stacks_t = [] 152 | feature_stacks_r = [] 153 | for f in feature_maps: 154 | # split real and nerf batches 155 | batch = f.shape[0] # should be target batch_size + rgb batch_size 156 | feature_t = f[:batch//2] 157 | feature_r = f[batch//2:] 158 | 159 | feature_stacks_t.append(torch.nn.UpsamplingBilinear2d(size=(upsampleH, upsampleW))(feature_t)) # GT img 160 | feature_stacks_r.append(torch.nn.UpsamplingBilinear2d(size=(upsampleH, upsampleW))(feature_r)) # render img 161 | feature_stacks_t = torch.stack(feature_stacks_t) # [3, B, C, H, W] 162 | feature_stacks_r = torch.stack(feature_stacks_r) # [3, B, C, H, W] 163 | feature_maps = [feature_stacks_t, feature_stacks_r] # (2, [3, B, C, H, W]) 164 | else: 165 | feature_maps = None 166 | 167 | if return_pose==False: 168 | return feature_maps, None 169 | 170 | ### pose regression head ### 171 | x = self.avgpool(x) 172 | x = x.reshape(x.size(0), -1) 173 | predict = self.fc_pose(x) 174 | 175 | return feature_maps, predict 176 | -------------------------------------------------------------------------------- /script/feature/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from torchvision import models 5 | 6 | from efficientnet_pytorch import EfficientNet 7 | 8 | 9 | # PoseNet (SE(3)) w/ mobilev2 backbone 10 | class PoseNetV2(nn.Module): 11 | def __init__(self, feat_dim=12): 12 | super(PoseNetV2, self).__init__() 13 | self.backbone_net = models.mobilenet_v2(pretrained=True) 14 | self.feature_extractor = self.backbone_net.features 15 | self.avgpool = nn.AdaptiveAvgPool2d(1) 16 | self.fc_pose = nn.Linear(1280, feat_dim) 17 | 18 | def _aggregate_feature(self, x, upsampleH, upsampleW): 19 | ''' 20 | assume target and nerf rgb are inferenced at the same time, 21 | slice target batch and nerf batch and aggregate features 22 | :param x: image blob (2B x C x H x W) 23 | :param upsampleH: New H 24 | :param upsampleW: New W 25 | :return feature: (2 x B x H x W) 26 | ''' 27 | batch = x.shape[0] # should be target batch_size + rgb batch_size 28 | feature_t = torch.mean(torch.nn.UpsamplingBilinear2d(size=(upsampleH, upsampleW))(x[:batch//2]), dim=1) 29 | feature_r = torch.mean(torch.nn.UpsamplingBilinear2d(size=(upsampleH, upsampleW))(x[batch//2:]), dim=1) 30 | feature = torch.stack([feature_t, feature_r]) 31 | return feature 32 | 33 | def _aggregate_feature2(self, x): 34 | ''' 35 | assume target and nerf rgb are inferenced at the same time, 36 | slice target batch and nerf batch and output stacked features 37 | :param x: image blob (2B x C x H x W) 38 | :return feature: (2 x B x C x H x W) 39 | ''' 40 | batch = x.shape[0] # should be target batch_size + rgb batch_size 41 | feature_t = x[:batch//2] 42 | feature_r = x[batch//2:] 43 | feature = torch.stack([feature_t, feature_r]) 44 | return feature 45 | 46 | def forward(self, x, upsampleH=224, upsampleW=224, isTrain=False, isSingleStream=False): 47 | ''' 48 | Currently under dev. 49 | :param x: image blob () 50 | :param upsampleH: New H obsolete 51 | :param upsampleW: New W obsolete 52 | :param isTrain: True to extract features, False only return pose prediction. Really should be isExtractFeature 53 | :param isSingleStrea: True to inference single img, False to inference two imgs in siemese network fashion 54 | ''' 55 | feat_out = [] # we only use high level features 56 | for i in range(len(self.feature_extractor)): 57 | # print("layer {} encoder layer: {}".format(i, self.feature_extractor[i])) 58 | x = self.feature_extractor[i](x) 59 | 60 | if isTrain: # collect aggregate features 61 | if i >= 17 and i <= 17: # 17th block 62 | if isSingleStream: 63 | feature = torch.stack([x]) 64 | else: 65 | feature = self._aggregate_feature2(x) 66 | feat_out.append(feature) 67 | x = self.avgpool(x) 68 | x = x.reshape(x.size(0), -1) 69 | predict = self.fc_pose(x) 70 | return feat_out, predict 71 | 72 | class EfficientNetB3(nn.Module): 73 | ''' EfficientNet-B3 backbone, 74 | model ref: https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py 75 | ''' 76 | def __init__(self, feat_dim=12, feature_block=6): 77 | super(EfficientNetB3, self).__init__() 78 | self.backbone_net = EfficientNet.from_pretrained('efficientnet-b3') 79 | self.feature_block = feature_block # determine which block's feature to use, max=6 80 | if self.feature_block == 6: 81 | self.feature_extractor = self.backbone_net.extract_features 82 | else: 83 | self.feature_extractor = self.backbone_net.extract_endpoints 84 | 85 | # self.feature_extractor = self.backbone_net.extract_endpoints # it can restore middle layer 86 | self.avgpool = nn.AdaptiveAvgPool2d(1) 87 | self.fc_pose = nn.Linear(1536, feat_dim) # 1280 for efficientnet-b0, 1536 for efficientnet-b3 88 | 89 | def _aggregate_feature2(self, x): 90 | ''' 91 | assume target and nerf rgb are inferenced at the same time, 92 | slice target batch and nerf batch and output stacked features 93 | :param x: image blob (2B x C x H x W) 94 | :return feature: (2 x B x C x H x W) 95 | ''' 96 | batch = x.shape[0] # should be target batch_size + rgb batch_size 97 | feature_t = x[:batch//2] 98 | feature_r = x[batch//2:] 99 | feature = torch.stack([feature_t, feature_r]) 100 | return feature 101 | 102 | def forward(self, x, return_feature=False, isSingleStream=False): 103 | ''' 104 | Currently under dev. 105 | :param x: image blob () 106 | :param return_feature: True to extract features, False only return pose prediction. Really should be isExtractFeature 107 | :param isSingleStream: True to inference single img, False to inference two imgs in siemese network fashion 108 | ''' 109 | # pdb.set_trace() 110 | feat_out = [] # we only use high level features 111 | if self.feature_block == 6: 112 | x = self.feature_extractor(x) 113 | fe = x.clone() # features to save 114 | else: 115 | list_x = self.feature_extractor(x) 116 | fe = list_x['reduction_'+str(self.feature_block)] 117 | x = list_x['reduction_6'] # features to save 118 | if return_feature: 119 | if isSingleStream: 120 | feature = torch.stack([fe]) 121 | else: 122 | feature = self._aggregate_feature2(fe) 123 | feat_out.append(feature) 124 | x = self.avgpool(x) 125 | x = x.reshape(x.size(0), -1) 126 | predict = self.fc_pose(x) 127 | return feat_out, predict -------------------------------------------------------------------------------- /script/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ActiveVisionLab/NeFeS/1ac72cb48af60d5bfa1fed1d1af9d0f1dc750b7a/script/models/__init__.py -------------------------------------------------------------------------------- /script/models/activation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from torch.cuda.amp import custom_bwd, custom_fwd 4 | 5 | class _trunc_exp(Function): 6 | @staticmethod 7 | @custom_fwd(cast_inputs=torch.float) 8 | def forward(ctx, x): 9 | x=x.clamp(-9.7, 11.08) # safer, ref: https://en.wikipedia.org/wiki/Half-precision_floating-point_format 10 | ctx.save_for_backward(x) 11 | return torch.exp(x) 12 | 13 | @staticmethod 14 | @custom_bwd 15 | def backward(ctx, g): 16 | x = ctx.saved_tensors[0] 17 | # return g * torch.exp(x.clamp(-15, 15)) 18 | # print("g min: ", g.min(), "g max:", g.max()) 19 | return g * torch.exp(x.clamp(-9.7, 11.08)) # clamp to avoid overflow 20 | 21 | trunc_exp = _trunc_exp.apply 22 | 23 | def trunc_softplus(x): 24 | x= x.clamp(-9.7, 11.08) # safer 25 | return torch.nn.functional.softplus(x) -------------------------------------------------------------------------------- /script/models/decoder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | from torchsummary import summary 4 | from kornia.filters import filter2d 5 | 6 | class Blur(nn.Module): 7 | def __init__(self): 8 | super().__init__() 9 | f = torch.Tensor([1, 2, 1]) 10 | self.register_buffer('f', f) 11 | 12 | def forward(self, x): 13 | f = self.f 14 | f = f[None, None, :] * f[None, :, None] 15 | return filter2d(x, f, normalized=True) 16 | 17 | class Decoder(nn.Module): 18 | ''' from Giraffe Nueral Rendering class 19 | 20 | Args: 21 | n_feat (int): number of features 22 | input_dim (int): input dimension; if not equal to n_feat, 23 | it is projected to n_feat with a 1x1 convolution 24 | out_dim (int): output dimension 25 | final_actvn (bool): whether to apply a final activation (sigmoid) 26 | min_feat (int): minimum features 27 | n_blocks (int): n blocks for upsampling, 15x27 -> 240x427 => 4 blocks 28 | use_rgb_skip (bool): whether to use RGB skip connections 29 | upsample_feat (str): upsampling type for feature upsampling 30 | upsample_rgb (str): upsampling type for rgb upsampling 31 | use_norm (bool): whether to use normalization 32 | ''' 33 | 34 | def __init__( 35 | self, n_feat=128, input_dim=128, out_dim=3, final_actvn=True, 36 | min_feat=32, n_blocks=4, use_rgb_skip=True, 37 | upsample_feat="nn", upsample_rgb="bilinear", use_norm=False, 38 | **kwargs): 39 | super().__init__() 40 | self.final_actvn = final_actvn 41 | self.input_dim = input_dim 42 | self.use_rgb_skip = use_rgb_skip 43 | self.use_norm = use_norm 44 | self.h_dim = kwargs['h_dim'] # GT image size 45 | self.w_dim = kwargs['w_dim'] # GT image size 46 | self.n_blocks = n_blocks 47 | # n_blocks = int(log2(img_size) - 4) # 4 here represent 16x16 featuremap? 48 | 49 | assert(upsample_feat in ("nn")) 50 | self.upsample_2 = nn.Upsample(scale_factor=2.) # feature upsampling 51 | assert(upsample_rgb in ("bilinear")) 52 | self.upsample_rgb = nn.Sequential(nn.Upsample( 53 | scale_factor=2, mode='bilinear', align_corners=False), Blur()) # rgb upsampling 54 | 55 | # for the last layer, we upsample to specified resolution, same with GT image 56 | self.upsample_feat_final = nn.Upsample(size=[self.h_dim, self.w_dim]) 57 | self.upsample_rgb_final = nn.Sequential(nn.Upsample( 58 | size=[self.h_dim, self.w_dim], mode='bilinear', align_corners=False), Blur()) 59 | 60 | if n_feat == input_dim: 61 | self.conv_in = lambda x: x 62 | else: 63 | self.conv_in = nn.Conv2d(input_dim, n_feat, 1, 1, 0) 64 | 65 | self.conv_layers = nn.ModuleList( 66 | [nn.Conv2d(n_feat, max(n_feat // 2, min_feat), 3, 1, 1)] + 67 | [nn.Conv2d(max(n_feat // (2 ** (i + 1)), min_feat), 68 | max(n_feat // (2 ** (i + 2)), min_feat), 3, 1, 1) 69 | for i in range(0, n_blocks - 1)] 70 | ) 71 | if use_rgb_skip: 72 | self.conv_rgb = nn.ModuleList( 73 | [nn.Conv2d(input_dim, out_dim, 3, 1, 1)] + 74 | [nn.Conv2d(max(n_feat // (2 ** (i + 1)), min_feat), 75 | out_dim, 3, 1, 1) for i in range(0, n_blocks)] 76 | ) 77 | else: 78 | self.conv_rgb = nn.Conv2d( 79 | max(n_feat // (2 ** (n_blocks)), min_feat), 3, 1, 1) 80 | 81 | self.actvn = nn.LeakyReLU(0.2, inplace=True) 82 | 83 | def forward(self, x, return_hier_rgbs=False): 84 | ''' 85 | x: features 86 | return_hier_rgbs: return list of hidden levels of rgbs for photometric supervision if True 87 | ''' 88 | 89 | net = self.conv_in(x) 90 | 91 | if self.use_rgb_skip: # 1st time upsample to rgb, should be bilinear 92 | if self.n_blocks>1: 93 | rgb = self.upsample_rgb(self.conv_rgb[0](x)) 94 | else: 95 | rgb = self.upsample_rgb_final(self.conv_rgb[0](x)) 96 | 97 | if return_hier_rgbs==True: 98 | rgbs = [] 99 | 100 | for idx, layer in enumerate(self.conv_layers): 101 | # print("idx", idx) 102 | 103 | if idx < len(self.conv_layers) - 1: 104 | hid = layer(self.upsample_2(net)) 105 | else: 106 | hid = layer(self.upsample_feat_final(net)) 107 | 108 | net = self.actvn(hid) 109 | 110 | if self.use_rgb_skip: 111 | rgb = rgb + self.conv_rgb[idx + 1](net) 112 | 113 | if return_hier_rgbs==True: 114 | rgbs.append(rgb) 115 | 116 | if idx < len(self.conv_layers) - 2: 117 | rgb = self.upsample_rgb(rgb) 118 | elif idx == len(self.conv_layers) - 2: 119 | rgb = self.upsample_rgb_final(rgb) 120 | 121 | if not self.use_rgb_skip: 122 | rgb = self.conv_rgb(net) 123 | 124 | if return_hier_rgbs==True: 125 | # do not apply final activation 126 | torch.clamp(rgbs[-1], 0., 1.) 127 | return rgbs 128 | 129 | if self.final_actvn: 130 | rgb = torch.sigmoid(rgb) 131 | 132 | return rgb 133 | 134 | def main(): 135 | """ 136 | test decoders 137 | """ 138 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 139 | n_feat=128 140 | n_blocks=3 141 | kwargs = dict(h_dim=240, w_dim=427) 142 | 143 | decoder = Decoder(n_feat=128, input_dim=128, out_dim=3, final_actvn=True, 144 | min_feat=32, n_blocks=n_blocks, use_rgb_skip=True, 145 | upsample_feat="nn", upsample_rgb="bilinear", use_norm=False, **kwargs) 146 | decoder = decoder.to(device) 147 | # summary(decoder, (128, 15, 27)) 148 | summary(decoder, (128, 30, 54)) 149 | 150 | # f_in = torch.rand(1, 128, 15, 27).to(device) # B,C,H,W 151 | # pdb.set_trace() 152 | # out = decoder(f_in) 153 | 154 | 155 | if __name__ == '__main__': 156 | main() 157 | -------------------------------------------------------------------------------- /script/models/poses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from utils.lie_group_helper import make_c2w 4 | from lietorch import SE3 # SO3 5 | 6 | class LearnPose(nn.Module): 7 | def __init__(self, num_cams, learn_R, learn_t, init_c2w=None, lietorch=False): 8 | """ 9 | :param num_cams: # of frames to be optimized 10 | :param learn_R: True/False 11 | :param learn_t: True/False 12 | :param init_c2w: (N, 4, 4) torch tensor 13 | :param lietorch: True/False if True, use lietorch to compute the pose 14 | """ 15 | super(LearnPose, self).__init__() 16 | self.num_cams = num_cams 17 | self.init_c2w = None 18 | self.lietorch = lietorch 19 | if init_c2w is not None: 20 | self.init_c2w = nn.Parameter(init_c2w, requires_grad=False) 21 | 22 | self.r = nn.Parameter(torch.zeros(size=(num_cams, 3), dtype=torch.float32), requires_grad=learn_R) # (N, 3) delta r in se(3) 23 | self.t = nn.Parameter(torch.zeros(size=(num_cams, 3), dtype=torch.float32), requires_grad=learn_t) # (N, 3) delta t in se(3) 24 | 25 | def forward(self, cam_id): 26 | r = self.r[cam_id] # (3, ) axis-angle, lie algebra 27 | t = self.t[cam_id] # (3, ) 28 | 29 | if len(t.size()) == 2: # more than 1 query pose 30 | if self.lietorch: 31 | t_r = torch.cat([t, r], dim=1) 32 | c2w = SE3.exp(t_r).matrix() # input should be (6) or 1x6, [t,t,t,r,r,r] 33 | else: 34 | c2w = make_c2w(r, t) 35 | # learn a delta pose between init pose and target pose, if a init pose is provided 36 | if self.init_c2w is not None: 37 | 38 | c2w[:,:3,:3] = c2w[:,:3,:3] @ self.init_c2w[cam_id,:3,:3] 39 | c2w[:,:3,3] = c2w[:,:3,3] + self.init_c2w[cam_id,:3,3] 40 | 41 | elif len(t.size()) == 1: # only 1 query pose 42 | if self.lietorch: 43 | t_r = torch.cat([t, r], dim=0) 44 | c2w = SE3.exp(t_r).matrix() 45 | else: 46 | c2w = make_c2w(r, t) # (4, 4) 47 | if self.init_c2w is not None: 48 | c2w[:3,:3] = c2w[:3,:3] @ self.init_c2w[cam_id,:3,:3] 49 | c2w[:3,3] = c2w[:3,3] + self.init_c2w[cam_id,:3,3] 50 | return c2w 51 | -------------------------------------------------------------------------------- /script/models/ray_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | # Ray helpers 5 | def get_rays(H, W, focal, c2w): 6 | ''' Get rays for each pixel in the image (single image). ''' 7 | i, j = torch.meshgrid(torch.linspace(0, W-1, W), torch.linspace(0, H-1, H), indexing='ij') # pytorch's meshgrid has indexing='ij' 8 | i = i.t() 9 | j = j.t() 10 | dirs = torch.stack([(i-W*.5)/focal, -(j-H*.5)/focal, -torch.ones_like(i)], -1) 11 | 12 | # Rotate ray directions from camera frame to the world frame 13 | rays_d = torch.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1) # dot product, equals to: [c2w.dot(dir) for dir in dirs] 14 | # Translate camera frame's origin to the world frame. It is the origin of all rays. 15 | rays_o = c2w[:3,-1].expand(rays_d.shape) 16 | return rays_o, rays_d # rays_o (100,100,3), rays_d (100,100,3) 17 | 18 | def get_rays_np(H, W, focal, c2w): 19 | i, j = np.meshgrid(np.arange(W, dtype=np.float32), np.arange(H, dtype=np.float32), indexing='xy') 20 | dirs = np.stack([(i-W*.5)/focal, -(j-H*.5)/focal, -np.ones_like(i)], -1) 21 | # Rotate ray directions from camera frame to the world frame 22 | rays_d = np.sum(dirs[..., np.newaxis, :] * c2w[:3,:3], -1) # dot product, equals to: [c2w.dot(dir) for dir in dirs] 23 | # Translate camera frame's origin to the world frame. It is the origin of all rays. 24 | rays_o = np.broadcast_to(c2w[:3,-1], np.shape(rays_d)) 25 | return rays_o, rays_d 26 | 27 | def ndc_rays(H, W, focal, near, rays_o, rays_d): 28 | # Shift ray origins to near plane 29 | t = -(near + rays_o[...,2]) / rays_d[...,2] # t_n = −(n + o_z)/d_z move o to the ray's intersection with near plane 30 | rays_o = rays_o + t[...,None] * rays_d 31 | 32 | # Projection Formular (20) 33 | o0 = -1./(W/(2.*focal)) * rays_o[...,0] / rays_o[...,2] 34 | o1 = -1./(H/(2.*focal)) * rays_o[...,1] / rays_o[...,2] 35 | o2 = 1. + 2. * near / rays_o[...,2] 36 | # Formular (21) 37 | d0 = -1./(W/(2.*focal)) * (rays_d[...,0]/rays_d[...,2] - rays_o[...,0]/rays_o[...,2]) 38 | d1 = -1./(H/(2.*focal)) * (rays_d[...,1]/rays_d[...,2] - rays_o[...,1]/rays_o[...,2]) 39 | d2 = -2. * near / rays_o[...,2] 40 | 41 | rays_o = torch.stack([o0,o1,o2], -1) # o' 42 | rays_d = torch.stack([d0,d1,d2], -1) # d' 43 | 44 | return rays_o, rays_d 45 | 46 | def get_rays_batch(H, W, focal, c2w): 47 | ''' Get rays for each pixel in the image (in batches). ''' 48 | 49 | assert(len(c2w.size()) == 3) 50 | i, j = torch.meshgrid(torch.linspace(0, W-1, W), torch.linspace(0, H-1, H), indexing='ij') # pytorch's meshgrid has indexing='ij' 51 | i = i.t() 52 | j = j.t() 53 | dirs = torch.stack([(i-W*.5)/focal, -(j-H*.5)/focal, -torch.ones_like(i)], -1) 54 | 55 | # Rotate ray directions from camera frame to the world frame 56 | rays_d = torch.stack([torch.sum(dirs[..., np.newaxis, :] * c2w[k,:3,:3], -1) for k in range(c2w.shape[0])]) # dot product, equals to: [c2w.dot(dir) for dir in dirs] 57 | # Translate camera frame's origin to the world frame. It is the origin of all rays. 58 | rays_o = torch.stack([c2w[k,:3,-1].expand(rays_d.shape[1:]) for k in range(c2w.shape[0])]) 59 | return rays_o, rays_d # rays_o (B,100,100,3), rays_d (B,100,100,3) -------------------------------------------------------------------------------- /script/mstransformer/backbone.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for the backbone of TransPoseNet 3 | Backbone code is based on https://github.com/facebookresearch/detr/tree/master/models with the following modifications: 4 | - use efficient-net as backbone and extract different activation maps from different reduction maps 5 | - change learned encoding to have a learned token for the pose 6 | """ 7 | import torch.nn.functional as F 8 | from torch import nn 9 | from .pencoder import build_position_encoding, NestedTensor 10 | from typing import Dict, List 11 | import torch 12 | 13 | class BackboneBase(nn.Module): 14 | 15 | def __init__(self, backbone: nn.Module, reduction): 16 | super().__init__() 17 | self.body = backbone 18 | self.reductions = reduction 19 | self.reduction_map = {"reduction_3": 40, "reduction_4": 112} 20 | self.num_channels = [self.reduction_map[reduction] for reduction in self.reductions] 21 | 22 | def forward(self, tensor_list: NestedTensor): 23 | xs = self.body.extract_endpoints(tensor_list.tensors) 24 | out: Dict[str, NestedTensor] = {} 25 | for name in self.reductions: 26 | x = xs[name] 27 | m = tensor_list.mask 28 | assert m is not None 29 | mask = F.interpolate(m[None].float(), size=x.shape[-2:]).to(torch.bool)[0] 30 | out[name] = NestedTensor(x, mask) 31 | return out 32 | 33 | 34 | class Backbone(BackboneBase): 35 | def __init__(self, backbone_model_path: str, reduction): 36 | backbone = torch.load(backbone_model_path) 37 | super().__init__(backbone, reduction) 38 | 39 | 40 | class Joiner(nn.Sequential): 41 | def __init__(self, backbone, position_embedding): 42 | super().__init__(backbone, position_embedding) 43 | 44 | def forward(self, tensor_list: NestedTensor): 45 | xs = self[0](tensor_list) 46 | out: List[NestedTensor] = [] 47 | pos = [] 48 | for name, x in xs.items(): 49 | out.append(x) 50 | # position encoding 51 | ret = self[1](x) 52 | if isinstance(ret, tuple): 53 | p_emb, m_emb = ret 54 | pos.append([p_emb.to(x.tensors.dtype), m_emb.to(x.tensors.dtype)]) 55 | else: 56 | pos.append(ret.to(x.tensors.dtype)) 57 | 58 | return out, pos 59 | 60 | def build_backbone(config): 61 | position_embedding = build_position_encoding(config) 62 | backbone = Backbone(config.get("backbone"), config.get("reduction")) 63 | model = Joiner(backbone, position_embedding) 64 | model.num_channels = backbone.num_channels 65 | return model 66 | -------------------------------------------------------------------------------- /script/mstransformer/pencoder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for the position encoding of TransPoseNet 3 | code is based on https://github.com/facebookresearch/detr/tree/master/models with the following modifications: 4 | - changed to learn also the position of a learned pose token 5 | """ 6 | import torch 7 | from torch import nn 8 | from typing import Optional 9 | from torch import Tensor 10 | 11 | import os 12 | import subprocess 13 | import time 14 | from collections import defaultdict, deque 15 | import datetime 16 | import pickle 17 | from typing import Optional, List 18 | 19 | import torch 20 | import torch.distributed as dist 21 | from torch import Tensor 22 | 23 | # needed due to empty tensor bug in pytorch and torchvision 0.5 24 | import torchvision 25 | # if float(torchvision.__version__[:3]) < 0.7: 26 | # from torchvision.ops import _new_empty_tensor 27 | # from torchvision.ops.misc import _output_size 28 | 29 | def _max_by_axis(the_list): 30 | # type: (List[List[int]]) -> List[int] 31 | maxes = the_list[0] 32 | for sublist in the_list[1:]: 33 | for index, item in enumerate(sublist): 34 | maxes[index] = max(maxes[index], item) 35 | return maxes 36 | 37 | class NestedTensor(object): 38 | def __init__(self, tensors, mask: Optional[Tensor]): 39 | self.tensors = tensors 40 | self.mask = mask 41 | 42 | def to(self, device): 43 | # type: (Device) -> NestedTensor # noqa 44 | cast_tensor = self.tensors.to(device) 45 | mask = self.mask 46 | if mask is not None: 47 | assert mask is not None 48 | cast_mask = mask.to(device) 49 | else: 50 | cast_mask = None 51 | return NestedTensor(cast_tensor, cast_mask) 52 | 53 | def decompose(self): 54 | return self.tensors, self.mask 55 | 56 | def __repr__(self): 57 | return str(self.tensors) 58 | 59 | def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): 60 | # TODO make this more general 61 | if tensor_list[0].ndim == 3: 62 | if torchvision._is_tracing(): 63 | # nested_tensor_from_tensor_list() does not export well to ONNX 64 | # call _onnx_nested_tensor_from_tensor_list() instead 65 | return _onnx_nested_tensor_from_tensor_list(tensor_list) 66 | 67 | # TODO make it support different-sized images 68 | max_size = _max_by_axis([list(img.shape) for img in tensor_list]) 69 | # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list])) 70 | batch_shape = [len(tensor_list)] + max_size 71 | b, c, h, w = batch_shape 72 | dtype = tensor_list[0].dtype 73 | device = tensor_list[0].device 74 | tensor = torch.zeros(batch_shape, dtype=dtype, device=device) 75 | mask = torch.ones((b, h, w), dtype=torch.bool, device=device) 76 | for img, pad_img, m in zip(tensor_list, tensor, mask): 77 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 78 | m[: img.shape[1], :img.shape[2]] = False 79 | else: 80 | raise ValueError('not supported') 81 | return NestedTensor(tensor, mask) 82 | 83 | 84 | # _onnx_nested_tensor_from_tensor_list() is an implementation of 85 | # nested_tensor_from_tensor_list() that is supported by ONNX tracing. 86 | @torch.jit.unused 87 | def _onnx_nested_tensor_from_tensor_list(tensor_list): 88 | max_size = [] 89 | for i in range(tensor_list[0].dim()): 90 | max_size_i = torch.max(torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)).to(torch.int64) 91 | max_size.append(max_size_i) 92 | max_size = tuple(max_size) 93 | 94 | # work around for 95 | # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 96 | # m[: img.shape[1], :img.shape[2]] = False 97 | # which is not yet supported in onnx 98 | padded_imgs = [] 99 | padded_masks = [] 100 | for img in tensor_list: 101 | padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] 102 | padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0])) 103 | padded_imgs.append(padded_img) 104 | 105 | m = torch.zeros_like(img[0], dtype=torch.int, device=img.device) 106 | padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1) 107 | padded_masks.append(padded_mask.to(torch.bool)) 108 | 109 | tensor = torch.stack(padded_imgs) 110 | mask = torch.stack(padded_masks) 111 | 112 | return NestedTensor(tensor, mask=mask) 113 | 114 | class PositionEmbeddingLearnedWithPoseToken(nn.Module): 115 | """ 116 | Absolute pos embedding, learned. 117 | """ 118 | def __init__(self, num_pos_feats=256): 119 | super().__init__() 120 | self.row_embed = nn.Embedding(60, num_pos_feats) 121 | self.col_embed = nn.Embedding(60, num_pos_feats) 122 | self.pose_token_embed = nn.Embedding(60, num_pos_feats) 123 | self.reset_parameters() 124 | 125 | def reset_parameters(self): 126 | nn.init.uniform_(self.row_embed.weight) 127 | nn.init.uniform_(self.col_embed.weight) 128 | nn.init.uniform_(self.pose_token_embed.weight) 129 | 130 | def forward(self, tensor_list: NestedTensor): 131 | x = tensor_list.tensors 132 | h, w = x.shape[-2:] 133 | i = torch.arange(w, device=x.device) + 1 134 | j = torch.arange(h, device=x.device) + 1 135 | p = i[0]-1 136 | x_emb = self.col_embed(i) 137 | y_emb = self.row_embed(j) 138 | 139 | p_emb = torch.cat([self.pose_token_embed(p),self.pose_token_embed(p)]).repeat(x.shape[0], 1) 140 | 141 | # embed of position in the activation map 142 | m_emb = torch.cat([ 143 | x_emb.unsqueeze(0).repeat(h, 1, 1), 144 | y_emb.unsqueeze(1).repeat(1, w, 1), 145 | ], dim=-1).permute(2, 0, 1).unsqueeze(0).repeat(x.shape[0], 1, 1, 1) 146 | return p_emb, m_emb 147 | 148 | class PositionEmbeddingLearned(nn.Module): 149 | """ 150 | Absolute pos embedding, learned. 151 | """ 152 | def __init__(self, num_pos_feats=256): 153 | super().__init__() 154 | self.row_embed = nn.Embedding(50, num_pos_feats) 155 | self.col_embed = nn.Embedding(50, num_pos_feats) 156 | self.reset_parameters() 157 | 158 | def reset_parameters(self): 159 | nn.init.uniform_(self.row_embed.weight) 160 | nn.init.uniform_(self.col_embed.weight) 161 | 162 | def forward(self, tensor_list: NestedTensor): 163 | x = tensor_list.tensors 164 | h, w = x.shape[-2:] 165 | i = torch.arange(w, device=x.device) 166 | j = torch.arange(h, device=x.device) 167 | x_emb = self.col_embed(i) 168 | y_emb = self.row_embed(j) 169 | pos = torch.cat([ 170 | x_emb.unsqueeze(0).repeat(h, 1, 1), 171 | y_emb.unsqueeze(1).repeat(1, w, 1), 172 | ], dim=-1).permute(2, 0, 1).unsqueeze(0).repeat(x.shape[0], 1, 1, 1) 173 | return pos 174 | 175 | 176 | def build_position_encoding(config): 177 | hidden_dim = config.get("hidden_dim") 178 | N_steps = hidden_dim // 2 179 | learn_embedding_with_pose_token = config.get("learn_embedding_with_pose_token") 180 | if learn_embedding_with_pose_token: 181 | position_embedding = PositionEmbeddingLearnedWithPoseToken(N_steps) 182 | else: 183 | position_embedding = PositionEmbeddingLearned(N_steps) 184 | 185 | 186 | return position_embedding 187 | -------------------------------------------------------------------------------- /script/mstransformer/transformer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Transformer code taken from: https://github.com/facebookresearch/detr/tree/master/models 3 | with minor modifications 4 | Note: LN at the end of the encoder is not removed as in detr 5 | """ 6 | import copy 7 | from typing import Optional, List 8 | import torch 9 | import torch.nn.functional as F 10 | from torch import nn, Tensor 11 | from .transformer_encoder import TransformerEncoderLayer, TransformerEncoder,_get_clones, _get_activation_fn 12 | 13 | 14 | class Transformer(nn.Module): 15 | default_config = { 16 | "hidden_dim":512, 17 | "nhead":8, 18 | "num_encoder_layers": 6, 19 | "num_decoder_layers": 6, 20 | "dim_feedforward": 2048, 21 | "dropout":0.1, 22 | "activation": "gelu", 23 | "normalize_before": True, 24 | "return_intermediate_dec": False 25 | } 26 | 27 | def __init__(self, config = {}): 28 | super().__init__() 29 | config = {**self.default_config, **config} 30 | 31 | d_model = config.get("hidden_dim") # 256 32 | nhead = config.get("nhead") # 8 33 | dim_feedforward = config.get("dim_feedforward") # 256 34 | dropout = config.get("dropout") # 0.1 35 | activation = config.get("activation") # gelu 36 | normalize_before = config.get("normalize_before") # True 37 | num_encoder_layers = config.get("num_encoder_layers") # 6 38 | num_decoder_layers = config.get("num_decoder_layers") # 6 39 | encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, 40 | dropout, activation, normalize_before) 41 | encoder_norm = nn.LayerNorm(d_model) if normalize_before else None 42 | self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) 43 | 44 | decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, 45 | dropout, activation, normalize_before) 46 | decoder_norm = nn.LayerNorm(d_model) 47 | self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm, 48 | return_intermediate=config.get("return_intermediate_dec")) 49 | 50 | self._reset_parameters() 51 | 52 | self.d_model = d_model 53 | self.nhead = nhead 54 | 55 | def _reset_parameters(self): 56 | for p in self.parameters(): 57 | if p.dim() > 1: 58 | nn.init.xavier_uniform_(p) 59 | 60 | def forward(self, src, mask, query_embed, pos_embed): 61 | # flatten NxCxHxW to HWxNxC 62 | bs, c, h, w = src.shape 63 | src = src.flatten(2).permute(2, 0, 1) 64 | pos_embed = pos_embed.flatten(2).permute(2, 0, 1) 65 | query_embed = query_embed.unsqueeze(1).repeat(1, bs, 1) 66 | mask = mask.flatten(1) 67 | tgt = torch.zeros_like(query_embed) 68 | memory = self.encoder(src, src_key_padding_mask=mask, pos=pos_embed) 69 | hs = self.decoder(tgt, memory, memory_key_padding_mask=mask, 70 | pos=pos_embed, query_pos=query_embed) 71 | return hs.transpose(1, 2), memory.permute(1, 2, 0).view(bs, c, h, w) 72 | 73 | class TransformerDecoder(nn.Module): 74 | 75 | def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False): 76 | super().__init__() 77 | self.layers = _get_clones(decoder_layer, num_layers) 78 | self.num_layers = num_layers 79 | self.norm = norm 80 | self.return_intermediate = return_intermediate 81 | 82 | def forward(self, tgt, memory, 83 | tgt_mask: Optional[Tensor] = None, 84 | memory_mask: Optional[Tensor] = None, 85 | tgt_key_padding_mask: Optional[Tensor] = None, 86 | memory_key_padding_mask: Optional[Tensor] = None, 87 | pos: Optional[Tensor] = None, 88 | query_pos: Optional[Tensor] = None): 89 | output = tgt 90 | 91 | intermediate = [] 92 | 93 | for layer in self.layers: 94 | output = layer(output, memory, tgt_mask=tgt_mask, 95 | memory_mask=memory_mask, 96 | tgt_key_padding_mask=tgt_key_padding_mask, 97 | memory_key_padding_mask=memory_key_padding_mask, 98 | pos=pos, query_pos=query_pos) 99 | if self.return_intermediate: 100 | intermediate.append(self.norm(output)) 101 | 102 | if self.norm is not None: 103 | output = self.norm(output) 104 | if self.return_intermediate: 105 | intermediate.pop() 106 | intermediate.append(output) 107 | 108 | if self.return_intermediate: 109 | return torch.stack(intermediate) 110 | 111 | return output.unsqueeze(0) 112 | 113 | class TransformerDecoderLayer(nn.Module): 114 | 115 | def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, 116 | activation="relu", normalize_before=False): 117 | super().__init__() 118 | self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) 119 | self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) 120 | # Implementation of Feedforward model 121 | self.linear1 = nn.Linear(d_model, dim_feedforward) 122 | self.dropout = nn.Dropout(dropout) 123 | self.linear2 = nn.Linear(dim_feedforward, d_model) 124 | 125 | self.norm1 = nn.LayerNorm(d_model) 126 | self.norm2 = nn.LayerNorm(d_model) 127 | self.norm3 = nn.LayerNorm(d_model) 128 | self.dropout1 = nn.Dropout(dropout) 129 | self.dropout2 = nn.Dropout(dropout) 130 | self.dropout3 = nn.Dropout(dropout) 131 | 132 | self.activation = _get_activation_fn(activation) 133 | self.normalize_before = normalize_before 134 | 135 | def with_pos_embed(self, tensor, pos: Optional[Tensor]): 136 | return tensor if pos is None else tensor + pos 137 | 138 | def forward_post(self, tgt, memory, 139 | tgt_mask: Optional[Tensor] = None, 140 | memory_mask: Optional[Tensor] = None, 141 | tgt_key_padding_mask: Optional[Tensor] = None, 142 | memory_key_padding_mask: Optional[Tensor] = None, 143 | pos: Optional[Tensor] = None, 144 | query_pos: Optional[Tensor] = None): 145 | q = k = self.with_pos_embed(tgt, query_pos) 146 | tgt2 = self.self_attn(q, k, value=tgt, attn_mask=tgt_mask, 147 | key_padding_mask=tgt_key_padding_mask)[0] 148 | tgt = tgt + self.dropout1(tgt2) 149 | tgt = self.norm1(tgt) 150 | tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt, query_pos), 151 | key=self.with_pos_embed(memory, pos), 152 | value=memory, attn_mask=memory_mask, 153 | key_padding_mask=memory_key_padding_mask)[0] 154 | tgt = tgt + self.dropout2(tgt2) 155 | tgt = self.norm2(tgt) 156 | tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt)))) 157 | tgt = tgt + self.dropout3(tgt2) 158 | tgt = self.norm3(tgt) 159 | return tgt 160 | 161 | def forward_pre(self, tgt, memory, 162 | tgt_mask: Optional[Tensor] = None, 163 | memory_mask: Optional[Tensor] = None, 164 | tgt_key_padding_mask: Optional[Tensor] = None, 165 | memory_key_padding_mask: Optional[Tensor] = None, 166 | pos: Optional[Tensor] = None, 167 | query_pos: Optional[Tensor] = None): 168 | tgt2 = self.norm1(tgt) 169 | q = k = self.with_pos_embed(tgt2, query_pos) 170 | tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask, 171 | key_padding_mask=tgt_key_padding_mask)[0] 172 | tgt = tgt + self.dropout1(tgt2) 173 | tgt2 = self.norm2(tgt) 174 | tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt2, query_pos), 175 | key=self.with_pos_embed(memory, pos), 176 | value=memory, attn_mask=memory_mask, 177 | key_padding_mask=memory_key_padding_mask)[0] 178 | tgt = tgt + self.dropout2(tgt2) 179 | tgt2 = self.norm3(tgt) 180 | tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2)))) 181 | tgt = tgt + self.dropout3(tgt2) 182 | return tgt 183 | 184 | def forward(self, tgt, memory, 185 | tgt_mask: Optional[Tensor] = None, 186 | memory_mask: Optional[Tensor] = None, 187 | tgt_key_padding_mask: Optional[Tensor] = None, 188 | memory_key_padding_mask: Optional[Tensor] = None, 189 | pos: Optional[Tensor] = None, 190 | query_pos: Optional[Tensor] = None): 191 | if self.normalize_before: 192 | return self.forward_pre(tgt, memory, tgt_mask, memory_mask, 193 | tgt_key_padding_mask, memory_key_padding_mask, pos, query_pos) 194 | return self.forward_post(tgt, memory, tgt_mask, memory_mask, 195 | tgt_key_padding_mask, memory_key_padding_mask, pos, query_pos) 196 | 197 | -------------------------------------------------------------------------------- /script/mstransformer/transformer_encoder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code for the encoder of TransPoseNet 3 | code is based on https://github.com/facebookresearch/detr/tree/master/models 4 | (transformer + position encoding. Note: LN at the end of the encoder is not removed) 5 | with the following modifications: 6 | - decoder is removed 7 | - encoder is changed to take the encoding of the pose token and to output just the token 8 | """ 9 | 10 | import copy 11 | from typing import Optional 12 | import torch 13 | import torch.nn.functional as F 14 | from torch import nn, Tensor 15 | 16 | class Transformer(nn.Module): 17 | default_config = { 18 | "hidden_dim":512, 19 | "nhead":8, 20 | "num_encoder_layers": 6, 21 | "dim_feedforward": 2048, 22 | "dropout":0.1, 23 | "activation": "gelu", 24 | "normalize_before": True, 25 | "return_intermediate_dec": False 26 | } 27 | 28 | def __init__(self, config = {}): 29 | super().__init__() 30 | config = {**self.default_config, **config} 31 | d_model = config.get("hidden_dim") 32 | nhead = config.get("nhead") 33 | dim_feedforward = config.get("dim_feedforward") 34 | dropout = config.get("dropout") 35 | activation = config.get("activation") 36 | normalize_before = config.get("normalize_before") 37 | num_encoder_layers = config.get("num_encoder_layers") 38 | encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, 39 | dropout, activation, normalize_before) 40 | encoder_norm = nn.LayerNorm(d_model) if normalize_before else None 41 | self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) 42 | self._reset_parameters() 43 | 44 | self.d_model = d_model 45 | self.nhead = nhead 46 | 47 | def _reset_parameters(self): 48 | for p in self.parameters(): 49 | if p.dim() > 1: 50 | nn.init.xavier_uniform_(p) 51 | 52 | def forward(self, src, mask, pos_embed, pose_token_embed): 53 | # flatten NxCxHxW to HWxNxC 54 | bs, c, h, w = src.shape 55 | 56 | pose_pos_embed, activation_pos_embed = pos_embed 57 | activation_pos_embed = activation_pos_embed.flatten(2).permute(2, 0, 1) 58 | pose_pos_embed = pose_pos_embed.unsqueeze(2).permute(2, 0, 1) 59 | pos_embed = torch.cat([pose_pos_embed, activation_pos_embed]) 60 | 61 | src = src.flatten(2).permute(2, 0, 1) 62 | pose_token_embed = pose_token_embed.unsqueeze(1).repeat(1, bs, 1) 63 | src = torch.cat([pose_token_embed, src]) 64 | memory = self.encoder(src, src_key_padding_mask=None, pos=pos_embed) 65 | return memory.transpose(0,1) 66 | 67 | 68 | class TransformerEncoder(nn.Module): 69 | 70 | def __init__(self, encoder_layer, num_layers, norm=None): 71 | super().__init__() 72 | self.layers = _get_clones(encoder_layer, num_layers) 73 | self.num_layers = num_layers 74 | self.norm = norm 75 | 76 | def forward(self, src, 77 | mask: Optional[Tensor] = None, 78 | src_key_padding_mask: Optional[Tensor] = None, 79 | pos: Optional[Tensor] = None): 80 | output = src 81 | 82 | for layer in self.layers: 83 | output = layer(output, src_mask=mask, 84 | src_key_padding_mask=src_key_padding_mask, pos=pos) 85 | 86 | if self.norm is not None: 87 | output = self.norm(output) 88 | 89 | return output 90 | 91 | 92 | class TransformerEncoderLayer(nn.Module): 93 | 94 | def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, 95 | activation="relu", normalize_before=False): 96 | super().__init__() 97 | self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) 98 | # Implementation of Feedforward model 99 | self.linear1 = nn.Linear(d_model, dim_feedforward) 100 | self.dropout = nn.Dropout(dropout) 101 | self.linear2 = nn.Linear(dim_feedforward, d_model) 102 | 103 | self.norm1 = nn.LayerNorm(d_model) 104 | self.norm2 = nn.LayerNorm(d_model) 105 | self.dropout1 = nn.Dropout(dropout) 106 | self.dropout2 = nn.Dropout(dropout) 107 | 108 | self.activation = _get_activation_fn(activation) 109 | self.normalize_before = normalize_before 110 | 111 | def with_pos_embed(self, tensor, pos: Optional[Tensor]): 112 | return tensor if pos is None else tensor + pos 113 | 114 | def forward_post(self, 115 | src, 116 | src_mask: Optional[Tensor] = None, 117 | src_key_padding_mask: Optional[Tensor] = None, 118 | pos: Optional[Tensor] = None): 119 | q = k = self.with_pos_embed(src, pos) 120 | src2 = self.self_attn(q, k, value=src, attn_mask=src_mask, 121 | key_padding_mask=src_key_padding_mask)[0] 122 | src = src + self.dropout1(src2) 123 | src = self.norm1(src) 124 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) 125 | src = src + self.dropout2(src2) 126 | src = self.norm2(src) 127 | return src 128 | 129 | def forward_pre(self, src, 130 | src_mask: Optional[Tensor] = None, 131 | src_key_padding_mask: Optional[Tensor] = None, 132 | pos: Optional[Tensor] = None): 133 | src2 = self.norm1(src) 134 | q = k = self.with_pos_embed(src2, pos) 135 | src2 = self.self_attn(q, k, value=src2, attn_mask=src_mask, 136 | key_padding_mask=src_key_padding_mask)[0] 137 | src = src + self.dropout1(src2) 138 | src2 = self.norm2(src) 139 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src2)))) 140 | src = src + self.dropout2(src2) 141 | return src 142 | 143 | def forward(self, src, 144 | src_mask: Optional[Tensor] = None, 145 | src_key_padding_mask: Optional[Tensor] = None, 146 | pos: Optional[Tensor] = None): 147 | if self.normalize_before: 148 | return self.forward_pre(src, src_mask, src_key_padding_mask, pos) 149 | return self.forward_post(src, src_mask, src_key_padding_mask, pos) 150 | 151 | 152 | def _get_clones(module, N): 153 | return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) 154 | 155 | 156 | def _get_activation_fn(activation): 157 | """Return an activation function given a string""" 158 | if activation == "relu": 159 | return F.relu 160 | if activation == "gelu": 161 | return F.gelu 162 | if activation == "glu": 163 | return F.glu 164 | raise RuntimeError(F"activation should be relu/gelu, not {activation}.") 165 | 166 | 167 | def build_transformer(config): 168 | return Transformer(config) 169 | -------------------------------------------------------------------------------- /script/test_apr_refinement.sh: -------------------------------------------------------------------------------- 1 | # !/bin/bash 2 | 3 | ## NeFeS + APR refinement ### 4 | python test_refinement.py --config config/7Scenes/dfnet/config_stairs_DFM.txt 5 | # python test_refinement.py --config config/7Scenes/dfnet/config_heads_DFM.txt 6 | # python test_refinement.py --config config/7Scenes/dfnet/config_chess_DFM.txt 7 | # python test_refinement.py --config config/7Scenes/dfnet/config_fire_DFM.txt 8 | # python test_refinement.py --config config/7Scenes/dfnet/config_kitchen_DFM.txt 9 | # python test_refinement.py --config config/7Scenes/dfnet/config_pumpkin_DFM.txt 10 | # python test_refinement.py --config config/7Scenes/dfnet/config_office_DFM.txt 11 | 12 | # python test_refinement.py --config config/Cambridge/dfnet/config_shop_DFM.txt 13 | # python test_refinement.py --config config/Cambridge/dfnet/config_hospital_DFM.txt 14 | # python test_refinement.py --config config/Cambridge/dfnet/config_kings_DFM.txt 15 | # python test_refinement.py --config config/Cambridge/dfnet/config_church_DFM.txt 16 | 17 | ## NeFeS + Pose refinement (Table 5.) ### 18 | # python test_refinement.py --config config/7Scenes/dfnet/config_stairs_DFM.txt --pose_only 3 --lr_r 0.0087 --lr_t 0.01 19 | # python test_refinement.py --config config/7Scenes/dfnet/config_heads_DFM.txt --pose_only 3 --lr_r 0.0087 --lr_t 0.01 20 | # python test_refinement.py --config config/7Scenes/dfnet/config_chess_DFM.txt --pose_only 3 --lr_r 0.0087 --lr_t 0.01 21 | # python test_refinement.py --config config/7Scenes/dfnet/config_fire_DFM.txt --pose_only 3 --lr_r 0.0087 --lr_t 0.01 22 | # python test_refinement.py --config config/7Scenes/dfnet/config_kitchen_DFM.txt --pose_only 3 --lr_r 0.0087 --lr_t 0.01 23 | # python test_refinement.py --config config/7Scenes/dfnet/config_pumpkin_DFM.txt --pose_only 3 --lr_r 0.0087 --lr_t 0.01 24 | # python test_refinement.py --config config/7Scenes/dfnet/config_office_DFM.txt --pose_only 3 --lr_r 0.0087 --lr_t 0.01 25 | 26 | # python test_refinement.py --config config/Cambridge/dfnet/config_shop_DFM.txt --pose_only 3 27 | # python test_refinement.py --config config/Cambridge/dfnet/config_hospital_DFM.txt --pose_only 3 28 | # python test_refinement.py --config config/Cambridge/dfnet/config_kings_DFM.txt --pose_only 3 29 | # python test_refinement.py --config config/Cambridge/dfnet/config_church_DFM.txt --pose_only 3 -------------------------------------------------------------------------------- /script/test_refinement.py: -------------------------------------------------------------------------------- 1 | from ast import Not 2 | import utils.set_sys_path 3 | import numpy as np 4 | import random 5 | import torch 6 | import os 7 | from dm.pose_model import get_error_in_q 8 | from dm.direct_pose_model import load_APR_and_FeatureNet 9 | from dm.prepare_data import load_dataset 10 | from dm.options import config_parser 11 | from dm.DFM_APR_refine import DFM_post_processing 12 | from dm.DFM_pose_refine import DFM_post_processing2, load_NeRF_model 13 | from dataset_loaders.load_7Scenes import load_7Scenes_dataloader 14 | from dataset_loaders.load_7Scenes_colmap import load_7Scenes_dataloader_colmap 15 | from dataset_loaders.load_Cambridge import load_Cambridge_dataloader 16 | 17 | parser = config_parser() 18 | args = parser.parse_args() 19 | device = torch.device('cuda:0') # this is really controlled in train.sh 20 | 21 | # # try to be deterministic 22 | # np.random.seed(0) 23 | # torch.manual_seed(0) 24 | # import random 25 | # random.seed(0) 26 | 27 | # os.system("ulimit -n 8192") 28 | torch.multiprocessing.set_sharing_strategy('file_system') 29 | 30 | def train(): 31 | print(parser.format_values()) 32 | MODE = args.pose_only 33 | # Load data 34 | if args.dataset_type == '7Scenes': 35 | train_dl, val_dl, test_dl, hwf, i_split, near, far = load_7Scenes_dataloader(args) 36 | if args.set_near_far: 37 | print('use customized near_far') 38 | near = args.near_far[0] 39 | far = args.near_far[1] 40 | elif args.dataset_type == '7Scenes_colmap': 41 | train_dl, val_dl, test_dl, hwf, i_split, near, far = load_7Scenes_dataloader_colmap(args) 42 | if args.set_near_far: 43 | print('use customized near_far') 44 | near = args.near_far[0] 45 | far = args.near_far[1] 46 | elif args.dataset_type == 'Cambridge': 47 | train_dl, val_dl, test_dl, hwf, i_split, near, far = load_Cambridge_dataloader(args) 48 | if args.set_near_far: 49 | print('use customized near_far') 50 | near = args.near_far[0] 51 | far = args.near_far[1] 52 | else: 53 | images, poses_train, render_poses, hwf, i_split, near, far = load_dataset(args) 54 | # Cast intrinsics to right types 55 | H, W, focal = hwf 56 | H, W = int(H), int(W) 57 | hwf = [H, W, focal] 58 | if args.set_near_far: 59 | print('use customized near_far') 60 | near = args.near_far[0] 61 | far = args.near_far[1] 62 | 63 | i_train, i_val, i_test = i_split 64 | print('TRAIN views are', i_train) 65 | print('TEST views are', i_test) 66 | print('VAL views are', i_val) 67 | 68 | if MODE==2: # APR Refinement with NeFeS 69 | model, feat_model = load_APR_and_FeatureNet(args, device) 70 | 71 | # start training 72 | DFM_post_processing(args, model, feat_model, hwf, near, far, device, test_dl=test_dl) 73 | 74 | elif MODE==3: # Pose Refinement with NeFeS 75 | 76 | print ('Inital Pose Error...') 77 | ### load or inference the predicted camera poses and Feature Extraction model 78 | model, feat_model = load_APR_and_FeatureNet(args, device) 79 | 80 | # compute initial pose error 81 | vis_info = get_error_in_q(args, test_dl, model, len(i_test), device, batch_size=1, ret_vis_info=True) 82 | predict_poses = vis_info["pose_result_raw"] 83 | poses_gt = vis_info["pose_GT"] 84 | 85 | 86 | ### load NeRF 87 | world_setup_dict = { 88 | 'pose_scale' : test_dl.dataset.pose_scale, 89 | 'pose_scale2' : test_dl.dataset.pose_scale2, 90 | 'move_all_cam_vec' : test_dl.dataset.move_all_cam_vec, 91 | } 92 | 93 | render_kwargs_test = load_NeRF_model(args, near, far) 94 | 95 | ### Perform DFM post-processing 96 | pose_param_net = DFM_post_processing2(args, predict_poses, feat_model, render_kwargs_test, hwf, device, test_dl=test_dl, world_setup_dict=world_setup_dict) 97 | 98 | if __name__ == '__main__': 99 | if args.eval: 100 | torch.manual_seed(0) 101 | random.seed(0) 102 | np.random.seed(0) 103 | # eval() 104 | else: 105 | train() 106 | -------------------------------------------------------------------------------- /script/train_nefes.sh: -------------------------------------------------------------------------------- 1 | # !/bin/bash 2 | 3 | ### Train NeFeS stage1 photometric loss only ### 4 | # python run_nefes.py --config config/7Scenes/dfnet/config_stairs_stage1.txt 5 | 6 | ### Train NeFeS stage2 photometric+featuremetric loss ### 7 | # python run_nefes.py --config config/7Scenes/dfnet/config_stairs_stage2.txt 8 | 9 | ### Train NeFeS stage1 photometric loss only ### 10 | python run_nefes.py --config config/Cambridge/dfnet/config_shop_stage1.txt 11 | 12 | ### Train NeFeS stage2 photometric+featuremetric loss ### 13 | python run_nefes.py --config config/Cambridge/dfnet/config_shop_stage2.txt -------------------------------------------------------------------------------- /script/utils/align_traj.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from third_party.ATE.align_utils import alignTrajectory 5 | from utils.lie_group_helper import SO3_to_quat, convert3x4_4x4 6 | 7 | def align_ate_c2b_use_a2b(traj_a, traj_b, traj_c=None): 8 | """Align c to b using the sim3 from a to b. 9 | :param traj_a: (N0, 3/4, 4) torch tensor 10 | :param traj_b: (N0, 3/4, 4) torch tensor 11 | :param traj_c: None or (N1, 3/4, 4) torch tensor 12 | :return: (N1, 4, 4) torch tensor 13 | """ 14 | device = traj_a.device 15 | if traj_c is None: 16 | traj_c = traj_a.clone() 17 | 18 | traj_a = traj_a.float().cpu().numpy() 19 | traj_b = traj_b.float().cpu().numpy() 20 | traj_c = traj_c.float().cpu().numpy() 21 | 22 | R_a = traj_a[:, :3, :3] # (N0, 3, 3) 23 | t_a = traj_a[:, :3, 3] # (N0, 3) 24 | quat_a = SO3_to_quat(R_a) # (N0, 4) 25 | 26 | R_b = traj_b[:, :3, :3] # (N0, 3, 3) 27 | t_b = traj_b[:, :3, 3] # (N0, 3) 28 | quat_b = SO3_to_quat(R_b) # (N0, 4) 29 | 30 | # This function works in quaternion. 31 | # scalar, (3, 3), (3, ) gt = R * s * est + t. 32 | s, R, t = alignTrajectory(t_a, t_b, quat_a, quat_b, method='sim3') 33 | # s, R, t = alignTrajectory(t_a, t_b, quat_a, quat_b, method='se3') 34 | 35 | # reshape tensors 36 | R = R[None, :, :].astype(np.float32) # (1, 3, 3) 37 | t = t[None, :, None].astype(np.float32) # (1, 3, 1) 38 | s = float(s) 39 | 40 | R_c = traj_c[:, :3, :3] # (N1, 3, 3) 41 | t_c = traj_c[:, :3, 3:4] # (N1, 3, 1) 42 | 43 | R_c_aligned = R @ R_c # (N1, 3, 3) 44 | t_c_aligned = s * (R @ t_c) + t # (N1, 3, 1) 45 | traj_c_aligned = np.concatenate([R_c_aligned, t_c_aligned], axis=2) # (N1, 3, 4) 46 | 47 | # append the last row 48 | traj_c_aligned = convert3x4_4x4(traj_c_aligned) # (N1, 4, 4) 49 | 50 | traj_c_aligned = torch.from_numpy(traj_c_aligned).to(device) 51 | return traj_c_aligned # (N1, 4, 4) 52 | -------------------------------------------------------------------------------- /script/utils/comp_ate.py: -------------------------------------------------------------------------------- 1 | from third_party.ATE.compute_trajectory_errors import compute_absolute_error 2 | from third_party.ATE.results_writer import compute_statistics 3 | from utils.lie_group_helper import SO3_to_quat 4 | from utils.align_traj import align_ate_c2b_use_a2b 5 | 6 | 7 | def compute_ate(c2ws_a, c2ws_b, align_a2b=None): 8 | """Compuate ate between a and b. 9 | :param c2ws_a: (N, 3/4, 4) torch 10 | :param c2ws_b: (N, 3/4, 4) torch 11 | :param align_a2b: None or 'sim3'. Set to None if a and b are pre-aligned. 12 | """ 13 | if align_a2b == 'sim3': 14 | c2ws_a_aligned = align_ate_c2b_use_a2b(c2ws_a, c2ws_b) 15 | R_a_aligned = c2ws_a_aligned[:, :3, :3].cpu().numpy() 16 | t_a_aligned = c2ws_a_aligned[:, :3, 3].cpu().numpy() 17 | else: 18 | R_a_aligned = c2ws_a[:, :3, :3].cpu().numpy() 19 | t_a_aligned = c2ws_a[:, :3, 3].cpu().numpy() 20 | R_b = c2ws_b[:, :3, :3].cpu().numpy() 21 | t_b = c2ws_b[:, :3, 3].cpu().numpy() 22 | 23 | quat_a_aligned = SO3_to_quat(R_a_aligned) 24 | quat_b = SO3_to_quat(R_b) 25 | 26 | e_trans, e_trans_vec, e_rot, e_ypr, e_scale_perc = compute_absolute_error(t_a_aligned,quat_a_aligned, 27 | t_b, quat_b) 28 | stats_tran = compute_statistics(e_trans) 29 | stats_rot = compute_statistics(e_rot) 30 | stats_scale = compute_statistics(e_scale_perc) 31 | 32 | return stats_tran, stats_rot, stats_scale # dicts 33 | -------------------------------------------------------------------------------- /script/utils/lie_group_helper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from scipy.spatial.transform import Rotation as RotLib 4 | 5 | 6 | def SO3_to_quat(R): 7 | """ 8 | :param R: (N, 3, 3) or (3, 3) np 9 | :return: (N, 4, ) or (4, ) np 10 | """ 11 | x = RotLib.from_matrix(R) 12 | quat = x.as_quat() 13 | return quat 14 | 15 | 16 | def quat_to_SO3(quat): 17 | """ 18 | :param quat: (N, 4, ) or (4, ) np 19 | :return: (N, 3, 3) or (3, 3) np 20 | """ 21 | x = RotLib.from_quat(quat) 22 | R = x.as_matrix() 23 | return R 24 | 25 | 26 | def convert3x4_4x4(input): 27 | """ 28 | :param input: (N, 3, 4) or (3, 4) torch or np 29 | :return: (N, 4, 4) or (4, 4) torch or np 30 | """ 31 | if torch.is_tensor(input): 32 | if len(input.shape) == 3: 33 | output = torch.cat([input, torch.zeros_like(input[:, 0:1])], dim=1) # (N, 4, 4) 34 | output[:, 3, 3] = 1.0 35 | else: 36 | output = torch.cat([input, torch.tensor([[0,0,0,1]], dtype=input.dtype, device=input.device)], dim=0) # (4, 4) 37 | else: 38 | if len(input.shape) == 3: 39 | output = np.concatenate([input, np.zeros_like(input[:, 0:1])], axis=1) # (N, 4, 4) 40 | output[:, 3, 3] = 1.0 41 | else: 42 | output = np.concatenate([input, np.array([[0,0,0,1]], dtype=input.dtype)], axis=0) # (4, 4) 43 | output[3, 3] = 1.0 44 | return output 45 | 46 | 47 | def vec2skew(v): 48 | """ 49 | :param v: (3, ) torch tensor 50 | :return: (3, 3) 51 | """ 52 | zero = torch.zeros(1, dtype=torch.float32, device=v.device) 53 | skew_v0 = torch.cat([ zero, -v[2:3], v[1:2]]) # (3, 1) 54 | skew_v1 = torch.cat([ v[2:3], zero, -v[0:1]]) 55 | skew_v2 = torch.cat([-v[1:2], v[0:1], zero]) 56 | skew_v = torch.stack([skew_v0, skew_v1, skew_v2], dim=0) # (3, 3) 57 | return skew_v # (3, 3) 58 | 59 | 60 | def Exp(r): 61 | """so(3) vector to SO(3) matrix 62 | :param r: (3, ) axis-angle, torch tensor 63 | :return: (3, 3) 64 | """ 65 | skew_r = vec2skew(r) # (3, 3) 66 | norm_r = r.norm() + 1e-15 67 | eye = torch.eye(3, dtype=torch.float32, device=r.device) 68 | R = eye + (torch.sin(norm_r) / norm_r) * skew_r + ((1 - torch.cos(norm_r)) / norm_r**2) * (skew_r @ skew_r) 69 | return R 70 | 71 | 72 | def make_c2w(r, t): 73 | """ 74 | :param r: (3, ) axis-angle torch tensor 75 | :param t: (3, ) translation vector torch tensor 76 | :return: (4, 4) 77 | """ 78 | R = Exp(r) # (3, 3) 79 | c2w = torch.cat([R, t.unsqueeze(1)], dim=1) # (3, 4) 80 | c2w = convert3x4_4x4(c2w) # (4, 4) 81 | return c2w 82 | 83 | ### from pixloc code 84 | def qvec2rotmat(qvec): 85 | return np.array([ 86 | [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, 87 | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], 88 | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]], 89 | [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], 90 | 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2, 91 | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]], 92 | [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], 93 | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], 94 | 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]]) 95 | 96 | 97 | def rotmat2qvec(R): 98 | Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat 99 | K = np.array([ 100 | [Rxx - Ryy - Rzz, 0, 0, 0], 101 | [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0], 102 | [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0], 103 | [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0 104 | eigvals, eigvecs = np.linalg.eigh(K) 105 | qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)] 106 | if qvec[0] < 0: 107 | qvec *= -1 108 | return qvec 109 | -------------------------------------------------------------------------------- /script/utils/set_sys_path.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('../') -------------------------------------------------------------------------------- /script/utils/vis_cam_traj.py: -------------------------------------------------------------------------------- 1 | # This file is modified from NeRF++: https://github.com/Kai-46/nerfplusplus 2 | 3 | import numpy as np 4 | 5 | try: 6 | import open3d as o3d 7 | except ImportError: 8 | pass 9 | 10 | 11 | def frustums2lineset(frustums): 12 | N = len(frustums) 13 | merged_points = np.zeros((N*5, 3)) # 5 vertices per frustum 14 | merged_lines = np.zeros((N*8, 2)) # 8 lines per frustum 15 | merged_colors = np.zeros((N*8, 3)) # each line gets a color 16 | 17 | for i, (frustum_points, frustum_lines, frustum_colors) in enumerate(frustums): 18 | merged_points[i*5:(i+1)*5, :] = frustum_points 19 | merged_lines[i*8:(i+1)*8, :] = frustum_lines + i*5 20 | merged_colors[i*8:(i+1)*8, :] = frustum_colors 21 | 22 | lineset = o3d.geometry.LineSet() 23 | lineset.points = o3d.utility.Vector3dVector(merged_points) 24 | lineset.lines = o3d.utility.Vector2iVector(merged_lines) 25 | lineset.colors = o3d.utility.Vector3dVector(merged_colors) 26 | 27 | return lineset 28 | 29 | 30 | def get_camera_frustum_opengl_coord(H, W, fx, fy, W2C, frustum_length=0.5, color=np.array([0., 1., 0.])): 31 | '''X right, Y up, Z backward to the observer. 32 | :param H, W: 33 | :param fx, fy: 34 | :param W2C: (4, 4) matrix 35 | :param frustum_length: scalar: scale the frustum 36 | :param color: (3,) list, frustum line color 37 | :return: 38 | frustum_points: (5, 3) frustum points in world coordinate 39 | frustum_lines: (8, 2) 8 lines connect 5 frustum points, specified in line start/end index. 40 | frustum_colors: (8, 3) colors for 8 lines. 41 | ''' 42 | hfov = np.rad2deg(np.arctan(W / 2. / fx) * 2.) 43 | vfov = np.rad2deg(np.arctan(H / 2. / fy) * 2.) 44 | half_w = frustum_length * np.tan(np.deg2rad(hfov / 2.)) 45 | half_h = frustum_length * np.tan(np.deg2rad(vfov / 2.)) 46 | 47 | # build view frustum in camera space in homogenous coordinate (5, 4) 48 | frustum_points = np.array([[0., 0., 0., 1.0], # frustum origin 49 | [-half_w, half_h, -frustum_length, 1.0], # top-left image corner 50 | [half_w, half_h, -frustum_length, 1.0], # top-right image corner 51 | [half_w, -half_h, -frustum_length, 1.0], # bottom-right image corner 52 | [-half_w, -half_h, -frustum_length, 1.0]]) # bottom-left image corner 53 | frustum_lines = np.array([[0, i] for i in range(1, 5)] + [[i, (i+1)] for i in range(1, 4)] + [[4, 1]]) # (8, 2) 54 | frustum_colors = np.tile(color.reshape((1, 3)), (frustum_lines.shape[0], 1)) # (8, 3) 55 | 56 | # transform view frustum from camera space to world space 57 | C2W = np.linalg.inv(W2C) 58 | frustum_points = np.matmul(C2W, frustum_points.T).T # (5, 4) 59 | frustum_points = frustum_points[:, :3] / frustum_points[:, 3:4] # (5, 3) remove homogenous coordinate 60 | return frustum_points, frustum_lines, frustum_colors 61 | 62 | 63 | def draw_camera_frustum_geometry(c2ws, H, W, fx=600.0, fy=600.0, frustum_length=0.5, 64 | color=np.array([29.0, 53.0, 87.0])/255.0, draw_now=False, coord='opengl'): 65 | ''' 66 | :param c2ws: (N, 4, 4) np.array 67 | :param H: scalar 68 | :param W: scalar 69 | :param fx: scalar 70 | :param fy: scalar 71 | :param frustum_length: scalar 72 | :param color: None or (N, 3) or (3, ) or (1, 3) or (3, 1) np array 73 | :param draw_now: True/False call o3d vis now 74 | :return: 75 | ''' 76 | N = c2ws.shape[0] 77 | 78 | num_ele = color.flatten().shape[0] 79 | if num_ele == 3: 80 | color = color.reshape(1, 3) 81 | color = np.tile(color, (N, 1)) 82 | 83 | frustum_list = [] 84 | if coord == 'opengl': 85 | for i in range(N): 86 | frustum_list.append(get_camera_frustum_opengl_coord(H, W, fx, fy, 87 | W2C=np.linalg.inv(c2ws[i]), 88 | frustum_length=frustum_length, 89 | color=color[i])) 90 | else: 91 | print('Undefined coordinate system. Exit') 92 | exit() 93 | 94 | frustums_geometry = frustums2lineset(frustum_list) 95 | 96 | if draw_now: 97 | o3d.visualization.draw_geometries([frustums_geometry]) 98 | 99 | return frustums_geometry # this is an o3d geometry object. 100 | --------------------------------------------------------------------------------