├── .gitignore ├── LICENSE ├── README.md ├── config ├── dataset │ ├── icvl.json │ └── nyu.json ├── icvl │ ├── eval_25select1.yaml │ ├── eval_25select15.yaml │ ├── eval_25select15_light.yaml │ ├── eval_25select1_light.yaml │ ├── eval_25select3.yaml │ ├── eval_25select3_light.yaml │ ├── eval_25select9.yaml │ ├── eval_25select9_light.yaml │ ├── eval_uniform1.yaml │ ├── eval_uniform15.yaml │ ├── eval_uniform25.yaml │ ├── eval_uniform3.yaml │ ├── eval_uniform9.yaml │ ├── train_25select15.yaml │ ├── train_25select15_light.yaml │ ├── train_25select3.yaml │ ├── train_25select3_light.yaml │ ├── train_25select9.yaml │ ├── train_25select9_light.yaml │ └── train_uniform.yaml └── nyu │ ├── eval_25select1.yaml │ ├── eval_25select15.yaml │ ├── eval_25select15_light.yaml │ ├── eval_25select1_light.yaml │ ├── eval_25select3.yaml │ ├── eval_25select3_light.yaml │ ├── eval_25select9.yaml │ ├── eval_25select9_light.yaml │ ├── eval_uniform1.yaml │ ├── eval_uniform15.yaml │ ├── eval_uniform25.yaml │ ├── eval_uniform3.yaml │ ├── eval_uniform9.yaml │ ├── train_25select15.yaml │ ├── train_25select15_light.yaml │ ├── train_25select3.yaml │ ├── train_25select3_light.yaml │ ├── train_25select9.yaml │ ├── train_25select9_light.yaml │ └── train_uniform.yaml ├── feeders ├── __init__.py ├── hands2019_feeder.py ├── icvl_feeder.py └── nyu_feeder.py ├── fig └── pipeline.png ├── models ├── __init__.py ├── a2j.py ├── a2j_conf_net.py ├── attention.py ├── conf_net.py ├── layers.py ├── multiview_a2j.py ├── resnet.py └── view_selector_a2j.py ├── ops ├── __init__.py ├── cuda │ ├── __init__.py │ ├── depth_to_point_cloud_mask_cuda.cpp │ ├── depth_to_point_cloud_mask_cuda_kernel.cu │ ├── helper_cuda.h │ ├── helper_string.h │ ├── point_cloud_mask_to_depth_cuda.cpp │ ├── point_cloud_mask_to_depth_cuda_kernel.cu │ └── setup.py ├── image_ops.py ├── joint_ops.py ├── loss_ops.py ├── point_transform.py └── render.py ├── requirements.txt ├── result_nyu_icvl ├── icvl_select_15_views_light.txt ├── icvl_select_1_views_light.txt ├── icvl_select_3_views_light.txt ├── icvl_select_9_views_light.txt ├── icvl_uniform_25_views.txt ├── nyu_select_15_views_light.txt ├── nyu_select_1_view_light.txt ├── nyu_select_3_views_light.txt ├── nyu_select_9_views_light.txt └── nyu_uniform_25_views.txt ├── train_a2j.py ├── utils ├── __init__.py ├── camera_utils.py ├── hand_detector.py ├── image_utils.py ├── parser_utils.py ├── point_transform.py └── voxel_utils.py └── view_select_a2j.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | 140 | .vscode/ 141 | .idea/ 142 | checkpoint/ 143 | logs/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any person or company obtaining a copy of this software and associated documentation files (the "Software") from the copyright holders to use the Software for any non-commercial purpose. Publication, redistribution and (re)selling of the software, of modifications, extensions, and derivates of it, and of other software containing portions of the licensed Software, are not permitted. The Copyright holder is permitted to publically disclose and advertise the use of the software by any licensee. 2 | 3 | Packaging or distributing parts or whole of the provided software (including code, models and data) as is or as part of other software is prohibited. Commercial use of parts or whole of the provided software (including code, models and data) is strictly prohibited. Using the provided software for promotion of a commercial entity or product, or in any other manner which directly or indirectly results in commercial gains is strictly prohibited. 4 | 5 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 6 | 7 | The license is modified from this [template](https://github.com/r00tman/EventHands). 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Efficient Virtual View Selection for 3D Hand Pose Estimation 2 | 3 | This is the official implementation for the paper, 4 | "Efficient Virtual View Selection for 3D Hand Pose Estimation", 5 | AAAI 2022. 6 | 7 | ![pipeline](fig/pipeline.png) 8 | 9 | [Project Webpage](https://me495.github.io/handpose-virtualview/)       [Paper](https://arxiv.org/pdf/2203.15458) 10 | 11 | ## Update (2022-3-30) 12 | We upload prediction results in pixel coordinates (i.e., UVD format) for NYU and ICVL datasets: https://github.com/iscas3dv/handpose-virtualview/tree/main/result_nyu_icvl, Evaluation code (https://github.com/xinghaochen/awesome-hand-pose-estimation/tree/master/evaluation) can be applied for performance comparision among SoTA methods. 13 | 14 | ## Update (2022-6-7) 15 | The models were damaged during uploading to Google drive. We have uploaded new models. 16 | 17 | ## Update (2022-06-27) 18 | Modify the training method of View selection with the "student" confidence network. 19 | 20 | ## Dependencies 21 | * `CUDA 11.1` 22 | 23 | Other versions of `CUDA` should also work, 24 | but please make sure that the version of `CUDA` used by `PyTorch` is the same as the system, 25 | because our code needs to be compiled with `nvcc`. 26 | 27 | ## Installation 28 | * Clone this repository. 29 | * Install the required packages: 30 | ```angular2html 31 | pip install -r requirements.txt 32 | ``` 33 | * Compile and install the multi-view rendering code: 34 | ```angular2html 35 | cd ops/cuda/ 36 | python setup.py install 37 | ``` 38 | 39 | ## Data preparation 40 | We publish training and evaluation code on NYU hand pose dataset and ICVL hand posture dataset. 41 | The data preparation process of these two datasets is as follows. 42 | 43 | ### NYU Hand Pose Dataset 44 | * Download [NYU Hand Pose Dataset](https://jonathantompson.github.io/NYU_Hand_Pose_Dataset.htm#download). Then put them under the data directory: 45 | ```angular2html 46 | -directory/ 47 | -test/ 48 | -joint_data.mat 49 | ... 50 | -train/ 51 | -joint_data.mat 52 | ... 53 | ``` 54 | * Modify `path` field of the `config/dataset/nyu.json` to point to the data directory. 55 | 56 | ### ICVL Hand Posture Dataset 57 | * Download [ICVL Hand Posture Dataset](https://labicvl.github.io/hand.html). Then put them under the data directory: 58 | ```angular2html 59 | -directory/ 60 | -Testing/ 61 | -Depth/ 62 | ... 63 | -test_seq_1.txt 64 | -test_seq_2.txt 65 | -Training/ 66 | -Depth/ 67 | ... 68 | -labels.txt 69 | ``` 70 | * Modify `path` field of the `config/dataset/icvl.json` to point to the data directory. 71 | 72 | 73 | ## Evaluation 74 | We have already trained some models that you can [download](https://drive.google.com/file/d/1kfrfLUKynVNH5W8iD2UHLllGUG-aAJtv/view?usp=sharing) and evaluate. 75 | After downloading models, extract it to `checkpoint` folder in the project directory. 76 | 77 | ### NYU Hand Pose Dataset 78 | #### Uniform sampling 79 | In the output results, `error_3d_conf` shows the average joint error for fusion with confidence, 80 | and `error_3d_fused` shows the average joint error for fusion without confidence. 81 | * Uniformly sampling 25 views: 82 | ```angular2html 83 | python train_a2j.py --config config/nyu/eval_uniform25.yaml 84 | ``` 85 | * Uniformly sampling 15 views: 86 | ```angular2html 87 | python train_a2j.py --config config/nyu/eval_uniform15.yaml 88 | ``` 89 | * Uniformly sampling 9 views: 90 | ```angular2html 91 | python train_a2j.py --config config/nyu/eval_uniform9.yaml 92 | ``` 93 | * Uniformly sampling 3 views: 94 | ```angular2html 95 | python train_a2j.py --config config/nyu/eval_uniform3.yaml 96 | ``` 97 | * Uniformly sampling 1 views: 98 | ```angular2html 99 | python train_a2j.py --config config/nyu/eval_uniform1.yaml 100 | ``` 101 | 102 | #### View selection with the “teacher” confidence network 103 | In the output results, `error_3d_fused` shows the average joint error. 104 | * Select 15 views from 25 views: 105 | ```angular2html 106 | python train_a2j.py --config config/nyu/eval_25select15.yaml 107 | ``` 108 | * Select 9 views from 25 views: 109 | ```angular2html 110 | python train_a2j.py --config config/nyu/eval_25select9.yaml 111 | ``` 112 | * Select 3 views from 25 views: 113 | ```angular2html 114 | python train_a2j.py --config config/nyu/eval_25select3.yaml 115 | ``` 116 | * Select 1 view from 25 views: 117 | ```angular2html 118 | python train_a2j.py --config config/nyu/eval_25select1.yaml 119 | ``` 120 | 121 | #### View selection with the “student” confidence network 122 | In the output results, `epoch_error_3d_conf_select` shows the average joint error. 123 | * Select 15 views from 25 views: 124 | ```angular2html 125 | python view_select_a2j.py --config config/nyu/eval_25select15_light.yaml 126 | ``` 127 | * Select 9 views from 25 views: 128 | ```angular2html 129 | python view_select_a2j.py --config config/nyu/eval_25select9_light.yaml 130 | ``` 131 | * Select 3 views from 25 views: 132 | ```angular2html 133 | python view_select_a2j.py --config config/nyu/eval_25select3_light.yaml 134 | ``` 135 | * Select 1 view from 25 views: 136 | ```angular2html 137 | python view_select_a2j.py --config config/nyu/eval_25select1_light.yaml 138 | ``` 139 | 140 | ### ICVL Hand Posture Dataset 141 | We provide a model trained and configuration files on ICVL hand posture dataset, 142 | you can follow the commands on NYU hand pose dataset and use corresponding configuration files to evaluate. 143 | 144 | 145 | ## Training 146 | You can also train models using the following commands. 147 | 148 | ### NYU Hand Pose Dataset 149 | #### Uniform sampling 150 | We only train a model that uniformly samples 25 views, 151 | which is also suitable for uniformly sampling 15, 9, 3 and 1 views. 152 | ```angular2html 153 | python train_a2j.py --config config/nyu/train_uniform.yaml 154 | ``` 155 | 156 | #### View selection with the “teacher” confidence network 157 | The following commands train models using the "teacher" network to select 15, 9, 3 views from 25 views respectively. 158 | The model that selects 1 view from 25 views is the same as the model that selects 3 views from 25 views. 159 | ```angular2html 160 | python train_a2j.py --config config/nyu/train_25select15.yaml 161 | ``` 162 | ```angular2html 163 | python train_a2j.py --config config/nyu/train_25select9.yaml 164 | ``` 165 | ```angular2html 166 | python train_a2j.py --config config/nyu/train_25select3.yaml 167 | ``` 168 | 169 | #### View selection with the “student” confidence network 170 | The following commands train models using the "student" network to select 15, 9, 3 views from 25 views respectively. 171 | The model that selects 1 view from 25 views is the same as the model that selects 3 views from 25 views. 172 | This step requires the use of the trained `teacher` confidence network, 173 | please modify the `pre_a2j` field of the configuration file to the path of the previously trained model. 174 | ```angular2html 175 | python view_select_a2j.py --config config/nyu/train_25select15_light.yaml 176 | ``` 177 | ```angular2html 178 | python view_select_a2j.py --config config/nyu/train_25select9_light.yaml 179 | ``` 180 | ```angular2html 181 | python view_select_a2j.py --config config/nyu/train_25select3_light.yaml 182 | ``` 183 | 184 | ### ICVL Hand Posture Dataset 185 | We provide configuration files on ICVL hand posture dataset, 186 | you can follow the commands on NYU hand pose dataset and use corresponding configuration files to train. 187 | 188 | ## Citation 189 | Please cite this paper if you want to use it in your work, 190 | ```angular2html 191 | @inproceedings{Cheng2022virtualview, 192 | title={Efficient Virtual View Selection for 3D Hand Pose Estimation}, 193 | author={Jian Cheng, Yanguang Wan, Dexin Zuo, Cuixia Ma, Jian Gu, Ping Tan, Hongan Wang, Xiaoming Deng, Yinda Zhang}, 194 | booktitle={AAAI Conference on Artificial Intelligence (AAAI)}, 195 | year={2022} 196 | } 197 | ``` 198 | 199 | ## Acknowledgements 200 | We use part of the great code from [A2J](https://github.com/zhangboshen/A2J), 201 | [HandAugment](https://github.com/wozhangzhaohui/HandAugment) 202 | and [attention-is-all-you-need-pytorch](https://github.com/jadore801120/attention-is-all-you-need-pytorch). 203 | -------------------------------------------------------------------------------- /config/dataset/icvl.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "icvl", 3 | "path": "/home/dataset/ICVL", 4 | "camera": {"fx": 241.42, "fy": 241.42, "u0": 160.0, "v0": 120.0}, 5 | "height": 240, 6 | "width": 320, 7 | "crop_size": 176, 8 | "cube": [220, 220, 220], 9 | "selected": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] 10 | } -------------------------------------------------------------------------------- /config/dataset/nyu.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nyu", 3 | "path": "/home/dataset/nyu/dataset", 4 | "camera": {"u0": 320.0, "v0": 240.0, "fx": 588.03, "fy": 587.07}, 5 | "height": 480, 6 | "width": 640, 7 | "crop_size": 176, 8 | "cube": [280, 280, 280], 9 | "selected": [0, 3, 6, 9, 12, 15, 18, 21, 24, 25, 27, 30, 31, 32], 10 | "fingers_indices": [[0,13,1], [2,13,3], [4,13,5], [6,13,7], [9,8,10]], 11 | "connections": [ 12 | [13, 1], 13 | [1, 0], 14 | [13, 3], 15 | [3, 2], 16 | [13, 5], 17 | [5, 4], 18 | [13, 7], 19 | [7, 6], 20 | [13, 10], 21 | [10, 9], 22 | [9, 8], 23 | [13, 11], 24 | [13, 12] 25 | ], 26 | "connection_colors": [ 27 | [0.83, 1, 0.7], 28 | [0.83, 1, 1], 29 | [0.66, 1, 0.7], 30 | [0.66, 1, 1], 31 | [0.50, 1, 0.7], 32 | [0.50, 1, 1], 33 | [0.33, 1, 0.7], 34 | [0.33, 1, 1], 35 | [0.00, 1, 0.6], 36 | [0.00, 1, 0.8], 37 | [0.00, 1, 1], 38 | [0.16, 1, 0.7], 39 | [0.16, 1, 1] 40 | ], 41 | "joint_colors": [ 42 | [0.83, 1, 0.7], 43 | [0.83, 1, 1], 44 | [0.66, 1, 0.7], 45 | [0.66, 1, 1], 46 | [0.50, 1, 0.7], 47 | [0.50, 1, 1], 48 | [0.33, 1, 0.7], 49 | [0.33, 1, 1], 50 | [0.00, 1, 0.6], 51 | [0.00, 1, 0.8], 52 | [0.00, 1, 1], 53 | [0.16, 1, 1], 54 | [0.16, 1, 1], 55 | [0.16, 1, 1] 56 | ] 57 | } -------------------------------------------------------------------------------- /config/icvl/eval_25select1.yaml: -------------------------------------------------------------------------------- 1 | phase: eval 2 | dataset: icvl 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_25select1 6 | pre_model_name: ./checkpoint/icvl/25select3.pth 7 | level: 4 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 1 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/icvl/eval_25select15.yaml: -------------------------------------------------------------------------------- 1 | phase: eval 2 | dataset: icvl 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_25select15 6 | pre_model_name: ./checkpoint/icvl/25select15.pth 7 | level: 4 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 15 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/icvl/eval_25select15_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: icvl 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_25select15_light 6 | pre_a2j: ./checkpoint/icvl/25select15.pth 7 | pre_model_path: ./checkpoint/icvl/25select15_light.pth 8 | level: 4 9 | n_head: 1 10 | d_attn: 256 11 | d_k: 64 12 | d_v: 64 13 | d_inner: 256 14 | num_select: 15 15 | num_worker: 8 16 | save_result: True -------------------------------------------------------------------------------- /config/icvl/eval_25select1_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: icvl 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_25select1_light 6 | pre_a2j: ./checkpoint/icvl/25select3.pth 7 | pre_model_path: ./checkpoint/icvl/25select3_light.pth 8 | level: 4 9 | n_head: 1 10 | d_attn: 256 11 | d_k: 64 12 | d_v: 64 13 | d_inner: 256 14 | num_select: 1 15 | num_worker: 8 16 | save_result: True -------------------------------------------------------------------------------- /config/icvl/eval_25select3.yaml: -------------------------------------------------------------------------------- 1 | phase: eval 2 | dataset: icvl 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_25select3 6 | pre_model_name: ./checkpoint/icvl/25select3.pth 7 | level: 4 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 3 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/icvl/eval_25select3_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: icvl 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_25select3_light 6 | pre_a2j: ./checkpoint/icvl/25select3.pth 7 | pre_model_path: ./checkpoint/icvl/25select3_light.pth 8 | level: 4 9 | n_head: 1 10 | d_attn: 256 11 | d_k: 64 12 | d_v: 64 13 | d_inner: 256 14 | num_select: 3 15 | num_worker: 8 16 | save_result: True -------------------------------------------------------------------------------- /config/icvl/eval_25select9.yaml: -------------------------------------------------------------------------------- 1 | phase: eval 2 | dataset: icvl 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_25select9 6 | pre_model_name: ./checkpoint/icvl/25select9.pth 7 | level: 4 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 9 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/icvl/eval_25select9_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: icvl 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_25select9_light 6 | pre_a2j: ./checkpoint/icvl/25select9.pth 7 | pre_model_path: ./checkpoint/icvl/25select9_light.pth 8 | level: 4 9 | n_head: 1 10 | d_attn: 256 11 | d_k: 64 12 | d_v: 64 13 | d_inner: 256 14 | num_select: 9 15 | num_worker: 8 16 | save_result: True -------------------------------------------------------------------------------- /config/icvl/eval_uniform1.yaml: -------------------------------------------------------------------------------- 1 | phase: eval 2 | dataset: icvl 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_uniform1 6 | pre_model_name: ./checkpoint/icvl/uniform.pth 7 | level: 0 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 1 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/icvl/eval_uniform15.yaml: -------------------------------------------------------------------------------- 1 | phase: eval 2 | dataset: icvl 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_uniform15 6 | pre_model_name: ./checkpoint/icvl/uniform.pth 7 | level: 3 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 15 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/icvl/eval_uniform25.yaml: -------------------------------------------------------------------------------- 1 | phase: eval 2 | dataset: icvl 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_uniform25 6 | pre_model_name: ./checkpoint/icvl/uniform.pth 7 | level: 4 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 25 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/icvl/eval_uniform3.yaml: -------------------------------------------------------------------------------- 1 | phase: eval 2 | dataset: icvl 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_uniform3 6 | pre_model_name: ./checkpoint/icvl/uniform.pth 7 | level: 1 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 3 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/icvl/eval_uniform9.yaml: -------------------------------------------------------------------------------- 1 | phase: eval 2 | dataset: icvl 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/icvl/eval_uniform9 6 | pre_model_name: ./checkpoint/icvl/uniform.pth 7 | level: 2 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 9 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/icvl/train_25select15.yaml: -------------------------------------------------------------------------------- 1 | phase: train 2 | dataset: icvl 3 | split: 20 4 | batch_size: 10 5 | num_epoch: 60 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/icvl/train_25select15 9 | log_dir: ./logs/icvl/train_25select15 10 | learning_decay_rate: 0.95 11 | reg_weight: 1e-6 12 | level: 4 13 | n_head: 1 14 | d_attn: 256 15 | d_k: 64 16 | d_v: 64 17 | d_inner: 256 18 | dropout_rate: 0.5 19 | num_select: 15 20 | num_worker: 5 21 | max_jitter: 0. 22 | depth_sigma: 0. 23 | random_flip: False 24 | adjust_cube: False -------------------------------------------------------------------------------- /config/icvl/train_25select15_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: icvl 2 | phase: train 3 | split: 20 4 | batch_size: 32 5 | num_epoch: 10 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/icvl/train_25select15_light 9 | log_dir: ./logs/icvl/train_25select15_light 10 | pre_a2j: ./checkpoint/icvl/train_25select15/model.pth 11 | learning_decay_rate: 0.8 12 | reg_weight: 1e-6 13 | level: 4 14 | n_head: 1 15 | d_attn: 256 16 | d_k: 64 17 | d_v: 64 18 | d_inner: 256 19 | dropout_rate: 0.5 20 | num_select: 15 21 | num_worker: 8 22 | max_jitter: 0. 23 | depth_sigma: 0. 24 | random_flip: False 25 | adjust_cube: False -------------------------------------------------------------------------------- /config/icvl/train_25select3.yaml: -------------------------------------------------------------------------------- 1 | phase: train 2 | dataset: icvl 3 | split: 20 4 | batch_size: 10 5 | num_epoch: 60 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/icvl/train_25select3 9 | log_dir: ./logs/icvl/train_25select3 10 | learning_decay_rate: 0.95 11 | reg_weight: 1e-6 12 | level: 4 13 | n_head: 1 14 | d_attn: 256 15 | d_k: 64 16 | d_v: 64 17 | d_inner: 256 18 | dropout_rate: 0.5 19 | num_select: 3 20 | num_worker: 5 21 | max_jitter: 0. 22 | depth_sigma: 0. 23 | random_flip: False 24 | adjust_cube: False -------------------------------------------------------------------------------- /config/icvl/train_25select3_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: icvl 2 | phase: train 3 | split: 20 4 | batch_size: 32 5 | num_epoch: 10 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/icvl/train_25select3_light 9 | log_dir: ./logs/icvl/train_25select3_light 10 | pre_a2j: ./checkpoint/icvl/train_25select3/model.pth 11 | learning_decay_rate: 0.8 12 | reg_weight: 1e-6 13 | level: 4 14 | n_head: 1 15 | d_attn: 256 16 | d_k: 64 17 | d_v: 64 18 | d_inner: 256 19 | dropout_rate: 0.5 20 | num_select: 3 21 | num_worker: 8 22 | max_jitter: 0. 23 | depth_sigma: 0. 24 | random_flip: False 25 | adjust_cube: False -------------------------------------------------------------------------------- /config/icvl/train_25select9.yaml: -------------------------------------------------------------------------------- 1 | phase: train 2 | dataset: icvl 3 | split: 20 4 | batch_size: 10 5 | num_epoch: 60 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/icvl/train_25select9 9 | log_dir: ./logs/icvl/train_25select9 10 | learning_decay_rate: 0.95 11 | reg_weight: 1e-6 12 | level: 4 13 | n_head: 1 14 | d_attn: 256 15 | d_k: 64 16 | d_v: 64 17 | d_inner: 256 18 | dropout_rate: 0.5 19 | num_select: 9 20 | num_worker: 5 21 | max_jitter: 0. 22 | depth_sigma: 0. 23 | random_flip: False 24 | adjust_cube: False -------------------------------------------------------------------------------- /config/icvl/train_25select9_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: icvl 2 | phase: train 3 | split: 20 4 | batch_size: 32 5 | num_epoch: 10 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/icvl/train_25select9_light 9 | log_dir: ./logs/icvl/train_25select9_light 10 | pre_a2j: ./checkpoint/icvl/train_25select9/model.pth 11 | learning_decay_rate: 0.8 12 | reg_weight: 1e-6 13 | level: 4 14 | n_head: 1 15 | d_attn: 256 16 | d_k: 64 17 | d_v: 64 18 | d_inner: 256 19 | dropout_rate: 0.5 20 | num_select: 9 21 | num_worker: 8 22 | max_jitter: 0. 23 | depth_sigma: 0. 24 | random_flip: False 25 | adjust_cube: False -------------------------------------------------------------------------------- /config/icvl/train_uniform.yaml: -------------------------------------------------------------------------------- 1 | phase: train 2 | dataset: icvl 3 | split: 20 4 | batch_size: 10 5 | num_epoch: 60 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/icvl/train_uniform 9 | log_dir: ./logs/icvl/train_uniform 10 | learning_decay_rate: 0.95 11 | reg_weight: 1e-6 12 | level: 4 13 | n_head: 1 14 | d_attn: 256 15 | d_k: 64 16 | d_v: 64 17 | d_inner: 256 18 | dropout_rate: 0.5 19 | num_select: 25 20 | num_worker: 5 21 | max_jitter: 0. 22 | depth_sigma: 0. 23 | random_flip: False 24 | adjust_cube: False -------------------------------------------------------------------------------- /config/nyu/eval_25select1.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | split: 5 4 | batch_size: 1 5 | gpus: [0] 6 | log_dir: ./logs/nyu/eval_25select3 7 | pre_model_name: ./checkpoint/nyu/25select3.pth 8 | level: 4 9 | n_head: 1 10 | d_attn: 256 11 | d_k: 64 12 | d_v: 64 13 | d_inner: 256 14 | num_select: 1 15 | num_worker: 5 16 | save_result: True -------------------------------------------------------------------------------- /config/nyu/eval_25select15.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/nyu/eval_25select15 6 | pre_model_name: ./checkpoint/nyu/25select15.pth 7 | level: 4 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 15 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/nyu/eval_25select15_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/nyu/eval_25select15_light 6 | pre_a2j: ./checkpoint/nyu/25select15.pth 7 | pre_model_path: ./checkpoint/nyu/25select15_light.pth 8 | level: 4 9 | n_head: 1 10 | d_attn: 256 11 | d_k: 64 12 | d_v: 64 13 | d_inner: 256 14 | num_select: 15 15 | num_worker: 4 16 | save_result: True -------------------------------------------------------------------------------- /config/nyu/eval_25select1_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/nyu/eval_25select1_light 6 | pre_a2j: ./checkpoint/nyu/25select3.pth 7 | pre_model_path: ./checkpoint/nyu/25select3_light.pth 8 | level: 4 9 | n_head: 1 10 | d_attn: 256 11 | d_k: 64 12 | d_v: 64 13 | d_inner: 256 14 | num_select: 1 15 | num_worker: 4 16 | save_result: True -------------------------------------------------------------------------------- /config/nyu/eval_25select3.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/nyu/eval_25select3 6 | pre_model_name: ./checkpoint/nyu/25select3.pth 7 | level: 4 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 3 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/nyu/eval_25select3_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/nyu/eval_25select3_light 6 | pre_a2j: ./checkpoint/nyu/25select3.pth 7 | pre_model_path: ./checkpoint/nyu/25select3_light.pth 8 | level: 4 9 | n_head: 1 10 | d_attn: 256 11 | d_k: 64 12 | d_v: 64 13 | d_inner: 256 14 | num_select: 3 15 | num_worker: 4 16 | save_result: True -------------------------------------------------------------------------------- /config/nyu/eval_25select9.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/nyu/eval_25select9 6 | pre_model_name: ./checkpoint/nyu/25select9.pth 7 | level: 4 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 9 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/nyu/eval_25select9_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/nyu/eval_25select9_light 6 | pre_a2j: ./checkpoint/nyu/25select9.pth 7 | pre_model_path: ./checkpoint/nyu/25select9_light.pth 8 | level: 4 9 | n_head: 1 10 | d_attn: 256 11 | d_k: 64 12 | d_v: 64 13 | d_inner: 256 14 | num_select: 9 15 | num_worker: 4 16 | save_result: True -------------------------------------------------------------------------------- /config/nyu/eval_uniform1.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/nyu/eval_uniform1 6 | pre_model_name: ./checkpoint/nyu/uniform.pth 7 | level: 0 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 1 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/nyu/eval_uniform15.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/nyu/eval_uniform15 6 | pre_model_name: ./checkpoint/nyu/uniform.pth 7 | level: 3 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 15 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/nyu/eval_uniform25.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/nyu/eval_uniform25 6 | pre_model_name: ./checkpoint/nyu/uniform.pth 7 | level: 4 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 25 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/nyu/eval_uniform3.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/nyu/eval_uniform3 6 | pre_model_name: ./checkpoint/nyu/uniform.pth 7 | level: 1 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 3 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/nyu/eval_uniform9.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: eval 3 | batch_size: 1 4 | gpus: [0] 5 | log_dir: ./logs/nyu/eval_uniform9 6 | pre_model_name: ./checkpoint/nyu/uniform.pth 7 | level: 2 8 | n_head: 1 9 | d_attn: 256 10 | d_k: 64 11 | d_v: 64 12 | d_inner: 256 13 | num_select: 9 14 | num_worker: 5 15 | save_result: True -------------------------------------------------------------------------------- /config/nyu/train_25select15.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: train 3 | split: 5 4 | batch_size: 10 5 | num_epoch: 50 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/nyu/train_25select15 9 | log_dir: ./logs/nyu/train_25select15 10 | learning_decay_rate: 0.95 11 | reg_weight: 1e-6 12 | level: 4 13 | n_head: 1 14 | d_attn: 256 15 | d_k: 64 16 | d_v: 64 17 | d_inner: 256 18 | dropout_rate: 0.5 19 | num_select: 15 20 | num_worker: 5 21 | max_jitter: 0. 22 | offset: 20. 23 | depth_sigma: 0. 24 | random_flip: False 25 | adjust_cube: False -------------------------------------------------------------------------------- /config/nyu/train_25select15_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: train 3 | split: 5 4 | batch_size: 32 5 | num_epoch: 30 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/nyu/train_25select15_light 9 | log_dir: ./logs/nyu/train_25select15_light 10 | pre_a2j: ./checkpoint/nyu/train_25select15/model.pth 11 | learning_decay_rate: 0.95 12 | reg_weight: 1e-6 13 | level: 4 14 | n_head: 1 15 | d_attn: 256 16 | d_k: 64 17 | d_v: 64 18 | d_inner: 256 19 | dropout_rate: 0.5 20 | num_select: 15 21 | num_worker: 4 22 | max_jitter: 0. 23 | offset: 20. 24 | depth_sigma: 0. 25 | random_flip: False 26 | adjust_cube: False -------------------------------------------------------------------------------- /config/nyu/train_25select3.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: train 3 | split: 5 4 | batch_size: 10 5 | num_epoch: 50 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/nyu/train_25select3 9 | log_dir: ./logs/nyu/train_25select3 10 | learning_decay_rate: 0.95 11 | reg_weight: 1e-6 12 | level: 4 13 | n_head: 1 14 | d_attn: 256 15 | d_k: 64 16 | d_v: 64 17 | d_inner: 256 18 | dropout_rate: 0.5 19 | num_select: 3 20 | num_worker: 5 21 | max_jitter: 0. 22 | offset: 20. 23 | depth_sigma: 0. 24 | random_flip: False 25 | adjust_cube: False -------------------------------------------------------------------------------- /config/nyu/train_25select3_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: train 3 | split: 5 4 | batch_size: 32 5 | num_epoch: 30 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/nyu/train_25select3_light 9 | log_dir: ./logs/nyu/train_25select3_light 10 | pre_a2j: ./checkpoint/nyu/train_25select3/model.pth 11 | learning_decay_rate: 0.95 12 | reg_weight: 1e-6 13 | level: 4 14 | n_head: 1 15 | d_attn: 256 16 | d_k: 64 17 | d_v: 64 18 | d_inner: 256 19 | dropout_rate: 0.5 20 | num_select: 3 21 | num_worker: 4 22 | max_jitter: 0. 23 | offset: 20. 24 | depth_sigma: 0. 25 | random_flip: False 26 | adjust_cube: False -------------------------------------------------------------------------------- /config/nyu/train_25select9.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: train 3 | split: 5 4 | batch_size: 10 5 | num_epoch: 50 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/nyu/train_25select9 9 | log_dir: ./logs/nyu/train_25select9 10 | learning_decay_rate: 0.95 11 | reg_weight: 1e-6 12 | level: 4 13 | n_head: 1 14 | d_attn: 256 15 | d_k: 64 16 | d_v: 64 17 | d_inner: 256 18 | dropout_rate: 0.5 19 | num_select: 9 20 | num_worker: 5 21 | max_jitter: 0. 22 | offset: 20. 23 | depth_sigma: 0. 24 | random_flip: False 25 | adjust_cube: False -------------------------------------------------------------------------------- /config/nyu/train_25select9_light.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: train 3 | split: 5 4 | batch_size: 32 5 | num_epoch: 30 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/nyu/train_25select9_light 9 | log_dir: ./logs/nyu/train_25select9_light 10 | pre_a2j: ./checkpoint/nyu/train_25select9/model.pth 11 | learning_decay_rate: 0.95 12 | reg_weight: 1e-6 13 | level: 4 14 | n_head: 1 15 | d_attn: 256 16 | d_k: 64 17 | d_v: 64 18 | d_inner: 256 19 | dropout_rate: 0.5 20 | num_select: 9 21 | num_worker: 4 22 | max_jitter: 0. 23 | offset: 20. 24 | depth_sigma: 0. 25 | random_flip: False 26 | adjust_cube: False -------------------------------------------------------------------------------- /config/nyu/train_uniform.yaml: -------------------------------------------------------------------------------- 1 | dataset: nyu 2 | phase: train 3 | split: 5 4 | batch_size: 10 5 | num_epoch: 50 6 | gpus: [0] 7 | learning_rate: 0.0005 8 | model_saved_path: ./checkpoint/nyu/train_uniform 9 | log_dir: ./logs/nyu/train_uniform 10 | learning_decay_rate: 0.95 11 | reg_weight: 1e-6 12 | level: 4 13 | n_head: 1 14 | d_attn: 256 15 | d_k: 64 16 | d_v: 64 17 | d_inner: 256 18 | dropout_rate: 0.5 19 | num_worker: 5 20 | max_jitter: 0. 21 | offset: 20. 22 | depth_sigma: 0. 23 | random_flip: False 24 | adjust_cube: False -------------------------------------------------------------------------------- /feeders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscas3dv/handpose-virtualview/d220efa69ff031077381bc0d4cd58fae7049c329/feeders/__init__.py -------------------------------------------------------------------------------- /feeders/hands2019_feeder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset, DataLoader 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import random 6 | import cv2 7 | import traceback 8 | from PIL import Image 9 | import scipy.io as sio 10 | import scipy.ndimage 11 | from glob import glob 12 | import json 13 | import logging 14 | import sys 15 | import os 16 | root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | sys.path.append(root) 18 | from utils.hand_detector import crop_area_3d 19 | from utils.image_utils import normlize_depth 20 | import matplotlib.patches as mpathes 21 | from utils.point_transform import transform_3D_to_2D, transform_2D_to_3D 22 | 23 | logging.basicConfig(level=logging.INFO, format="%(asctime)s: %(levelname)s %(name)s:%(lineno)d] %(message)s") 24 | logger = logging.getLogger(__file__) 25 | 26 | 27 | def get_center_from_bbx(path, img_w, img_h, fx, fy, bbx_rectify=True): 28 | # Reference: https://github.com/wozhangzhaohui/HandAugment 29 | cube_len = 150. 30 | lines = [line.split() for line in open(path).readlines()] 31 | bb_list = [[int(x) for x in line[1:]] for line in lines] 32 | center_uvd_list = [] 33 | for bb in bb_list: 34 | if bb[0]>bb[2] or bb[1]>bb[3]: 35 | center_uvd_list.append(None) 36 | continue 37 | w = bb[2] - bb[0] 38 | h = bb[3] - bb[1] 39 | ww = max(w, h) 40 | if bbx_rectify: 41 | if w < ww: 42 | if bb[0] == 0: 43 | bb[0] = bb[2] - ww 44 | elif bb[2] == img_w: 45 | bb[2] = bb[0] + ww 46 | if h < ww: 47 | if bb[1] == 0: 48 | bb[1] = bb[3] - ww 49 | elif bb[3] == img_h: 50 | bb[3] = bb[1] + ww 51 | 52 | center_uvd = np.array([(bb[0] + bb[2]) / 2, 53 | (bb[1] + bb[3]) / 2, 54 | cube_len*2 / ww * fx], dtype=np.float32) 55 | center_uvd_list.append(center_uvd) 56 | return center_uvd_list 57 | 58 | 59 | def load_joint_pred(path, fx, fy, u0, v0): 60 | # Reference: https://github.com/wozhangzhaohui/HandAugment 61 | joint_3d_list = [] 62 | joint_2d_list = [] 63 | with open(path, 'r') as f: 64 | for anno in f.readlines(): 65 | anno = anno.split('\t') 66 | if (anno[-1] == '\n'): 67 | anno = anno[:-1] 68 | if len(anno) == 2: 69 | joint_3d_list.append(None) 70 | joint_2d_list.append(None) 71 | else: 72 | joint_3d = np.array(anno[1:]).astype(np.float32) 73 | joint_3d = joint_3d.reshape(21, 3) 74 | joint_2d = transform_3D_to_2D(joint_3d, fx, fy, u0, v0) 75 | joint_3d_list.append(joint_3d) 76 | joint_2d_list.append(joint_2d) 77 | return joint_3d_list, joint_2d_list 78 | 79 | 80 | class Hands2019Feeder(Dataset): 81 | def __init__(self, phase='train', max_jitter=10., depth_sigma=0., cube_len=None, min_scale=1., max_scale=1., 82 | offset=30., hand_thickness=20., random_flip=False, use_joint=False): 83 | self.phase = phase 84 | self.max_jitter = max_jitter 85 | self.depth_sigma = depth_sigma 86 | self.cube_len = cube_len 87 | self.min_scale = min_scale 88 | self.max_scale = max_scale 89 | self.offset = offset 90 | self.hand_thickness = hand_thickness 91 | self.random_flip = random_flip 92 | self.use_joint = use_joint 93 | config_file = os.path.join(root, "config", "dataset", "hands2019.json") 94 | self.config = json.load(open(config_file, 'r')) 95 | self.fx = np.float32(self.config['camera']['fx']) 96 | self.fy = np.float32(self.config['camera']['fy']) 97 | self.u0 = np.float32(self.config['camera']['u0']) 98 | self.v0 = np.float32(self.config['camera']['v0']) 99 | if cube_len is None: 100 | self.cube = np.array(self.config['cube'], dtype=np.float32) 101 | else: 102 | self.cube = np.array([cube_len, cube_len, cube_len], dtype=np.float32) 103 | self.crop_size = self.config['crop_size'] 104 | self.inter_matrix = np.array([[self.fx, 0, self.u0], 105 | [0, self.fy, self.v0], 106 | [0, 0, 1]], dtype=np.float32) 107 | self.depth_name_list, self.joint_3d_list, self.joint_2d_list = self.load_annotation() 108 | 109 | self.com_2d_list = get_center_from_bbx( 110 | os.path.join(self.config['path'], 'training_bbs.txt'), self.config['width'],self.config['height'], 111 | self.fx, self.fy) 112 | if use_joint: 113 | self.joint_3d_pred_list, self.joint_2d_pred_list = load_joint_pred( 114 | os.path.join(self.config["path"], 'training_joint.txt'), self.fx, self.fy, self.u0, self.v0) 115 | num = len(self.depth_name_list) 116 | test_num = num // 10 117 | train_num = num - test_num 118 | if phase == 'train': 119 | self.depth_name_list = self.depth_name_list[:train_num] 120 | self.joint_3d_list = self.joint_3d_list[:train_num] 121 | self.joint_2d_list = self.joint_2d_list[:train_num] 122 | self.com_2d_list = self.com_2d_list[:train_num] 123 | if use_joint: 124 | self.joint_3d_pred_list = self.joint_3d_pred_list[:train_num] 125 | self.joint_2d_pred_list = self.joint_2d_pred_list[:train_num] 126 | else: 127 | self.depth_name_list = self.depth_name_list[train_num:] 128 | self.joint_3d_list = self.joint_3d_list[train_num:] 129 | self.joint_2d_list = self.joint_2d_list[train_num:] 130 | self.com_2d_list = self.com_2d_list[train_num:] 131 | if use_joint: 132 | self.joint_3d_pred_list = self.joint_3d_pred_list[train_num:] 133 | self.joint_2d_pred_list = self.joint_2d_pred_list[train_num:] 134 | self.index = np.arange(len(self.depth_name_list)) 135 | 136 | def load_annotation(self): 137 | joint_anno_path = os.path.join(self.config["path"], 'training_joint_annotation.txt') 138 | bbs_path = os.path.join(self.config["path"], 'training_bbs.txt') 139 | joint_3d_list = [] 140 | joint_2d_list = [] 141 | img_name_list = [] 142 | bbx_list = [] 143 | with open(joint_anno_path, 'r') as f: 144 | for anno in f.readlines(): 145 | anno = anno.split('\t') 146 | if (anno[-1] == '\n'): 147 | anno = anno[:-1] 148 | img_name = anno[0] 149 | joint_3d = np.array(anno[1:]).astype(np.float32) 150 | joint_3d = joint_3d.reshape(21, 3) 151 | joint_2d = transform_3D_to_2D(joint_3d, self.fx, self.fy, self.u0, self.v0) 152 | joint_3d_list.append(joint_3d) 153 | joint_2d_list.append(joint_2d) 154 | img_name_list.append(img_name) 155 | 156 | return img_name_list, joint_3d_list, joint_2d_list 157 | 158 | def show(self, cropped, joint_3d, crop_trans): 159 | joint_2d = self.inter_matrix @ np.transpose(joint_3d, (1, 0)) 160 | joint_2d = joint_2d / joint_2d[2, :] 161 | joint_2d = np.transpose(joint_2d, (1, 0)) 162 | crop_joint_2d = np.ones_like(joint_2d) 163 | crop_joint_2d[:, :2] = joint_2d[:, :2] 164 | crop_joint_2d = np.transpose(crop_joint_2d, (1, 0)) 165 | crop_joint_2d = np.array(crop_trans @ crop_joint_2d) 166 | crop_joint_2d = np.transpose(crop_joint_2d, (1, 0)) 167 | plt.clf() 168 | plt.imshow(cropped) 169 | plt.scatter(crop_joint_2d[:, 0], crop_joint_2d[:, 1], c='red') 170 | plt.show() 171 | 172 | def __getitem__(self, item): 173 | item = self.index[item] 174 | depth_path = os.path.join(self.config["path"], 'training_images', self.depth_name_list[item]) 175 | depth = cv2.imread(depth_path, 2).astype(np.float32) 176 | joint_3d, com_2d = self.joint_3d_list[item], self.com_2d_list[item] 177 | try: 178 | if com_2d is None: 179 | raise ValueError 180 | if self.max_jitter>0.: 181 | com_3d = transform_2D_to_3D(com_2d, self.fx, self.fy, self.u0, self.v0) 182 | com_offset = np.random.uniform(low=-1., high=1., size=(3,))*self.max_jitter 183 | com_offset = com_offset.astype(np.float32) 184 | com_3d = com_3d + com_offset 185 | com_2d = transform_3D_to_2D(com_3d, self.fx, self.fy, self.u0, self.v0) 186 | 187 | scale = np.random.uniform(low=self.min_scale, high=self.max_scale) 188 | cube = self.cube * scale 189 | if self.use_joint: 190 | joint_2d_pred, joint_3d_pred = self.joint_2d_pred_list[item], self.joint_3d_pred_list[item] 191 | left = np.min(joint_2d_pred[:, 0]) 192 | right = np.max(joint_2d_pred[:, 0]) 193 | up = np.min(joint_2d_pred[:, 1]) 194 | down = np.max(joint_2d_pred[:, 1]) 195 | front = np.min(joint_3d_pred[:, 2])-self.hand_thickness 196 | back = np.max(joint_3d_pred[:, 2]) 197 | bbx = [left, right, up, down, front, back] 198 | cropped, crop_trans, com_2d = crop_area_3d(depth, com_2d, self.fx, self.fy, bbx=bbx, offset=self.offset, 199 | size=cube, dsize=[self.crop_size, self.crop_size], docom=False) 200 | else: 201 | cropped, crop_trans, com_2d = crop_area_3d(depth, com_2d, self.fx, self.fy, size=cube, 202 | dsize=[self.crop_size, self.crop_size], docom=False) 203 | except Exception as e: 204 | # exc_type, exc_value, exc_obj = sys.exc_info() 205 | # traceback.print_tb(exc_obj) 206 | # print(com_2d) 207 | # print(self.depth_name_list[item]) 208 | # plt.imshow(depth) 209 | # plt.show() 210 | # height = down - up 211 | # width = right - left 212 | # rect = mpathes.Rectangle([left, up], width, height, color='r', fill=False, linewidth=2) 213 | # fig, ax = plt.subplots() 214 | # ax.imshow(depth) 215 | # ax.add_patch(rect) 216 | # plt.show() 217 | # plt.imshow(mask_) 218 | # plt.show() 219 | return item, None, None, None, None, None, None, None 220 | 221 | if self.random_flip: 222 | to_center = np.array([[1., 0., self.crop_size/2.], 223 | [0., 1., self.crop_size/2.], 224 | [0., 0., 1]], np.float32) 225 | to_origin = np.array([[1., 0., -self.crop_size/2.], 226 | [0., 1., -self.crop_size/2.], 227 | [0., 0., 1]], np.float32) 228 | if random.random()>0.5: 229 | # Horizontal flip 230 | cropped = cropped[:, ::-1] 231 | matrix = np.eye(3, dtype=np.float32) 232 | matrix[0, 0] = -1 233 | flip_matrix = to_center @ matrix @ to_origin 234 | crop_trans = flip_matrix @ crop_trans 235 | 236 | if random.random()>0.5: 237 | # Vertical flip 238 | cropped = cropped[::-1, :] 239 | matrix = np.eye(3, dtype=np.float32) 240 | matrix[1, 1] = -1 241 | flip_matrix = to_center @ matrix @ to_origin 242 | crop_trans = flip_matrix @ crop_trans 243 | 244 | cropped = np.array(cropped) 245 | 246 | if self.depth_sigma>0.: 247 | # noise = np.random.randn(self.crop_size, self.crop_size)*self.noise_sigma 248 | noise = np.random.normal(0, self.depth_sigma, size=(self.crop_size, self.crop_size)).astype(np.float32) 249 | cropped[cropped>1e-3] += noise[cropped>1e-3] 250 | 251 | # self.show(cropped, joint_3d, crop_trans) 252 | # plt.imshow(depth) 253 | # plt.show() 254 | # print(com_2d) 255 | return item, depth[None, ...], cropped[None, ...], joint_3d, np.array(crop_trans), com_2d, self.inter_matrix, \ 256 | cube 257 | 258 | def __len__(self): 259 | return len(self.index) 260 | 261 | 262 | def collate_fn(batch): 263 | batch_item = [] 264 | batch_depth = [] 265 | batch_cropped = [] 266 | batch_joint_3d = [] 267 | batch_crop_trans = [] 268 | batch_com_2d = [] 269 | batch_inter_matrix = [] 270 | batch_cube = [] 271 | for item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube in batch: 272 | if depth is not None: 273 | batch_item.append(item) 274 | batch_depth.append(depth) 275 | batch_cropped.append(cropped) 276 | batch_joint_3d.append(joint_3d) 277 | batch_crop_trans.append(crop_trans) 278 | batch_com_2d.append(com_2d) 279 | batch_inter_matrix.append(inter_matrix) 280 | batch_cube.append(cube) 281 | output = [torch.from_numpy(np.array(batch_item))] 282 | for arrays in [batch_depth, batch_cropped, batch_joint_3d, batch_crop_trans, batch_com_2d, batch_inter_matrix, 283 | batch_cube]: 284 | output.append(torch.from_numpy(np.stack(arrays, axis=0))) 285 | return output 286 | 287 | 288 | if __name__ == '__main__': 289 | from tqdm import tqdm 290 | train_dataset = Hands2019Feeder('train', max_jitter=0., depth_sigma=0., cube_len=270., min_scale=1., max_scale=1., 291 | offset=30., use_joint=True) 292 | item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = train_dataset[0] 293 | plt.imshow(depth[0]) 294 | plt.show() 295 | print(depth[depth!=0].min()) 296 | print(depth[0, 300, 500:600]) 297 | # dataloader = DataLoader(train_dataset, shuffle=False, batch_size=4, num_workers=1, collate_fn=collate_fn) 298 | # for batch_idx, batch_data in enumerate(tqdm(dataloader)): 299 | # item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data 300 | # break 301 | 302 | # test_dataset = Hands2019Feeder('test', max_jitter=0.) 303 | # # item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = train_dataset[4979] 304 | # dataloader = DataLoader(test_dataset, shuffle=False, batch_size=4, num_workers=4, collate_fn=collate_fn) 305 | # for batch_idx, batch_data in enumerate(tqdm(dataloader)): 306 | # item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data 307 | -------------------------------------------------------------------------------- /feeders/icvl_feeder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset, DataLoader 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import random 6 | import cv2 7 | import traceback 8 | from PIL import Image 9 | import scipy.io as sio 10 | import scipy.ndimage 11 | from glob import glob 12 | import json 13 | import logging 14 | import sys 15 | import os 16 | root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | sys.path.append(root) 18 | from utils.hand_detector import calculate_com_2d, crop_area_3d 19 | from utils.point_transform import transform_2D_to_3D 20 | 21 | logging.basicConfig(level=logging.INFO, format="%(asctime)s: %(levelname)s %(name)s:%(lineno)d] %(message)s") 22 | logger = logging.getLogger(__file__) 23 | 24 | 25 | class ICVLFeeder(Dataset): 26 | def __init__(self, phase='train', max_jitter=10., depth_sigma=1.): 27 | """ 28 | 29 | :param phase: train or test 30 | :param max_jitter: 31 | :param depth_sigma: 32 | """ 33 | self.phase = phase 34 | self.max_jitter = max_jitter 35 | self.depth_sigma = depth_sigma 36 | config_file = os.path.join(root, "config", "dataset", "icvl.json") 37 | self.config = json.load(open(config_file, 'r')) 38 | self.fx = self.config['camera']['fx'] 39 | self.fy = self.config['camera']['fy'] 40 | self.u0 = self.config['camera']['u0'] 41 | self.v0 = self.config['camera']['v0'] 42 | self.crop_size = self.config['crop_size'] 43 | self.inter_matrix = np.array([[self.fx, 0, self.u0], 44 | [0, self.fy, self.v0], 45 | [0, 0, 1]], dtype=np.float32) 46 | self.cube = np.array(self.config["cube"], dtype=np.float32) 47 | self.joint_2d, self.joint_3d, self.depth_path = self.load_annotation() 48 | self.index = np.arange(len(self.depth_path)) 49 | logger.info("{} num: {}".format(phase, len(self.index))) 50 | 51 | def load_annotation(self): 52 | if self.phase == 'train': 53 | label_path = [os.path.join(self.config['path'], 'Training', 'labels.txt')] 54 | depth_dir = os.path.join(self.config['path'], 'Training', 'Depth') 55 | else: 56 | label_path = [os.path.join(self.config['path'], 'Testing', 'test_seq_1.txt'), 57 | os.path.join(self.config['path'], 'Testing', 'test_seq_2.txt')] 58 | depth_dir = os.path.join(self.config['path'], 'Testing', 'Depth') 59 | 60 | joint_2d_list = [] 61 | depth_path_list = [] 62 | for path in label_path: 63 | with open(path, 'r') as f: 64 | for line in f.readlines(): 65 | line = line.strip() 66 | if len(line) == 0: 67 | continue 68 | sp = line.split() 69 | depth_path = os.path.join(depth_dir, sp[0]) 70 | joint_2d = np.array(list(map(float, sp[1:])), np.float32) 71 | joint_2d = joint_2d.reshape((-1, 3)) 72 | depth_path_list.append(depth_path) 73 | joint_2d_list.append(joint_2d) 74 | joint_2d = np.stack(joint_2d_list, axis=0) 75 | joint_3d = transform_2D_to_3D(joint_2d, self.fx, self.fy, self.u0, self.v0) 76 | return joint_2d, joint_3d, depth_path_list 77 | 78 | def __getitem__(self, item): 79 | item = self.index[item] 80 | joint_2d, joint_3d, depth_path = self.joint_2d[item], self.joint_3d[item], self.depth_path[item] 81 | 82 | try: 83 | depth = np.asarray(Image.open(depth_path), np.float32) 84 | except FileNotFoundError: 85 | return item, None, None, joint_3d, None, None, self.inter_matrix 86 | 87 | com_3d = np.mean(joint_3d[[0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15]], axis=0) 88 | 89 | if self.max_jitter>0.: 90 | com_offset = np.random.uniform(low=-1., high=1., size=(3,))*self.max_jitter 91 | com_3d = com_3d + com_offset 92 | com_2d = self.inter_matrix @ com_3d[:, None] 93 | com_2d = np.squeeze(com_2d) 94 | com_2d[:2] /= com_2d[2] 95 | com_2d = com_2d.astype(np.float32) 96 | 97 | cube = self.cube 98 | try: 99 | cropped, crop_trans, com_2d = crop_area_3d(depth, com_2d, self.fx, self.fy, size=cube, 100 | dsize=[self.crop_size, self.crop_size], docom=False) 101 | except UserWarning: 102 | return item, None, None, joint_3d, None, None, self.inter_matrix 103 | # plt.imshow(depth) 104 | # plt.scatter(com_2d[0], com_2d[1]) 105 | # plt.show() 106 | # plt.imshow(cropped) 107 | # plt.show() 108 | 109 | if self.depth_sigma>0.: 110 | # noise = np.random.randn(self.crop_size, self.crop_size)*self.noise_sigma 111 | noise = np.random.normal(0, self.depth_sigma, size=(self.crop_size, self.crop_size)).astype(np.float32) 112 | cropped[cropped>1e-3] += noise[cropped>1e-3] 113 | 114 | return item, depth[None, ...], cropped[None, ...], joint_3d, np.array(crop_trans), com_2d, self.inter_matrix, \ 115 | cube 116 | 117 | def __len__(self): 118 | return len(self.index) 119 | 120 | 121 | if __name__ == '__main__': 122 | from tqdm import tqdm 123 | from feeders.nyu_feeder import collate_fn 124 | train_dataset = ICVLFeeder('train', max_jitter=0., depth_sigma=0.) 125 | dataloader = DataLoader(train_dataset, shuffle=False, batch_size=4, collate_fn=collate_fn, num_workers=4) 126 | for batch_idx, batch_data in enumerate(tqdm(dataloader)): 127 | item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data 128 | 129 | # print(item) 130 | # print(depth.shape) 131 | # print(cropped.shape) 132 | # print(joint_3d.shape) 133 | # print(crop_trans.shape) 134 | # print(com_2d.shape) 135 | # print(inter_matrix.shape) 136 | # print(cube.shape) 137 | -------------------------------------------------------------------------------- /feeders/nyu_feeder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset, DataLoader 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import random 6 | import cv2 7 | import traceback 8 | from PIL import Image 9 | import scipy.io as sio 10 | import scipy.ndimage 11 | from glob import glob 12 | import json 13 | import logging 14 | import sys 15 | import os 16 | root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | sys.path.append(root) 18 | from utils.hand_detector import calculate_com_2d, crop_area_3d 19 | from utils.image_utils import normlize_depth 20 | 21 | logging.basicConfig(level=logging.INFO, format="%(asctime)s: %(levelname)s %(name)s:%(lineno)d] %(message)s") 22 | logger = logging.getLogger(__file__) 23 | 24 | 25 | class NyuFeeder(Dataset): 26 | def __init__(self, phase='train', max_jitter=10., depth_sigma=1., offset=20., random_flip=False, adjust_cube=False): 27 | """ 28 | 29 | :param phase: train or test 30 | :param max_jitter: 31 | :param depth_sigma: 32 | :param min_scale: 33 | :param max_scale: 34 | :param random_flip: 35 | :param random_rotate: 36 | """ 37 | self.phase = phase 38 | self.max_jitter = max_jitter 39 | self.depth_sigma = depth_sigma 40 | self.offset = offset 41 | self.random_flip = random_flip 42 | self.adjust_cube = adjust_cube 43 | config_file = os.path.join(root, "config", "dataset", "nyu.json") 44 | self.config = json.load(open(config_file, 'r')) 45 | self.joint_2d, self.joint_3d, self.depth_path = self.load_annotation() 46 | self.fx = self.config['camera']['fx'] 47 | self.fy = self.config['camera']['fy'] 48 | self.u0 = self.config['camera']['u0'] 49 | self.v0 = self.config['camera']['v0'] 50 | self.crop_size = self.config['crop_size'] 51 | self.inter_matrix = np.array([[self.fx, 0, self.u0], 52 | [0, self.fy, self.v0], 53 | [0, 0, 1]], dtype=np.float32) 54 | self.cube = np.array(self.config["cube"], dtype=np.float32) 55 | self.com_2d = [None] * len(self.depth_path) 56 | # self.index = [] 57 | # if self.phase == 'train': 58 | # self.index = [i for i in range(len(self.depth_path)) if i%6!=0] 59 | # if self.phase == 'test': 60 | # self.index = [i for i in range(len(self.depth_path)) if i%6==0] 61 | self.index = np.arange(len(self.depth_path)) 62 | logger.info("{} num: {}".format(phase, len(self.index))) 63 | 64 | def load_annotation(self): 65 | data_dir = os.path.join(self.config["path"], self.phase) 66 | joint_data = sio.loadmat(os.path.join(data_dir, 'joint_data.mat')) 67 | # if self.phase == 'test': 68 | # joint_2d = joint_data['joint_uvd'][0][:, self.config['selected']].astype(np.float32) 69 | # joint_3d = joint_data['joint_xyz'][0][:, self.config['selected']].astype(np.float32) 70 | # joint_3d[:, :, 1] = -joint_3d[:, :, 1] 71 | # depth_path = glob(os.path.join(data_dir, "depth_1_*.png")) 72 | # else: 73 | # joint_2d = joint_data['joint_uvd'][:, :, self.config['selected']].astype(np.float32) 74 | # joint_2d = np.reshape(joint_2d, [-1, len(self.config['selected']), 3]) 75 | # joint_3d = joint_data['joint_xyz'][:, :, self.config['selected']].astype(np.float32) 76 | # joint_3d = np.reshape(joint_3d, [-1, len(self.config['selected']), 3]) 77 | # joint_3d[:, :, 1] = -joint_3d[:, :, 1] 78 | # depth_path = glob(os.path.join(data_dir, "depth_*.png")) 79 | joint_2d = joint_data['joint_uvd'][0][:, self.config['selected']].astype(np.float32) 80 | joint_3d = joint_data['joint_xyz'][0][:, self.config['selected']].astype(np.float32) 81 | joint_3d[:, :, 1] = -joint_3d[:, :, 1] 82 | depth_path = glob(os.path.join(data_dir, "depth_1_*.png")) 83 | depth_path.sort() 84 | return joint_2d, joint_3d, depth_path 85 | 86 | def show(self, cropped, joint_3d, crop_trans): 87 | joint_2d = self.inter_matrix @ np.transpose(joint_3d, (1, 0)) 88 | joint_2d = joint_2d / joint_2d[2, :] 89 | joint_2d = np.transpose(joint_2d, (1, 0)) 90 | crop_joint_2d = np.ones_like(joint_2d) 91 | crop_joint_2d[:, :2] = joint_2d[:, :2] 92 | crop_joint_2d = np.transpose(crop_joint_2d, (1, 0)) 93 | crop_joint_2d = np.array(crop_trans @ crop_joint_2d) 94 | crop_joint_2d = np.transpose(crop_joint_2d, (1, 0)) 95 | plt.clf() 96 | plt.imshow(cropped) 97 | plt.scatter(crop_joint_2d[:, 0], crop_joint_2d[:, 1], c='red') 98 | plt.show() 99 | 100 | def __getitem__(self, item): 101 | item = self.index[item] 102 | joint_2d, joint_3d, depth_path = self.joint_2d[item], self.joint_3d[item], self.depth_path[item] 103 | depth = load_depth_map(depth_path) 104 | if depth is None: 105 | return item, None, None, joint_3d, None, None, self.inter_matrix 106 | # com_2d = joint_2d[13] 107 | # com_2d = np.mean(joint_2d, axis=0) 108 | com_3d = np.mean(joint_3d, axis=0) 109 | 110 | # scale = np.random.uniform(low=self.min_scale, high=self.max_scale) 111 | # cube = self.cube * scale 112 | if self.max_jitter>0.: 113 | com_offset = np.random.uniform(low=-1., high=1., size=(3,))*self.max_jitter 114 | com_3d = com_3d + com_offset 115 | com_2d = self.inter_matrix @ com_3d[:, None] 116 | com_2d = np.squeeze(com_2d) 117 | com_2d[:2] /= com_2d[2] 118 | com_2d = com_2d.astype(np.float32) 119 | if self.adjust_cube: 120 | distance = np.linalg.norm(joint_3d - com_3d, axis=-1) 121 | cube_size = (np.max(distance) + self.offset) * 2. 122 | cube = np.array([cube_size, cube_size, cube_size], dtype=np.float32) 123 | left = np.min(joint_2d[:, 0]) 124 | right = np.max(joint_2d[:, 0]) 125 | up = np.min(joint_2d[:, 1]) 126 | down = np.max(joint_2d[:, 1]) 127 | front = np.min(joint_3d[:, 2]) 128 | back = np.max(joint_3d[:, 2]) 129 | bbx = [left, right, up, down, front, back] 130 | cropped, crop_trans, com_2d = crop_area_3d(depth, com_2d, self.fx, self.fy, bbx, self.offset, size=cube, 131 | dsize=(self.crop_size, self.crop_size), docom=False) 132 | else: 133 | if self.phase != 'train' and item >= 2440: 134 | cube = self.cube * 5.0 / 6.0 135 | else: 136 | cube = self.cube 137 | cropped, crop_trans, com_2d = crop_area_3d(depth, com_2d, self.fx, self.fy, size=cube, 138 | dsize=[self.crop_size, self.crop_size], docom=False) 139 | # if self.random_rotate: 140 | # # plt.imshow(cropped) 141 | # # plt.show() 142 | # angle = np.random.rand()*360. 143 | # M = cv2.getRotationMatrix2D((self.crop_size/2., self.crop_size/2.), angle, 1.) 144 | # cropped = cv2.warpAffine(cropped, M, (self.crop_size, self.crop_size), flags=cv2.INTER_NEAREST) 145 | # rotate_trans = np.eye(3, dtype=np.float32) 146 | # rotate_trans[:2, :] = M 147 | # crop_trans = rotate_trans @ crop_trans 148 | # # plt.imshow(cropped) 149 | # # plt.show() 150 | 151 | if self.random_flip: 152 | to_center = np.array([[1., 0., self.crop_size/2.], 153 | [0., 1., self.crop_size/2.], 154 | [0., 0., 1]], np.float32) 155 | to_origin = np.array([[1., 0., -self.crop_size/2.], 156 | [0., 1., -self.crop_size/2.], 157 | [0., 0., 1]], np.float32) 158 | if random.random()>0.5: 159 | # Horizontal flip 160 | cropped = cropped[:, ::-1] 161 | matrix = np.eye(3, dtype=np.float32) 162 | matrix[0, 0] = -1 163 | flip_matrix = to_center @ matrix @ to_origin 164 | crop_trans = flip_matrix @ crop_trans 165 | 166 | if random.random()>0.5: 167 | # Vertical flip 168 | cropped = cropped[::-1, :] 169 | matrix = np.eye(3, dtype=np.float32) 170 | matrix[1, 1] = -1 171 | flip_matrix = to_center @ matrix @ to_origin 172 | crop_trans = flip_matrix @ crop_trans 173 | 174 | cropped = np.array(cropped) 175 | 176 | if self.depth_sigma>0.: 177 | # noise = np.random.randn(self.crop_size, self.crop_size)*self.noise_sigma 178 | noise = np.random.normal(0, self.depth_sigma, size=(self.crop_size, self.crop_size)).astype(np.float32) 179 | cropped[cropped>1e-3] += noise[cropped>1e-3] 180 | 181 | # self.show(cropped, joint_3d, crop_trans) 182 | # plt.imshow(depth) 183 | # plt.show() 184 | # print(com_2d) 185 | return item, depth[None, ...], cropped[None, ...], joint_3d, np.array(crop_trans), com_2d, self.inter_matrix, \ 186 | cube 187 | 188 | def __len__(self): 189 | return len(self.index) 190 | 191 | 192 | def load_depth_map(filename): 193 | """ 194 | Read a depth-map 195 | :param filename: file name to load 196 | :return: image data of depth image 197 | """ 198 | try: 199 | img = Image.open(filename) 200 | # top 8 bits of depth are packed into green channel and lower 8 bits into blue 201 | assert len(img.getbands()) == 3 202 | r, g, b = img.split() 203 | r = np.asarray(r, np.int32) 204 | g = np.asarray(g, np.int32) 205 | b = np.asarray(b, np.int32) 206 | dpt = np.bitwise_or(np.left_shift(g, 8), b) 207 | imgdata = np.asarray(dpt, np.float32) 208 | except IOError as e: 209 | imgdata = None 210 | # imgdata = np.zeros((480, 640), np.float32) 211 | logger.exception(filename+' file broken.') 212 | return imgdata 213 | 214 | 215 | def collate_fn(batch): 216 | # batch_item = [] 217 | # batch_depth = [] 218 | # batch_cropped = [] 219 | # batch_joint_3d = [] 220 | # batch_crop_trans = [] 221 | # batch_com_2d = [] 222 | # batch_inter_matrix = [] 223 | # batch_cube = [] 224 | batch_data = [] 225 | for i in range(len(batch)): 226 | if batch[i][1] is not None: 227 | batch_data.append(batch[i]) 228 | # for item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube in batch: 229 | # if depth is not None: 230 | # batch_item.append(item) 231 | # batch_depth.append(depth) 232 | # batch_cropped.append(cropped) 233 | # batch_joint_3d.append(joint_3d) 234 | # batch_crop_trans.append(crop_trans) 235 | # batch_com_2d.append(com_2d) 236 | # batch_inter_matrix.append(inter_matrix) 237 | # batch_cube.append(cube) 238 | batch_data = list(zip(*batch_data)) 239 | output = [torch.from_numpy(np.array(batch_data[0]))] 240 | for arrays in batch_data[1:]: 241 | output.append(torch.from_numpy(np.stack(arrays, axis=0))) 242 | return output 243 | 244 | 245 | if __name__ == '__main__': 246 | train_dataset = NyuFeeder('train', max_jitter=10., depth_sigma=0., offset=30, random_flip=False) 247 | item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = train_dataset[0] 248 | dataloader = DataLoader(train_dataset, shuffle=False, batch_size=1, collate_fn=collate_fn) 249 | for batch_idx, batch_data in enumerate(dataloader): 250 | item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data 251 | print(item) 252 | print(cube) 253 | break 254 | 255 | # test_dataset = NyuFeeder('test', max_jitter=0., depth_sigma=0., offset=30, random_flip=False) 256 | # dataloader = DataLoader(test_dataset, shuffle=True, batch_size=4) 257 | # for batch_idx, batch_data in enumerate(dataloader): 258 | # item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data 259 | # print(item) 260 | # print(cube) 261 | # break 262 | 263 | # random.seed(0) 264 | # train_dataset = NyuFeeder('test', jitter_sigma=0., noise_sigma=0., scale_sigma=0., random_flip=True) 265 | # dataloader = DataLoader(train_dataset, shuffle=False, batch_size=4, collate_fn=collate_fn) 266 | # for batch_idx, batch_data in enumerate(dataloader): 267 | # item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data 268 | # print(depth[2, 0, 300, 200]) 269 | # print(item) 270 | # print(cube) 271 | # break 272 | -------------------------------------------------------------------------------- /fig/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscas3dv/handpose-virtualview/d220efa69ff031077381bc0d4cd58fae7049c329/fig/pipeline.png -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscas3dv/handpose-virtualview/d220efa69ff031077381bc0d4cd58fae7049c329/models/__init__.py -------------------------------------------------------------------------------- /models/a2j.py: -------------------------------------------------------------------------------- 1 | """ 2 | MIT License 3 | 4 | Copyright (c) 2019 Boshen Zhang 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | """ 13 | import torch.nn as nn 14 | from torch.nn import init 15 | import torch 16 | import torch.nn.functional as F 17 | import numpy as np 18 | 19 | import os 20 | import sys 21 | dir = os.path.dirname(os.path.abspath(__file__)) 22 | root = os.path.dirname(dir) 23 | from models import resnet 24 | 25 | 26 | class DepthRegressionModel(nn.Module): 27 | def __init__(self, num_features_in, num_anchors=16, num_classes=15, feature_size=256): 28 | super(DepthRegressionModel, self).__init__() 29 | self.num_classes = num_classes 30 | self.num_anchors = num_anchors 31 | 32 | self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) 33 | self.bn1 = nn.BatchNorm2d(feature_size) 34 | self.act1 = nn.ReLU() 35 | self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 36 | self.bn2 = nn.BatchNorm2d(feature_size) 37 | self.act2 = nn.ReLU() 38 | self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 39 | self.bn3 = nn.BatchNorm2d(feature_size) 40 | self.act3 = nn.ReLU() 41 | self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 42 | self.bn4 = nn.BatchNorm2d(feature_size) 43 | self.act4 = nn.ReLU() 44 | self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1) 45 | for m in self.modules(): 46 | if isinstance(m, nn.Conv2d): 47 | nn.init.xavier_normal_(m.weight.data) 48 | elif isinstance(m, nn.BatchNorm2d): 49 | m.weight.data.fill_(1) 50 | m.bias.data.zero_() 51 | 52 | def forward(self, x): 53 | out = self.conv1(x) 54 | out = self.bn1(out) 55 | out = self.act1(out) 56 | out = self.conv2(out) 57 | out = self.bn2(out) 58 | out = self.act2(out) 59 | out = self.conv3(out) 60 | out = self.bn3(out) 61 | out = self.act3(out) 62 | out = self.conv4(out) 63 | out = self.bn4(out) 64 | out = self.act4(out) 65 | out = self.output(out) 66 | 67 | # out is B x C x W x H, with C = 3*num_anchors 68 | out1 = out.permute(0, 3, 2, 1) 69 | batch_size, width, height, channels = out1.shape 70 | out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes) 71 | return out2.contiguous().view(out2.shape[0], -1, self.num_classes) 72 | 73 | 74 | class RegressionModel(nn.Module): 75 | def __init__(self, num_features_in, num_anchors=16, num_classes=15, feature_size=256): 76 | super(RegressionModel, self).__init__() 77 | self.num_anchors = num_anchors 78 | self.num_classes = num_classes 79 | self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) 80 | self.bn1 = nn.BatchNorm2d(feature_size) 81 | self.act1 = nn.ReLU() 82 | self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 83 | self.bn2 = nn.BatchNorm2d(feature_size) 84 | self.act2 = nn.ReLU() 85 | self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 86 | self.bn3 = nn.BatchNorm2d(feature_size) 87 | self.act3 = nn.ReLU() 88 | self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 89 | self.bn4 = nn.BatchNorm2d(feature_size) 90 | self.act4 = nn.ReLU() 91 | self.output = nn.Conv2d(feature_size, num_anchors * num_classes * 2, kernel_size=3, padding=1) 92 | for m in self.modules(): 93 | if isinstance(m, nn.Conv2d): 94 | nn.init.xavier_normal_(m.weight.data) 95 | elif isinstance(m, nn.BatchNorm2d): 96 | m.weight.data.fill_(1) 97 | m.bias.data.zero_() 98 | 99 | def forward(self, x): 100 | out = self.conv1(x) 101 | out = self.bn1(out) 102 | out = self.act1(out) 103 | out = self.conv2(out) 104 | out = self.bn2(out) 105 | out = self.act2(out) 106 | out = self.conv3(out) 107 | out = self.bn3(out) 108 | out = self.act3(out) 109 | out = self.conv4(out) 110 | out = self.bn4(out) 111 | out = self.act4(out) 112 | out = self.output(out) 113 | 114 | # out is B x C x W x H, with C = 3*num_anchors 115 | out1 = out.permute(0, 3, 2, 1) 116 | batch_size, width, height, channels = out1.shape 117 | out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes, 2) 118 | return out2.contiguous().view(out2.shape[0], -1, self.num_classes, 2) 119 | 120 | 121 | class ClassificationModel(nn.Module): 122 | def __init__(self, num_features_in, num_anchors=16, num_classes=15, prior=0.01, feature_size=256): 123 | super(ClassificationModel, self).__init__() 124 | self.num_classes = num_classes 125 | self.num_anchors = num_anchors 126 | self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1) 127 | self.bn1 = nn.BatchNorm2d(feature_size) 128 | self.act1 = nn.ReLU() 129 | self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 130 | self.bn2 = nn.BatchNorm2d(feature_size) 131 | self.act2 = nn.ReLU() 132 | self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 133 | self.bn3 = nn.BatchNorm2d(feature_size) 134 | self.act3 = nn.ReLU() 135 | self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1) 136 | self.bn4 = nn.BatchNorm2d(feature_size) 137 | self.act4 = nn.ReLU() 138 | self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1) 139 | for m in self.modules(): 140 | if isinstance(m, nn.Conv2d): 141 | nn.init.xavier_normal_(m.weight.data) 142 | elif isinstance(m, nn.BatchNorm2d): 143 | m.weight.data.fill_(1) 144 | m.bias.data.zero_() 145 | 146 | def forward(self, x): 147 | out = self.conv1(x) 148 | out = self.bn1(out) 149 | out = self.act1(out) 150 | out = self.conv2(out) 151 | out = self.bn2(out) 152 | out = self.act2(out) 153 | out = self.conv3(out) 154 | out = self.bn3(out) 155 | out = self.act3(out) 156 | out = self.conv4(out) 157 | out = self.bn4(out) 158 | out = self.act4(out) 159 | out = self.output(out) 160 | 161 | # out is B x C x W x H, with C = n_classes + n_anchors 162 | out1 = out.permute(0, 3, 2, 1) 163 | batch_size, width, height, channels = out1.shape 164 | out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes) 165 | return out2.contiguous().view(x.shape[0], -1, self.num_classes) 166 | 167 | 168 | class ResNetBackBone(nn.Module): 169 | def __init__(self, light): 170 | super(ResNetBackBone, self).__init__() 171 | if light: 172 | self.model = resnet.resnet18(pretrained=True) 173 | else: 174 | self.model = resnet.resnet50(pretrained=True) 175 | 176 | def forward(self, x): 177 | n, c, h, w = x.size() # x: [B, 1, H ,W] 178 | 179 | x = x[:, 0:1, :, :] # depth 180 | x = x.expand(n, 3, h, w) 181 | 182 | x = self.model.conv1(x) 183 | x = self.model.bn1(x) 184 | x = self.model.relu(x) 185 | x = self.model.maxpool(x) 186 | x1 = self.model.layer1(x) 187 | x2 = self.model.layer2(x1) 188 | x3 = self.model.layer3(x2) 189 | x4 = self.model.layer4(x3) 190 | 191 | return x3, x4 192 | 193 | 194 | def generate_anchors(P_h=None, P_w=None): 195 | if P_h is None: 196 | P_h = np.array([2,6,10,14]) 197 | 198 | if P_w is None: 199 | P_w = np.array([2,6,10,14]) 200 | 201 | num_anchors = len(P_h) * len(P_h) 202 | 203 | # initialize output anchors 204 | anchors = np.zeros((num_anchors, 2)) 205 | k = 0 206 | for i in range(len(P_w)): 207 | for j in range(len(P_h)): 208 | anchors[k,1] = P_w[j] 209 | anchors[k,0] = P_h[i] 210 | k += 1 211 | return anchors 212 | 213 | 214 | def shift(shape, stride, anchors): 215 | shift_h = np.arange(0, shape[0]) * stride 216 | shift_w = np.arange(0, shape[1]) * stride 217 | 218 | shift_h, shift_w = np.meshgrid(shift_h, shift_w) 219 | shifts = np.vstack((shift_h.ravel(), shift_w.ravel())).transpose() 220 | 221 | # add A anchors (1, A, 2) to 222 | # cell K shifts (K, 1, 2) to get 223 | # shift anchors (K, A, 2) 224 | # reshape to (K*A, 2) shifted anchors 225 | A = anchors.shape[0] 226 | K = shifts.shape[0] 227 | all_anchors = (anchors.reshape((1, A, 2)) + shifts.reshape((1, K, 2)).transpose((1, 0, 2))) 228 | all_anchors = all_anchors.reshape((K * A, 2)) 229 | # print(all_anchors.shape) 230 | # print(all_anchors[:32]) 231 | 232 | return all_anchors 233 | 234 | 235 | class A2J_model(nn.Module): 236 | def __init__(self, num_classes, P_h=None, P_w=None, shape=[11, 11], stride=16, dropout_rate=0., is_3D=True, 237 | light=False): 238 | super(A2J_model, self).__init__() 239 | self.dropout_rate = dropout_rate 240 | self.is_3D = is_3D 241 | self.light = light 242 | anchors = generate_anchors(P_h=P_h, P_w=P_w) 243 | self.all_anchors = torch.from_numpy(shift(shape, stride, anchors)).float() #(w*h*A)*2 244 | self.Backbone = ResNetBackBone(light) # 1 channel depth only 245 | if light: 246 | self.regressionModel = RegressionModel(512, num_classes=num_classes) 247 | self.classificationModel = ClassificationModel(256, num_classes=num_classes) 248 | self.dropout = nn.Dropout(dropout_rate) 249 | if is_3D: 250 | self.DepthRegressionModel = DepthRegressionModel(512, num_classes=num_classes) 251 | else: 252 | self.regressionModel = RegressionModel(2048, num_classes=num_classes) 253 | self.classificationModel = ClassificationModel(1024, num_classes=num_classes) 254 | self.dropout = nn.Dropout(dropout_rate) 255 | if is_3D: 256 | self.DepthRegressionModel = DepthRegressionModel(2048, num_classes=num_classes) 257 | 258 | def forward(self, x): 259 | anchor = self.all_anchors.to(x.device) 260 | x3, x4 = self.Backbone(x) 261 | x3 = self.dropout(x3) 262 | x4 = self.dropout(x4) 263 | classification = self.classificationModel(x3) # N*(w/16*h/16*A)*P 264 | regression = self.regressionModel(x4) # N*(w/16*h/16*A)*P*2 265 | reg_weight = F.softmax(classification, dim=1) # N*(w/16*h/16*A)*P 266 | reg_weight_xy = torch.unsqueeze(reg_weight, 3).expand( 267 | reg_weight.shape[0], reg_weight.shape[1], reg_weight.shape[2], 2) # N*(w/16*h/16*A)*P*2 268 | anchor_joints_2d = (reg_weight_xy * torch.unsqueeze(anchor, 1)).sum(1) # N*P*2 269 | # anchor_joints_2d[..., 0], anchor_joints_2d[..., 1] = anchor_joints_2d[..., 1], anchor_joints_2d[..., 0] 270 | 271 | reg = torch.unsqueeze(anchor, 1) + regression # N*(w/16*h/16*A)*P*2 272 | regression_joints_2d = (reg_weight_xy*reg).sum(1) # N*P*2 273 | # regression_joints_2d[..., 0], regression_joints_2d[..., 1] = \ 274 | # regression_joints_2d[..., 1], regression_joints_2d[..., 0] 275 | 276 | if self.is_3D: 277 | depthregression = self.DepthRegressionModel(x4) # N*(w/16*h/16*A)*P 278 | depth_value = (reg_weight * depthregression).sum(1) 279 | return classification, regression, depthregression, anchor_joints_2d, regression_joints_2d, depth_value 280 | return classification, regression, anchor_joints_2d, regression_joints_2d 281 | 282 | 283 | if __name__ == "__main__": 284 | num_classes = 14 285 | w, h = 176, 176 286 | B = 10 287 | depth = torch.rand([B, 1, h, w], dtype=torch.float32).cuda() 288 | model = A2J_model(num_classes).cuda() 289 | anchor_joints_2d, regression_joints_2d, depth_value = model(depth) 290 | print(anchor_joints_2d.shape) 291 | print(regression_joints_2d.shape) 292 | print(depth_value.shape) 293 | -------------------------------------------------------------------------------- /models/a2j_conf_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import os 5 | import sys 6 | dir = os.path.dirname(os.path.abspath(__file__)) 7 | root = os.path.dirname(dir) 8 | sys.path.append(root) 9 | from models.attention import MultiHeadAttention, PositionwiseFeedForward 10 | 11 | class A2JConfNet(nn.Module): 12 | def __init__(self, n_head, d_attn, d_k, d_v, d_inner, dropout_rate, num_select, random=False): 13 | super(A2JConfNet, self).__init__() 14 | self.n_head = n_head 15 | self.d_attn = d_attn 16 | self.d_k = d_k 17 | self.d_v = d_v 18 | self.d_inner = d_inner 19 | self.dropout_rate = dropout_rate 20 | self.num_select = num_select 21 | self.random = random 22 | self.num_anchors = 11*11*16 23 | self.encode = nn.Sequential( 24 | # (B*N*J, 64, 11, 11) 25 | nn.Conv2d(64, d_attn//4, kernel_size=3, padding=1), 26 | nn.BatchNorm2d(d_attn//4), 27 | nn.ReLU(), 28 | nn.MaxPool2d(3, 2), # (B*N*J, d_attn//4, 5, 5) 29 | 30 | nn.Conv2d(d_attn//4, d_attn//2, kernel_size=3, padding=1), 31 | nn.BatchNorm2d(d_attn//2), 32 | nn.ReLU(), 33 | nn.MaxPool2d(3, 2), # (B*N*J, d_attn//2, 2, 2) 34 | 35 | nn.Conv2d(d_attn//2, d_attn, kernel_size=2) # (B*N*J, d_attn, 1, 1) 36 | ) 37 | self.attention = MultiHeadAttention(n_head, d_attn, d_k, d_v, dropout_rate) 38 | self.pos_ffn = PositionwiseFeedForward(d_attn, d_inner, dropout_rate) 39 | self.confidence_net = nn.Linear(d_attn, 1) 40 | 41 | def select(self, joint_3d, conf, k, random): 42 | """ 43 | 44 | :param joint_3d: Tensor(B, N, J, 3) 45 | :param conf: Tensor(B, N) 46 | :param k: int 47 | :return: 48 | conf_select: Tensor(B, k) 49 | id_select: Tensor(B, k) 50 | """ 51 | B, N, J, _ = joint_3d.shape 52 | if random: 53 | conf_select_list = [] 54 | id_select_list = [] 55 | for i in range(B): 56 | id = torch.arange(0, N, device=conf.device) 57 | id = id[torch.randperm(N)] 58 | id_select = id[:k] 59 | 60 | conf_select = conf[i, id_select] 61 | conf_select_list.append(conf_select) 62 | id_select_list.append(id_select) 63 | conf_select = torch.stack(conf_select_list, dim=0) 64 | id_select = torch.stack(id_select_list, dim=0) 65 | conf_select, id_select = torch.topk(conf, k, dim=-1) # (B, k) 66 | 67 | id_select_expand = id_select[:, :, None, None].repeat((1, 1, J, 3)) 68 | joint_3d_select = torch.gather(joint_3d, 1, id_select_expand) # (B, k, J, 3) 69 | 70 | return joint_3d_select, conf_select, id_select 71 | 72 | def forward(self, classification, regression, depthregression, joint_3d): 73 | """ 74 | 75 | :param classification: Tensor(B, num_views, num_anchors, num_joints) 76 | :param regression: Tensor(B, num_views, num_anchors, num_joints, 2) 77 | :param depthregression: Tensor(B, num_views, num_anchors, num_joints) 78 | :param joint_3d: Tensor(B, num_views, num_joints, 3) 79 | :return: 80 | """ 81 | B, N, J, _ = joint_3d.shape 82 | # (B, N, num_anchors, num_joints, 4) 83 | input = torch.cat([classification[..., None], regression, depthregression[..., None]], dim=-1) 84 | input = torch.transpose(input, 2, 3) # # (B, N, J, num_anchors, 4) 85 | input = torch.reshape(input, (B*N*J, 11, 11, 16*4)) 86 | input = input.transpose(1, 3).transpose(2, 3) # (B*N*J, 64, 11, 11) 87 | feature = self.encode(input).reshape([B, N, J, -1]) # (B, N, J, d_attn) 88 | v = feature.mean(dim=-2) # (B, N,d_attn) 89 | 90 | v = self.attention(v, v, v) 91 | v = self.pos_ffn(v) 92 | 93 | conf = self.confidence_net(v).reshape([B, N]) 94 | joint_3d_select, conf_select, id_select = self.select(joint_3d, conf, self.num_select, self.random) 95 | 96 | conf_select = torch.softmax(conf_select, dim=-1) # (B, k) 97 | 98 | joint_3d_conf = joint_3d_select * conf_select[:, :, None, None] 99 | joint_3d_conf = torch.sum(joint_3d_conf, 1) 100 | 101 | return conf, joint_3d_conf 102 | -------------------------------------------------------------------------------- /models/attention.py: -------------------------------------------------------------------------------- 1 | """ 2 | MIT License 3 | 4 | Copyright (c) 2017 Victor Huang 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | """ 24 | import torch 25 | from torch import nn as nn 26 | from torch.nn import functional as F 27 | 28 | 29 | class ScaledDotProductAttention(nn.Module): 30 | ''' Scaled Dot-Product Attention ''' 31 | 32 | def __init__(self, temperature, attn_dropout=0.1): 33 | super().__init__() 34 | self.temperature = temperature 35 | self.dropout = nn.Dropout(attn_dropout) 36 | 37 | def forward(self, q, k, v): 38 | 39 | attn = torch.matmul(q / self.temperature, k.transpose(2, 3)) 40 | 41 | attn = self.dropout(F.softmax(attn, dim=-1)) 42 | output = torch.matmul(attn, v) 43 | 44 | return output, attn 45 | 46 | 47 | class MultiHeadAttention(nn.Module): 48 | ''' Multi-Head Attention module ''' 49 | 50 | def __init__(self, n_head, d_attn, d_k, d_v, dropout=0.1): 51 | super().__init__() 52 | 53 | self.n_head = n_head 54 | self.d_k = d_k 55 | self.d_v = d_v 56 | 57 | self.w_qs = nn.Linear(d_attn, n_head * d_k, bias=False) 58 | self.w_ks = nn.Linear(d_attn, n_head * d_k, bias=False) 59 | self.w_vs = nn.Linear(d_attn, n_head * d_v, bias=False) 60 | self.fc = nn.Linear(n_head * d_v, d_attn, bias=False) 61 | 62 | self.attention = ScaledDotProductAttention(temperature=d_k ** 0.5, attn_dropout=dropout) 63 | 64 | self.dropout = nn.Dropout(dropout) 65 | self.layer_norm = nn.LayerNorm(d_attn, eps=1e-6) 66 | 67 | def forward(self, q, k, v): 68 | 69 | d_k, d_v, n_head = self.d_k, self.d_v, self.n_head 70 | B, num_views = q.size(0), q.size(1) 71 | 72 | residual = v 73 | 74 | # Pass through the pre-attention projection: b x num_views x (n*dv) 75 | # Separate different heads: B x num_views x n x dv 76 | q = self.w_qs(q).view(B, num_views, n_head, d_k) 77 | k = self.w_ks(k).view(B, num_views, n_head, d_k) 78 | v = self.w_vs(v).view(B, num_views, n_head, d_v) 79 | 80 | # Transpose for attention dot product: b x n x num_views x dv 81 | q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2) 82 | 83 | v, attn = self.attention(q, k, v) 84 | 85 | # Transpose to move the head dimension back: b x num_views x n x dv 86 | # Combine the last two dimensions to concatenate all the heads together: b x num_views x (n*dv) 87 | v = v.transpose(1, 2).contiguous().view(B, num_views, -1) 88 | v = self.dropout(self.fc(v)) 89 | v += residual 90 | 91 | v = self.layer_norm(v) 92 | 93 | return v 94 | 95 | 96 | class PositionwiseFeedForward(nn.Module): 97 | ''' A two-feed-forward-layer module ''' 98 | 99 | def __init__(self, d_in, d_hid, dropout=0.1): 100 | super().__init__() 101 | self.w_1 = nn.Linear(d_in, d_hid) # position-wise 102 | self.w_2 = nn.Linear(d_hid, d_in) # position-wise 103 | self.layer_norm = nn.LayerNorm(d_in, eps=1e-6) 104 | self.dropout = nn.Dropout(dropout) 105 | 106 | def forward(self, x): 107 | 108 | residual = x 109 | 110 | x = self.w_2(F.relu(self.w_1(x))) 111 | x = self.dropout(x) 112 | x += residual 113 | 114 | x = self.layer_norm(x) 115 | 116 | return x -------------------------------------------------------------------------------- /models/conf_net.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.nn import init 3 | import torch 4 | import torch.nn.functional as F 5 | import numpy as np 6 | 7 | import os 8 | import sys 9 | dir = os.path.dirname(os.path.abspath(__file__)) 10 | root = os.path.dirname(dir) 11 | from models import resnet 12 | 13 | 14 | class ConfNet(nn.Module): 15 | def __init__(self, num_views, dropout_rate): 16 | super(ConfNet, self).__init__() 17 | self.resnet = resnet.resnet18(pretrained=False) 18 | self.dropout = nn.Dropout(dropout_rate) 19 | self.fc = nn.Linear(1000, num_views) 20 | 21 | def forward(self, x): 22 | """ 23 | 24 | :param x: Tensor(B, 1, 176, 176) 25 | :return: 26 | """ 27 | n, c, h, w = x.size() # x: [B, 1, H ,W] 28 | 29 | x = x[:, 0:1, :, :] # depth 30 | x = x.expand(n, 3, h, w) 31 | x = self.resnet(x) 32 | x = self.dropout(x) 33 | x = self.fc(x) 34 | return x 35 | 36 | 37 | if __name__ == '__main__': 38 | confnet = ConfNet(25, 0.5) 39 | input = torch.randn((4, 1, 176, 176), dtype=torch.float32) 40 | output = confnet(input) 41 | print(output.shape) -------------------------------------------------------------------------------- /models/layers.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | Pool = nn.MaxPool2d 4 | 5 | 6 | def batchnorm(x): 7 | return nn.BatchNorm2d(x.size()[1])(x) 8 | 9 | 10 | class Conv(nn.Module): 11 | def __init__(self, inp_dim, out_dim, kernel_size=3, stride=1, bn=False, relu=True): 12 | super(Conv, self).__init__() 13 | self.inp_dim = inp_dim 14 | self.conv = nn.Conv2d(inp_dim, out_dim, kernel_size, stride, padding=(kernel_size - 1) // 2, bias=True) 15 | self.relu = None 16 | self.bn = None 17 | if relu: 18 | self.relu = nn.ReLU() 19 | if bn: 20 | self.bn = nn.BatchNorm2d(out_dim) 21 | 22 | def forward(self, x): 23 | assert x.size()[1] == self.inp_dim, "{} {}".format(x.size()[1], self.inp_dim) 24 | x = self.conv(x) 25 | if self.bn is not None: 26 | x = self.bn(x) 27 | if self.relu is not None: 28 | x = self.relu(x) 29 | return x 30 | 31 | 32 | class Residual(nn.Module): 33 | def __init__(self, inp_dim, out_dim): 34 | super(Residual, self).__init__() 35 | self.relu = nn.ReLU() 36 | self.bn1 = nn.BatchNorm2d(inp_dim) 37 | self.conv1 = Conv(inp_dim, int(out_dim / 2), 1, relu=False) 38 | self.bn2 = nn.BatchNorm2d(int(out_dim / 2)) 39 | self.conv2 = Conv(int(out_dim / 2), int(out_dim / 2), 3, relu=False) 40 | self.bn3 = nn.BatchNorm2d(int(out_dim / 2)) 41 | self.conv3 = Conv(int(out_dim / 2), out_dim, 1, relu=False) 42 | self.skip_layer = Conv(inp_dim, out_dim, 1, relu=False) 43 | if inp_dim == out_dim: 44 | self.need_skip = False 45 | else: 46 | self.need_skip = True 47 | 48 | def forward(self, x): 49 | if self.need_skip: 50 | residual = self.skip_layer(x) 51 | else: 52 | residual = x 53 | out = self.bn1(x) 54 | out = self.relu(out) 55 | out = self.conv1(out) 56 | out = self.bn2(out) 57 | out = self.relu(out) 58 | out = self.conv2(out) 59 | out = self.bn3(out) 60 | out = self.relu(out) 61 | out = self.conv3(out) 62 | out += residual 63 | return out 64 | 65 | 66 | class Hourglass(nn.Module): 67 | def __init__(self, n, f, bn=None, increase=0): 68 | super(Hourglass, self).__init__() 69 | nf = f + increase 70 | self.up1 = Residual(f, f) 71 | # Lower branch 72 | self.pool1 = Pool(2, 2) 73 | self.low1 = Residual(f, nf) 74 | self.n = n 75 | # Recursive hourglass 76 | if self.n > 1: 77 | self.low2 = Hourglass(n - 1, nf, bn=bn) 78 | else: 79 | self.low2 = Residual(nf, nf) 80 | self.low3 = Residual(nf, f) 81 | self.up2 = nn.Upsample(scale_factor=2, mode='nearest') 82 | 83 | def forward(self, x): 84 | up1 = self.up1(x) 85 | pool1 = self.pool1(x) 86 | low1 = self.low1(pool1) 87 | low2 = self.low2(low1) 88 | low3 = self.low3(low2) 89 | up2 = self.up2(low3) 90 | return up1 + up2 -------------------------------------------------------------------------------- /models/multiview_a2j.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | import os 6 | import sys 7 | dir = os.path.dirname(os.path.abspath(__file__)) 8 | root = os.path.dirname(dir) 9 | sys.path.append(root) 10 | from ops.render import depth_crop_expand 11 | from ops.image_ops import normalize_depth_expand, normalize_depth 12 | from ops.point_transform import transform_2D, transform_2D_to_3D, transform_3D 13 | from models.a2j import A2J_model 14 | from models.a2j_conf_net import A2JConfNet 15 | import logging 16 | logger = logging.getLogger(__file__) 17 | 18 | 19 | class MultiviewA2J(nn.Module): 20 | def __init__(self, camera, num_joints, n_head, d_attn, d_k, d_v, d_inner, dropout_rate, num_select, 21 | light=False, use_conf=True, random_select=False, random_sample=False): 22 | super(MultiviewA2J, self).__init__() 23 | self.camera = camera 24 | self.num_joints = num_joints 25 | self.n_head = n_head 26 | self.d_attn = d_attn 27 | self.d_k = d_k 28 | self.d_v = d_v 29 | self.d_inner = d_inner 30 | self.dropout_rate = dropout_rate 31 | self.num_select = num_select 32 | self.light = light 33 | self.use_conf = use_conf 34 | self.random_select = random_select 35 | self.random_sample = random_sample 36 | self.fx = camera["fx"] 37 | self.fy = camera["fy"] 38 | self.u0 = camera["u0"] 39 | self.v0 = camera["v0"] 40 | self.a2j = A2J_model(num_joints, dropout_rate=dropout_rate, light=light) 41 | self.conf_fuse_net = A2JConfNet(n_head, d_attn, d_k, d_v, d_inner, dropout_rate, num_select, random_select) 42 | 43 | def forward(self, cropped, crop_trans, com_2d, inter_matrix, cube, level, view_trans=None): 44 | """ 45 | :param cropped: Tensor(B, 1, 176, 176) or Tensor(B, N, 1, 176, 176) 46 | :param crop_trans: Tensor(B, 3, 3) 47 | :param com_2d: Tensor(B, 3) 48 | :param inter_matrix: Tensor(B, 3, 3) 49 | :param cube: Tensor(B, 3) 50 | :param level: int 51 | :return: 52 | crop_expand: Tensor(B, num_views, 1, H, W) 53 | anchor_joints_2d_crop: Tensor(B, num_views, num_joints, 2) 54 | regression_joints_2d_crop: Tensor(B, num_views, num_joints, 2) 55 | depth_value_norm: Tensor(B, num_views, num_joints) 56 | joints_3d: Tensor(B, num_views, num_joints, 3) 57 | view_trans: Tensor(B, num_views, 4, 4) 58 | joint_3d_fused: Tensor(B, num_joints, 3) 59 | classification: Tensor(B*num_views, w/16*h/16*A, num_joints) 60 | regression: Tensor(B*num_views, w/16*h/16*A, num_joints, 2) 61 | depthregression: Tensor(B*num_views, w/16*h/16*A, num_joints) 62 | """ 63 | if level==-1: 64 | assert view_trans is not None 65 | B, num_views, _, H, W = cropped.shape 66 | crop_expand = cropped 67 | else: 68 | B, _, H, W = cropped.shape 69 | if level>0: 70 | with torch.no_grad(): 71 | # crop_expand: Tensor(B, num_views, 1, H, W) 72 | # view_trans: Tensor(B, num_views, 4, 4) 73 | crop_expand, view_trans = depth_crop_expand(cropped, self.fx, self.fy, self.u0, self.v0, crop_trans, 74 | level, com_2d, self.random_sample, False) 75 | elif level==0: 76 | if self.random_sample: 77 | crop_expand, view_trans = depth_crop_expand(cropped, self.fx, self.fy, self.u0, self.v0, crop_trans, 78 | level, com_2d, self.random_sample, False) 79 | else: 80 | crop_expand = cropped[:, None, :, :, :] 81 | view_trans = torch.eye(4, dtype=torch.float32)[None, None, :, :] 82 | view_trans = view_trans.repeat((B, 1, 1, 1)).to(cropped.device) 83 | 84 | B, num_views, _, H, W = crop_expand.shape 85 | crop_expand = normalize_depth_expand(crop_expand, com_2d, cube) 86 | crop_expand = crop_expand.reshape((B * num_views, 1, H, W)) 87 | 88 | 89 | # classification: (B*num_views, w/16*h/16*A, num_joints) 90 | # regression: (B*num_views, w/16*h/16*A, num_joints, 2) 91 | # depthregression: (B*num_views, w/16*h/16*A, num_joints) 92 | # anchor_joints_2d: (B*num_views, num_joints, 2) 93 | # regression_joints_2d: (B*num_views, num_joints, 2) 94 | # depth_value: (B*num_views, num_joints) 95 | classification, regression, depthregression, anchor_joints_2d_crop, regression_joints_2d_crop, \ 96 | depth_value_norm = self.a2j(crop_expand) 97 | 98 | inv_corp_trans = torch.inverse(crop_trans) 99 | inv_corp_trans_expand = inv_corp_trans[:, None, :, :].repeat([1, num_views, 1, 1]) 100 | inv_corp_trans_expand = inv_corp_trans_expand.reshape([-1, 3, 3]) 101 | regression_joints_2d = transform_2D(regression_joints_2d_crop, inv_corp_trans_expand) 102 | com_z_expand = com_2d[:, 2][:, None].repeat([1, num_views]).reshape([B*num_views, 1]) 103 | cube_z_expand = cube[:, 2][:, None].repeat([1, num_views]).reshape([B*num_views, 1]) 104 | depth_value = depth_value_norm * cube_z_expand/2. + com_z_expand 105 | regression_joints_2d = regression_joints_2d.reshape([B, num_views, self.num_joints, 2]) 106 | depth_value = depth_value.reshape([B, num_views, self.num_joints]) 107 | # joints_3d_trans: (B, num_views, num_joints, 3) 108 | joints_3d_trans = torch.cat([regression_joints_2d, depth_value[..., None]], dim=-1) 109 | joints_3d_trans = transform_2D_to_3D(joints_3d_trans, self.fx, self.fy, self.u0, self.v0) 110 | joints_3d = transform_3D(joints_3d_trans, torch.inverse(view_trans)) 111 | joint_3d_fused = torch.mean(joints_3d, dim=1) 112 | 113 | crop_expand = crop_expand.reshape((B, num_views, 1, H, W)) 114 | anchor_joints_2d_crop = anchor_joints_2d_crop.reshape((B, num_views, self.num_joints, 2)) 115 | regression_joints_2d_crop = regression_joints_2d_crop.reshape((B, num_views, self.num_joints, 2)) 116 | depth_value_norm = depth_value_norm.reshape([B, num_views, self.num_joints]) 117 | 118 | num_anchors = classification.shape[1] 119 | classification = torch.reshape(classification, (B, num_views, num_anchors, self.num_joints)) 120 | regression = torch.reshape(regression, (B, num_views, num_anchors, self.num_joints, 2)) 121 | depthregression = torch.reshape(depthregression, (B, num_views, num_anchors, self.num_joints)) 122 | 123 | if self.use_conf: 124 | if level!=0: 125 | conf, joint_3d_conf = self.conf_fuse_net(classification, regression, depthregression, joints_3d) 126 | else: 127 | conf = torch.ones((B, 1), dtype=torch.float32) 128 | joint_3d_conf = joint_3d_fused 129 | else: 130 | joint_3d_conf = joint_3d_fused 131 | conf = None 132 | 133 | return crop_expand, anchor_joints_2d_crop, regression_joints_2d_crop, depth_value_norm, joints_3d, view_trans,\ 134 | joint_3d_fused, classification, regression, depthregression, conf, joint_3d_conf 135 | 136 | 137 | if __name__ == '__main__': 138 | from feeders.nyu_feeder import NyuFeeder, collate_fn 139 | from torch.utils.data.dataloader import DataLoader 140 | import json 141 | 142 | dataset_config = json.load(open("../config/dataset/nyu.json", 'r')) 143 | train_dataset = NyuFeeder('train') 144 | dataloader = DataLoader(train_dataset, batch_size=6) 145 | predictor = MultiviewA2J(dataset_config["camera"], 14).cuda() 146 | for batch_idx, batch_data in enumerate(dataloader): 147 | item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data 148 | cropped = cropped.cuda() 149 | crop_trans = crop_trans.cuda() 150 | com_2d = com_2d.cuda() 151 | inter_matrix = inter_matrix.cuda() 152 | cube = cube.cuda() 153 | crop_expand, anchor_joints_2d, regression_joints_2d, depth_value, joints_3d, view_trans = \ 154 | predictor(cropped, crop_trans, com_2d, inter_matrix, cube, level=4) 155 | print(crop_expand.shape) 156 | print(anchor_joints_2d.shape) 157 | print(regression_joints_2d.shape) 158 | print(depth_value.shape) 159 | print(joints_3d.shape) 160 | print(view_trans.shape) 161 | break 162 | -------------------------------------------------------------------------------- /models/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.utils.model_zoo as model_zoo 3 | 4 | 5 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 6 | 'resnet152'] 7 | 8 | 9 | model_urls = { 10 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 11 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 12 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 13 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 14 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 15 | } 16 | 17 | 18 | def conv3x3(in_planes, out_planes, stride=1, dilation=1): 19 | """3x3 convolution with padding""" 20 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, dilation=dilation, 21 | padding=dilation, bias=False) 22 | 23 | 24 | def conv1x1(in_planes, out_planes, stride=1): 25 | """1x1 convolution""" 26 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 27 | 28 | 29 | class BasicBlock(nn.Module): 30 | expansion = 1 31 | 32 | def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1): 33 | super(BasicBlock, self).__init__() 34 | self.conv1 = conv3x3(inplanes, planes, stride) 35 | self.bn1 = nn.BatchNorm2d(planes) 36 | self.relu = nn.ReLU(inplace=True) 37 | self.conv2 = conv3x3(planes, planes, dilation=dilation) 38 | self.bn2 = nn.BatchNorm2d(planes) 39 | self.downsample = downsample 40 | self.stride = stride 41 | 42 | def forward(self, x): 43 | identity = x 44 | 45 | out = self.conv1(x) 46 | out = self.bn1(out) 47 | out = self.relu(out) 48 | 49 | out = self.conv2(out) 50 | out = self.bn2(out) 51 | 52 | if self.downsample is not None: 53 | identity = self.downsample(x) 54 | 55 | out += identity 56 | out = self.relu(out) 57 | 58 | return out 59 | 60 | 61 | class Bottleneck(nn.Module): 62 | expansion = 4 63 | 64 | def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1): 65 | super(Bottleneck, self).__init__() 66 | self.conv1 = conv1x1(inplanes, planes) 67 | self.bn1 = nn.BatchNorm2d(planes) 68 | self.conv2 = conv3x3(planes, planes, stride, dilation=dilation) 69 | self.bn2 = nn.BatchNorm2d(planes) 70 | self.conv3 = conv1x1(planes, planes * self.expansion) 71 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 72 | self.relu = nn.ReLU(inplace=True) 73 | self.downsample = downsample 74 | self.stride = stride 75 | 76 | def forward(self, x): 77 | identity = x 78 | 79 | out = self.conv1(x) 80 | out = self.bn1(out) 81 | out = self.relu(out) 82 | 83 | out = self.conv2(out) 84 | out = self.bn2(out) 85 | out = self.relu(out) 86 | 87 | out = self.conv3(out) 88 | out = self.bn3(out) 89 | 90 | if self.downsample is not None: 91 | identity = self.downsample(x) 92 | 93 | out += identity 94 | out = self.relu(out) 95 | 96 | return out 97 | 98 | 99 | class ResNet(nn.Module): 100 | 101 | def __init__(self, block, layers, num_classes=1000, zero_init_residual=False): 102 | super(ResNet, self).__init__() 103 | self.inplanes = 64 104 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 105 | bias=False) 106 | self.bn1 = nn.BatchNorm2d(64) 107 | self.relu = nn.ReLU(inplace=True) 108 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 109 | self.layer1 = self._make_layer(block, 64, layers[0]) 110 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 111 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 112 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1,dilation=2) 113 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 114 | self.fc = nn.Linear(512 * block.expansion, num_classes) 115 | 116 | for m in self.modules(): 117 | if isinstance(m, nn.Conv2d): 118 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 119 | elif isinstance(m, nn.BatchNorm2d): 120 | nn.init.constant_(m.weight, 1) 121 | nn.init.constant_(m.bias, 0) 122 | 123 | # Zero-initialize the last BN in each residual branch, 124 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 125 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 126 | if zero_init_residual: 127 | for m in self.modules(): 128 | if isinstance(m, Bottleneck): 129 | nn.init.constant_(m.bn3.weight, 0) 130 | elif isinstance(m, BasicBlock): 131 | nn.init.constant_(m.bn2.weight, 0) 132 | 133 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1): 134 | downsample = None 135 | if stride != 1 or self.inplanes != planes * block.expansion: 136 | downsample = nn.Sequential( 137 | conv1x1(self.inplanes, planes * block.expansion, stride), 138 | nn.BatchNorm2d(planes * block.expansion), 139 | ) 140 | 141 | layers = [] 142 | layers.append(block(self.inplanes, planes, stride, downsample)) 143 | self.inplanes = planes * block.expansion 144 | for _ in range(1, blocks): 145 | layers.append(block(self.inplanes, planes, dilation=dilation)) 146 | 147 | return nn.Sequential(*layers) 148 | 149 | def forward(self, x): 150 | x = self.conv1(x) 151 | x = self.bn1(x) 152 | x = self.relu(x) 153 | x = self.maxpool(x) 154 | 155 | x = self.layer1(x) 156 | x = self.layer2(x) 157 | x = self.layer3(x) 158 | x = self.layer4(x) 159 | 160 | x = self.avgpool(x) 161 | x = x.view(x.size(0), -1) 162 | x = self.fc(x) 163 | 164 | return x 165 | 166 | 167 | def resnet18(pretrained=False, **kwargs): 168 | """Constructs a ResNet-18 model. 169 | Args: 170 | pretrained (bool): If True, returns a model pre-trained on ImageNet 171 | """ 172 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 173 | if pretrained: 174 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 175 | return model 176 | 177 | 178 | def resnet34(pretrained=False, **kwargs): 179 | """Constructs a ResNet-34 model. 180 | Args: 181 | pretrained (bool): If True, returns a model pre-trained on ImageNet 182 | """ 183 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 184 | if pretrained: 185 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 186 | return model 187 | 188 | 189 | def resnet50(pretrained=False, **kwargs): 190 | """Constructs a ResNet-50 model. 191 | Args: 192 | pretrained (bool): If True, returns a model pre-trained on ImageNet 193 | """ 194 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 195 | if pretrained: 196 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 197 | return model 198 | 199 | 200 | def resnet101(pretrained=False, **kwargs): 201 | """Constructs a ResNet-101 model. 202 | Args: 203 | pretrained (bool): If True, returns a model pre-trained on ImageNet 204 | """ 205 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 206 | if pretrained: 207 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 208 | return model 209 | 210 | 211 | def resnet152(pretrained=False, **kwargs): 212 | """Constructs a ResNet-152 model. 213 | Args: 214 | pretrained (bool): If True, returns a model pre-trained on ImageNet 215 | """ 216 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 217 | if pretrained: 218 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 219 | return model -------------------------------------------------------------------------------- /models/view_selector_a2j.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import os 4 | import sys 5 | dir = os.path.dirname(os.path.abspath(__file__)) 6 | root = os.path.dirname(dir) 7 | sys.path.append(root) 8 | from models.multiview_a2j import MultiviewA2J 9 | from ops.point_transform import transform_2D_to_3D 10 | from ops.render import uniform_view_matrix, render_view, depth_crop_expand 11 | from ops.image_ops import normalize_depth_expand 12 | 13 | 14 | class ViewSelector(nn.Module): 15 | def __init__(self, multiview_a2j, conf_net, random): 16 | super().__init__() 17 | self.multiview_a2j = multiview_a2j 18 | self.conf_net = conf_net 19 | self.random = random 20 | 21 | self.multiview_a2j.eval() 22 | self.num_joints = self.multiview_a2j.num_joints 23 | self.camera = self.multiview_a2j.camera 24 | self.fx = self.camera["fx"] 25 | self.fy = self.camera["fy"] 26 | self.u0 = self.camera["u0"] 27 | self.v0 = self.camera["v0"] 28 | 29 | def train(self, mode=True): 30 | self.training = mode 31 | for module in self.children(): 32 | if module==self.conf_net: 33 | module.train(mode) 34 | return self 35 | 36 | def select(self, conf, k, random=False): 37 | """ 38 | 39 | :param conf: Tensor(B, N) 40 | :param k: int 41 | :return: 42 | conf_select: Tensor(B, k) 43 | id_select: Tensor(B, k) 44 | """ 45 | B, N = conf.shape 46 | if random: 47 | conf_select_list = [] 48 | id_select_list = [] 49 | for i in range(B): 50 | id = torch.arange(0, N, device=conf.device) 51 | id = id[torch.randperm(N)] 52 | id_select = id[:k] 53 | 54 | conf_select = conf[i, id_select] 55 | conf_select_list.append(conf_select) 56 | id_select_list.append(id_select) 57 | conf_select = torch.stack(conf_select_list, dim=0) 58 | id_select = torch.stack(id_select_list, dim=0) 59 | else: 60 | conf_select, id_select = torch.topk(conf, k, dim=-1) # (B, k) 61 | 62 | return conf_select, id_select 63 | 64 | def select_crop(self, crop_expand, view_trans, conf, k): 65 | """ 66 | :param crop_expand: Tensor(B, N, 1, 176, 176) 67 | :param view_trans: Tensor(B, N, 4, 4) 68 | :param conf: Tensor(B, N) 69 | :param k: int 70 | :return: 71 | crop_select: Tensor(B, k, 1, 176, 176) 72 | joint_3d_select: Tensor(B, k, J, 3) 73 | conf_select: Tensor(B, k) 74 | id_select: Tensor(B, k) 75 | """ 76 | B, N, _, W, H = crop_expand.shape 77 | conf_select, id_select = torch.topk(conf, k, dim=-1) # (B, k) 78 | 79 | id_select_expand = id_select[:, :, None, None, None].repeat((1, 1, 1, W, H)) 80 | crop_select = torch.gather(crop_expand, 1, id_select_expand) # (B, k, 1, 176, 176) 81 | 82 | id_select_expand = id_select[:, :, None, None].repeat((1, 1, 4, 4)) 83 | view_trans_select = torch.gather(view_trans, 1, id_select_expand) # (B, k, 4, 4) 84 | 85 | return crop_select, view_trans_select, conf_select, id_select 86 | 87 | def forward(self, cropped, crop_trans, com_2d, inter_matrix, cube, level, k, inference): 88 | """ 89 | :param cropped: Tensor(B, 1, 176, 176) 90 | :param crop_trans: Tensor(B, 3, 3) 91 | :param com_2d: Tensor(B, 3) 92 | :param inter_matrix: Tensor(B, 3, 3) 93 | :param cube: Tensor(B, 3) 94 | :param level: int 95 | :param k: int 96 | :inference: bool 97 | :return: 98 | """ 99 | if level==1: 100 | self.shape = [1, 3] 101 | elif level==2: 102 | self.shape = [3, 3] 103 | elif level==3: 104 | self.shape = [3, 5] 105 | elif level==4: 106 | self.shape = [5, 5] 107 | elif level==5: 108 | self.shape = [9, 9] 109 | else: 110 | raise NotImplemented 111 | 112 | conf_light = self.conf_net(cropped) 113 | 114 | with torch.no_grad(): 115 | conf_select_light, id_select_light = self.select(conf_light, k, self.random) 116 | crop_select_light, view_trans_select_light = depth_crop_expand(cropped, self.fx, self.fy, self.u0, self.v0, 117 | crop_trans, level, com_2d, False, random_ratote=False, indices=id_select_light) 118 | crop_select_light = normalize_depth_expand(crop_select_light, com_2d, cube) 119 | _, _, _, _, joints_3d_pred_select_light, _, joint_3d_fused_select_light, _, _, _, _, _ = \ 120 | self.multiview_a2j(crop_select_light, crop_trans, com_2d, inter_matrix, cube, level=-1, 121 | view_trans=view_trans_select_light) 122 | conf_select_light = torch.softmax(conf_select_light, dim=-1) # (B, k) 123 | 124 | joint_3d_conf_select_light = joints_3d_pred_select_light * conf_select_light[:, :, None, None] 125 | joint_3d_conf_select_light = torch.sum(joint_3d_conf_select_light, 1) 126 | if inference: 127 | return joints_3d_pred_select_light, joint_3d_fused_select_light, joint_3d_conf_select_light 128 | else: 129 | crop_expand, anchor_joints_2d_crop, regression_joints_2d_crop, depth_value_norm, joints_3d_pred, \ 130 | view_trans, joint_3d_fused, classification, regression, depthregression, conf, joint_3d_conf_select = \ 131 | self.multiview_a2j(cropped, crop_trans, com_2d, inter_matrix, cube, level=level) 132 | 133 | return crop_expand, view_trans, anchor_joints_2d_crop, regression_joints_2d_crop, depth_value_norm, \ 134 | joints_3d_pred, joint_3d_fused, conf, joint_3d_conf_select, joints_3d_pred_select_light, \ 135 | joint_3d_fused_select_light, joint_3d_conf_select_light, conf_light 136 | -------------------------------------------------------------------------------- /ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscas3dv/handpose-virtualview/d220efa69ff031077381bc0d4cd58fae7049c329/ops/__init__.py -------------------------------------------------------------------------------- /ops/cuda/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscas3dv/handpose-virtualview/d220efa69ff031077381bc0d4cd58fae7049c329/ops/cuda/__init__.py -------------------------------------------------------------------------------- /ops/cuda/depth_to_point_cloud_mask_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | // CUDA forward declarations 6 | 7 | std::vector depth_to_point_cloud_mask_cuda_forward(torch::Tensor depthmap); 8 | 9 | // C++ interface 10 | 11 | #define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor") 12 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous") 13 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 14 | 15 | std::vector depth_to_point_cloud_mask_forward(torch::Tensor depthmap) { 16 | CHECK_INPUT(depthmap); 17 | 18 | return depth_to_point_cloud_mask_cuda_forward(depthmap); 19 | } 20 | 21 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 22 | m.def("forward", &depth_to_point_cloud_mask_forward, "depth to point cloud mask forward"); 23 | } -------------------------------------------------------------------------------- /ops/cuda/depth_to_point_cloud_mask_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include "helper_cuda.h" 6 | 7 | #include 8 | 9 | namespace { 10 | // input: depthmap(b, h*w) 11 | // output: point_cloud (b, h*w, 3), mask (b, h*w) 12 | __global__ void depth_to_point_cloud_mask_forward_kernel( 13 | const torch::PackedTensorAccessor32 depthmap, 14 | torch::PackedTensorAccessor32 point_cloud, 15 | torch::PackedTensorAccessor32 mask, 16 | const int h, const int w){ 17 | //batch index 18 | const int n = blockIdx.y; 19 | // column index 20 | const int c = blockIdx.x * blockDim.x + threadIdx.x; 21 | if(c < depthmap.size(1)) { 22 | int d = depthmap[n][c]; 23 | point_cloud[n][c][0] = c%w; 24 | point_cloud[n][c][1] = c/w; 25 | point_cloud[n][c][2] = d==0?1:d; // avoid dividing 0 in 3D to 2D transform 26 | mask[n][c] = (d!=0); 27 | } 28 | } 29 | } // namespace 30 | 31 | // input: depthmap: (b, h, w, 1) 32 | // output: point_cloud: (b, h*w, 3), mask: (b, h*w) 33 | std::vector depth_to_point_cloud_mask_cuda_forward(torch::Tensor depthmap) { 34 | const int b = depthmap.size(0); 35 | const int h = depthmap.size(1); 36 | const int w = depthmap.size(2); 37 | depthmap = depthmap.reshape({b, h*w}); 38 | auto point_cloud = torch::zeros({b, h*w, 3}, 39 | torch::TensorOptions().dtype(depthmap.scalar_type()).device(depthmap.device())); 40 | auto mask = torch::zeros({b, h*w}, torch::TensorOptions().dtype(torch::kInt32).device(depthmap.device())); 41 | 42 | const int threads = 1024; 43 | const dim3 blocks((h*w + threads - 1) / threads, b); 44 | 45 | AT_DISPATCH_INTEGRAL_TYPES(depthmap.scalar_type(), "depth_to_point_cloud_mask_forward_cuda", ([&]() { 46 | depth_to_point_cloud_mask_forward_kernel<<>>( 47 | depthmap.packed_accessor32(), 48 | point_cloud.packed_accessor32(), 49 | mask.packed_accessor32(), 50 | h, w); 51 | })); 52 | getLastCudaError("depth_to_point_cloud_mask_forward_kernel() execution failed."); 53 | checkCudaErrors(cudaDeviceSynchronize()); 54 | return {point_cloud, mask}; 55 | } 56 | 57 | -------------------------------------------------------------------------------- /ops/cuda/point_cloud_mask_to_depth_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | // CUDA forward declarations 6 | 7 | torch::Tensor point_cloud_mask_to_depth_cuda_forward(torch::Tensor point_cloud, torch::Tensor mask, 8 | const int h, const int w); 9 | 10 | // C++ interface 11 | 12 | #define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor") 13 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous") 14 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 15 | 16 | torch::Tensor point_cloud_mask_to_depth_forward(torch::Tensor point_cloud, torch::Tensor mask, 17 | const int h, const int w) { 18 | CHECK_INPUT(point_cloud); 19 | 20 | return point_cloud_mask_to_depth_cuda_forward(point_cloud, mask, h, w); 21 | } 22 | 23 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 24 | m.def("forward", &point_cloud_mask_to_depth_forward, "point cloud mask to depth forward"); 25 | } -------------------------------------------------------------------------------- /ops/cuda/point_cloud_mask_to_depth_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include "helper_cuda.h" 6 | 7 | #include 8 | 9 | namespace { 10 | // input: point_cloud (b,h*w,3) 11 | // output: depthmap(b,h*w), mask (b, h*w) 12 | __global__ void point_cloud_mask_to_depth_forward_kernel( 13 | torch::PackedTensorAccessor32 depthmap, 14 | const torch::PackedTensorAccessor32 point_cloud, 15 | const torch::PackedTensorAccessor32 mask, 16 | const int h, const int w) { 17 | //batch index 18 | const int n = blockIdx.y; 19 | // column index 20 | const int c = blockIdx.x * blockDim.x + threadIdx.x; 21 | if(c < depthmap.size(1) && mask[n][c]) { 22 | int u = point_cloud[n][c][0], v = point_cloud[n][c][1], d = point_cloud[n][c][2]; 23 | if(0<=u && u depthmap, 33 | const int h, const int w, const int bg_val) { 34 | //batch index 35 | const int n = blockIdx.y; 36 | // column index 37 | const int c = blockIdx.x * blockDim.x + threadIdx.x; 38 | if(c < depthmap.size(1) && depthmap[n][c] == INT_MAX) { 39 | depthmap[n][c] = bg_val; 40 | } 41 | } 42 | } // namespace 43 | 44 | // input: point_cloud: (b, h*w, 3), mask: (b, h*w) 45 | // output: depthmap: (b, h, w, 1) 46 | torch::Tensor point_cloud_mask_to_depth_cuda_forward(torch::Tensor point_cloud, torch::Tensor mask, 47 | const int h, const int w) { 48 | const int b = point_cloud.size(0); 49 | auto depthmap = torch::full({b, h*w}, INT_MAX, 50 | torch::TensorOptions().dtype(point_cloud.scalar_type()).device(point_cloud.device())); 51 | 52 | const int threads = 1024; 53 | const dim3 blocks((h*w + threads - 1) / threads, b); 54 | 55 | AT_DISPATCH_INTEGRAL_TYPES(depthmap.scalar_type(), "point_cloud_mask_to_depth_forward_cuda", ([&]() { 56 | point_cloud_mask_to_depth_forward_kernel<<>>( 57 | depthmap.packed_accessor32(), 58 | point_cloud.packed_accessor32(), 59 | mask.packed_accessor32(), 60 | h, w); 61 | })); 62 | getLastCudaError("point_cloud_mask_to_depth_forward_kernel() execution failed."); 63 | checkCudaErrors(cudaDeviceSynchronize()); 64 | 65 | int bg_val = 0; 66 | AT_DISPATCH_INTEGRAL_TYPES(depthmap.scalar_type(), "set_background_forward_kernel", ([&]() { 67 | set_background_forward_kernel<<>>( 68 | depthmap.packed_accessor32(), 69 | h, w, bg_val); 70 | })); 71 | getLastCudaError("set_background_forward_kernel() execution failed."); 72 | checkCudaErrors(cudaDeviceSynchronize()); 73 | depthmap = depthmap.reshape({b, h, w, 1}); 74 | return depthmap; 75 | } -------------------------------------------------------------------------------- /ops/cuda/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | import torch 4 | import os 5 | 6 | arch_list = [] 7 | for i in range(torch.cuda.device_count()): 8 | arch = '{}.{}'.format(*torch.cuda.get_device_capability(i)) 9 | if arch not in arch_list: 10 | arch_list.append(arch) 11 | arch_list = ';'.join(arch_list) 12 | os.environ["TORCH_CUDA_ARCH_LIST"] = arch_list 13 | 14 | setup( 15 | name='render_cuda', 16 | ext_modules=[ 17 | CUDAExtension('depth_to_point_cloud_mask_cuda', [ 18 | 'depth_to_point_cloud_mask_cuda.cpp', 19 | 'depth_to_point_cloud_mask_cuda_kernel.cu', 20 | ]), 21 | CUDAExtension('point_cloud_mask_to_depth_cuda', [ 22 | 'point_cloud_mask_to_depth_cuda.cpp', 23 | 'point_cloud_mask_to_depth_cuda_kernel.cu', 24 | ]) 25 | ], 26 | cmdclass={ 27 | 'build_ext': BuildExtension 28 | }) -------------------------------------------------------------------------------- /ops/image_ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import numpy as np 4 | 5 | 6 | def normalize_depth(depth, com_2d, cube): 7 | """Normalize depth to [-1, 1] 8 | 9 | :param depth: (B, 1, H, W) 10 | :param com_2d: (B, 3) 11 | :param cube_z: float 12 | :return: 13 | """ 14 | B, _, H, W = depth.shape 15 | background = (depth<1e-3).float() 16 | com_z = com_2d[:, 2] 17 | com_z = com_z[:, None, None, None].repeat((1, 1, H, W)) 18 | cube_z = cube[:, 2] 19 | cube_z = cube_z[:, None, None, None].repeat((1, 1, H, W)) 20 | norm_depth = depth + background * (com_z + (cube_z / 2.)) 21 | norm_depth = (norm_depth-com_z) / (cube_z/2.) 22 | return norm_depth 23 | 24 | 25 | def normalize_depth_expand(depth_expand, com_2d, cube): 26 | """Normalize depth expand to [-1, 1] 27 | 28 | :param depth: (B, num_views, 1, H, W) 29 | :param com_2d: (B, 3) 30 | :param cube_z: (B, 3) 31 | :return: 32 | """ 33 | B, N, _, H, W = depth_expand.shape 34 | background = (depth_expand<1e-3).float() 35 | com_z = com_2d[:, 2] 36 | com_z = com_z[:, None, None, None, None].repeat((1, N, 1, H, W)) 37 | cube_z = cube[:, 2] 38 | cube_z = cube_z[:, None, None, None, None].repeat((1, N, 1, H, W)) 39 | norm_depth_expand = depth_expand + background * (com_z + (cube_z / 2.)) 40 | norm_depth_expand = (norm_depth_expand - com_z) / (cube_z / 2.) 41 | return norm_depth_expand 42 | 43 | 44 | def normalize_image(img): 45 | """ 46 | 47 | :param img: Tensor(B, 1, H, W) 48 | :return: Tensor(B, 1, H, W) 49 | """ 50 | B, _, H, W = img.shape 51 | t_min, _ = torch.min(img.reshape([B, -1]), dim=-1) 52 | t_max, _ = torch.max(img.reshape([B, -1]), dim=-1) 53 | t_min = t_min[:, None].repeat(1, H*W).reshape([B, 1, H, W]) 54 | t_max = t_max[:, None].repeat(1, H*W).reshape([B, 1, H, W]) 55 | img = (img-t_min)/(t_max-t_min) 56 | return img 57 | 58 | 59 | sobel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype='float32') 60 | sobel_x = sobel_x.reshape((1, 1, 3, 3)) 61 | sobel_y = np.array([[-1, -2, -1], [0, 0, 0], [1, 2, 1]], dtype='float32') 62 | sobel_y = sobel_y.reshape((1, 1, 3, 3)) 63 | pad = torch.nn.ReplicationPad2d(1) 64 | 65 | def sobel_edge(img): 66 | """ 67 | 68 | :param img: Tensor(B, 1, H, W) 69 | :return: 70 | """ 71 | weight_x = torch.tensor(sobel_x, device=img.device, requires_grad=False) 72 | weight_y = torch.tensor(sobel_y, device=img.device, requires_grad=False) 73 | img = pad(img) 74 | edge_x = F.conv2d(img, weight_x) 75 | edge_y = F.conv2d(img, weight_y) 76 | edge = torch.abs(edge_x) + torch.abs(edge_y) 77 | # edge = torch.sqrt(edge_x*edge_x+edge_y*edge_y) 78 | return edge 79 | 80 | 81 | def normalize_edge(edge: torch.Tensor): 82 | """ 83 | 84 | :param edge: Tensor(B, 1, H, W) 85 | :return: 86 | """ 87 | B, _, H, W = edge.size() 88 | edge = edge.reshape([B, -1]) 89 | torch.min(edge, 1, keepdim=True) 90 | t_min = torch.min(edge, 1, keepdim=True)[0].repeat([1, H*W]) 91 | t_max = torch.max(edge, 1, keepdim=True)[0].repeat([1, H*W]) 92 | edge = (edge-t_min) / (t_max-t_min) 93 | edge = edge.reshape([B, 1, H, W]) 94 | return edge 95 | 96 | 97 | if __name__ == '__main__': 98 | B, N, H, W = 4, 12, 128, 128 99 | depth_expand = torch.randn((B, N, 1, H, W)).cuda() 100 | com_2d = torch.randn((B, 3)).cuda() 101 | cube_z = 125. 102 | output = normalize_depth_expand(depth_expand, com_2d, cube_z) 103 | print(output.shape) -------------------------------------------------------------------------------- /ops/joint_ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import time 4 | import os 5 | import sys 6 | dir = os.path.dirname(os.path.abspath(__file__)) 7 | root = os.path.dirname(dir) 8 | sys.path.append(root) 9 | from ops.loss_ops import gen_2D_gaussion_map 10 | 11 | 12 | def solve(coor, b): 13 | """Ax=b, solve x, where 14 | A = [[coor[?, 0]**2, coor[?, 0], 1], 15 | [coor[?, 1]**2, coor[?, 1], 1], 16 | [coor[?, 2]**2, coor[?, 2], 1]] 17 | x is (a, b, c) which is coefficient of quadratic function, 18 | 19 | :param coor: Tensor(B, 3) 20 | :param b: Tensor(B, 3) 21 | :return: Tensor(B) -b/(a*2), the symmetry axis of quadratic function 22 | 23 | """ 24 | B = coor.shape[0] 25 | A = torch.ones((B, 3, 3), dtype=torch.float32, device=b.device) 26 | A[:, :, 1] = coor 27 | A[:, :, 0] = A[:, :, 1]*A[:, :, 1] 28 | U, D, V = torch.svd(A) # (B, 3, 3), (B, 3), (B, 3, 3) 29 | b_ = U.transpose(-2, -1) @ b[:, :, None] # (B, 3, 1) 30 | b_ = b_.squeeze() # (B, 3) 31 | y = b_/D # (B, 3) 32 | result = V @ y[:, :, None] # (B, 3, 1) 33 | result = result.squeeze(dim=-1) # (B, 3) 34 | not_zero = (result[:, 0]!=0) 35 | x = coor[:, 1].clone() 36 | x[not_zero] = -result[not_zero, 1] / (result[not_zero, 0] * 2) 37 | return x 38 | 39 | 40 | def heatmap_to_loc(heatmap, adjust=True): 41 | """ 42 | 43 | :param heatmap: Tensor(B, num_joints, H, W) 44 | :return: Tensor(B, num_joints, 2) 45 | """ 46 | device = heatmap.device 47 | B, J, H, W = heatmap.shape 48 | heatmap = heatmap.reshape((B*J, H, W)) 49 | dense_flat = heatmap.reshape((B*J, -1)) 50 | loc = torch.argmax(dense_flat, dim=-1) 51 | y = loc//W # (B*num_joints) 52 | x = loc%W # (B*num_joints) 53 | xx = x.float()+0.5 54 | yy = y.float()+0.5 55 | 56 | # adjust location. It is extremely slow on GPU, so we use CPU 57 | if adjust: 58 | x, y, xx, yy, dense_flat, loc = x.cpu(), y.cpu(), xx.cpu(), yy.cpu(), dense_flat.cpu(), loc.cpu() 59 | x_adjust_index = (0b_x[:, 2]] = -0.25 72 | # adjust_coor_x = coor_x[:, 1] + adjust 73 | # xx[x_adjust_index] = adjust_coor_x 74 | 75 | y_adjust_index = (0 < y) & (y < H - 1) 76 | if torch.any(y_adjust_index): 77 | coor_y = yy[y_adjust_index, None].repeat([1, 3]).float() 78 | coor_y[:, 0] -= 1. 79 | coor_y[:, 2] += 1. 80 | b_y = torch.zeros_like(coor_y) 81 | b_y[:, 0] = dense_flat[y_adjust_index, loc[y_adjust_index] - W] 82 | b_y[:, 1] = dense_flat[y_adjust_index, loc[y_adjust_index]] 83 | b_y[:, 2] = dense_flat[y_adjust_index, loc[y_adjust_index] + W] 84 | yy[y_adjust_index] = solve(coor_y, b_y) 85 | # adjust = torch.zeros_like(coor_y[:, 1]) 86 | # adjust[b_y[:, 0] < b_y[:, 2]] = 0.25 87 | # adjust[b_y[:, 0] > b_y[:, 2]] = -0.25 88 | # adjust_coor_y = coor_y[:, 1] + adjust 89 | # yy[y_adjust_index] = adjust_coor_y 90 | 91 | xx, yy = xx.to(device), yy.to(device) 92 | 93 | # for b in range(B): 94 | # for j in range(J): 95 | # ax, ay = x[b, j], y[b, j] 96 | # tmp = heatmap[b, j] 97 | # # if (ax, ay) is not on bound 98 | # if 0tmp[ay, ax+1]: 102 | # # xx[b, j] -= 0.25 103 | # xx[b, j] = solve(torch.stack([ax-0.5, ax+0.5, ax+1.5]), tmp[ay, ax-1:ax+2]) 104 | # 105 | # if 0tmp[ay+1, ax]: 109 | # # yy[b, j] -= 0.25 110 | # yy[b, j] = solve(torch.stack([ay-0.5, ay+0.5, ay+1.5]), tmp[ay-1:ay+2, ax]) 111 | xx = xx.reshape([B, J]) 112 | yy = yy.reshape([B, J]) 113 | return torch.stack([xx, yy], dim=-1) 114 | 115 | 116 | def get_projection_matrices(inter_matrix, view_trans): 117 | """ 118 | 119 | :param inter_matrix: Tensor(B, 3, 3) 120 | :param view_trans: Tensor(B, num_views, 4, 4) 121 | :return: Tensor(B, num_views, 3, 4) 122 | """ 123 | B, N = view_trans.size(0), view_trans.size(1) 124 | eye = torch.eye(3, 4, dtype=torch.float32, device=inter_matrix.device) 125 | eye = eye[None, ...].repeat([B, 1, 1]) # (B, 3, 4) 126 | proj_mat = inter_matrix@eye # (B, 3, 4) 127 | proj_mat = proj_mat[:, None, :, :].repeat(1, N, 1, 1) # (B, num_views, 3, 4) 128 | proj_mat = proj_mat @ view_trans # (B, num_views, 3, 4) 129 | return proj_mat 130 | 131 | 132 | def triangulate(joint_2d, cam_mat, weight=None): 133 | """ 134 | 135 | :param joint_2d: Tensor(B, num_joints, num_views, 2) 136 | :param cam_mat: Tensor(B, num_joints, num_views, 3, 4) 137 | :param weight: Tensor(B, num_joints, num_views) 138 | :return: 139 | """ 140 | B, J, N, _ = joint_2d.shape 141 | joint_2d = joint_2d[..., None] # (B, num_joints, num_views, 2, 1) 142 | c2 = cam_mat[..., 2:, :] # (B, num_joints, num_views, 1, 4) 143 | c12 = cam_mat[..., :2, :] # (B, num_joints, num_views, 2, 4) 144 | A = joint_2d @ c2 - c12 # (B, num_joints, num_views, 2, 4) 145 | if weight is not None: 146 | weight = weight[:, :, :, None, None].repeat([1, 1, 1, 2, 4]) 147 | A = A * weight 148 | A = A.reshape([B, J, N*2, 4]) # (B, num_joints, num_views*2, 4) 149 | device = A.device 150 | A = A.cpu() 151 | _, _, V = torch.svd(A) # (B, num_joints, 4, 4) 152 | V = V.to(device) 153 | X = V[..., -1] # (B, num_joints, 4) 154 | X = X / X[..., -1, None] # (B, num_joints, 4) 155 | joint_3d = X[..., :-1] # (B, num_joints, 3) 156 | return joint_3d 157 | 158 | 159 | def compute_joint_3d(joint_2d, inter_matrix, view_trans, weight=None): 160 | """Calculate 3D joint according to 2D joint. 161 | 162 | :param joint_2d: Tensor(B, num_views, num_joints, 2) 163 | :param inter_matrix: Tensor(B, 3, 3) 164 | :param view_trans: Tensor(B, num_views, 4, 4) 165 | :param weight: Tensor(B, num_joints, num_views) 166 | :return: Tensor(B, J, 3) 167 | """ 168 | J = joint_2d.size(2) 169 | proj_mat = get_projection_matrices(inter_matrix, view_trans) # (B, num_views, 3, 4) 170 | joint_2d = joint_2d.permute([0, 2, 1, 3]) # (B, num_joints, num_views, 2) 171 | proj_mat = proj_mat[:, None, ...].repeat([1,J, 1, 1, 1]) # (B, num_joints, num_views, 3, 4) 172 | joint_3d = triangulate(joint_2d, proj_mat, weight) 173 | return joint_3d 174 | 175 | 176 | def compute_joint_3d_view_select(confidence, joint_2d_pred, inter_matrix, view_trans): 177 | """ 178 | 179 | :param confidence: Tensor(B, N) 180 | :param joint_2d_pred: Tensor(B, N, J, 2) 181 | :param inter_matrix: Tensor(B, 3, 3) 182 | :param view_trans: Tensor(B, N, 4, 4) 183 | :return: 184 | joint_3d_select: Tensor(B, J, 3) 185 | """ 186 | B, N, J, _ = joint_2d_pred.shape 187 | indices = torch.multinomial(confidence, 10, replacement=False) 188 | joint_2d_indices = indices[:, :, None, None].repeat([1, 1, J, 2]) 189 | joint_2d_select = torch.gather(joint_2d_pred, 1, joint_2d_indices) 190 | view_trans_indices = indices[:, :, None, None].repeat([1, 1, 4, 4]) 191 | view_trans_select = torch.gather(view_trans, 1, view_trans_indices) 192 | 193 | # _, indices = torch.sort(confidence, dim=-1, descending=True) 194 | # joint_2d_indices = indices[:, :, None, None].repeat([1, 1, J, 2]) 195 | # joint_2d_select = joint_2d_pred.reshape([-1])[joint_2d_indices.reshape(-1)<10].reshape([B, 10, J, 2]) 196 | # view_trans_indices = indices[:, :, None, None].repeat([1, 1, 4, 4]) 197 | # view_trans_select = view_trans.reshape([-1])[view_trans_indices.reshape(-1)<10].reshape([B, 10, 4, 4]) 198 | 199 | joint_3d_select = compute_joint_3d(joint_2d_select, inter_matrix, view_trans_select) 200 | return joint_3d_select 201 | 202 | 203 | if __name__ == '__main__': 204 | # x = torch.from_numpy(np.array([1., 1., 1.], dtype=np.float32)) 205 | # b = torch.from_numpy(np.array([3., 3., 3.], dtype=np.float32)) 206 | # print(solve(x, b)) 207 | H = W = 32 208 | B = 480 209 | joint_2d = torch.rand((B, 14, 3), dtype=torch.float32)*32 210 | joint_2d[:, :, -1] = 1. 211 | # joint_2d = torch.ones((B, 14, 2), dtype=torch.float32) 212 | # joint_2d = torch.ones((B, 14, 2), dtype=torch.float32) 213 | # joint_2d[:, :, 1] = torch.rand((B, 14), dtype=torch.float32)*32 214 | # joint_2d[:, :, 0] = torch.rand((B, 14), dtype=torch.float32) * 32 215 | print(joint_2d[0]) 216 | joint_2d = joint_2d.cuda() 217 | 218 | heatmap = gen_2D_gaussion_map(joint_2d, H, W, 1, 1, 1) 219 | # print(heatmap.shape) 220 | # heatmap = torch.zeros((1, 2, 32, 32), dtype=torch.float32) 221 | # heatmap[:, :, 5:8, 5:8] = 1 222 | split = time.time() 223 | loc = heatmap_to_loc(heatmap) 224 | print(loc[0]) 225 | print(time.time() - split) 226 | 227 | split = time.time() 228 | loc = heatmap_to_loc(heatmap, False) 229 | print(loc[0]) 230 | print(time.time() - split) 231 | -------------------------------------------------------------------------------- /ops/loss_ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Module 4 | import math 5 | import numpy as np 6 | import os 7 | import sys 8 | dir = os.path.dirname(os.path.abspath(__file__)) 9 | root = os.path.dirname(dir) 10 | sys.path.append(root) 11 | from ops.point_transform import transform_3D, transform_3D_to_2D, transform_2D 12 | 13 | 14 | def gen_2D_gaussion_map(joint_2d, H, W, fx, fy, sigma): 15 | """ 16 | 17 | :param joint_2d: Tensor(B, J, 3) 18 | :param H: 19 | :param W: 20 | :param sigma: 21 | :return: Tensor(B, J, H, W) 22 | """ 23 | B, J, _ = joint_2d.shape 24 | u = torch.arange(W, device=joint_2d.device) 25 | v = torch.arange(H, device=joint_2d.device) 26 | v_t, u_t = torch.meshgrid([v, u]) # (H, W) 27 | grid = torch.stack([u_t, v_t], dim=-1)[None, ...].repeat([B, 1, 1, 1]) # (B, H, W, 2) 28 | grid = grid.reshape([B, H*W, 2]) 29 | grid = grid[:, None, :, :].repeat([1, J, 1, 1]) # (B, J, H*W, 2) 30 | grid = grid.float() + 0.5 # coordinate of pixel is on center of pixel 31 | joint_2d = joint_2d[:, :, None, :].repeat([1, 1, H*W, 1]) # (B, J, W*H, 2) 32 | scale = joint_2d[:, :, :, 2] 33 | diff_x = ((grid[..., 0] - joint_2d[..., 0]) * scale / fx) ** 2 34 | diff_y = ((grid[..., 1] - joint_2d[..., 1]) * scale / fy) ** 2 35 | # diff_x = (grid[..., 0] - joint_2d[..., 0]) ** 2 36 | # diff_y = (grid[..., 1] - joint_2d[..., 1]) ** 2 37 | diff = diff_x + diff_y 38 | gaussian_map = 1 / (math.sqrt(2 * math.pi) * sigma) * torch.exp(-diff/(2*(sigma ** 2))) 39 | gaussian_map = gaussian_map.reshape([B, J, H, W]) 40 | return gaussian_map 41 | 42 | 43 | class LossCalculator(Module): 44 | def forward(self, heatmaps, joint_2d_pred, joint_3d_pred, view_trans, crop_trans, fx, fy, u0, v0, joint_3d_gt): 45 | """ 46 | 47 | :param heatmap: Tensor(B, N, nstack, J, H, W) 48 | :param joint_2d_pred: Tensor(B, N, J, 2) 49 | :param joint_3d_pred: Tensor(B, J, 3) 50 | :param view_trans: Tensor(B, N, 4, 4) 51 | :param crop_trans: Tensor(B, 3, 3) 52 | :param fx: float 53 | :param fy: float 54 | :param u0: float 55 | :param v0: float 56 | :param joint_3d_gt: Tensor(B, J, 3) 57 | :return: 58 | """ 59 | B, N, nstack, J, H, W = heatmaps.shape 60 | joint_3d_gt_expand = joint_3d_gt[:, None, :, :].repeat([1, N, 1, 1]) # (B, N, J, 3) 61 | crop_trans = crop_trans[:, None, :, :].repeat([1, N, 1, 1]) # (B, N, 3, 3) 62 | joint_3d_gt_expand = joint_3d_gt_expand.reshape([B * N, J, 3]) # (B*N, J, 3) 63 | view_trans = view_trans.reshape([B * N, 4, 4]) 64 | crop_trans = crop_trans.reshape([B * N, 3, 3]) 65 | joint_3d_gt_expand = transform_3D(joint_3d_gt_expand, view_trans) # (B*N, J, 3) 66 | joint_2d_gt = transform_3D_to_2D(joint_3d_gt_expand, fx, fy, u0, v0) # (B*N, J, 2) 67 | joint_2d_gt_crop = transform_2D(joint_2d_gt, crop_trans) / 4. 68 | heatmaps = heatmaps.reshape([B * N * nstack, J, H, W]) 69 | gaussian_maps = gen_2D_gaussion_map(joint_2d_gt_crop, H, W, fx, fy, sigma=0.4) # (B*N, J, H, W) 70 | gaussian_maps = gaussian_maps[:, None, :, :, :].repeat([1, nstack, 1, 1, 1]).reshape([B*N*nstack, J, H, W]) 71 | hm_loss = F.mse_loss(heatmaps, gaussian_maps, reduction='none') 72 | hm_loss = hm_loss.reshape([B, -1]).mean(-1) 73 | 74 | joint_2d_pred = joint_2d_pred.reshape([B*N, J, 2]) 75 | error_2d = torch.norm(joint_2d_pred-joint_2d_gt[..., :2], dim=-1).mean(-1).reshape([B, N]).mean(-1) 76 | error_3d = torch.norm(joint_3d_pred - joint_3d_gt, dim=-1).mean(-1) 77 | return hm_loss, error_2d, error_3d, gaussian_maps.reshape([B, N, nstack, J, H, W]) 78 | 79 | 80 | class MultiA2JCalculator(Module): 81 | def __init__(self, reg_factor, conf_factor): 82 | super().__init__() 83 | self.reg_factor = reg_factor 84 | self.conf_factor = conf_factor 85 | self.smooth_l1_loss = torch.nn.SmoothL1Loss(reduction='none') 86 | 87 | def forward(self, anchor_joints_2d_crop, regression_joints_2d_crop, depth_value_norm, joints_3d_pred, 88 | joints_3d_fused, joint_3d_conf, view_trans, crop_trans, com_2d, cube, fx, fy, u0, v0, joints_3d_gt): 89 | """ 90 | :param anchor_joints_2d_crop: Tensor(B, N, J, 2) 91 | :param regression_joints_2d_crop: Tensor(B, N, J, 2) 92 | :param depth_value_norm: Tensor(B, N, J) 93 | :param joint_3d_pred: Tensor(B, N, J, 3) 94 | :param joints_3d_fused: Tensor(B, J, 3) 95 | :param joint_3d_conf: Tensor(B, J, 3) 96 | :param view_trans: Tensor(B, N, 4, 4) 97 | :param crop_trans: Tensor(B, 3, 3) 98 | :param com_2d: Tensor(B, 3) 99 | :param cube: Tensor(B, 3) 100 | :param fx: float 101 | :param fy: float 102 | :param u0: float 103 | :param v0: float 104 | :param joints_3d_gt: Tensor(B, J, 3) 105 | :return: 106 | """ 107 | B, N, J, _ = anchor_joints_2d_crop.shape 108 | joints_3d_gt_expand = joints_3d_gt[:, None, :, :].repeat([1, N, 1, 1]) # (B, N, J, 3) 109 | joints_3d_gt_expand = transform_3D(joints_3d_gt_expand, view_trans) # (B, N, J, 3) 110 | joints_2d_gt_expand = transform_3D_to_2D(joints_3d_gt_expand, fx, fy, u0, v0)[..., :2] # (B, N, J, 2) 111 | crop_trans_expand = crop_trans[:, None, :, :].repeat([1, N, 1, 1]) 112 | joints_2d_gt_expand_crop = transform_2D(joints_2d_gt_expand, crop_trans_expand) 113 | com_z_expand = com_2d[:, None, :].repeat([1, N, 1])[:, :, 2:] 114 | cube_z_expand = cube[:, None, :].repeat([1, N, 1])[:, :, 2:] 115 | depth_gt_norm_expand = (joints_3d_gt_expand[..., 2]-com_z_expand)/(cube_z_expand/2) 116 | 117 | anchor_loss = self.smooth_l1_loss(anchor_joints_2d_crop, joints_2d_gt_expand_crop) 118 | regression_loss = self.smooth_l1_loss(regression_joints_2d_crop, joints_2d_gt_expand_crop) 119 | depth_loss = self.smooth_l1_loss(depth_value_norm, depth_gt_norm_expand) 120 | conf_loss = self.smooth_l1_loss(joint_3d_conf, joints_3d_gt) 121 | 122 | anchor_loss = anchor_loss.reshape([B, -1]).mean(-1) 123 | regression_loss = regression_loss.reshape([B, -1]).mean(-1) 124 | depth_loss = depth_loss.reshape([B, -1]).mean(-1) 125 | conf_loss = conf_loss.reshape([B, -1]).mean(-1) 126 | 127 | reg_loss = regression_loss*0.5 + depth_loss 128 | 129 | loss = anchor_loss + reg_loss * self.reg_factor + conf_loss*self.conf_factor 130 | 131 | error_3d = torch.norm(joints_3d_pred-joints_3d_gt[:, None, :, :], dim=-1).mean(-1) 132 | error_3d_fused = torch.norm(joints_3d_fused-joints_3d_gt, dim=-1).mean(-1) 133 | error_3d_conf = torch.norm(joint_3d_conf-joints_3d_gt, dim=-1).mean(-1) 134 | center_error_3d = error_3d[:, N//2] 135 | min_error_3d, _ = torch.min(error_3d, dim=-1) 136 | mean_error_3d = torch.mean(error_3d, dim=-1) 137 | return anchor_loss, reg_loss, conf_loss, loss, center_error_3d, min_error_3d, mean_error_3d, error_3d, \ 138 | error_3d_fused, error_3d_conf 139 | 140 | 141 | class ConfidenceLossCalculator(Module): 142 | def forward(self, confidence, joint_3d_pred, joint_3d_gt, view_trans, fx, fy, u0, v0, joint_2d_expand): 143 | ''' 144 | 145 | :param confidence: Tensor(B, N) 146 | :param joint_3d_pred: Tensor(B, J, 3) 147 | :param joint_3d_gt: Tensor(B, J, 3) 148 | :param view_trans: Tensor(B, N, 4, 4) 149 | :param fx: float 150 | :param fy: flaat 151 | :param u0: float 152 | :param v0: float 153 | :param joint_2d_expand: Tensor(B, N, J, 2) 154 | :return: 155 | ''' 156 | B, N = confidence.shape 157 | J = joint_3d_pred.shape[1] 158 | if N==3: 159 | map_shape = [1, 3] 160 | elif N==9: 161 | map_shape = [3, 3] 162 | elif N==15: 163 | map_shape = [3, 5] 164 | elif N==25: 165 | map_shape = [5, 5] 166 | elif N==81: 167 | map_shape = [9, 9] 168 | with torch.no_grad(): 169 | error_3d = torch.norm(joint_3d_pred - joint_3d_gt, dim=-1).mean(-1) 170 | loss = F.smooth_l1_loss(joint_3d_pred, joint_3d_gt, reduction='none').mean(-1).mean(-1) 171 | confidence = confidence.reshape([B]+map_shape) 172 | return loss, error_3d, confidence 173 | 174 | def get_confidence(self, error_2d, map_shape): 175 | """ 176 | 177 | :param error_2d: Tensor(B, J, N) 178 | :param map_shape: list 179 | :return: 180 | confidence: Tensor(B, N) 181 | """ 182 | B, J, N = error_2d.shape 183 | error_std, error_mean = torch.std_mean(error_2d, dim=-1) 184 | error_std = error_std[:, :, None].repeat([1, 1, N]) 185 | error_mean = error_mean[:, :, None].repeat([1, 1, N]) 186 | confidence = -(error_2d - error_mean) / error_std 187 | soft_confidence = torch.softmax(confidence, dim=-1) 188 | # gauss_confidence = confidence 189 | # confidence = confidence.reshape([B*J, 1]+map_shape) 190 | # gauss_confidence = gaussian_blur2d(confidence, (5, 5), (1, 1)) 191 | # gauss_confidence = gauss_confidence.reshape([B, J, N]) 192 | # soft_gauss_confidence = torch.softmax(gauss_confidence, dim=-1) 193 | return soft_confidence 194 | 195 | 196 | class ViewSelectLossCalculator(Module): 197 | def forward(self, light_heatmaps, heatmap, joint_3d_pred, view_trans, crop_trans, fx, fy, u0, v0, joint_3d_gt, 198 | alpha): 199 | """ 200 | 201 | :param light_heatmaps: Tensor(B, N, nstack, J, H, W) 202 | :param heatmap: Tensor(B, N, J, H, W) 203 | :param joint_3d_pred: Tensor(B, J, 3) 204 | :param view_trans: Tensor(B, N, 4, 4) 205 | :param crop_trans: Tensor(B, 3, 3) 206 | :param fx: float 207 | :param fy: float 208 | :param u0: float 209 | :param v0: float 210 | :param joint_3d_gt: Tensor(B, J, 3) 211 | :return: 212 | """ 213 | B, N, nstack, J, H, W = light_heatmaps.shape 214 | joint_3d_gt_expand = joint_3d_gt[:, None, :, :].repeat([1, N, 1, 1]) # (B, N, J, 3) 215 | crop_trans = crop_trans[:, None, :, :].repeat([1, N, 1, 1]) # (B, N, 3, 3) 216 | joint_3d_gt_expand = joint_3d_gt_expand.reshape([B * N, J, 3]) # (B*N, J, 3) 217 | view_trans = view_trans.reshape([B * N, 4, 4]) 218 | crop_trans = crop_trans.reshape([B * N, 3, 3]) 219 | joint_3d_gt_expand = transform_3D(joint_3d_gt_expand, view_trans) # (B*N, J, 3) 220 | joint_2d_gt = transform_3D_to_2D(joint_3d_gt_expand, fx, fy, u0, v0) # (B*N, J, 2) 221 | joint_2d_gt_crop = transform_2D(joint_2d_gt, crop_trans) / 4. 222 | if heatmap is not None: 223 | light_heatmaps = light_heatmaps.reshape([B * N * nstack, J, H, W]) 224 | gaussian_maps = gen_2D_gaussion_map(joint_2d_gt_crop, H, W, fx, fy, sigma=0.4) # (B*N, J, H, W) 225 | gaussian_maps = gaussian_maps[:, None, :, :, :].repeat([1, nstack, 1, 1, 1]).reshape( 226 | [B * N * nstack, J, H, W]) 227 | heatmap = heatmap[:, :, None, :, :, :].repeat(1, 1, nstack, 1, 1, 1) 228 | heatmap = heatmap.reshape([B * N * nstack, J, H, W]) 229 | hm_loss = alpha * F.mse_loss(light_heatmaps, gaussian_maps, reduction='none') + \ 230 | (1-alpha) * F.mse_loss(light_heatmaps, heatmap, reduction='none') 231 | hm_loss = hm_loss.reshape([B, -1]).mean(-1) 232 | gaussian_maps = gaussian_maps.reshape([B, N, nstack, J, H, W]) 233 | gaussian_maps = gaussian_maps[:, :, 0, :, :, :] # (B, N, J, H, W) 234 | else: 235 | hm_loss = torch.zeros([B], dtype=torch.float32, device=light_heatmaps.device) 236 | gaussian_maps = None 237 | error_3d = torch.norm(joint_3d_pred - joint_3d_gt, dim=-1).mean(-1) 238 | return hm_loss, error_3d, gaussian_maps 239 | 240 | 241 | class ViewSelectA2JLossCalculator(Module): 242 | def __init__(self, alpha, conf_factor): 243 | super().__init__() 244 | self.alpha = alpha 245 | self.conf_factor = conf_factor 246 | self.smooth_l1_loss = torch.nn.SmoothL1Loss(reduction='none') 247 | 248 | def forward(self, joints_3d_pred, joint_3d_fused, conf, joint_3d_conf_select, 249 | joints_3d_pred_select_light, joint_3d_fused_select_light, joint_3d_conf_select_light, conf_light, 250 | view_trans, crop_trans, com_2d, cube, fx, fy, u0, v0, joints_3d_gt): 251 | """ 252 | :param joints_3d_pred: Tensor(B, N, J, 3) 253 | :param joint_3d_fused: Tensor(B, J, 3) 254 | :param conf: Tensor(B, N) 255 | :param joint_3d_conf_select: Tensor(B, J, 3) 256 | :param joints_3d_pred_select_light: Tensor(B, k, J, 3) 257 | :param joint_3d_fused_select_light: Tensor(B, J, 3) 258 | :param joint_3d_conf_select_light: Tensor(B, J, 3) 259 | :param conf_light: Tensor(B, N) 260 | :param crop_trans: Tensor(B, 3, 3) 261 | :param com_2d: Tensor(B, 3) 262 | :param cube: Tensor(B, 3) 263 | :param fx: float 264 | :param fy: float 265 | :param u0: float 266 | :param v0: float 267 | :param joints_3d_gt: Tensor(B, J, 3) 268 | :return: 269 | """ 270 | B, N, J, _ = joints_3d_pred.shape 271 | joints_3d_gt_expand = joints_3d_gt[:, None, :, :].repeat([1, N, 1, 1]) # (B, N, J, 3) 272 | joints_3d_gt_expand = transform_3D(joints_3d_gt_expand, view_trans) # (B, N, J, 3) 273 | joints_2d_gt_expand = transform_3D_to_2D(joints_3d_gt_expand, fx, fy, u0, v0)[..., :2] # (B, N, J, 2) 274 | crop_trans_expand = crop_trans[:, None, :, :].repeat([1, N, 1, 1]) 275 | joints_2d_gt_expand_crop = transform_2D(joints_2d_gt_expand, crop_trans_expand) 276 | com_z_expand = com_2d[:, None, :].repeat([1, N, 1])[:, :, 2:] 277 | cube_z_expand = cube[:, None, :].repeat([1, N, 1])[:, :, 2:] 278 | depth_gt_norm_expand = (joints_3d_gt_expand[..., 2] - com_z_expand) / (cube_z_expand / 2) 279 | 280 | # sub_conf = conf[:, :, None]-conf[:, None, :] 281 | # sub_conf_light = conf_light[:, :, None]-conf_light[:, None, :] 282 | # conf_loss = self.smooth_l1_loss(sub_conf*100, sub_conf_light*100) 283 | conf_loss = self.smooth_l1_loss(conf_light * self.conf_factor, conf * self.conf_factor) 284 | 285 | conf_loss = conf_loss.reshape([B, -1]).mean(-1) 286 | 287 | loss = conf_loss 288 | 289 | error_3d_fused_select_light = torch.norm(joint_3d_fused_select_light - joints_3d_gt, dim=-1).mean(-1) 290 | error_3d_conf_select_light = torch.norm(joint_3d_conf_select_light - joints_3d_gt, dim=-1).mean(-1) 291 | 292 | error_3d_fused = torch.norm(joint_3d_fused - joints_3d_gt, dim=-1).mean(-1) 293 | error_3d_conf_select = torch.norm(joint_3d_conf_select - joints_3d_gt, dim=-1).mean(-1) 294 | return conf_loss, loss, error_3d_fused_select_light, error_3d_conf_select_light,\ 295 | error_3d_fused, error_3d_conf_select 296 | 297 | 298 | class ViewSelectLossCalculator2(Module): 299 | def forward(self, joint_3d, joint_3d_uniform, confidence_select, joint_3d_gt): 300 | """ 301 | 302 | :param joint_3d: Tensor(B, J, 3) 303 | :param joint_3d_uniform: Tensor(B, J, 3) 304 | :param confidence: Tensor(B, num_views) 305 | :param joint_3d_gt: Tensor(B, J, 3) 306 | :return: 307 | """ 308 | error_3d = torch.norm(joint_3d - joint_3d_gt, dim=-1).mean(-1) 309 | error_3d_uniform = torch.norm(joint_3d_uniform - joint_3d_gt, dim=-1).mean(-1) 310 | # reward = torch.ones_like(error_3d) 311 | # reward[error_3d_uniform 0] = reward[reward > 0] * 10 316 | # loss = torch.mean(-torch.log(confidence_select.mean(dim=-1))*reward, dim=-1) + 1e-3*error_3d/error_3d_uniform 317 | loss = -torch.log(confidence_select.sum(dim=-1)) * weight 318 | return loss, error_3d, error_3d_uniform, reward 319 | 320 | 321 | class FocalLoss_Ori(Module): 322 | """ 323 | This is a implementation of Focal Loss with smooth label cross entropy supported which is proposed in 324 | 'Focal Loss for Dense Object Detection. (https://arxiv.org/abs/1708.02002)' 325 | Focal_Loss= -1*alpha*(1-pt)*log(pt) 326 | :param num_class: 327 | :param alpha: (tensor) 3D or 4D the scalar factor for this criterion 328 | :param gamma: (float,double) gamma > 0 reduces the relative loss for well-classified examples (p>0.5) putting more 329 | focus on hard misclassified example 330 | :param smooth: (float,double) smooth value when cross entropy 331 | :param size_average: (bool, optional) By default, the losses are averaged over each loss element in the batch. 332 | """ 333 | 334 | def __init__(self, num_class, alpha=[0.25,0.75], gamma=2, balance_index=-1): 335 | super(FocalLoss_Ori, self).__init__() 336 | self.num_class = num_class 337 | self.alpha = alpha 338 | self.gamma = gamma 339 | self.eps = 1e-6 340 | 341 | if isinstance(self.alpha, (list, tuple)): 342 | assert len(self.alpha) == self.num_class 343 | self.alpha = torch.Tensor(list(self.alpha)) 344 | elif isinstance(self.alpha, (float,int)): 345 | assert 0-1 347 | alpha = torch.ones((self.num_class)) 348 | alpha *= 1-self.alpha 349 | alpha[balance_index] = self.alpha 350 | self.alpha = alpha 351 | elif isinstance(self.alpha,torch.Tensor): 352 | self.alpha = self.alpha 353 | else: 354 | raise TypeError('Not support alpha type, expect `int|float|list|tuple|torch.Tensor`') 355 | 356 | def forward(self, logit, pred, target): 357 | B = logit.size(0) 358 | if logit.dim() > 2: 359 | # N,C,d1,d2 -> N,C,m (m=d1*d2*...) 360 | logit = logit.view(logit.size(0), logit.size(1), -1) 361 | logit = logit.transpose(1, 2).contiguous() # [N,C,d1*d2..] -> [N,d1*d2..,C] 362 | logit = logit.view(-1, logit.size(-1)) # [N,d1*d2..,C]-> [N*d1*d2..,C] 363 | p = torch.softmax(logit, -1) 364 | target = target.view(-1, 1) # [N,d1,d2,...]->[N*d1*d2*...,1] 365 | 366 | # -----------legacy way------------ 367 | # idx = target.cpu().long() 368 | # one_hot_key = torch.FloatTensor(target.size(0), self.num_class).zero_() 369 | # one_hot_key = one_hot_key.scatter_(1, idx, 1) 370 | # if one_hot_key.device != logit.device: 371 | # one_hot_key = one_hot_key.to(logit.device) 372 | # pt = (one_hot_key * logit).sum(1) + epsilon 373 | 374 | # ----------memory saving way-------- 375 | pt = p.gather(1, target).view(-1) + self.eps # avoid apply 376 | logpt = pt.log() 377 | 378 | if self.alpha.device != logpt.device: 379 | alpha = self.alpha.to(logpt.device) 380 | alpha_class = alpha.gather(0,target.view(-1)) 381 | logpt = alpha_class*logpt 382 | loss = -1 * torch.pow(torch.sub(1.0, pt), self.gamma) * logpt 383 | loss = torch.reshape(loss, [B, -1]).mean(dim=-1) 384 | target = target.reshape([B, -1]) 385 | pred = pred.reshape([B, -1]) 386 | acc = torch.sum(target==pred, dim=-1, dtype=torch.float32)/target.shape[-1] 387 | return loss, acc 388 | 389 | if __name__ == '__main__': 390 | fx, fy, u0, v0 = 588.03, 587.07, 320., 320. 391 | # joint_2d = np.array([[0.5, 0.5, 500], 392 | # [0.5, 1.5, 500], 393 | # [0.5, 2.5, 500]], dtype=np.float32) 394 | # joint_2d = torch.from_numpy(joint_2d[None, :, :]) 395 | # gaussian = gen_2D_gaussion_map(joint_2d, 3, 5, 1) 396 | # print(gaussian) 397 | 398 | # B, N, nstack, J, H, W = 8, 40, 4, 14, 64, 64 399 | # heatmaps = torch.rand((B, N, nstack, J, H, W), dtype=torch.float32).cuda() 400 | # joint_2d_pred = torch.rand((B, N, J, 2), dtype=torch.float32).cuda() 401 | # joint_3d_pred = torch.rand((B, J, 3), dtype=torch.float32).cuda() 402 | # view_trans = torch.rand((B, N, 4, 4), dtype=torch.float32).cuda() 403 | # crop_trans = torch.rand((B, 3, 3), dtype=torch.float32).cuda() 404 | # joint_3d_gt = torch.rand((B, J, 3), dtype=torch.float32).cuda() 405 | # loss_calc = LossCalculator() 406 | # loss_calc = torch.nn.DataParallel(loss_calc) 407 | # loss_calc = loss_calc.cuda() 408 | # hm_loss, error_2d, error_3d, gaussian_maps = loss_calc(heatmaps, joint_2d_pred, joint_3d_pred, view_trans, crop_trans, fx, fy, u0, v0, joint_3d_gt) 409 | # print(hm_loss.shape) 410 | # print(error_2d.shape) 411 | # print(error_3d.shape) 412 | # print(gaussian_maps.shape) 413 | 414 | # B = 8 415 | # N = 25 416 | # J = 14 417 | # error_2d_pred = torch.rand([B, N, J], dtype=torch.float32) 418 | # joint_2d_pred = torch.rand([B, N, J, 2], dtype=torch.float32) 419 | # joint_3d_gt = torch.rand([B, J, 3], dtype=torch.float32) 420 | # view_trans = torch.rand([B, N, 4, 4], dtype=torch.float32) 421 | # 422 | # error2d_loss_calc = ConfidenceLossCalculator() 423 | # error, loss, error_map_gt, error_map_pred = error2d_loss_calc(error_2d_pred, joint_2d_pred, joint_3d_gt, view_trans, fx, fy, u0, v0) 424 | # print(error.shape) 425 | # print(loss.shape) 426 | # print(error_map_gt.shape) 427 | # print(error_map_pred.shape) 428 | 429 | logit = torch.rand([4, 2, 480, 640], dtype=torch.float32) 430 | pred = torch.zeros([4, 480, 640], dtype=torch.int64) 431 | label = torch.zeros([4, 480, 640], dtype=torch.int64) 432 | loss_calc = FocalLoss_Ori(2) 433 | loss, acc = loss_calc(logit, pred, label) 434 | print(loss.shape) 435 | print(acc) 436 | print(acc.dtype) 437 | -------------------------------------------------------------------------------- /ops/point_transform.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def transform_2D_to_3D(points, fx, fy, u0, v0): 5 | """ 6 | 7 | :param points: Tensor(..., 3) 8 | :param fx: 9 | :param fy: 10 | :param u0: 11 | :param v0: 12 | :return: Tensor(..., 3) 13 | """ 14 | x = (points[..., 0] - u0) * points[..., 2] / fx 15 | y = (points[..., 1] - v0) * points[..., 2] / fy 16 | z = points[..., 2] 17 | return torch.stack([x, y, z], dim=-1) 18 | 19 | 20 | def transform_3D_to_2D(points, fx, fy, u0, v0): 21 | u = points[..., 0] / points[..., 2] * fx + u0 22 | v = points[..., 1] / points[..., 2] * fy + v0 23 | d = points[..., 2] 24 | return torch.stack([u, v, d], dim=-1) 25 | 26 | 27 | def transform_3D(points, trans_matrix): 28 | """3D affine transformation 29 | 30 | :param points: Tensor(..., N, 3) 31 | :param trans_matrix: Tensor(..., 4, 4) 32 | :return: Tensor(B, N, 3) 33 | """ 34 | x = points[..., 0] 35 | y = points[..., 1] 36 | z = points[..., 2] 37 | ones = torch.ones_like(x) 38 | points_h = torch.stack([x, y, z, ones], -2) # (..., 4, N) 39 | points_h = trans_matrix @ points_h 40 | points = torch.transpose(points_h, -2, -1)[..., :3] # (..., N, 3) 41 | return points 42 | 43 | 44 | def transform_2D(points, trans_matirx): 45 | """2D affine transformation 46 | 47 | :param points: Tensor(..., N, 2|3) 48 | :param trans_matirx: Tensor(..., 3, 3) 49 | :return: Tensor(..., N, 2|3) 50 | """ 51 | d = points.size(-1) 52 | x = points[..., 0] 53 | y = points[..., 1] 54 | if d > 2: 55 | z = points[..., 2] 56 | ones = torch.ones_like(x) 57 | points_h = torch.stack([x, y, ones], axis=-2) # (B, 3, N) 58 | points_h = trans_matirx @ points_h 59 | points = torch.transpose(points_h, -2, -1) # (B, N, 3) 60 | if d > 2: 61 | points[..., 2] = z 62 | else: 63 | points = points[..., :2] 64 | return points 65 | 66 | 67 | def transform(points, trans_matrix): 68 | """2D or 3D affine transformation. 69 | This function is the same as the function of the above two functions. 70 | But it can backward. 71 | 72 | :param points: Tensor(B, N, 3) 73 | :param trans_matrix: Tensor(B, 3/4, 3/4) 74 | :return: Tensor(B, N, 3) 75 | """ 76 | # B, N, _ = points.shape 77 | x = points[..., 0] 78 | y = points[..., 1] 79 | z = points[..., 2] 80 | ones = torch.ones_like(x, requires_grad=False) 81 | if trans_matrix.size(1)==4: 82 | # points = points.transpose(1, 2) 83 | # points[:, :3, :] = trans_matrix[:, :3, :3] @ points[:, :3, :] 84 | # points[:, :3, :] += trans_matrix[:, :3, 3, None].repeat([1, 1, N]) 85 | # points = points.transpose(1, 2) 86 | points_h = torch.stack([x, y, z, ones], -2) 87 | points_h = trans_matrix @ points_h 88 | points = torch.transpose(points_h, 1, 2)[..., :3] 89 | elif trans_matrix.size(1)==3: 90 | # points = points.transpose(1, 2) 91 | # points[:, :2, :] = trans_matrix[:, :2, :2] @ points[:, :2, :] 92 | # points[:, :2, :] += trans_matrix[:, :2, 2, None].repeat([1, 1, N]) 93 | # points = points.transpose(1, 2) 94 | points_h = torch.stack([x, y, ones], -2) 95 | points_h = trans_matrix @ points_h 96 | points = torch.transpose(points_h, 1, 2) 97 | points[..., 2] = z 98 | return points 99 | -------------------------------------------------------------------------------- /ops/render.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import depth_to_point_cloud_mask_cuda 4 | import point_cloud_mask_to_depth_cuda 5 | import sys 6 | import os 7 | dir = os.path.dirname(os.path.abspath(__file__)) 8 | root = os.path.dirname(dir) 9 | sys.path.append(root) 10 | from torch.utils.data import DataLoader 11 | from ops.point_transform import transform_3D, \ 12 | transform_2D_to_3D, transform_3D_to_2D, transform_2D 13 | from feeders.nyu_feeder import NyuFeeder 14 | import logging 15 | 16 | logging.basicConfig(level=logging.INFO, format="%(asctime)s: %(levelname)s %(name)s:%(lineno)d] %(message)s") 17 | logger = logging.getLogger(__file__) 18 | 19 | 20 | # def crop_trans_inv(crop_trans): 21 | # ''' 22 | # 23 | # :param crop_trans: Tensor(..., 3, 3) 24 | # :return: 25 | # ''' 26 | # inv = torch.zeros_like(crop_trans) 27 | # inv[..., 0, 0] = 1 / crop_trans[..., 0, 0] 28 | # inv[..., 0, 2] = -crop_trans[..., 0, 2] / crop_trans[..., 0, 0] 29 | # inv[..., 1, 1] = 1 / crop_trans[..., 1, 1] 30 | # inv[..., 1, 2] = -crop_trans[..., 1, 2] / crop_trans[..., 1, 1] 31 | # inv[..., 2, 2] = 1. 32 | # 33 | # return inv 34 | 35 | 36 | def depth_to_point_cloud_mask(depth): 37 | """ 38 | 39 | :param depth: Tensor(B, 1, H, W) 40 | :return: point_cloud: Tensor(B, N, 3), mask: Tensor(B, N) 41 | """ 42 | depth = depth.permute((0, 2, 3, 1)) # (B, H, W, 1) 43 | return depth_to_point_cloud_mask_cuda.forward(depth.contiguous()) 44 | 45 | 46 | def point_cloud_mask_to_depth(point_cloud, mask, h, w): 47 | depth = point_cloud_mask_to_depth_cuda.forward(point_cloud.contiguous(), mask, h, w) # (B, H, W, 1) 48 | depth = depth.permute((0, 3, 1, 2)) # (B, 1, H, W) 49 | return depth 50 | 51 | 52 | def uniform_view_matrix(center, level, random_sample, random_rotate): 53 | """Uniform generation of view transformation matrix 54 | 55 | :param center: Tensor(B, 3), 3D coordinate 56 | :param level: int, 1, 2, 3, 4 or 5 57 | :return: Tensor(B, num_views, 4, 4) 58 | """ 59 | B = center.size(0) 60 | if random_sample: 61 | if level == 0: 62 | num_view = 1 63 | elif level == 1: 64 | num_view = 3 65 | elif level == 2: 66 | num_view = 9 67 | elif level == 3: 68 | num_view = 15 69 | elif level == 4: 70 | num_view = 25 71 | elif level == 5: 72 | num_view = 81 73 | else: 74 | logger.critical('level must be 1, 2, 3 or 4.') 75 | raise ValueError('level must be 1, 2, 3 or 4.') 76 | rotation = torch.from_numpy(np.random.uniform(-np.pi/3, np.pi/3, size=[num_view, 2])).to(center.device) 77 | rotation = rotation.float() 78 | else: 79 | if level == 1: 80 | # azimuth = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 3, device=center.device) # 3 81 | azimuth = torch.linspace(-np.pi / 3, np.pi / 3, 3, device=center.device) 82 | elevation = torch.zeros([1], device=center.device) 83 | elif level == 2: 84 | # azimuth = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 3, device=center.device) # 3 85 | # elevation = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 3, device=center.device) # 3 86 | azimuth = torch.linspace(-np.pi / 3, np.pi / 3, 3, device=center.device) # 3 87 | elevation = torch.linspace(-np.pi / 3, np.pi / 3, 3, device=center.device) # 3 88 | elif level == 3: 89 | # azimuth = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 6, device=center.device) # 5 90 | # elevation = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 3, device=center.device) # 3 91 | azimuth = torch.linspace(-np.pi / 3, np.pi / 3, 5, device=center.device) # 5 92 | elevation = torch.linspace(-np.pi / 3, np.pi / 3, 3, device=center.device) # 3 93 | elif level == 4: 94 | # azimuth = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 6, device=center.device) # 5 95 | # elevation = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 6, device=center.device) # 5 96 | azimuth = torch.linspace(-np.pi / 3, np.pi / 3, 5, device=center.device) # 5 97 | elevation = torch.linspace(-np.pi / 3, np.pi / 3, 5, device=center.device) # 5 98 | elif level == 5: 99 | # azimuth = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 12, device=center.device) # 9 100 | # elevation = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 12, device=center.device) # 9 101 | azimuth = torch.linspace(-np.pi / 3, np.pi / 3, 9, device=center.device) # 9 102 | elevation = torch.linspace(-np.pi / 3, np.pi / 3, 9, device=center.device) # 9 103 | else: 104 | logger.critical('level must be 1, 2, 3 or 4.') 105 | raise ValueError('level must be 1, 2, 3 or 4.') 106 | 107 | elevation = elevation.float() 108 | azimuth = azimuth.float() 109 | 110 | rotation = torch.meshgrid(elevation, azimuth) 111 | rotation = torch.reshape(torch.stack(rotation, axis=-1), [-1, 2]) 112 | 113 | rotation = rotation[None, :, :].repeat(B, 1, 1) 114 | # print(rotation) 115 | 116 | N = rotation.size(1) 117 | r_theta_x = rotation[..., 0] 118 | r_theta_y = rotation[..., 1] 119 | if random_rotate: 120 | # r_theta_z = torch.rand([B, rotation.shape[1]], dtype=torch.float32, device=center.device) * np.pi * 2 121 | r_theta_z = torch.ones([B, rotation.shape[1]], dtype=torch.float32, device=center.device) * np.pi * 2 * \ 122 | np.random.rand() 123 | else: 124 | r_theta_z = torch.zeros([B, rotation.shape[1]], dtype=torch.float32, device=center.device) 125 | center = center.float() 126 | transform_center = center[:, None, :].repeat(1, N, 1) 127 | zeros = torch.zeros([B, N], dtype=torch.float32, device=center.device) 128 | ones = torch.ones([B, N], dtype=torch.float32, device=center.device) 129 | 130 | c, s = torch.cos(r_theta_x), torch.sin(r_theta_x) 131 | Rx = torch.stack([ones, zeros, zeros, zeros, 132 | zeros, c, -s, zeros, 133 | zeros, s, c, zeros, 134 | zeros, zeros, zeros, ones], axis=-1) 135 | Rx = torch.reshape(Rx, [B, N, 4, 4]) 136 | 137 | c, s = torch.cos(r_theta_y), torch.sin(r_theta_y) 138 | Ry = torch.stack([c, zeros, s, zeros, 139 | zeros, ones, zeros, zeros, 140 | -s, zeros, c, zeros, 141 | zeros, zeros, zeros, ones], axis=-1) 142 | Ry = torch.reshape(Ry, [B, N, 4, 4]) 143 | 144 | c, s = torch.cos(r_theta_z), torch.sin(r_theta_z) 145 | Rz = torch.stack([c, -s, zeros, zeros, 146 | s, c, zeros, zeros, 147 | zeros, zeros, ones, zeros, 148 | zeros, zeros, zeros, ones], axis=-1) 149 | Rz = torch.reshape(Rz, [B, N, 4, 4]) 150 | 151 | to_center = torch.stack([ones, zeros, zeros, -transform_center[..., 0], 152 | zeros, ones, zeros, -transform_center[..., 1], 153 | zeros, zeros, ones, -transform_center[..., 2], 154 | zeros, zeros, zeros, ones], axis=-1) 155 | to_center = torch.reshape(to_center, [B, N, 4, 4]) 156 | 157 | # to_center_inv = torch.stack([ones, zeros, zeros, transform_center[..., 0], 158 | # zeros, ones, zeros, transform_center[..., 1], 159 | # zeros, zeros, ones, transform_center[..., 2], 160 | # zeros, zeros, zeros, ones], axis=-1) 161 | # to_center_inv = torch.reshape(to_center_inv, [B, N, 4, 4]) 162 | 163 | transform_mat = torch.inverse(to_center) @ Ry @ Rx @ Rz @ to_center 164 | return transform_mat 165 | 166 | 167 | def depth_crop_expand(depth_crop, fx, fy, u0, v0, crop_trans, level, com_2d, random_sample, random_ratote=False, 168 | indices=None): 169 | """When 170 | level=1, num_views=3 171 | level=2, num_views=9 172 | level=3, num_views=15 173 | level=4, num_views=25 174 | level=5, num_views=81 175 | 176 | :param depth_crop: Tensor(B, 1, H, W) 177 | :param fx: float 178 | :param fy: float 179 | :param u0: float 180 | :param v0: float 181 | :param crop_trans: Tensor(B, 3, 3) 182 | :param level: int, 1, 2, 3, 4, 5 183 | :param com_2d: Tensor(B, 3) 184 | :param random_sample: bool 185 | :param random_ratote: bool 186 | :param indices: Tensor(B, num_select) 187 | :return: 188 | if indices is None: 189 | depth_crop_expand: Tensor(B, num_views, 1, H, W) 190 | view_mat: Tensor(B, num_views, 4, 4) 191 | else: 192 | depth_crop_expand: Tensor(B, num_select, 1, H, W) 193 | view_mat: Tensor(B, num_select, 4, 4) 194 | """ 195 | B, _, H, W = depth_crop.size() 196 | center = com_2d 197 | center = transform_2D_to_3D(center, fx, fy, u0, v0) 198 | view_mat = uniform_view_matrix(center, level, random_sample, random_ratote) # Tensor(B, num_views, 4, 4) 199 | if indices is None: 200 | num_views = view_mat.size(1) 201 | else: 202 | indices = indices[:, :, None, None].repeat([1, 1, 4, 4]) 203 | view_mat = torch.gather(view_mat, 1, indices) 204 | num_views = indices.size(1) 205 | depth_crop = depth_crop[:, None, :, :, :].repeat([1, num_views, 1, 1, 1]) 206 | depth_crop = depth_crop.reshape([B*num_views, 1, H, W]) 207 | crop_trans = crop_trans[:, None, :, :].repeat([1, num_views, 1, 1]) 208 | crop_trans = crop_trans.reshape([B*num_views, 3, 3]) 209 | view_mat = view_mat.reshape([B*num_views, 4, 4]) 210 | depth_expand = render_view(depth_crop, fx, fy, u0, v0, crop_trans, view_mat) 211 | 212 | depth_expand = depth_expand.reshape([B, num_views, 1, H, W]) 213 | view_mat = view_mat.reshape([B, num_views, 4, 4]) 214 | 215 | return depth_expand, view_mat 216 | 217 | def render_view(depth_crop, fx, fy, u0, v0, crop_trans, view_mat): 218 | ''' 219 | 220 | :param depth_crop: Tensor(B, 1, H, W) 221 | :param fx: float 222 | :param fy: float 223 | :param u0: float 224 | :param v0: float 225 | :param crop_trans: Tensor(B, 3, 3) 226 | :param view_mat: Tensor(B, 4, 4) 227 | :return: Tensor(B, 1, H, W) 228 | ''' 229 | B, _, H, W = depth_crop.size() 230 | pc_crop, mask = depth_to_point_cloud_mask(torch.round(depth_crop).int()) 231 | pc_crop, mask = pc_crop.float(), mask.float() 232 | pc = transform_2D(pc_crop, torch.inverse(crop_trans)) 233 | pc_3d = transform_2D_to_3D(pc, fx, fy, u0, v0) 234 | pc_3d_trans = transform_3D(pc_3d, view_mat) 235 | pc_trans = transform_3D_to_2D(pc_3d_trans, fx, fy, u0, v0) 236 | pc_crop_trans = transform_2D(pc_trans, crop_trans) 237 | depth_expand = point_cloud_mask_to_depth(torch.round(pc_crop_trans).int(), mask.int(), H, W) 238 | return depth_expand.float() 239 | 240 | if __name__ == '__main__': 241 | uniform_view_matrix(torch.zeros([1, 3]), level=4, random_sample=False, random_rotate=False) 242 | # from tqdm import tqdm 243 | # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 244 | # os.environ["CUDA_VISIBLE_DEVICES"] = '0' 245 | # u0 = 320.0 246 | # v0 = 240.0 247 | # fx = 588.03 248 | # fy = 587.07 249 | # random_sample = True 250 | # import matplotlib.pyplot as plt 251 | # import utils.point_transform as np_pt 252 | # B = 4 253 | # train_dataset = NyuFeeder('train', max_jitter=10., depth_sigma=0., offset=30, random_flip=False) 254 | # dataloader = DataLoader(train_dataset, shuffle=False, batch_size=B, num_workers=8) 255 | # for batch_idx, batch_data in enumerate(tqdm(dataloader)): 256 | # item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data 257 | # 258 | # cropped, crop_trans, com_2d = cropped.cuda(), crop_trans.cuda(), com_2d.cuda() 259 | # confidence = torch.ones([B, 25]) 260 | # indices = torch.multinomial(confidence, 3).cuda() 261 | # crop_expand, view_mat = depth_crop_expand(cropped, fx, fy, u0, v0, crop_trans, 4, com_2d, random_sample=False, 262 | # random_ratote=False, indices=indices) 263 | # cropped = cropped.cpu().numpy() 264 | # crop_trans = crop_trans.cpu().numpy() 265 | # crop_expand = crop_expand.cpu().numpy() 266 | # view_mat = view_mat.cpu().numpy() 267 | # com_2d = com_2d.cpu().numpy() 268 | # cube = cube.numpy() 269 | # com_2d = com_2d[0] 270 | # cube = cube[0] 271 | # plt.imshow(cropped[0, 0, ...]) 272 | # plt.show() 273 | # print(crop_expand.shape) 274 | # for i in range(0, crop_expand.shape[1], 2): 275 | # img = crop_expand[0, i, 0, ...] 276 | # img[img>1e-3] = img[img>1e-3] - com_2d[2] + cube[2]/2. 277 | # img[img<1e-3] = cube[2] 278 | # img = img / cube[2] 279 | # _joint_3d = joint_3d[0] 280 | # _joint_3d = np_pt.transform_3D(_joint_3d, view_mat[0, i]) 281 | # _joint_2d = np_pt.transform_3D_to_2D(_joint_3d, fx, fy, u0, v0) 282 | # _crop_joint_2d = np_pt.transform_2D(_joint_2d, crop_trans[0]) 283 | # fig, ax = plt.subplots(figsize=plt.figaspect(img)) 284 | # fig.subplots_adjust(0, 0, 1, 1) 285 | # ax.imshow(img, cmap='gray') 286 | # # ax.scatter(_crop_joint_2d[:, 0], _crop_joint_2d[:, 1], c='red', s=100) 287 | # ax.axis('off') 288 | # plt.savefig('{}.jpg'.format(i)) 289 | # plt.show() 290 | # break 291 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | imageio==2.13.5 2 | matplotlib==3.5.1 3 | numpy==1.22.1 4 | opencv-python==4.5.5.62 5 | Pillow==9.0.0 6 | PyYAML==5.4.1 7 | scikit-image==0.19.1 8 | scipy==1.7.3 9 | tensorboard==2.8.0 10 | tensorboardX==2.4.1 11 | tqdm 12 | -f https://download.pytorch.org/whl/torch_stable.html 13 | torch==1.8.1+cu111 14 | torchaudio==0.8.1 15 | torchvision==0.9.1+cu111 16 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscas3dv/handpose-virtualview/d220efa69ff031077381bc0d4cd58fae7049c329/utils/__init__.py -------------------------------------------------------------------------------- /utils/camera_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import os 4 | import imageio 5 | import cv2 6 | 7 | 8 | def get_camera_external_paramter(target, origin, up): 9 | ''' 10 | Transform target, origin, up to R,T 11 | :param target : lookat/center/target 12 | :param origin : eye/origin 13 | :param up : up 14 | :return : R(3*3 matrix) T(3*1 vector) 15 | ''' 16 | z = np.array(origin-target, dtype=np.float32) 17 | z = z / np.linalg.norm(z) 18 | x = np.cross(z,up) 19 | x = x / np.linalg.norm(x) 20 | y = np.cross(z,x) 21 | y = y / np.linalg.norm(y) 22 | R = np.eye(3, dtype=np.float32) 23 | R[0:3,0:3] = [x,y,z] 24 | T = -target 25 | return R, T 26 | 27 | def RT2affine(R, T): 28 | ''' 29 | Transform R,T to affine matrix(4*4) 30 | :param R : camera rotation 31 | :param T : camera translation 32 | :return : affine matrix(4*4) 33 | ''' 34 | matrix_r = np.eye(4, dtype=np.float32) 35 | matrix_r[0:3,0:3] = R 36 | matrix_t = np.eye(4, dtype=np.float32) 37 | matrix_t[0:3,3] = T 38 | matrix = np.dot(matrix_r,matrix_t) 39 | return matrix 40 | 41 | 42 | def get_camera_external_paramter_matrix(target, origin, up): 43 | """ 44 | Args: 45 | target (np.array): lookat/center/target 46 | origin (np.array): eye/origin 47 | up (np.array): up 48 | 49 | Returns: 50 | np.array: camera external paramter matrix 51 | """ 52 | R, T = get_camera_external_paramter(target, origin, up) 53 | matirx = RT2affine(R, T) 54 | return matirx 55 | 56 | 57 | def get_camera_internal_parameter(fov, width, height): 58 | """ 59 | Args: 60 | fov (float): denotes the camera’s field of view in degrees, fov maps to the x-axis in screen space 61 | width (int/float): 62 | height (int/float): 63 | 64 | Returns: 65 | fx, fy, ux, uy 66 | """ 67 | fov_ = fov * math.pi / 180 68 | focal = (width / 2) / math.tan(fov_ / 2) 69 | 70 | fx = focal 71 | fy = focal 72 | ux = width / 2 73 | uy = height / 2 74 | 75 | return fx, fy, ux, uy 76 | 77 | 78 | def get_camera_internal_parameter_matrix(fx, fy, ux, uy): 79 | """ 80 | Args: 81 | fx: 82 | fy: 83 | ux: 84 | uy: 85 | 86 | Returns: 87 | np.array: camera internal parameter matrix 88 | """ 89 | matrix = np.array([ 90 | [fx, 0., ux], 91 | [ 0., fy, uy], 92 | [ 0., 0., 1] 93 | ], dtype=np.float32) 94 | 95 | return matrix 96 | 97 | 98 | 99 | 100 | if __name__ == '__main__': 101 | r_theta_x = 0 102 | r_theta_y = np.pi/4 103 | r_theta_z = 0 104 | center = np.array([0., 0., 500.]) 105 | 106 | c, s = np.cos(r_theta_x), np.sin(r_theta_x) 107 | Rx = np.array([[1, 0, 0, 0], 108 | [0, c, s, 0], 109 | [0, -s, c, 0], 110 | [0, 0, 0, 1]]) 111 | 112 | c, s = np.cos(r_theta_y), np.sin(r_theta_y) 113 | Ry = np.array([[c, 0, -s, 0], 114 | [0, 1, 0, 0], 115 | [s, 0, c, 0], 116 | [0, 0, 0, 1]]) 117 | 118 | c, s = np.cos(r_theta_z), np.sin(r_theta_z) 119 | Rz = np.array([[c, s, 0, 0], 120 | [-s, c, 0, 0], 121 | [0, 0, 1, 0], 122 | [0, 0, 0,1]]) 123 | 124 | to_center = np.array([[1, 0, 0, -center[0]], 125 | [0, 1, 0, -center[1]], 126 | [0, 0, 1, -center[2]], 127 | [0, 0, 0, 1]]) 128 | 129 | transform_mat = np.linalg.inv(to_center)@Rz@Ry@Rx@to_center 130 | # transform_mat = np.array(transform_mat, dtype=np.int) 131 | print(transform_mat) 132 | 133 | target1 = np.array([-500, 0, 0]) 134 | origin = np.array([0, 0, 0]) 135 | up = np.array([0, 1, 0]) 136 | matrix1 = get_camera_external_paramter_matrix(target1, origin, up) 137 | target2 = np.array([-353.55339, 0, -353.55339]) 138 | matrix2 = get_camera_external_paramter_matrix(target2, origin, up) 139 | matrix = np.matmul(matrix2, np.linalg.inv(matrix1)) 140 | print(matrix) 141 | 142 | matrix = np.matmul(matrix2, np.linalg.inv(matrix1)) 143 | 144 | 145 | target1 = np.array([-501, 0, 0]) 146 | origin = np.array([-1, 0, 0]) 147 | up = np.array([0, 1, 0]) 148 | matrix1 = get_camera_external_paramter_matrix(target1, origin, up) 149 | # print(matrix1) 150 | 151 | target2 = np.array([-1, 0, 500]) 152 | matrix2 = get_camera_external_paramter_matrix(target2, origin, up) 153 | # print(matrix2) 154 | matrix = np.matmul(matrix2, np.linalg.inv(matrix1)) 155 | # print(matrix) 156 | 157 | target1 = np.array([-500, 0, 0]) 158 | origin = np.array([0, 0, 0]) 159 | up = np.array([0, 1, 0]) 160 | matrix1 = get_camera_external_paramter_matrix(target1, origin, up) 161 | # print(matrix1) 162 | coor1 = np.matmul(matrix1, np.array([1, 1, 1, 1])) 163 | ans1 = np.array([1., -1., 501., 1.]) 164 | assert (np.abs(coor1-ans1)<1e-8).all() 165 | 166 | 167 | target2 = np.array([0, 0, 500]) 168 | matrix2 = get_camera_external_paramter_matrix(target2, origin, up) 169 | coor2 = np.matmul(matrix2, np.array([1, 1, 1, 1])) 170 | ans2 = np.array([1., -1., 499., 1.]) 171 | assert (np.abs(coor2-ans2)<1e-8).all() 172 | matrix = np.matmul(matrix2, np.linalg.inv(matrix1)) 173 | # print(matrix) 174 | 175 | matrix = np.matmul(matrix2, np.linalg.inv(matrix1)) 176 | # print(matrix) 177 | assert (np.abs(coor2-np.matmul(matrix, coor1))<1e-8).all() 178 | 179 | target1 = np.array([-500, 0, 0]) 180 | origin = np.array([0, 0, 0]) 181 | up = np.array([0, -1, 0]) 182 | matrix1 = get_camera_external_paramter_matrix(target1, origin, up) 183 | coor1 = np.matmul(matrix1, np.array([1, 1, 1, 1])) 184 | ans1 = np.array([-1., 1., 501., 1.]) 185 | assert (np.abs(coor1-ans1)<1e-8).all() 186 | 187 | target2 = np.array([0, 0, 500]) 188 | matrix2 = get_camera_external_paramter_matrix(target2, origin, up) 189 | coor2 = np.matmul(matrix2, np.array([1, 1, 1, 1])) 190 | ans2 = np.array([-1., 1., 499., 1.]) 191 | assert (np.abs(coor2-ans2)<1e-8).all() 192 | 193 | target1 = np.array([-501, 0, 0]) 194 | origin = np.array([-1, 0, 0]) 195 | up = np.array([0, 1, 1]) 196 | matrix1 = get_camera_external_paramter_matrix(target1, origin, up) 197 | coor1 = np.matmul(matrix1, np.array([1, 1, 1, 1])) 198 | ans1 = np.array([0., -1.41421356, 502., 1.]) 199 | assert (np.abs(coor1-ans1)<1e-8).all() 200 | 201 | target1 = np.array([-501, 1, 1]) 202 | origin = np.array([-1, 1, 1]) 203 | up = np.array([0, 1, 0]) 204 | matrix1 = get_camera_external_paramter_matrix(target1, origin, up) 205 | coor = np.array([1, 1, 1, 1]) 206 | coor1 = np.matmul(matrix1, coor) 207 | ans1 = np.array([0., 0., 502., 1.]) 208 | assert ((coor1-ans1)<1e-8).all() 209 | 210 | 211 | target2 = np.array([0, 0, 499]) 212 | matrix2 = get_camera_external_paramter_matrix(target2, origin, up) 213 | 214 | 215 | fov = 30 216 | width = 128 217 | height = 128 218 | 219 | internal_matrix = np.eye(4) 220 | fx, fy, ux, uy = get_camera_internal_parameter(fov, width, height) 221 | print(fx, fy, ux, uy) 222 | tmp = get_camera_internal_parameter_matrix(fx, fy, ux, uy) 223 | internal_matrix[:3,:3] = tmp 224 | 225 | # print(internal_matrix) 226 | 227 | dataset_dir = '/home/acc/cj/MultiviewRender/render_result' 228 | views = np.load(os.path.join(dataset_dir, '../views.npy'), allow_pickle=True, encoding='latin1').item() 229 | for (key, value) in views.items(): 230 | if(key!='reg_deng/RGB/hand_1/sample5'): 231 | continue 232 | print(key) 233 | path = os.path.join(dataset_dir, key) 234 | print(path) 235 | img_view0 = cv2.imread(path+'_view0.exr', cv2.IMREAD_ANYDEPTH) 236 | img_view1 = cv2.imread(path+'_view1.exr', cv2.IMREAD_ANYDEPTH) 237 | print(value['origin']) 238 | print(value['view0']) 239 | print(value['view1']) 240 | print(value['up']) 241 | external_matrix0 = get_camera_external_paramter_matrix(value['view0'], value['origin'], value['up']) 242 | external_matrix1 = get_camera_external_paramter_matrix(value['view1'], value['origin'], value['up']) 243 | internal_matrix = np.eye(4) 244 | tmp = get_camera_internal_parameter_matrix(fx, fy, ux, uy) 245 | internal_matrix[:3,:3] = tmp 246 | 247 | matrix0 = np.matmul(internal_matrix, external_matrix0) 248 | matrix1 = np.matmul(internal_matrix, external_matrix1) 249 | matrix = np.matmul(matrix1, np.linalg.inv(matrix0)) 250 | 251 | print(img_view0[64,64]) 252 | u, v = 57, 87 253 | print(img_view0[v, u]) 254 | zc = img_view0[v, u] 255 | point0 = np.array([u*zc, v*zc, zc, 1]) 256 | pointc0 = np.matmul(np.linalg.inv(internal_matrix), point0) 257 | print(pointc0) 258 | point = np.matmul(np.linalg.inv(external_matrix0), pointc0) 259 | print(point) 260 | pointc1 = np.matmul(external_matrix1, point) 261 | print(pointc1) 262 | point1 = np.matmul(internal_matrix, pointc1) 263 | point1[:2] = point1[:2]/point1[2] 264 | print(point1) 265 | u, v = int(point1[0]), int(point1[1]) 266 | # print(img_view1[v-3:v+3, u-3:u+3]) 267 | print(img_view1[v, u]) 268 | 269 | break 270 | -------------------------------------------------------------------------------- /utils/hand_detector.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2015, 2018 ICG, Graz University of Technology 3 | This file is part of PreView. 4 | PreView is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | PreView is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | You should have received a copy of the GNU General Public License 13 | along with PreView. If not, see . 14 | """ 15 | 16 | import numpy as np 17 | import cv2 18 | import math 19 | from scipy import ndimage 20 | import matplotlib.pyplot as plt 21 | import logging 22 | logger = logging.getLogger(__file__) 23 | 24 | 25 | def normlize_depth(depth, com_2d, cube_z): 26 | norm_depth = depth.copy() 27 | norm_depth[norm_depth==0] = com_2d[2]+(cube_z/2.) 28 | norm_depth -= com_2d[2] 29 | norm_depth /= (cube_z/2.) 30 | return norm_depth 31 | 32 | 33 | def calculate_com_2d(dpt): 34 | """Calculate the center of mass 35 | 36 | :param dpt: depth image; invalid pixels which should not be considered must be set zero 37 | :return: (x,y,z) center of mass 38 | """ 39 | dc = dpt.copy() 40 | cc = ndimage.measurements.center_of_mass(dc > 0) 41 | num = np.count_nonzero(dc) 42 | com_2d = np.array((cc[1] * num, cc[0] * num, dc.sum()), np.float32) 43 | 44 | if num == 0: 45 | return np.array((0, 0, 0), np.float32) 46 | else: 47 | return com_2d / num 48 | 49 | 50 | def calc_mask(depth, com_2d, fx, fy, bbx, offset, minRatioInside=0.75, size=(250, 250, 250)): 51 | if len(size) != 3: 52 | raise ValueError("Size must be 3D and dsize 2D bounding box") 53 | 54 | if bbx is not None: 55 | if len(bbx)==6: 56 | left, right, up, down, front, back = bbx 57 | else: 58 | left, right, up, down = bbx 59 | left = int(math.floor(left * com_2d[2] / fx - offset) / com_2d[2] * fx) 60 | right = int(math.floor(right * com_2d[2] / fx + offset) / com_2d[2] * fx) 61 | up = int(math.floor(up * com_2d[2] / fx - offset) / com_2d[2] * fx) 62 | down = int(math.floor(down * com_2d[2] / fx + offset) / com_2d[2] * fx) 63 | left = max(left, 0) 64 | right = min(right, depth.shape[1]) 65 | up = max(up, 0) 66 | down = min(down, depth.shape[0]) 67 | imgDepth = np.zeros_like(depth) 68 | imgDepth[up:down, left:right] = depth[up:down, left:right] 69 | if len(bbx)==6: 70 | imgDepth[imgDepth < front-offset] = 0. 71 | imgDepth[imgDepth > back+offset] = 0. 72 | else: 73 | imgDepth = depth 74 | 75 | # calculate boundaries 76 | zstart = com_2d[2] - size[2] / 2. 77 | zend = com_2d[2] + size[2] / 2. 78 | xstart = int(math.floor((com_2d[0] * com_2d[2] / fx - size[0] / 2.) / com_2d[2] * fx)) 79 | xend = int(math.floor((com_2d[0] * com_2d[2] / fx + size[0] / 2.) / com_2d[2] * fx)) 80 | ystart = int(math.floor((com_2d[1] * com_2d[2] / fy - size[1] / 2.) / com_2d[2] * fy)) 81 | yend = int(math.floor((com_2d[1] * com_2d[2] / fy + size[1] / 2.) / com_2d[2] * fy)) 82 | 83 | # Check if part within image is large enough; otherwise stop 84 | xstartin = max(xstart, 0) 85 | xendin = min(xend, imgDepth.shape[1]) 86 | ystartin = max(ystart, 0) 87 | yendin = min(yend, imgDepth.shape[0]) 88 | ratioInside = float((xendin - xstartin) * (yendin - ystartin)) / float((xend - xstart) * (yend - ystart)) 89 | if (ratioInside < minRatioInside) and ( 90 | (com_2d[0] < 0) or (com_2d[0] >= imgDepth.shape[1]) or (com_2d[1] < 0) or ( 91 | com_2d[1] >= imgDepth.shape[0])): 92 | # print("Hand largely outside image (ratio (inside) = {})".format(ratioInside)) 93 | raise UserWarning('Hand not inside image') 94 | 95 | if (ystartin back+offset] = 0. 148 | else: 149 | imgDepth = depth 150 | 151 | # calculate boundaries 152 | zstart = com_2d[2] - size[2] / 2. 153 | zend = com_2d[2] + size[2] / 2. 154 | xstart = int(math.floor((com_2d[0] * com_2d[2] / fx - size[0] / 2.) / com_2d[2] * fx)) 155 | xend = int(math.floor((com_2d[0] * com_2d[2] / fx + size[0] / 2.) / com_2d[2] * fx)) 156 | ystart = int(math.floor((com_2d[1] * com_2d[2] / fy - size[1] / 2.) / com_2d[2] * fy)) 157 | yend = int(math.floor((com_2d[1] * com_2d[2] / fy + size[1] / 2.) / com_2d[2] * fy)) 158 | 159 | # Check if part within image is large enough; otherwise stop 160 | xstartin = max(xstart, 0) 161 | xendin = min(xend, imgDepth.shape[1]) 162 | ystartin = max(ystart, 0) 163 | yendin = min(yend, imgDepth.shape[0]) 164 | ratioInside = float((xendin - xstartin) * (yendin - ystartin)) / float((xend - xstart) * (yend - ystart)) 165 | if (ratioInside < minRatioInside) and ( 166 | (com_2d[0] < 0) or (com_2d[0] >= imgDepth.shape[1]) or (com_2d[1] < 0) or (com_2d[1] >= imgDepth.shape[0])): 167 | # print("Hand largely outside image (ratio (inside) = {})".format(ratioInside)) 168 | raise UserWarning('Hand not inside image') 169 | 170 | # crop patch from source 171 | cropped = imgDepth[max(ystart, 0):min(yend, imgDepth.shape[0]), max(xstart, 0):min(xend, imgDepth.shape[1])].copy() 172 | # add pixels that are out of the image in order to keep aspect ratio 173 | cropped = np.pad(cropped, ((abs(ystart) - max(ystart, 0), abs(yend) - min(yend, imgDepth.shape[0])), 174 | (abs(xstart) - max(xstart, 0), abs(xend) - min(xend, imgDepth.shape[1]))), 175 | mode='constant', constant_values=int(CROP_BG_VALUE)) 176 | msk1 = np.bitwise_and(cropped < zstart, cropped != 0) 177 | msk2 = np.bitwise_and(cropped > zend, cropped != 0) 178 | cropped[msk1] = CROP_BG_VALUE # backface is at 0, it is set later; setting anything outside cube to same value now (was set to zstart earlier) 179 | cropped[msk2] = CROP_BG_VALUE # backface is at 0, it is set later 180 | 181 | # for simulating COM within cube 182 | if docom is True: 183 | com_2d = calculate_com_2d(cropped) 184 | if np.allclose(com_2d, 0.): 185 | com_2d[2] = cropped[cropped.shape[0] // 2, cropped.shape[1] // 2] 186 | com_2d[0] += xstart 187 | com_2d[1] += ystart 188 | 189 | # calculate boundaries 190 | zstart = com_2d[2] - size[2] / 2. 191 | zend = com_2d[2] + size[2] / 2. 192 | xstart = int(math.floor((com_2d[0] * com_2d[2] / fx - size[0] / 2.) / com_2d[2] * fx)) 193 | xend = int(math.floor((com_2d[0] * com_2d[2] / fx + size[0] / 2.) / com_2d[2] * fx)) 194 | ystart = int(math.floor((com_2d[1] * com_2d[2] / fy - size[1] / 2.) / com_2d[2] * fy)) 195 | yend = int(math.floor((com_2d[1] * com_2d[2] / fy + size[1] / 2.) / com_2d[2] * fy)) 196 | 197 | # crop patch from source 198 | cropped = imgDepth[max(ystart, 0):min(yend, imgDepth.shape[0]), 199 | max(xstart, 0):min(xend, imgDepth.shape[1])].copy() 200 | # add pixels that are out of the image in order to keep aspect ratio 201 | cropped = np.pad(cropped, ((abs(ystart) - max(ystart, 0), abs(yend) - min(yend, imgDepth.shape[0])), 202 | (abs(xstart) - max(xstart, 0), abs(xend) - min(xend, imgDepth.shape[1]))), 203 | mode='constant', constant_values=0) 204 | msk1 = np.bitwise_and(cropped < zstart, cropped != 0) 205 | msk2 = np.bitwise_and(cropped > zend, cropped != 0) 206 | cropped[msk1] = zstart 207 | cropped[msk2] = CROP_BG_VALUE # backface is at 0, it is set later 208 | 209 | wb = (xend - xstart) 210 | hb = (yend - ystart) 211 | trans = np.asmatrix(np.eye(3, dtype=np.float32)) 212 | trans[0, 2] = -xstart 213 | trans[1, 2] = -ystart 214 | # compute size of image patch for isotropic scaling where the larger side is the side length of the fixed size image patch (preserving aspect ratio) 215 | if wb > hb: 216 | sz = (dsize[0], int(round(hb * dsize[0] / float(wb)))) 217 | else: 218 | sz = (int(round(wb * dsize[1] / float(hb))), dsize[1]) 219 | 220 | # comdpute scale factor from cropped ROI in image to fixed size image patch; set up matrix with same scale in x and y (preserving aspect ratio) 221 | roi = cropped 222 | if roi.shape[0] > roi.shape[1]: # Note, roi.shape is (y,x) and sz is (x,y) 223 | scale = np.asmatrix(np.eye(3, dtype=np.float32) * sz[1] / float(roi.shape[0])) 224 | else: 225 | scale = np.asmatrix(np.eye(3, dtype=np.float32) * sz[0] / float(roi.shape[1])) 226 | scale[2, 2] = 1 227 | 228 | # depth resize 229 | rz = cv2.resize(cropped, sz, interpolation=cv2.INTER_NEAREST) 230 | 231 | # Sanity check 232 | numValidPixels = np.sum(rz != CROP_BG_VALUE) 233 | if (numValidPixels < 40) or (numValidPixels < (np.prod(dsize) * 0.01)): 234 | # plt.imshow(rz) 235 | # plt.show() 236 | # print("Too small number of foreground/hand pixels (={})".format(numValidPixels)) 237 | raise UserWarning("No valid hand. Foreground region too small.") 238 | 239 | # Place the resized patch (with preserved aspect ratio) in the center of a fixed size patch (padded with default background values) 240 | ret = np.ones(dsize, np.float32) * CROP_BG_VALUE # use background as filler 241 | xstart = int(math.floor(dsize[0] / 2 - rz.shape[1] / 2)) 242 | xend = int(xstart + rz.shape[1]) 243 | ystart = int(math.floor(dsize[1] / 2 - rz.shape[0] / 2)) 244 | yend = int(ystart + rz.shape[0]) 245 | ret[ystart:yend, xstart:xend] = rz 246 | # print rz.shape 247 | off = np.asmatrix(np.eye(3, dtype=np.float32)) 248 | off[0, 2] = xstart 249 | off[1, 2] = ystart 250 | 251 | # Transformation from original image to fixed size crop includes 252 | # the translation of the "anchor" point of the crop to origin (=trans), 253 | # the (isotropic) scale factor (=scale), and 254 | # the offset of the patch (with preserved aspect ratio) within the fixed size patch (=off) 255 | return ret, off * scale * trans, com_2d 256 | -------------------------------------------------------------------------------- /utils/image_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def normlize_depth(depth, com_2d, cube_z): 5 | norm_depth = depth.copy() 6 | norm_depth[norm_depth==0] = com_2d[2]+(cube_z/2.) 7 | norm_depth -= com_2d[2] 8 | norm_depth /= (cube_z/2.) 9 | return norm_depth 10 | 11 | 12 | def normlize_image(img): 13 | """ 14 | 15 | :param img: np.array(H, W) 16 | :return: np.array(H, W) 17 | """ 18 | t_min = np.min(img) 19 | t_max = np.max(img) 20 | img = (img - t_min) / (t_max - t_min) 21 | return img 22 | -------------------------------------------------------------------------------- /utils/point_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def transform_3D(points, trans_matrix): 5 | """3D affine transformation 6 | 7 | :param points: Tensor(..., N, 3) 8 | :param cam_trans_matrix: Tensor(..., N, 4) 9 | :return: Tensor(B, N, 3) 10 | """ 11 | x = points[..., 0] 12 | y = points[..., 1] 13 | z = points[..., 2] 14 | ones = np.ones_like(x) 15 | points_h = np.stack([x, y, z, ones], -2) # (..., 4, N) 16 | points_h = trans_matrix @ points_h 17 | dim = len(points.shape) 18 | points = np.transpose(points_h, [i for i in range(dim-2)]+[dim-1,dim-2])[..., :3] # (..., N, 3) 19 | return points 20 | 21 | 22 | def transform_3D_to_2D(points, fx, fy, u0, v0): 23 | u = points[..., 0] / points[..., 2] * fx + u0 24 | v = points[..., 1] / points[..., 2] * fy + v0 25 | d = points[..., 2] 26 | return np.stack([u, v, d], axis=-1) 27 | 28 | 29 | def transform_2D_to_3D(points, fx, fy, u0, v0): 30 | x = (points[..., 0] - u0) * points[..., 2] / fx 31 | y = (points[..., 1] - v0) * points[..., 2] / fy 32 | z = points[..., 2] 33 | return np.stack([x, y, z], axis=-1) 34 | 35 | 36 | def transform_2D(points, trans_matirx): 37 | """2D affine transformation 38 | 39 | :param points: Tensor(..., N, 3) 40 | :param trans_matirx: Tensor(..., 3, 3) 41 | :return: Tensor(..., N, 3) 42 | """ 43 | x = points[..., 0] 44 | y = points[..., 1] 45 | z = points[..., 2] 46 | ones = np.ones_like(x) 47 | points_h = np.stack([x, y, ones], axis=-2) # (..., 4, N) 48 | points_h = trans_matirx @ points_h 49 | dim = len(points.shape) 50 | points = np.transpose(points_h, [i for i in range(dim-2)]+[dim-1,dim-2]) # (B, N, 3) 51 | points[..., 2] = z 52 | return points -------------------------------------------------------------------------------- /utils/voxel_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def gen_voxel(cropped, com_2d, cube, voxel_len): 5 | """ 6 | 7 | :param cropped: numpy([H, W], float) 8 | :param com_2d: numpy([3], float) 9 | :param cube: numpy([3], float) 10 | :param voxel_len: int 11 | :return: numpy([voxel_len, voxel_len, voxel_len], int) 12 | """ 13 | H, W = cropped.shape 14 | 15 | # Where x is the x row, y is the y column 16 | x = np.arange(H) 17 | y = np.arange(W) 18 | x, y = np.meshgrid(x, y, indexing='ij') 19 | z = cropped.copy() 20 | mask = np.bitwise_and(cropped>=com_2d[2]-cube[2]/2., cropped