├── .gitignore
├── LICENSE
├── README.md
├── config
    ├── dataset
    │   ├── icvl.json
    │   └── nyu.json
    ├── icvl
    │   ├── eval_25select1.yaml
    │   ├── eval_25select15.yaml
    │   ├── eval_25select15_light.yaml
    │   ├── eval_25select1_light.yaml
    │   ├── eval_25select3.yaml
    │   ├── eval_25select3_light.yaml
    │   ├── eval_25select9.yaml
    │   ├── eval_25select9_light.yaml
    │   ├── eval_uniform1.yaml
    │   ├── eval_uniform15.yaml
    │   ├── eval_uniform25.yaml
    │   ├── eval_uniform3.yaml
    │   ├── eval_uniform9.yaml
    │   ├── train_25select15.yaml
    │   ├── train_25select15_light.yaml
    │   ├── train_25select3.yaml
    │   ├── train_25select3_light.yaml
    │   ├── train_25select9.yaml
    │   ├── train_25select9_light.yaml
    │   └── train_uniform.yaml
    └── nyu
    │   ├── eval_25select1.yaml
    │   ├── eval_25select15.yaml
    │   ├── eval_25select15_light.yaml
    │   ├── eval_25select1_light.yaml
    │   ├── eval_25select3.yaml
    │   ├── eval_25select3_light.yaml
    │   ├── eval_25select9.yaml
    │   ├── eval_25select9_light.yaml
    │   ├── eval_uniform1.yaml
    │   ├── eval_uniform15.yaml
    │   ├── eval_uniform25.yaml
    │   ├── eval_uniform3.yaml
    │   ├── eval_uniform9.yaml
    │   ├── train_25select15.yaml
    │   ├── train_25select15_light.yaml
    │   ├── train_25select3.yaml
    │   ├── train_25select3_light.yaml
    │   ├── train_25select9.yaml
    │   ├── train_25select9_light.yaml
    │   └── train_uniform.yaml
├── feeders
    ├── __init__.py
    ├── hands2019_feeder.py
    ├── icvl_feeder.py
    └── nyu_feeder.py
├── fig
    └── pipeline.png
├── models
    ├── __init__.py
    ├── a2j.py
    ├── a2j_conf_net.py
    ├── attention.py
    ├── conf_net.py
    ├── layers.py
    ├── multiview_a2j.py
    ├── resnet.py
    └── view_selector_a2j.py
├── ops
    ├── __init__.py
    ├── cuda
    │   ├── __init__.py
    │   ├── depth_to_point_cloud_mask_cuda.cpp
    │   ├── depth_to_point_cloud_mask_cuda_kernel.cu
    │   ├── helper_cuda.h
    │   ├── helper_string.h
    │   ├── point_cloud_mask_to_depth_cuda.cpp
    │   ├── point_cloud_mask_to_depth_cuda_kernel.cu
    │   └── setup.py
    ├── image_ops.py
    ├── joint_ops.py
    ├── loss_ops.py
    ├── point_transform.py
    └── render.py
├── requirements.txt
├── result_nyu_icvl
    ├── icvl_select_15_views_light.txt
    ├── icvl_select_1_views_light.txt
    ├── icvl_select_3_views_light.txt
    ├── icvl_select_9_views_light.txt
    ├── icvl_uniform_25_views.txt
    ├── nyu_select_15_views_light.txt
    ├── nyu_select_1_view_light.txt
    ├── nyu_select_3_views_light.txt
    ├── nyu_select_9_views_light.txt
    └── nyu_uniform_25_views.txt
├── train_a2j.py
├── utils
    ├── __init__.py
    ├── camera_utils.py
    ├── hand_detector.py
    ├── image_utils.py
    ├── parser_utils.py
    ├── point_transform.py
    └── voxel_utils.py
└── view_select_a2j.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # Cython debug symbols
138 | cython_debug/
139 | 
140 | .vscode/
141 | .idea/
142 | checkpoint/
143 | logs/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Permission is hereby granted, free of charge, to any person or company obtaining a copy of this software and associated documentation files (the "Software") from the copyright holders to use the Software for any non-commercial purpose. Publication, redistribution and (re)selling of the software, of modifications, extensions, and derivates of it, and of other software containing portions of the licensed Software, are not permitted. The Copyright holder is permitted to publically disclose and advertise the use of the software by any licensee.
2 | 
3 | Packaging or distributing parts or whole of the provided software (including code, models and data) as is or as part of other software is prohibited. Commercial use of parts or whole of the provided software (including code, models and data) is strictly prohibited. Using the provided software for promotion of a commercial entity or product, or in any other manner which directly or indirectly results in commercial gains is strictly prohibited.
4 | 
5 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
6 | 
7 | The license is modified from this [template](https://github.com/r00tman/EventHands).
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Efficient Virtual View Selection for 3D Hand Pose Estimation
  2 | 
  3 | This is the official implementation for the paper, 
  4 | "Efficient Virtual View Selection for 3D Hand Pose Estimation", 
  5 | AAAI 2022.
  6 | 
  7 | ![pipeline](fig/pipeline.png)
  8 | 
  9 | [Project Webpage](https://me495.github.io/handpose-virtualview/) &nbsp;  &nbsp;  &nbsp;   [Paper](https://arxiv.org/pdf/2203.15458)
 10 | 
 11 | ## Update (2022-3-30)
 12 | We upload prediction results in pixel coordinates (i.e., UVD format) for NYU and ICVL datasets: https://github.com/iscas3dv/handpose-virtualview/tree/main/result_nyu_icvl, Evaluation code (https://github.com/xinghaochen/awesome-hand-pose-estimation/tree/master/evaluation) can be applied for performance comparision among SoTA methods.
 13 | 
 14 | ## Update (2022-6-7)
 15 | The models were damaged during uploading to Google drive. We have uploaded new models. 
 16 | 
 17 | ## Update (2022-06-27)
 18 | Modify the training method of View selection with the "student" confidence network.
 19 | 
 20 | ## Dependencies
 21 | * `CUDA 11.1`
 22 | 
 23 | Other versions of `CUDA` should also work, 
 24 | but please make sure that the version of `CUDA` used by `PyTorch` is the same as the system, 
 25 | because our code needs to be compiled with `nvcc`. 
 26 | 
 27 | ## Installation
 28 | * Clone this repository.
 29 | * Install the required packages:
 30 |   ```angular2html
 31 |   pip install -r requirements.txt
 32 |   ```
 33 | * Compile and install the multi-view rendering code:
 34 |   ```angular2html
 35 |   cd ops/cuda/
 36 |   python setup.py install
 37 |   ```
 38 | 
 39 | ## Data preparation
 40 | We publish training and evaluation code on NYU hand pose dataset and ICVL hand posture dataset. 
 41 | The data preparation process of these two datasets is as follows. 
 42 | 
 43 | ### NYU Hand Pose Dataset
 44 | * Download [NYU Hand Pose Dataset](https://jonathantompson.github.io/NYU_Hand_Pose_Dataset.htm#download). Then put them under the data directory:
 45 |   ```angular2html
 46 |   -directory/
 47 |     -test/
 48 |       -joint_data.mat
 49 |       ...
 50 |     -train/
 51 |       -joint_data.mat
 52 |       ...
 53 |   ```
 54 | * Modify `path` field of the `config/dataset/nyu.json` to point to the data directory. 
 55 | 
 56 | ### ICVL Hand Posture Dataset
 57 | * Download [ICVL Hand Posture Dataset](https://labicvl.github.io/hand.html). Then put them under the data directory:
 58 |   ```angular2html
 59 |   -directory/
 60 |     -Testing/
 61 |       -Depth/
 62 |         ...
 63 |       -test_seq_1.txt
 64 |       -test_seq_2.txt
 65 |     -Training/
 66 |       -Depth/
 67 |         ...
 68 |       -labels.txt
 69 |   ```
 70 | * Modify `path` field of the `config/dataset/icvl.json` to point to the data directory.
 71 | 
 72 | 
 73 | ## Evaluation
 74 | We have already trained some models that you can [download](https://drive.google.com/file/d/1kfrfLUKynVNH5W8iD2UHLllGUG-aAJtv/view?usp=sharing) and evaluate. 
 75 | After downloading models, extract it to `checkpoint` folder in the project directory.
 76 | 
 77 | ### NYU Hand Pose Dataset
 78 | #### Uniform sampling
 79 | In the output results, `error_3d_conf` shows the average joint error for fusion with confidence, 
 80 | and `error_3d_fused` shows the average joint error for fusion without confidence.
 81 | * Uniformly sampling 25 views:
 82 |   ```angular2html
 83 |   python train_a2j.py --config config/nyu/eval_uniform25.yaml
 84 |   ```
 85 | * Uniformly sampling 15 views:
 86 |   ```angular2html
 87 |   python train_a2j.py --config config/nyu/eval_uniform15.yaml
 88 |   ```
 89 | * Uniformly sampling 9 views:
 90 |   ```angular2html
 91 |   python train_a2j.py --config config/nyu/eval_uniform9.yaml
 92 |   ```
 93 | * Uniformly sampling 3 views:
 94 |   ```angular2html
 95 |   python train_a2j.py --config config/nyu/eval_uniform3.yaml
 96 |   ```
 97 | * Uniformly sampling 1 views:
 98 |   ```angular2html
 99 |   python train_a2j.py --config config/nyu/eval_uniform1.yaml
100 |   ```
101 | 
102 | #### View selection with the “teacher” confidence network
103 | In the output results, `error_3d_fused` shows the average joint error.
104 | * Select 15 views from 25 views:
105 |   ```angular2html
106 |   python train_a2j.py --config config/nyu/eval_25select15.yaml
107 |   ```
108 | * Select 9 views from 25 views:
109 |   ```angular2html
110 |   python train_a2j.py --config config/nyu/eval_25select9.yaml
111 |   ```
112 | * Select 3 views from 25 views:
113 |   ```angular2html
114 |   python train_a2j.py --config config/nyu/eval_25select3.yaml
115 |   ```
116 | * Select 1 view from 25 views:
117 |   ```angular2html
118 |   python train_a2j.py --config config/nyu/eval_25select1.yaml
119 |   ```
120 | 
121 | #### View selection with the “student” confidence network
122 | In the output results, `epoch_error_3d_conf_select` shows the average joint error.
123 | * Select 15 views from 25 views:
124 |   ```angular2html
125 |   python view_select_a2j.py --config config/nyu/eval_25select15_light.yaml
126 |   ```
127 | * Select 9 views from 25 views:
128 |   ```angular2html
129 |   python view_select_a2j.py --config config/nyu/eval_25select9_light.yaml
130 |   ```
131 | * Select 3 views from 25 views:
132 |   ```angular2html
133 |   python view_select_a2j.py --config config/nyu/eval_25select3_light.yaml
134 |   ```
135 | * Select 1 view from 25 views:
136 |   ```angular2html
137 |   python view_select_a2j.py --config config/nyu/eval_25select1_light.yaml
138 |   ```
139 | 
140 | ### ICVL Hand Posture Dataset
141 | We provide a model trained and configuration files on ICVL hand posture dataset, 
142 | you can follow the commands on NYU hand pose dataset and use corresponding configuration files to evaluate.
143 | 
144 | 
145 | ## Training
146 | You can also train models using the following commands.
147 | 
148 | ### NYU Hand Pose Dataset
149 | #### Uniform sampling
150 | We only train a model that uniformly samples 25 views, 
151 | which is also suitable for uniformly sampling 15, 9, 3 and 1 views.
152 | ```angular2html
153 | python train_a2j.py --config config/nyu/train_uniform.yaml
154 | ```
155 | 
156 | #### View selection with the “teacher” confidence network
157 | The following commands train models using the "teacher" network to select 15, 9, 3 views from 25 views respectively.
158 | The model that selects 1 view from 25 views is the same as the model that selects 3 views from 25 views. 
159 | ```angular2html
160 | python train_a2j.py --config config/nyu/train_25select15.yaml
161 | ```
162 | ```angular2html
163 | python train_a2j.py --config config/nyu/train_25select9.yaml
164 | ```
165 | ```angular2html
166 | python train_a2j.py --config config/nyu/train_25select3.yaml
167 | ```
168 | 
169 | #### View selection with the “student” confidence network
170 | The following commands train models using the "student" network to select 15, 9, 3 views from 25 views respectively.
171 | The model that selects 1 view from 25 views is the same as the model that selects 3 views from 25 views. 
172 | This step requires the use of the trained `teacher` confidence network, 
173 | please modify the `pre_a2j` field of the configuration file to the path of the previously trained model.
174 | ```angular2html
175 | python view_select_a2j.py --config config/nyu/train_25select15_light.yaml
176 | ```
177 | ```angular2html
178 | python view_select_a2j.py --config config/nyu/train_25select9_light.yaml
179 | ```
180 | ```angular2html
181 | python view_select_a2j.py --config config/nyu/train_25select3_light.yaml
182 | ```
183 | 
184 | ### ICVL Hand Posture Dataset
185 | We provide configuration files on ICVL hand posture dataset, 
186 | you can follow the commands on NYU hand pose dataset and use corresponding configuration files to train.
187 | 
188 | ## Citation
189 | Please cite this paper if you want to use it in your work,
190 | ```angular2html
191 | @inproceedings{Cheng2022virtualview,
192 |       title={Efficient Virtual View Selection for 3D Hand Pose Estimation}, 
193 |       author={Jian Cheng, Yanguang Wan, Dexin Zuo, Cuixia Ma, Jian Gu, Ping Tan, Hongan Wang, Xiaoming Deng, Yinda Zhang}, 
194 |       booktitle={AAAI Conference on Artificial Intelligence (AAAI)}, 
195 |       year={2022} 
196 | } 
197 | ```
198 | 
199 | ## Acknowledgements
200 | We use part of the great code from [A2J](https://github.com/zhangboshen/A2J), 
201 | [HandAugment](https://github.com/wozhangzhaohui/HandAugment) 
202 | and [attention-is-all-you-need-pytorch](https://github.com/jadore801120/attention-is-all-you-need-pytorch).
203 | 


--------------------------------------------------------------------------------
/config/dataset/icvl.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "icvl",
 3 |   "path": "/home/dataset/ICVL",
 4 |   "camera": {"fx": 241.42, "fy": 241.42, "u0": 160.0, "v0": 120.0},
 5 |   "height": 240,
 6 |   "width": 320,
 7 |   "crop_size": 176,
 8 |   "cube": [220, 220, 220],
 9 |   "selected": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
10 | }


--------------------------------------------------------------------------------
/config/dataset/nyu.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "nyu",
 3 |   "path": "/home/dataset/nyu/dataset",
 4 |   "camera": {"u0": 320.0, "v0": 240.0, "fx": 588.03, "fy": 587.07},
 5 |   "height": 480,
 6 |   "width": 640,
 7 |   "crop_size": 176,
 8 |   "cube": [280, 280, 280],
 9 |   "selected": [0, 3, 6, 9, 12, 15, 18, 21, 24, 25, 27, 30, 31, 32],
10 |   "fingers_indices": [[0,13,1], [2,13,3], [4,13,5], [6,13,7], [9,8,10]],
11 |   "connections": [
12 |     [13, 1],
13 |     [1, 0],
14 |     [13, 3],
15 |     [3, 2],
16 |     [13, 5],
17 |     [5, 4],
18 |     [13, 7],
19 |     [7, 6],
20 |     [13, 10],
21 |     [10, 9],
22 |     [9, 8],
23 |     [13, 11],
24 |     [13, 12]
25 |   ],
26 |   "connection_colors": [
27 |     [0.83, 1, 0.7],
28 |     [0.83, 1, 1],
29 |     [0.66, 1, 0.7],
30 |     [0.66, 1, 1],
31 |     [0.50, 1, 0.7],
32 |     [0.50, 1, 1],
33 |     [0.33, 1, 0.7],
34 |     [0.33, 1, 1],
35 |     [0.00, 1, 0.6],
36 |     [0.00, 1, 0.8],
37 |     [0.00, 1, 1],
38 |     [0.16, 1, 0.7],
39 |     [0.16, 1, 1]
40 |   ],
41 |   "joint_colors": [
42 |     [0.83, 1, 0.7],
43 |     [0.83, 1, 1],
44 |     [0.66, 1, 0.7],
45 |     [0.66, 1, 1],
46 |     [0.50, 1, 0.7],
47 |     [0.50, 1, 1],
48 |     [0.33, 1, 0.7],
49 |     [0.33, 1, 1],
50 |     [0.00, 1, 0.6],
51 |     [0.00, 1, 0.8],
52 |     [0.00, 1, 1],
53 |     [0.16, 1, 1],
54 |     [0.16, 1, 1],
55 |     [0.16, 1, 1]
56 |   ]
57 | }


--------------------------------------------------------------------------------
/config/icvl/eval_25select1.yaml:
--------------------------------------------------------------------------------
 1 | phase: eval
 2 | dataset: icvl
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_25select1
 6 | pre_model_name: ./checkpoint/icvl/25select3.pth
 7 | level: 4
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 1
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/eval_25select15.yaml:
--------------------------------------------------------------------------------
 1 | phase: eval
 2 | dataset: icvl
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_25select15
 6 | pre_model_name: ./checkpoint/icvl/25select15.pth
 7 | level: 4
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 15
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/eval_25select15_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: icvl
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_25select15_light
 6 | pre_a2j: ./checkpoint/icvl/25select15.pth
 7 | pre_model_path: ./checkpoint/icvl/25select15_light.pth
 8 | level: 4
 9 | n_head: 1
10 | d_attn: 256
11 | d_k: 64
12 | d_v: 64
13 | d_inner: 256
14 | num_select: 15
15 | num_worker: 8
16 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/eval_25select1_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: icvl
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_25select1_light
 6 | pre_a2j: ./checkpoint/icvl/25select3.pth
 7 | pre_model_path: ./checkpoint/icvl/25select3_light.pth
 8 | level: 4
 9 | n_head: 1
10 | d_attn: 256
11 | d_k: 64
12 | d_v: 64
13 | d_inner: 256
14 | num_select: 1
15 | num_worker: 8
16 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/eval_25select3.yaml:
--------------------------------------------------------------------------------
 1 | phase: eval
 2 | dataset: icvl
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_25select3
 6 | pre_model_name: ./checkpoint/icvl/25select3.pth
 7 | level: 4
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 3
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/eval_25select3_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: icvl
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_25select3_light
 6 | pre_a2j: ./checkpoint/icvl/25select3.pth
 7 | pre_model_path: ./checkpoint/icvl/25select3_light.pth
 8 | level: 4
 9 | n_head: 1
10 | d_attn: 256
11 | d_k: 64
12 | d_v: 64
13 | d_inner: 256
14 | num_select: 3
15 | num_worker: 8
16 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/eval_25select9.yaml:
--------------------------------------------------------------------------------
 1 | phase: eval
 2 | dataset: icvl
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_25select9
 6 | pre_model_name: ./checkpoint/icvl/25select9.pth
 7 | level: 4
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 9
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/eval_25select9_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: icvl
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_25select9_light
 6 | pre_a2j: ./checkpoint/icvl/25select9.pth
 7 | pre_model_path: ./checkpoint/icvl/25select9_light.pth
 8 | level: 4
 9 | n_head: 1
10 | d_attn: 256
11 | d_k: 64
12 | d_v: 64
13 | d_inner: 256
14 | num_select: 9
15 | num_worker: 8
16 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/eval_uniform1.yaml:
--------------------------------------------------------------------------------
 1 | phase: eval
 2 | dataset: icvl
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_uniform1
 6 | pre_model_name: ./checkpoint/icvl/uniform.pth
 7 | level: 0
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 1
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/eval_uniform15.yaml:
--------------------------------------------------------------------------------
 1 | phase: eval
 2 | dataset: icvl
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_uniform15
 6 | pre_model_name: ./checkpoint/icvl/uniform.pth
 7 | level: 3
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 15
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/eval_uniform25.yaml:
--------------------------------------------------------------------------------
 1 | phase: eval
 2 | dataset: icvl
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_uniform25
 6 | pre_model_name: ./checkpoint/icvl/uniform.pth
 7 | level: 4
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 25
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/eval_uniform3.yaml:
--------------------------------------------------------------------------------
 1 | phase: eval
 2 | dataset: icvl
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_uniform3
 6 | pre_model_name: ./checkpoint/icvl/uniform.pth
 7 | level: 1
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 3
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/eval_uniform9.yaml:
--------------------------------------------------------------------------------
 1 | phase: eval
 2 | dataset: icvl
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/icvl/eval_uniform9
 6 | pre_model_name: ./checkpoint/icvl/uniform.pth
 7 | level: 2
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 9
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/icvl/train_25select15.yaml:
--------------------------------------------------------------------------------
 1 | phase: train
 2 | dataset: icvl
 3 | split: 20
 4 | batch_size: 10
 5 | num_epoch: 60
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/icvl/train_25select15
 9 | log_dir: ./logs/icvl/train_25select15
10 | learning_decay_rate: 0.95
11 | reg_weight: 1e-6
12 | level: 4
13 | n_head: 1
14 | d_attn: 256
15 | d_k: 64
16 | d_v: 64
17 | d_inner: 256
18 | dropout_rate: 0.5
19 | num_select: 15
20 | num_worker: 5
21 | max_jitter: 0.
22 | depth_sigma: 0.
23 | random_flip: False
24 | adjust_cube: False


--------------------------------------------------------------------------------
/config/icvl/train_25select15_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: icvl
 2 | phase: train
 3 | split: 20
 4 | batch_size: 32
 5 | num_epoch: 10
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/icvl/train_25select15_light
 9 | log_dir: ./logs/icvl/train_25select15_light
10 | pre_a2j: ./checkpoint/icvl/train_25select15/model.pth
11 | learning_decay_rate: 0.8
12 | reg_weight: 1e-6
13 | level: 4
14 | n_head: 1
15 | d_attn: 256
16 | d_k: 64
17 | d_v: 64
18 | d_inner: 256
19 | dropout_rate: 0.5
20 | num_select: 15
21 | num_worker: 8
22 | max_jitter: 0.
23 | depth_sigma: 0.
24 | random_flip: False
25 | adjust_cube: False


--------------------------------------------------------------------------------
/config/icvl/train_25select3.yaml:
--------------------------------------------------------------------------------
 1 | phase: train
 2 | dataset: icvl
 3 | split: 20
 4 | batch_size: 10
 5 | num_epoch: 60
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/icvl/train_25select3
 9 | log_dir: ./logs/icvl/train_25select3
10 | learning_decay_rate: 0.95
11 | reg_weight: 1e-6
12 | level: 4
13 | n_head: 1
14 | d_attn: 256
15 | d_k: 64
16 | d_v: 64
17 | d_inner: 256
18 | dropout_rate: 0.5
19 | num_select: 3
20 | num_worker: 5
21 | max_jitter: 0.
22 | depth_sigma: 0.
23 | random_flip: False
24 | adjust_cube: False


--------------------------------------------------------------------------------
/config/icvl/train_25select3_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: icvl
 2 | phase: train
 3 | split: 20
 4 | batch_size: 32
 5 | num_epoch: 10
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/icvl/train_25select3_light
 9 | log_dir: ./logs/icvl/train_25select3_light
10 | pre_a2j: ./checkpoint/icvl/train_25select3/model.pth
11 | learning_decay_rate: 0.8
12 | reg_weight: 1e-6
13 | level: 4
14 | n_head: 1
15 | d_attn: 256
16 | d_k: 64
17 | d_v: 64
18 | d_inner: 256
19 | dropout_rate: 0.5
20 | num_select: 3
21 | num_worker: 8
22 | max_jitter: 0.
23 | depth_sigma: 0.
24 | random_flip: False
25 | adjust_cube: False


--------------------------------------------------------------------------------
/config/icvl/train_25select9.yaml:
--------------------------------------------------------------------------------
 1 | phase: train
 2 | dataset: icvl
 3 | split: 20
 4 | batch_size: 10
 5 | num_epoch: 60
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/icvl/train_25select9
 9 | log_dir: ./logs/icvl/train_25select9
10 | learning_decay_rate: 0.95
11 | reg_weight: 1e-6
12 | level: 4
13 | n_head: 1
14 | d_attn: 256
15 | d_k: 64
16 | d_v: 64
17 | d_inner: 256
18 | dropout_rate: 0.5
19 | num_select: 9
20 | num_worker: 5
21 | max_jitter: 0.
22 | depth_sigma: 0.
23 | random_flip: False
24 | adjust_cube: False


--------------------------------------------------------------------------------
/config/icvl/train_25select9_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: icvl
 2 | phase: train
 3 | split: 20
 4 | batch_size: 32
 5 | num_epoch: 10
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/icvl/train_25select9_light
 9 | log_dir: ./logs/icvl/train_25select9_light
10 | pre_a2j: ./checkpoint/icvl/train_25select9/model.pth
11 | learning_decay_rate: 0.8
12 | reg_weight: 1e-6
13 | level: 4
14 | n_head: 1
15 | d_attn: 256
16 | d_k: 64
17 | d_v: 64
18 | d_inner: 256
19 | dropout_rate: 0.5
20 | num_select: 9
21 | num_worker: 8
22 | max_jitter: 0.
23 | depth_sigma: 0.
24 | random_flip: False
25 | adjust_cube: False


--------------------------------------------------------------------------------
/config/icvl/train_uniform.yaml:
--------------------------------------------------------------------------------
 1 | phase: train
 2 | dataset: icvl
 3 | split: 20
 4 | batch_size: 10
 5 | num_epoch: 60
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/icvl/train_uniform
 9 | log_dir: ./logs/icvl/train_uniform
10 | learning_decay_rate: 0.95
11 | reg_weight: 1e-6
12 | level: 4
13 | n_head: 1
14 | d_attn: 256
15 | d_k: 64
16 | d_v: 64
17 | d_inner: 256
18 | dropout_rate: 0.5
19 | num_select: 25
20 | num_worker: 5
21 | max_jitter: 0.
22 | depth_sigma: 0.
23 | random_flip: False
24 | adjust_cube: False


--------------------------------------------------------------------------------
/config/nyu/eval_25select1.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | split: 5
 4 | batch_size: 1
 5 | gpus: [0]
 6 | log_dir: ./logs/nyu/eval_25select3
 7 | pre_model_name: ./checkpoint/nyu/25select3.pth
 8 | level: 4
 9 | n_head: 1
10 | d_attn: 256
11 | d_k: 64
12 | d_v: 64
13 | d_inner: 256
14 | num_select: 1
15 | num_worker: 5
16 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/eval_25select15.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/nyu/eval_25select15
 6 | pre_model_name: ./checkpoint/nyu/25select15.pth
 7 | level: 4
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 15
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/eval_25select15_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/nyu/eval_25select15_light
 6 | pre_a2j: ./checkpoint/nyu/25select15.pth
 7 | pre_model_path: ./checkpoint/nyu/25select15_light.pth
 8 | level: 4
 9 | n_head: 1
10 | d_attn: 256
11 | d_k: 64
12 | d_v: 64
13 | d_inner: 256
14 | num_select: 15
15 | num_worker: 4
16 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/eval_25select1_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/nyu/eval_25select1_light
 6 | pre_a2j: ./checkpoint/nyu/25select3.pth
 7 | pre_model_path: ./checkpoint/nyu/25select3_light.pth
 8 | level: 4
 9 | n_head: 1
10 | d_attn: 256
11 | d_k: 64
12 | d_v: 64
13 | d_inner: 256
14 | num_select: 1
15 | num_worker: 4
16 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/eval_25select3.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/nyu/eval_25select3
 6 | pre_model_name: ./checkpoint/nyu/25select3.pth
 7 | level: 4
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 3
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/eval_25select3_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/nyu/eval_25select3_light
 6 | pre_a2j: ./checkpoint/nyu/25select3.pth
 7 | pre_model_path: ./checkpoint/nyu/25select3_light.pth
 8 | level: 4
 9 | n_head: 1
10 | d_attn: 256
11 | d_k: 64
12 | d_v: 64
13 | d_inner: 256
14 | num_select: 3
15 | num_worker: 4
16 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/eval_25select9.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/nyu/eval_25select9
 6 | pre_model_name: ./checkpoint/nyu/25select9.pth
 7 | level: 4
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 9
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/eval_25select9_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/nyu/eval_25select9_light
 6 | pre_a2j: ./checkpoint/nyu/25select9.pth
 7 | pre_model_path: ./checkpoint/nyu/25select9_light.pth
 8 | level: 4
 9 | n_head: 1
10 | d_attn: 256
11 | d_k: 64
12 | d_v: 64
13 | d_inner: 256
14 | num_select: 9
15 | num_worker: 4
16 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/eval_uniform1.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/nyu/eval_uniform1
 6 | pre_model_name: ./checkpoint/nyu/uniform.pth
 7 | level: 0
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 1
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/eval_uniform15.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/nyu/eval_uniform15
 6 | pre_model_name: ./checkpoint/nyu/uniform.pth
 7 | level: 3
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 15
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/eval_uniform25.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/nyu/eval_uniform25
 6 | pre_model_name: ./checkpoint/nyu/uniform.pth
 7 | level: 4
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 25
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/eval_uniform3.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/nyu/eval_uniform3
 6 | pre_model_name: ./checkpoint/nyu/uniform.pth
 7 | level: 1
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 3
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/eval_uniform9.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: eval
 3 | batch_size: 1
 4 | gpus: [0]
 5 | log_dir: ./logs/nyu/eval_uniform9
 6 | pre_model_name: ./checkpoint/nyu/uniform.pth
 7 | level: 2
 8 | n_head: 1
 9 | d_attn: 256
10 | d_k: 64
11 | d_v: 64
12 | d_inner: 256
13 | num_select: 9
14 | num_worker: 5
15 | save_result: True


--------------------------------------------------------------------------------
/config/nyu/train_25select15.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: train
 3 | split: 5
 4 | batch_size: 10
 5 | num_epoch: 50
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/nyu/train_25select15
 9 | log_dir: ./logs/nyu/train_25select15
10 | learning_decay_rate: 0.95
11 | reg_weight: 1e-6
12 | level: 4
13 | n_head: 1
14 | d_attn: 256
15 | d_k: 64
16 | d_v: 64
17 | d_inner: 256
18 | dropout_rate: 0.5
19 | num_select: 15
20 | num_worker: 5
21 | max_jitter: 0.
22 | offset: 20.
23 | depth_sigma: 0.
24 | random_flip: False
25 | adjust_cube: False


--------------------------------------------------------------------------------
/config/nyu/train_25select15_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: train
 3 | split: 5
 4 | batch_size: 32
 5 | num_epoch: 30
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/nyu/train_25select15_light
 9 | log_dir: ./logs/nyu/train_25select15_light
10 | pre_a2j: ./checkpoint/nyu/train_25select15/model.pth
11 | learning_decay_rate: 0.95
12 | reg_weight: 1e-6
13 | level: 4
14 | n_head: 1
15 | d_attn: 256
16 | d_k: 64
17 | d_v: 64
18 | d_inner: 256
19 | dropout_rate: 0.5
20 | num_select: 15
21 | num_worker: 4
22 | max_jitter: 0.
23 | offset: 20.
24 | depth_sigma: 0.
25 | random_flip: False
26 | adjust_cube: False


--------------------------------------------------------------------------------
/config/nyu/train_25select3.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: train
 3 | split: 5
 4 | batch_size: 10
 5 | num_epoch: 50
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/nyu/train_25select3
 9 | log_dir: ./logs/nyu/train_25select3
10 | learning_decay_rate: 0.95
11 | reg_weight: 1e-6
12 | level: 4
13 | n_head: 1
14 | d_attn: 256
15 | d_k: 64
16 | d_v: 64
17 | d_inner: 256
18 | dropout_rate: 0.5
19 | num_select: 3
20 | num_worker: 5
21 | max_jitter: 0.
22 | offset: 20.
23 | depth_sigma: 0.
24 | random_flip: False
25 | adjust_cube: False


--------------------------------------------------------------------------------
/config/nyu/train_25select3_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: train
 3 | split: 5
 4 | batch_size: 32
 5 | num_epoch: 30
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/nyu/train_25select3_light
 9 | log_dir: ./logs/nyu/train_25select3_light
10 | pre_a2j: ./checkpoint/nyu/train_25select3/model.pth
11 | learning_decay_rate: 0.95
12 | reg_weight: 1e-6
13 | level: 4
14 | n_head: 1
15 | d_attn: 256
16 | d_k: 64
17 | d_v: 64
18 | d_inner: 256
19 | dropout_rate: 0.5
20 | num_select: 3
21 | num_worker: 4
22 | max_jitter: 0.
23 | offset: 20.
24 | depth_sigma: 0.
25 | random_flip: False
26 | adjust_cube: False


--------------------------------------------------------------------------------
/config/nyu/train_25select9.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: train
 3 | split: 5
 4 | batch_size: 10
 5 | num_epoch: 50
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/nyu/train_25select9
 9 | log_dir: ./logs/nyu/train_25select9
10 | learning_decay_rate: 0.95
11 | reg_weight: 1e-6
12 | level: 4
13 | n_head: 1
14 | d_attn: 256
15 | d_k: 64
16 | d_v: 64
17 | d_inner: 256
18 | dropout_rate: 0.5
19 | num_select: 9
20 | num_worker: 5
21 | max_jitter: 0.
22 | offset: 20.
23 | depth_sigma: 0.
24 | random_flip: False
25 | adjust_cube: False


--------------------------------------------------------------------------------
/config/nyu/train_25select9_light.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: train
 3 | split: 5
 4 | batch_size: 32
 5 | num_epoch: 30
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/nyu/train_25select9_light
 9 | log_dir: ./logs/nyu/train_25select9_light
10 | pre_a2j: ./checkpoint/nyu/train_25select9/model.pth
11 | learning_decay_rate: 0.95
12 | reg_weight: 1e-6
13 | level: 4
14 | n_head: 1
15 | d_attn: 256
16 | d_k: 64
17 | d_v: 64
18 | d_inner: 256
19 | dropout_rate: 0.5
20 | num_select: 9
21 | num_worker: 4
22 | max_jitter: 0.
23 | offset: 20.
24 | depth_sigma: 0.
25 | random_flip: False
26 | adjust_cube: False


--------------------------------------------------------------------------------
/config/nyu/train_uniform.yaml:
--------------------------------------------------------------------------------
 1 | dataset: nyu
 2 | phase: train
 3 | split: 5
 4 | batch_size: 10
 5 | num_epoch: 50
 6 | gpus: [0]
 7 | learning_rate: 0.0005
 8 | model_saved_path: ./checkpoint/nyu/train_uniform
 9 | log_dir: ./logs/nyu/train_uniform
10 | learning_decay_rate: 0.95
11 | reg_weight: 1e-6
12 | level: 4
13 | n_head: 1
14 | d_attn: 256
15 | d_k: 64
16 | d_v: 64
17 | d_inner: 256
18 | dropout_rate: 0.5
19 | num_worker: 5
20 | max_jitter: 0.
21 | offset: 20.
22 | depth_sigma: 0.
23 | random_flip: False
24 | adjust_cube: False


--------------------------------------------------------------------------------
/feeders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iscas3dv/handpose-virtualview/d220efa69ff031077381bc0d4cd58fae7049c329/feeders/__init__.py


--------------------------------------------------------------------------------
/feeders/hands2019_feeder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.utils.data import Dataset, DataLoader
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import random
  6 | import cv2
  7 | import traceback
  8 | from PIL import Image
  9 | import scipy.io as sio
 10 | import scipy.ndimage
 11 | from glob import glob
 12 | import json
 13 | import logging
 14 | import sys
 15 | import os
 16 | root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 17 | sys.path.append(root)
 18 | from utils.hand_detector import crop_area_3d
 19 | from utils.image_utils import normlize_depth
 20 | import matplotlib.patches as mpathes
 21 | from utils.point_transform import transform_3D_to_2D, transform_2D_to_3D
 22 | 
 23 | logging.basicConfig(level=logging.INFO, format="%(asctime)s: %(levelname)s %(name)s:%(lineno)d] %(message)s")
 24 | logger = logging.getLogger(__file__)
 25 | 
 26 | 
 27 | def get_center_from_bbx(path, img_w, img_h, fx, fy, bbx_rectify=True):
 28 |     # Reference: https://github.com/wozhangzhaohui/HandAugment
 29 |     cube_len = 150.
 30 |     lines = [line.split() for line in open(path).readlines()]
 31 |     bb_list = [[int(x) for x in line[1:]] for line in lines]
 32 |     center_uvd_list = []
 33 |     for bb in bb_list:
 34 |         if bb[0]>bb[2] or bb[1]>bb[3]:
 35 |             center_uvd_list.append(None)
 36 |             continue
 37 |         w = bb[2] - bb[0]
 38 |         h = bb[3] - bb[1]
 39 |         ww = max(w, h)
 40 |         if bbx_rectify:
 41 |             if w < ww:
 42 |                 if bb[0] == 0:
 43 |                     bb[0] = bb[2] - ww
 44 |                 elif bb[2] == img_w:
 45 |                     bb[2] = bb[0] + ww
 46 |             if h < ww:
 47 |                 if bb[1] == 0:
 48 |                     bb[1] = bb[3] - ww
 49 |                 elif bb[3] == img_h:
 50 |                     bb[3] = bb[1] + ww
 51 | 
 52 |         center_uvd = np.array([(bb[0] + bb[2]) / 2,
 53 |                                (bb[1] + bb[3]) / 2,
 54 |                                cube_len*2 / ww * fx], dtype=np.float32)
 55 |         center_uvd_list.append(center_uvd)
 56 |     return center_uvd_list
 57 | 
 58 | 
 59 | def load_joint_pred(path, fx, fy, u0, v0):
 60 |     # Reference: https://github.com/wozhangzhaohui/HandAugment
 61 |     joint_3d_list = []
 62 |     joint_2d_list = []
 63 |     with open(path, 'r') as f:
 64 |         for anno in f.readlines():
 65 |             anno = anno.split('\t')
 66 |             if (anno[-1] == '\n'):
 67 |                 anno = anno[:-1]
 68 |             if len(anno) == 2:
 69 |                 joint_3d_list.append(None)
 70 |                 joint_2d_list.append(None)
 71 |             else:
 72 |                 joint_3d = np.array(anno[1:]).astype(np.float32)
 73 |                 joint_3d = joint_3d.reshape(21, 3)
 74 |                 joint_2d = transform_3D_to_2D(joint_3d, fx, fy, u0, v0)
 75 |                 joint_3d_list.append(joint_3d)
 76 |                 joint_2d_list.append(joint_2d)
 77 |     return joint_3d_list, joint_2d_list
 78 | 
 79 | 
 80 | class Hands2019Feeder(Dataset):
 81 |     def __init__(self, phase='train', max_jitter=10., depth_sigma=0., cube_len=None, min_scale=1., max_scale=1.,
 82 |                  offset=30., hand_thickness=20., random_flip=False, use_joint=False):
 83 |         self.phase = phase
 84 |         self.max_jitter = max_jitter
 85 |         self.depth_sigma = depth_sigma
 86 |         self.cube_len = cube_len
 87 |         self.min_scale = min_scale
 88 |         self.max_scale = max_scale
 89 |         self.offset = offset
 90 |         self.hand_thickness = hand_thickness
 91 |         self.random_flip = random_flip
 92 |         self.use_joint = use_joint
 93 |         config_file = os.path.join(root, "config", "dataset", "hands2019.json")
 94 |         self.config = json.load(open(config_file, 'r'))
 95 |         self.fx = np.float32(self.config['camera']['fx'])
 96 |         self.fy = np.float32(self.config['camera']['fy'])
 97 |         self.u0 = np.float32(self.config['camera']['u0'])
 98 |         self.v0 = np.float32(self.config['camera']['v0'])
 99 |         if cube_len is None:
100 |             self.cube = np.array(self.config['cube'], dtype=np.float32)
101 |         else:
102 |             self.cube = np.array([cube_len, cube_len, cube_len], dtype=np.float32)
103 |         self.crop_size = self.config['crop_size']
104 |         self.inter_matrix = np.array([[self.fx, 0, self.u0],
105 |                                       [0, self.fy, self.v0],
106 |                                       [0, 0, 1]], dtype=np.float32)
107 |         self.depth_name_list, self.joint_3d_list, self.joint_2d_list = self.load_annotation()
108 | 
109 |         self.com_2d_list = get_center_from_bbx(
110 |             os.path.join(self.config['path'], 'training_bbs.txt'), self.config['width'],self.config['height'],
111 |             self.fx, self.fy)
112 |         if use_joint:
113 |             self.joint_3d_pred_list, self.joint_2d_pred_list = load_joint_pred(
114 |                 os.path.join(self.config["path"], 'training_joint.txt'), self.fx, self.fy, self.u0, self.v0)
115 |         num = len(self.depth_name_list)
116 |         test_num = num // 10
117 |         train_num = num - test_num
118 |         if phase == 'train':
119 |             self.depth_name_list = self.depth_name_list[:train_num]
120 |             self.joint_3d_list = self.joint_3d_list[:train_num]
121 |             self.joint_2d_list = self.joint_2d_list[:train_num]
122 |             self.com_2d_list = self.com_2d_list[:train_num]
123 |             if use_joint:
124 |                 self.joint_3d_pred_list = self.joint_3d_pred_list[:train_num]
125 |                 self.joint_2d_pred_list = self.joint_2d_pred_list[:train_num]
126 |         else:
127 |             self.depth_name_list = self.depth_name_list[train_num:]
128 |             self.joint_3d_list = self.joint_3d_list[train_num:]
129 |             self.joint_2d_list = self.joint_2d_list[train_num:]
130 |             self.com_2d_list = self.com_2d_list[train_num:]
131 |             if use_joint:
132 |                 self.joint_3d_pred_list = self.joint_3d_pred_list[train_num:]
133 |                 self.joint_2d_pred_list = self.joint_2d_pred_list[train_num:]
134 |         self.index = np.arange(len(self.depth_name_list))
135 | 
136 |     def load_annotation(self):
137 |         joint_anno_path = os.path.join(self.config["path"], 'training_joint_annotation.txt')
138 |         bbs_path = os.path.join(self.config["path"], 'training_bbs.txt')
139 |         joint_3d_list = []
140 |         joint_2d_list = []
141 |         img_name_list = []
142 |         bbx_list = []
143 |         with open(joint_anno_path, 'r') as f:
144 |             for anno in f.readlines():
145 |                 anno = anno.split('\t')
146 |                 if (anno[-1] == '\n'):
147 |                     anno = anno[:-1]
148 |                 img_name = anno[0]
149 |                 joint_3d = np.array(anno[1:]).astype(np.float32)
150 |                 joint_3d = joint_3d.reshape(21, 3)
151 |                 joint_2d = transform_3D_to_2D(joint_3d, self.fx, self.fy, self.u0, self.v0)
152 |                 joint_3d_list.append(joint_3d)
153 |                 joint_2d_list.append(joint_2d)
154 |                 img_name_list.append(img_name)
155 | 
156 |         return img_name_list, joint_3d_list, joint_2d_list
157 | 
158 |     def show(self, cropped, joint_3d, crop_trans):
159 |         joint_2d = self.inter_matrix @ np.transpose(joint_3d, (1, 0))
160 |         joint_2d = joint_2d / joint_2d[2, :]
161 |         joint_2d = np.transpose(joint_2d, (1, 0))
162 |         crop_joint_2d = np.ones_like(joint_2d)
163 |         crop_joint_2d[:, :2] = joint_2d[:, :2]
164 |         crop_joint_2d = np.transpose(crop_joint_2d, (1, 0))
165 |         crop_joint_2d = np.array(crop_trans @ crop_joint_2d)
166 |         crop_joint_2d = np.transpose(crop_joint_2d, (1, 0))
167 |         plt.clf()
168 |         plt.imshow(cropped)
169 |         plt.scatter(crop_joint_2d[:, 0], crop_joint_2d[:, 1], c='red')
170 |         plt.show()
171 | 
172 |     def __getitem__(self, item):
173 |         item = self.index[item]
174 |         depth_path = os.path.join(self.config["path"], 'training_images', self.depth_name_list[item])
175 |         depth = cv2.imread(depth_path, 2).astype(np.float32)
176 |         joint_3d, com_2d = self.joint_3d_list[item], self.com_2d_list[item]
177 |         try:
178 |             if com_2d is None:
179 |                 raise ValueError
180 |             if self.max_jitter>0.:
181 |                 com_3d = transform_2D_to_3D(com_2d, self.fx, self.fy, self.u0, self.v0)
182 |                 com_offset = np.random.uniform(low=-1., high=1., size=(3,))*self.max_jitter
183 |                 com_offset = com_offset.astype(np.float32)
184 |                 com_3d = com_3d + com_offset
185 |                 com_2d = transform_3D_to_2D(com_3d, self.fx, self.fy, self.u0, self.v0)
186 | 
187 |             scale = np.random.uniform(low=self.min_scale, high=self.max_scale)
188 |             cube = self.cube * scale
189 |             if self.use_joint:
190 |                 joint_2d_pred, joint_3d_pred = self.joint_2d_pred_list[item], self.joint_3d_pred_list[item]
191 |                 left = np.min(joint_2d_pred[:, 0])
192 |                 right = np.max(joint_2d_pred[:, 0])
193 |                 up = np.min(joint_2d_pred[:, 1])
194 |                 down = np.max(joint_2d_pred[:, 1])
195 |                 front = np.min(joint_3d_pred[:, 2])-self.hand_thickness
196 |                 back = np.max(joint_3d_pred[:, 2])
197 |                 bbx = [left, right, up, down, front, back]
198 |                 cropped, crop_trans, com_2d = crop_area_3d(depth, com_2d, self.fx, self.fy, bbx=bbx, offset=self.offset,
199 |                                                            size=cube, dsize=[self.crop_size, self.crop_size], docom=False)
200 |             else:
201 |                 cropped, crop_trans, com_2d = crop_area_3d(depth, com_2d, self.fx, self.fy, size=cube,
202 |                                                            dsize=[self.crop_size, self.crop_size], docom=False)
203 |         except Exception as e:
204 |             # exc_type, exc_value, exc_obj = sys.exc_info()
205 |             # traceback.print_tb(exc_obj)
206 |             # print(com_2d)
207 |             # print(self.depth_name_list[item])
208 |             # plt.imshow(depth)
209 |             # plt.show()
210 |             # height = down - up
211 |             # width = right - left
212 |             # rect = mpathes.Rectangle([left, up], width, height, color='r', fill=False, linewidth=2)
213 |             # fig, ax = plt.subplots()
214 |             # ax.imshow(depth)
215 |             # ax.add_patch(rect)
216 |             # plt.show()
217 |             # plt.imshow(mask_)
218 |             # plt.show()
219 |             return item, None, None, None, None, None, None, None
220 | 
221 |         if self.random_flip:
222 |             to_center = np.array([[1., 0., self.crop_size/2.],
223 |                                   [0., 1., self.crop_size/2.],
224 |                                   [0., 0., 1]], np.float32)
225 |             to_origin = np.array([[1., 0., -self.crop_size/2.],
226 |                                   [0., 1., -self.crop_size/2.],
227 |                                   [0., 0., 1]], np.float32)
228 |             if random.random()>0.5:
229 |                 # Horizontal flip
230 |                 cropped = cropped[:, ::-1]
231 |                 matrix = np.eye(3, dtype=np.float32)
232 |                 matrix[0, 0] = -1
233 |                 flip_matrix = to_center @ matrix @ to_origin
234 |                 crop_trans = flip_matrix @ crop_trans
235 | 
236 |             if random.random()>0.5:
237 |                 # Vertical flip
238 |                 cropped = cropped[::-1, :]
239 |                 matrix = np.eye(3, dtype=np.float32)
240 |                 matrix[1, 1] = -1
241 |                 flip_matrix = to_center @ matrix @ to_origin
242 |                 crop_trans = flip_matrix @ crop_trans
243 | 
244 |             cropped = np.array(cropped)
245 | 
246 |         if self.depth_sigma>0.:
247 |             # noise = np.random.randn(self.crop_size, self.crop_size)*self.noise_sigma
248 |             noise = np.random.normal(0, self.depth_sigma, size=(self.crop_size, self.crop_size)).astype(np.float32)
249 |             cropped[cropped>1e-3] += noise[cropped>1e-3]
250 | 
251 |         # self.show(cropped, joint_3d, crop_trans)
252 |         # plt.imshow(depth)
253 |         # plt.show()
254 |         # print(com_2d)
255 |         return item, depth[None, ...], cropped[None, ...], joint_3d, np.array(crop_trans), com_2d, self.inter_matrix, \
256 |                cube
257 | 
258 |     def __len__(self):
259 |         return len(self.index)
260 | 
261 | 
262 | def collate_fn(batch):
263 |     batch_item = []
264 |     batch_depth = []
265 |     batch_cropped = []
266 |     batch_joint_3d = []
267 |     batch_crop_trans = []
268 |     batch_com_2d = []
269 |     batch_inter_matrix = []
270 |     batch_cube = []
271 |     for item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube in batch:
272 |         if depth is not None:
273 |             batch_item.append(item)
274 |             batch_depth.append(depth)
275 |             batch_cropped.append(cropped)
276 |             batch_joint_3d.append(joint_3d)
277 |             batch_crop_trans.append(crop_trans)
278 |             batch_com_2d.append(com_2d)
279 |             batch_inter_matrix.append(inter_matrix)
280 |             batch_cube.append(cube)
281 |     output = [torch.from_numpy(np.array(batch_item))]
282 |     for arrays in [batch_depth, batch_cropped, batch_joint_3d, batch_crop_trans, batch_com_2d, batch_inter_matrix,
283 |                    batch_cube]:
284 |         output.append(torch.from_numpy(np.stack(arrays, axis=0)))
285 |     return output
286 | 
287 | 
288 | if __name__ == '__main__':
289 |     from tqdm import tqdm
290 |     train_dataset = Hands2019Feeder('train', max_jitter=0., depth_sigma=0., cube_len=270., min_scale=1., max_scale=1.,
291 |                                     offset=30., use_joint=True)
292 |     item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = train_dataset[0]
293 |     plt.imshow(depth[0])
294 |     plt.show()
295 |     print(depth[depth!=0].min())
296 |     print(depth[0, 300, 500:600])
297 |     # dataloader = DataLoader(train_dataset, shuffle=False, batch_size=4, num_workers=1, collate_fn=collate_fn)
298 |     # for batch_idx, batch_data in enumerate(tqdm(dataloader)):
299 |     #     item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data
300 |     #     break
301 | 
302 |     # test_dataset = Hands2019Feeder('test', max_jitter=0.)
303 |     # # item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = train_dataset[4979]
304 |     # dataloader = DataLoader(test_dataset, shuffle=False, batch_size=4, num_workers=4, collate_fn=collate_fn)
305 |     # for batch_idx, batch_data in enumerate(tqdm(dataloader)):
306 |     #     item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data
307 | 


--------------------------------------------------------------------------------
/feeders/icvl_feeder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.utils.data import Dataset, DataLoader
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import random
  6 | import cv2
  7 | import traceback
  8 | from PIL import Image
  9 | import scipy.io as sio
 10 | import scipy.ndimage
 11 | from glob import glob
 12 | import json
 13 | import logging
 14 | import sys
 15 | import os
 16 | root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 17 | sys.path.append(root)
 18 | from utils.hand_detector import calculate_com_2d, crop_area_3d
 19 | from utils.point_transform import transform_2D_to_3D
 20 | 
 21 | logging.basicConfig(level=logging.INFO, format="%(asctime)s: %(levelname)s %(name)s:%(lineno)d] %(message)s")
 22 | logger = logging.getLogger(__file__)
 23 | 
 24 | 
 25 | class ICVLFeeder(Dataset):
 26 |     def __init__(self, phase='train', max_jitter=10., depth_sigma=1.):
 27 |         """
 28 | 
 29 |         :param phase: train or test
 30 |         :param max_jitter:
 31 |         :param depth_sigma:
 32 |         """
 33 |         self.phase = phase
 34 |         self.max_jitter = max_jitter
 35 |         self.depth_sigma = depth_sigma
 36 |         config_file = os.path.join(root, "config", "dataset", "icvl.json")
 37 |         self.config = json.load(open(config_file, 'r'))
 38 |         self.fx = self.config['camera']['fx']
 39 |         self.fy = self.config['camera']['fy']
 40 |         self.u0 = self.config['camera']['u0']
 41 |         self.v0 = self.config['camera']['v0']
 42 |         self.crop_size = self.config['crop_size']
 43 |         self.inter_matrix = np.array([[self.fx, 0, self.u0],
 44 |                                       [0, self.fy, self.v0],
 45 |                                       [0, 0, 1]], dtype=np.float32)
 46 |         self.cube = np.array(self.config["cube"], dtype=np.float32)
 47 |         self.joint_2d, self.joint_3d, self.depth_path = self.load_annotation()
 48 |         self.index = np.arange(len(self.depth_path))
 49 |         logger.info("{} num: {}".format(phase, len(self.index)))
 50 | 
 51 |     def load_annotation(self):
 52 |         if self.phase == 'train':
 53 |             label_path = [os.path.join(self.config['path'], 'Training', 'labels.txt')]
 54 |             depth_dir = os.path.join(self.config['path'], 'Training', 'Depth')
 55 |         else:
 56 |             label_path = [os.path.join(self.config['path'], 'Testing', 'test_seq_1.txt'),
 57 |                       os.path.join(self.config['path'], 'Testing', 'test_seq_2.txt')]
 58 |             depth_dir = os.path.join(self.config['path'], 'Testing', 'Depth')
 59 | 
 60 |         joint_2d_list = []
 61 |         depth_path_list = []
 62 |         for path in label_path:
 63 |             with open(path, 'r') as f:
 64 |                 for line in f.readlines():
 65 |                     line = line.strip()
 66 |                     if len(line) == 0:
 67 |                         continue
 68 |                     sp = line.split()
 69 |                     depth_path = os.path.join(depth_dir, sp[0])
 70 |                     joint_2d = np.array(list(map(float, sp[1:])), np.float32)
 71 |                     joint_2d = joint_2d.reshape((-1, 3))
 72 |                     depth_path_list.append(depth_path)
 73 |                     joint_2d_list.append(joint_2d)
 74 |         joint_2d = np.stack(joint_2d_list, axis=0)
 75 |         joint_3d = transform_2D_to_3D(joint_2d, self.fx, self.fy, self.u0, self.v0)
 76 |         return joint_2d, joint_3d, depth_path_list
 77 | 
 78 |     def __getitem__(self, item):
 79 |         item = self.index[item]
 80 |         joint_2d, joint_3d, depth_path = self.joint_2d[item], self.joint_3d[item], self.depth_path[item]
 81 | 
 82 |         try:
 83 |             depth = np.asarray(Image.open(depth_path), np.float32)
 84 |         except FileNotFoundError:
 85 |             return item, None, None, joint_3d, None, None, self.inter_matrix
 86 | 
 87 |         com_3d = np.mean(joint_3d[[0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15]], axis=0)
 88 | 
 89 |         if self.max_jitter>0.:
 90 |             com_offset = np.random.uniform(low=-1., high=1., size=(3,))*self.max_jitter
 91 |             com_3d = com_3d + com_offset
 92 |         com_2d = self.inter_matrix @ com_3d[:, None]
 93 |         com_2d = np.squeeze(com_2d)
 94 |         com_2d[:2] /= com_2d[2]
 95 |         com_2d = com_2d.astype(np.float32)
 96 | 
 97 |         cube = self.cube
 98 |         try:
 99 |             cropped, crop_trans, com_2d = crop_area_3d(depth, com_2d, self.fx, self.fy, size=cube,
100 |                                                    dsize=[self.crop_size, self.crop_size], docom=False)
101 |         except UserWarning:
102 |             return item, None, None, joint_3d, None, None, self.inter_matrix
103 |         # plt.imshow(depth)
104 |         # plt.scatter(com_2d[0], com_2d[1])
105 |         # plt.show()
106 |         # plt.imshow(cropped)
107 |         # plt.show()
108 | 
109 |         if self.depth_sigma>0.:
110 |             # noise = np.random.randn(self.crop_size, self.crop_size)*self.noise_sigma
111 |             noise = np.random.normal(0, self.depth_sigma, size=(self.crop_size, self.crop_size)).astype(np.float32)
112 |             cropped[cropped>1e-3] += noise[cropped>1e-3]
113 | 
114 |         return item, depth[None, ...], cropped[None, ...], joint_3d, np.array(crop_trans), com_2d, self.inter_matrix, \
115 |                cube
116 | 
117 |     def __len__(self):
118 |         return len(self.index)
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     from tqdm import tqdm
123 |     from feeders.nyu_feeder import collate_fn
124 |     train_dataset = ICVLFeeder('train', max_jitter=0., depth_sigma=0.)
125 |     dataloader = DataLoader(train_dataset, shuffle=False, batch_size=4, collate_fn=collate_fn, num_workers=4)
126 |     for batch_idx, batch_data in enumerate(tqdm(dataloader)):
127 |         item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data
128 | 
129 |     # print(item)
130 |     # print(depth.shape)
131 |     # print(cropped.shape)
132 |     # print(joint_3d.shape)
133 |     # print(crop_trans.shape)
134 |     # print(com_2d.shape)
135 |     # print(inter_matrix.shape)
136 |     # print(cube.shape)
137 | 


--------------------------------------------------------------------------------
/feeders/nyu_feeder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.utils.data import Dataset, DataLoader
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import random
  6 | import cv2
  7 | import traceback
  8 | from PIL import Image
  9 | import scipy.io as sio
 10 | import scipy.ndimage
 11 | from glob import glob
 12 | import json
 13 | import logging
 14 | import sys
 15 | import os
 16 | root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 17 | sys.path.append(root)
 18 | from utils.hand_detector import calculate_com_2d, crop_area_3d
 19 | from utils.image_utils import normlize_depth
 20 | 
 21 | logging.basicConfig(level=logging.INFO, format="%(asctime)s: %(levelname)s %(name)s:%(lineno)d] %(message)s")
 22 | logger = logging.getLogger(__file__)
 23 | 
 24 | 
 25 | class NyuFeeder(Dataset):
 26 |     def __init__(self, phase='train', max_jitter=10., depth_sigma=1., offset=20., random_flip=False, adjust_cube=False):
 27 |         """
 28 | 
 29 |         :param phase: train or test
 30 |         :param max_jitter:
 31 |         :param depth_sigma:
 32 |         :param min_scale:
 33 |         :param max_scale:
 34 |         :param random_flip:
 35 |         :param random_rotate:
 36 |         """
 37 |         self.phase = phase
 38 |         self.max_jitter = max_jitter
 39 |         self.depth_sigma = depth_sigma
 40 |         self.offset = offset
 41 |         self.random_flip = random_flip
 42 |         self.adjust_cube = adjust_cube
 43 |         config_file = os.path.join(root, "config", "dataset", "nyu.json")
 44 |         self.config = json.load(open(config_file, 'r'))
 45 |         self.joint_2d, self.joint_3d, self.depth_path = self.load_annotation()
 46 |         self.fx = self.config['camera']['fx']
 47 |         self.fy = self.config['camera']['fy']
 48 |         self.u0 = self.config['camera']['u0']
 49 |         self.v0 = self.config['camera']['v0']
 50 |         self.crop_size = self.config['crop_size']
 51 |         self.inter_matrix = np.array([[self.fx, 0, self.u0],
 52 |                                       [0, self.fy, self.v0],
 53 |                                       [0, 0, 1]], dtype=np.float32)
 54 |         self.cube = np.array(self.config["cube"], dtype=np.float32)
 55 |         self.com_2d = [None] * len(self.depth_path)
 56 |         # self.index = []
 57 |         # if self.phase == 'train':
 58 |         #     self.index = [i for i in range(len(self.depth_path)) if i%6!=0]
 59 |         # if self.phase == 'test':
 60 |         #     self.index = [i for i in range(len(self.depth_path)) if i%6==0]
 61 |         self.index = np.arange(len(self.depth_path))
 62 |         logger.info("{} num: {}".format(phase, len(self.index)))
 63 | 
 64 |     def load_annotation(self):
 65 |         data_dir = os.path.join(self.config["path"], self.phase)
 66 |         joint_data = sio.loadmat(os.path.join(data_dir, 'joint_data.mat'))
 67 |         # if self.phase == 'test':
 68 |         #     joint_2d = joint_data['joint_uvd'][0][:, self.config['selected']].astype(np.float32)
 69 |         #     joint_3d = joint_data['joint_xyz'][0][:, self.config['selected']].astype(np.float32)
 70 |         #     joint_3d[:, :, 1] = -joint_3d[:, :, 1]
 71 |         #     depth_path = glob(os.path.join(data_dir, "depth_1_*.png"))
 72 |         # else:
 73 |         #     joint_2d = joint_data['joint_uvd'][:, :, self.config['selected']].astype(np.float32)
 74 |         #     joint_2d = np.reshape(joint_2d, [-1, len(self.config['selected']), 3])
 75 |         #     joint_3d = joint_data['joint_xyz'][:, :, self.config['selected']].astype(np.float32)
 76 |         #     joint_3d = np.reshape(joint_3d, [-1, len(self.config['selected']), 3])
 77 |         #     joint_3d[:, :, 1] = -joint_3d[:, :, 1]
 78 |         #     depth_path = glob(os.path.join(data_dir, "depth_*.png"))
 79 |         joint_2d = joint_data['joint_uvd'][0][:, self.config['selected']].astype(np.float32)
 80 |         joint_3d = joint_data['joint_xyz'][0][:, self.config['selected']].astype(np.float32)
 81 |         joint_3d[:, :, 1] = -joint_3d[:, :, 1]
 82 |         depth_path = glob(os.path.join(data_dir, "depth_1_*.png"))
 83 |         depth_path.sort()
 84 |         return joint_2d, joint_3d, depth_path
 85 | 
 86 |     def show(self, cropped, joint_3d, crop_trans):
 87 |         joint_2d = self.inter_matrix @ np.transpose(joint_3d, (1, 0))
 88 |         joint_2d = joint_2d / joint_2d[2, :]
 89 |         joint_2d = np.transpose(joint_2d, (1, 0))
 90 |         crop_joint_2d = np.ones_like(joint_2d)
 91 |         crop_joint_2d[:, :2] = joint_2d[:, :2]
 92 |         crop_joint_2d = np.transpose(crop_joint_2d, (1, 0))
 93 |         crop_joint_2d = np.array(crop_trans @ crop_joint_2d)
 94 |         crop_joint_2d = np.transpose(crop_joint_2d, (1, 0))
 95 |         plt.clf()
 96 |         plt.imshow(cropped)
 97 |         plt.scatter(crop_joint_2d[:, 0], crop_joint_2d[:, 1], c='red')
 98 |         plt.show()
 99 | 
100 |     def __getitem__(self, item):
101 |         item = self.index[item]
102 |         joint_2d, joint_3d, depth_path = self.joint_2d[item], self.joint_3d[item], self.depth_path[item]
103 |         depth = load_depth_map(depth_path)
104 |         if depth is None:
105 |             return item, None, None, joint_3d, None, None, self.inter_matrix
106 |         # com_2d = joint_2d[13]
107 |         # com_2d = np.mean(joint_2d, axis=0)
108 |         com_3d = np.mean(joint_3d, axis=0)
109 | 
110 |         # scale = np.random.uniform(low=self.min_scale, high=self.max_scale)
111 |         # cube = self.cube * scale
112 |         if self.max_jitter>0.:
113 |             com_offset = np.random.uniform(low=-1., high=1., size=(3,))*self.max_jitter
114 |             com_3d = com_3d + com_offset
115 |         com_2d = self.inter_matrix @ com_3d[:, None]
116 |         com_2d = np.squeeze(com_2d)
117 |         com_2d[:2] /= com_2d[2]
118 |         com_2d = com_2d.astype(np.float32)
119 |         if self.adjust_cube:
120 |             distance = np.linalg.norm(joint_3d - com_3d, axis=-1)
121 |             cube_size = (np.max(distance) + self.offset) * 2.
122 |             cube = np.array([cube_size, cube_size, cube_size], dtype=np.float32)
123 |             left = np.min(joint_2d[:, 0])
124 |             right = np.max(joint_2d[:, 0])
125 |             up = np.min(joint_2d[:, 1])
126 |             down = np.max(joint_2d[:, 1])
127 |             front = np.min(joint_3d[:, 2])
128 |             back = np.max(joint_3d[:, 2])
129 |             bbx = [left, right, up, down, front, back]
130 |             cropped, crop_trans, com_2d = crop_area_3d(depth, com_2d, self.fx, self.fy, bbx, self.offset, size=cube,
131 |                                                dsize=(self.crop_size, self.crop_size), docom=False)
132 |         else:
133 |             if self.phase != 'train' and item >= 2440:
134 |                 cube = self.cube * 5.0 / 6.0
135 |             else:
136 |                 cube = self.cube
137 |             cropped, crop_trans, com_2d = crop_area_3d(depth, com_2d, self.fx, self.fy, size=cube,
138 |                                                        dsize=[self.crop_size, self.crop_size], docom=False)
139 |         # if self.random_rotate:
140 |         #     # plt.imshow(cropped)
141 |         #     # plt.show()
142 |         #     angle = np.random.rand()*360.
143 |         #     M = cv2.getRotationMatrix2D((self.crop_size/2., self.crop_size/2.), angle, 1.)
144 |         #     cropped = cv2.warpAffine(cropped, M, (self.crop_size, self.crop_size), flags=cv2.INTER_NEAREST)
145 |         #     rotate_trans = np.eye(3, dtype=np.float32)
146 |         #     rotate_trans[:2, :] = M
147 |         #     crop_trans = rotate_trans @ crop_trans
148 |         #     # plt.imshow(cropped)
149 |         #     # plt.show()
150 | 
151 |         if self.random_flip:
152 |             to_center = np.array([[1., 0., self.crop_size/2.],
153 |                                   [0., 1., self.crop_size/2.],
154 |                                   [0., 0., 1]], np.float32)
155 |             to_origin = np.array([[1., 0., -self.crop_size/2.],
156 |                                   [0., 1., -self.crop_size/2.],
157 |                                   [0., 0., 1]], np.float32)
158 |             if random.random()>0.5:
159 |                 # Horizontal flip
160 |                 cropped = cropped[:, ::-1]
161 |                 matrix = np.eye(3, dtype=np.float32)
162 |                 matrix[0, 0] = -1
163 |                 flip_matrix = to_center @ matrix @ to_origin
164 |                 crop_trans = flip_matrix @ crop_trans
165 | 
166 |             if random.random()>0.5:
167 |                 # Vertical flip
168 |                 cropped = cropped[::-1, :]
169 |                 matrix = np.eye(3, dtype=np.float32)
170 |                 matrix[1, 1] = -1
171 |                 flip_matrix = to_center @ matrix @ to_origin
172 |                 crop_trans = flip_matrix @ crop_trans
173 | 
174 |             cropped = np.array(cropped)
175 | 
176 |         if self.depth_sigma>0.:
177 |             # noise = np.random.randn(self.crop_size, self.crop_size)*self.noise_sigma
178 |             noise = np.random.normal(0, self.depth_sigma, size=(self.crop_size, self.crop_size)).astype(np.float32)
179 |             cropped[cropped>1e-3] += noise[cropped>1e-3]
180 | 
181 |         # self.show(cropped, joint_3d, crop_trans)
182 |         # plt.imshow(depth)
183 |         # plt.show()
184 |         # print(com_2d)
185 |         return item, depth[None, ...], cropped[None, ...], joint_3d, np.array(crop_trans), com_2d, self.inter_matrix, \
186 |                cube
187 | 
188 |     def __len__(self):
189 |         return len(self.index)
190 | 
191 | 
192 | def load_depth_map(filename):
193 |     """
194 |     Read a depth-map
195 |     :param filename: file name to load
196 |     :return: image data of depth image
197 |     """
198 |     try:
199 |         img = Image.open(filename)
200 |         # top 8 bits of depth are packed into green channel and lower 8 bits into blue
201 |         assert len(img.getbands()) == 3
202 |         r, g, b = img.split()
203 |         r = np.asarray(r, np.int32)
204 |         g = np.asarray(g, np.int32)
205 |         b = np.asarray(b, np.int32)
206 |         dpt = np.bitwise_or(np.left_shift(g, 8), b)
207 |         imgdata = np.asarray(dpt, np.float32)
208 |     except IOError as e:
209 |         imgdata = None
210 |         # imgdata = np.zeros((480, 640), np.float32)
211 |         logger.exception(filename+' file broken.')
212 |     return imgdata
213 | 
214 | 
215 | def collate_fn(batch):
216 |     # batch_item = []
217 |     # batch_depth = []
218 |     # batch_cropped = []
219 |     # batch_joint_3d = []
220 |     # batch_crop_trans = []
221 |     # batch_com_2d = []
222 |     # batch_inter_matrix = []
223 |     # batch_cube = []
224 |     batch_data = []
225 |     for i in range(len(batch)):
226 |         if batch[i][1] is not None:
227 |             batch_data.append(batch[i])
228 |     # for item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube in batch:
229 |     #     if depth is not None:
230 |     #         batch_item.append(item)
231 |     #         batch_depth.append(depth)
232 |     #         batch_cropped.append(cropped)
233 |     #         batch_joint_3d.append(joint_3d)
234 |     #         batch_crop_trans.append(crop_trans)
235 |     #         batch_com_2d.append(com_2d)
236 |     #         batch_inter_matrix.append(inter_matrix)
237 |     #         batch_cube.append(cube)
238 |     batch_data = list(zip(*batch_data))
239 |     output = [torch.from_numpy(np.array(batch_data[0]))]
240 |     for arrays in batch_data[1:]:
241 |         output.append(torch.from_numpy(np.stack(arrays, axis=0)))
242 |     return output
243 | 
244 | 
245 | if __name__ == '__main__':
246 |     train_dataset = NyuFeeder('train', max_jitter=10., depth_sigma=0., offset=30, random_flip=False)
247 |     item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = train_dataset[0]
248 |     dataloader = DataLoader(train_dataset, shuffle=False, batch_size=1, collate_fn=collate_fn)
249 |     for batch_idx, batch_data in enumerate(dataloader):
250 |         item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data
251 |         print(item)
252 |         print(cube)
253 |         break
254 | 
255 |     # test_dataset = NyuFeeder('test', max_jitter=0., depth_sigma=0., offset=30, random_flip=False)
256 |     # dataloader = DataLoader(test_dataset, shuffle=True, batch_size=4)
257 |     # for batch_idx, batch_data in enumerate(dataloader):
258 |     #     item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data
259 |     #     print(item)
260 |     #     print(cube)
261 |     #     break
262 | 
263 |     # random.seed(0)
264 |     # train_dataset = NyuFeeder('test', jitter_sigma=0., noise_sigma=0., scale_sigma=0., random_flip=True)
265 |     # dataloader = DataLoader(train_dataset, shuffle=False, batch_size=4, collate_fn=collate_fn)
266 |     # for batch_idx, batch_data in enumerate(dataloader):
267 |     #     item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data
268 |     #     print(depth[2, 0, 300, 200])
269 |     #     print(item)
270 |     #     print(cube)
271 |     #     break
272 | 


--------------------------------------------------------------------------------
/fig/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iscas3dv/handpose-virtualview/d220efa69ff031077381bc0d4cd58fae7049c329/fig/pipeline.png


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iscas3dv/handpose-virtualview/d220efa69ff031077381bc0d4cd58fae7049c329/models/__init__.py


--------------------------------------------------------------------------------
/models/a2j.py:
--------------------------------------------------------------------------------
  1 | """
  2 | MIT License
  3 | 
  4 | Copyright (c) 2019 Boshen Zhang
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  7 | 
  8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  9 | 
 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 11 | 
 12 | """
 13 | import torch.nn as nn
 14 | from torch.nn import init
 15 | import torch
 16 | import torch.nn.functional as F
 17 | import numpy as np
 18 | 
 19 | import os
 20 | import sys
 21 | dir = os.path.dirname(os.path.abspath(__file__))
 22 | root = os.path.dirname(dir)
 23 | from models import resnet
 24 | 
 25 | 
 26 | class DepthRegressionModel(nn.Module):
 27 |     def __init__(self, num_features_in, num_anchors=16, num_classes=15, feature_size=256):
 28 |         super(DepthRegressionModel, self).__init__()
 29 |         self.num_classes = num_classes
 30 |         self.num_anchors = num_anchors
 31 | 
 32 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
 33 |         self.bn1 = nn.BatchNorm2d(feature_size)
 34 |         self.act1 = nn.ReLU()
 35 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 36 |         self.bn2 = nn.BatchNorm2d(feature_size)
 37 |         self.act2 = nn.ReLU()
 38 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 39 |         self.bn3 = nn.BatchNorm2d(feature_size)
 40 |         self.act3 = nn.ReLU()
 41 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 42 |         self.bn4 = nn.BatchNorm2d(feature_size)
 43 |         self.act4 = nn.ReLU()
 44 |         self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1)
 45 |         for m in self.modules():
 46 |             if isinstance(m, nn.Conv2d):
 47 |                 nn.init.xavier_normal_(m.weight.data)
 48 |             elif isinstance(m, nn.BatchNorm2d):
 49 |                 m.weight.data.fill_(1)
 50 |                 m.bias.data.zero_()
 51 | 
 52 |     def forward(self, x):
 53 |         out = self.conv1(x)
 54 |         out = self.bn1(out)
 55 |         out = self.act1(out)
 56 |         out = self.conv2(out)
 57 |         out = self.bn2(out)
 58 |         out = self.act2(out)
 59 |         out = self.conv3(out)
 60 |         out = self.bn3(out)
 61 |         out = self.act3(out)
 62 |         out = self.conv4(out)
 63 |         out = self.bn4(out)
 64 |         out = self.act4(out)
 65 |         out = self.output(out)
 66 | 
 67 |         # out is B x C x W x H, with C = 3*num_anchors
 68 |         out1 = out.permute(0, 3, 2, 1)
 69 |         batch_size, width, height, channels = out1.shape
 70 |         out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
 71 |         return out2.contiguous().view(out2.shape[0], -1, self.num_classes)
 72 | 
 73 | 
 74 | class RegressionModel(nn.Module):
 75 |     def __init__(self, num_features_in, num_anchors=16, num_classes=15, feature_size=256):
 76 |         super(RegressionModel, self).__init__()
 77 |         self.num_anchors = num_anchors
 78 |         self.num_classes = num_classes
 79 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
 80 |         self.bn1 = nn.BatchNorm2d(feature_size)
 81 |         self.act1 = nn.ReLU()
 82 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 83 |         self.bn2 = nn.BatchNorm2d(feature_size)
 84 |         self.act2 = nn.ReLU()
 85 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 86 |         self.bn3 = nn.BatchNorm2d(feature_size)
 87 |         self.act3 = nn.ReLU()
 88 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
 89 |         self.bn4 = nn.BatchNorm2d(feature_size)
 90 |         self.act4 = nn.ReLU()
 91 |         self.output = nn.Conv2d(feature_size, num_anchors * num_classes * 2, kernel_size=3, padding=1)
 92 |         for m in self.modules():
 93 |             if isinstance(m, nn.Conv2d):
 94 |                 nn.init.xavier_normal_(m.weight.data)
 95 |             elif isinstance(m, nn.BatchNorm2d):
 96 |                 m.weight.data.fill_(1)
 97 |                 m.bias.data.zero_()
 98 | 
 99 |     def forward(self, x):
100 |         out = self.conv1(x)
101 |         out = self.bn1(out)
102 |         out = self.act1(out)
103 |         out = self.conv2(out)
104 |         out = self.bn2(out)
105 |         out = self.act2(out)
106 |         out = self.conv3(out)
107 |         out = self.bn3(out)
108 |         out = self.act3(out)
109 |         out = self.conv4(out)
110 |         out = self.bn4(out)
111 |         out = self.act4(out)
112 |         out = self.output(out)
113 | 
114 |         # out is B x C x W x H, with C = 3*num_anchors
115 |         out1 = out.permute(0, 3, 2, 1)
116 |         batch_size, width, height, channels = out1.shape
117 |         out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes, 2)
118 |         return out2.contiguous().view(out2.shape[0], -1, self.num_classes, 2)
119 | 
120 | 
121 | class ClassificationModel(nn.Module):
122 |     def __init__(self, num_features_in, num_anchors=16, num_classes=15, prior=0.01, feature_size=256):
123 |         super(ClassificationModel, self).__init__()
124 |         self.num_classes = num_classes
125 |         self.num_anchors = num_anchors
126 |         self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
127 |         self.bn1 = nn.BatchNorm2d(feature_size)
128 |         self.act1 = nn.ReLU()
129 |         self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
130 |         self.bn2 = nn.BatchNorm2d(feature_size)
131 |         self.act2 = nn.ReLU()
132 |         self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
133 |         self.bn3 = nn.BatchNorm2d(feature_size)
134 |         self.act3 = nn.ReLU()
135 |         self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
136 |         self.bn4 = nn.BatchNorm2d(feature_size)
137 |         self.act4 = nn.ReLU()
138 |         self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1)
139 |         for m in self.modules():
140 |             if isinstance(m, nn.Conv2d):
141 |                 nn.init.xavier_normal_(m.weight.data)
142 |             elif isinstance(m, nn.BatchNorm2d):
143 |                 m.weight.data.fill_(1)
144 |                 m.bias.data.zero_()
145 | 
146 |     def forward(self, x):
147 |         out = self.conv1(x)
148 |         out = self.bn1(out)
149 |         out = self.act1(out)
150 |         out = self.conv2(out)
151 |         out = self.bn2(out)
152 |         out = self.act2(out)
153 |         out = self.conv3(out)
154 |         out = self.bn3(out)
155 |         out = self.act3(out)
156 |         out = self.conv4(out)
157 |         out = self.bn4(out)
158 |         out = self.act4(out)
159 |         out = self.output(out)
160 | 
161 |         # out is B x C x W x H, with C = n_classes + n_anchors
162 |         out1 = out.permute(0, 3, 2, 1)
163 |         batch_size, width, height, channels = out1.shape
164 |         out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
165 |         return out2.contiguous().view(x.shape[0], -1, self.num_classes)
166 | 
167 | 
168 | class ResNetBackBone(nn.Module):
169 |     def __init__(self, light):
170 |         super(ResNetBackBone, self).__init__()
171 |         if light:
172 |             self.model = resnet.resnet18(pretrained=True)
173 |         else:
174 |             self.model = resnet.resnet50(pretrained=True)
175 | 
176 |     def forward(self, x):
177 |         n, c, h, w = x.size()  # x: [B, 1, H ,W]
178 | 
179 |         x = x[:, 0:1, :, :]  # depth
180 |         x = x.expand(n, 3, h, w)
181 | 
182 |         x = self.model.conv1(x)
183 |         x = self.model.bn1(x)
184 |         x = self.model.relu(x)
185 |         x = self.model.maxpool(x)
186 |         x1 = self.model.layer1(x)
187 |         x2 = self.model.layer2(x1)
188 |         x3 = self.model.layer3(x2)
189 |         x4 = self.model.layer4(x3)
190 | 
191 |         return x3, x4
192 | 
193 | 
194 | def generate_anchors(P_h=None, P_w=None):
195 |     if P_h is None:
196 |         P_h = np.array([2,6,10,14])
197 | 
198 |     if P_w is None:
199 |         P_w = np.array([2,6,10,14])
200 | 
201 |     num_anchors = len(P_h) * len(P_h)
202 | 
203 |     # initialize output anchors
204 |     anchors = np.zeros((num_anchors, 2))
205 |     k = 0
206 |     for i in range(len(P_w)):
207 |         for j in range(len(P_h)):
208 |             anchors[k,1] = P_w[j]
209 |             anchors[k,0] = P_h[i]
210 |             k += 1
211 |     return anchors
212 | 
213 | 
214 | def shift(shape, stride, anchors):
215 |     shift_h = np.arange(0, shape[0]) * stride
216 |     shift_w = np.arange(0, shape[1]) * stride
217 | 
218 |     shift_h, shift_w = np.meshgrid(shift_h, shift_w)
219 |     shifts = np.vstack((shift_h.ravel(), shift_w.ravel())).transpose()
220 | 
221 |     # add A anchors (1, A, 2) to
222 |     # cell K shifts (K, 1, 2) to get
223 |     # shift anchors (K, A, 2)
224 |     # reshape to (K*A, 2) shifted anchors
225 |     A = anchors.shape[0]
226 |     K = shifts.shape[0]
227 |     all_anchors = (anchors.reshape((1, A, 2)) + shifts.reshape((1, K, 2)).transpose((1, 0, 2)))
228 |     all_anchors = all_anchors.reshape((K * A, 2))
229 |     # print(all_anchors.shape)
230 |     # print(all_anchors[:32])
231 | 
232 |     return all_anchors
233 | 
234 | 
235 | class A2J_model(nn.Module):
236 |     def __init__(self, num_classes, P_h=None, P_w=None, shape=[11, 11], stride=16, dropout_rate=0., is_3D=True,
237 |                  light=False):
238 |         super(A2J_model, self).__init__()
239 |         self.dropout_rate = dropout_rate
240 |         self.is_3D = is_3D
241 |         self.light = light
242 |         anchors = generate_anchors(P_h=P_h, P_w=P_w)
243 |         self.all_anchors = torch.from_numpy(shift(shape, stride, anchors)).float() #(w*h*A)*2
244 |         self.Backbone = ResNetBackBone(light)  # 1 channel depth only
245 |         if light:
246 |             self.regressionModel = RegressionModel(512, num_classes=num_classes)
247 |             self.classificationModel = ClassificationModel(256, num_classes=num_classes)
248 |             self.dropout = nn.Dropout(dropout_rate)
249 |             if is_3D:
250 |                 self.DepthRegressionModel = DepthRegressionModel(512, num_classes=num_classes)
251 |         else:
252 |             self.regressionModel = RegressionModel(2048, num_classes=num_classes)
253 |             self.classificationModel = ClassificationModel(1024, num_classes=num_classes)
254 |             self.dropout = nn.Dropout(dropout_rate)
255 |             if is_3D:
256 |                 self.DepthRegressionModel = DepthRegressionModel(2048, num_classes=num_classes)
257 | 
258 |     def forward(self, x):
259 |         anchor = self.all_anchors.to(x.device)
260 |         x3, x4 = self.Backbone(x)
261 |         x3 = self.dropout(x3)
262 |         x4 = self.dropout(x4)
263 |         classification = self.classificationModel(x3) # N*(w/16*h/16*A)*P
264 |         regression = self.regressionModel(x4) # N*(w/16*h/16*A)*P*2
265 |         reg_weight = F.softmax(classification, dim=1)  # N*(w/16*h/16*A)*P
266 |         reg_weight_xy = torch.unsqueeze(reg_weight, 3).expand(
267 |             reg_weight.shape[0], reg_weight.shape[1], reg_weight.shape[2], 2)  # N*(w/16*h/16*A)*P*2
268 |         anchor_joints_2d = (reg_weight_xy * torch.unsqueeze(anchor, 1)).sum(1) # N*P*2
269 |         # anchor_joints_2d[..., 0], anchor_joints_2d[..., 1] = anchor_joints_2d[..., 1], anchor_joints_2d[..., 0]
270 | 
271 |         reg = torch.unsqueeze(anchor, 1) + regression  # N*(w/16*h/16*A)*P*2
272 |         regression_joints_2d = (reg_weight_xy*reg).sum(1) # N*P*2
273 |         # regression_joints_2d[..., 0], regression_joints_2d[..., 1] = \
274 |         #     regression_joints_2d[..., 1], regression_joints_2d[..., 0]
275 | 
276 |         if self.is_3D:
277 |             depthregression = self.DepthRegressionModel(x4) # N*(w/16*h/16*A)*P
278 |             depth_value = (reg_weight * depthregression).sum(1)
279 |             return classification, regression, depthregression, anchor_joints_2d, regression_joints_2d, depth_value
280 |         return classification, regression, anchor_joints_2d, regression_joints_2d
281 | 
282 | 
283 | if __name__ == "__main__":
284 |     num_classes = 14
285 |     w, h = 176, 176
286 |     B = 10
287 |     depth = torch.rand([B, 1, h, w], dtype=torch.float32).cuda()
288 |     model = A2J_model(num_classes).cuda()
289 |     anchor_joints_2d, regression_joints_2d, depth_value = model(depth)
290 |     print(anchor_joints_2d.shape)
291 |     print(regression_joints_2d.shape)
292 |     print(depth_value.shape)
293 | 


--------------------------------------------------------------------------------
/models/a2j_conf_net.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import os
  5 | import sys
  6 | dir = os.path.dirname(os.path.abspath(__file__))
  7 | root = os.path.dirname(dir)
  8 | sys.path.append(root)
  9 | from models.attention import MultiHeadAttention, PositionwiseFeedForward
 10 | 
 11 | class A2JConfNet(nn.Module):
 12 |     def __init__(self, n_head, d_attn, d_k, d_v, d_inner, dropout_rate, num_select, random=False):
 13 |         super(A2JConfNet, self).__init__()
 14 |         self.n_head = n_head
 15 |         self.d_attn = d_attn
 16 |         self.d_k = d_k
 17 |         self.d_v = d_v
 18 |         self.d_inner = d_inner
 19 |         self.dropout_rate = dropout_rate
 20 |         self.num_select = num_select
 21 |         self.random = random
 22 |         self.num_anchors = 11*11*16
 23 |         self.encode = nn.Sequential(
 24 |             # (B*N*J, 64, 11, 11)
 25 |             nn.Conv2d(64, d_attn//4, kernel_size=3, padding=1),
 26 |             nn.BatchNorm2d(d_attn//4),
 27 |             nn.ReLU(),
 28 |             nn.MaxPool2d(3, 2), # (B*N*J, d_attn//4, 5, 5)
 29 | 
 30 |             nn.Conv2d(d_attn//4, d_attn//2, kernel_size=3, padding=1),
 31 |             nn.BatchNorm2d(d_attn//2),
 32 |             nn.ReLU(),
 33 |             nn.MaxPool2d(3, 2),  # (B*N*J, d_attn//2, 2, 2)
 34 | 
 35 |             nn.Conv2d(d_attn//2, d_attn, kernel_size=2) # (B*N*J, d_attn, 1, 1)
 36 |         )
 37 |         self.attention = MultiHeadAttention(n_head, d_attn, d_k, d_v, dropout_rate)
 38 |         self.pos_ffn = PositionwiseFeedForward(d_attn, d_inner, dropout_rate)
 39 |         self.confidence_net = nn.Linear(d_attn, 1)
 40 | 
 41 |     def select(self, joint_3d, conf, k, random):
 42 |         """
 43 | 
 44 |         :param joint_3d: Tensor(B, N, J, 3)
 45 |         :param conf: Tensor(B, N)
 46 |         :param k: int
 47 |         :return:
 48 |             conf_select: Tensor(B, k)
 49 |             id_select: Tensor(B, k)
 50 |         """
 51 |         B, N, J, _ = joint_3d.shape
 52 |         if random:
 53 |             conf_select_list = []
 54 |             id_select_list = []
 55 |             for i in range(B):
 56 |                 id = torch.arange(0, N, device=conf.device)
 57 |                 id = id[torch.randperm(N)]
 58 |                 id_select = id[:k]
 59 | 
 60 |                 conf_select = conf[i, id_select]
 61 |                 conf_select_list.append(conf_select)
 62 |                 id_select_list.append(id_select)
 63 |             conf_select = torch.stack(conf_select_list, dim=0)
 64 |             id_select = torch.stack(id_select_list, dim=0)
 65 |         conf_select, id_select = torch.topk(conf, k, dim=-1) # (B, k)
 66 | 
 67 |         id_select_expand = id_select[:, :, None, None].repeat((1, 1, J, 3))
 68 |         joint_3d_select = torch.gather(joint_3d, 1, id_select_expand)  # (B, k, J, 3)
 69 | 
 70 |         return joint_3d_select, conf_select, id_select
 71 | 
 72 |     def forward(self, classification, regression, depthregression, joint_3d):
 73 |         """
 74 | 
 75 |         :param classification: Tensor(B, num_views, num_anchors, num_joints)
 76 |         :param regression: Tensor(B, num_views, num_anchors, num_joints, 2)
 77 |         :param depthregression: Tensor(B, num_views, num_anchors, num_joints)
 78 |         :param joint_3d: Tensor(B, num_views, num_joints, 3)
 79 |         :return:
 80 |         """
 81 |         B, N, J, _ = joint_3d.shape
 82 |         # (B, N, num_anchors, num_joints, 4)
 83 |         input = torch.cat([classification[..., None], regression, depthregression[..., None]], dim=-1)
 84 |         input = torch.transpose(input, 2, 3) # # (B, N, J, num_anchors, 4)
 85 |         input = torch.reshape(input, (B*N*J, 11, 11, 16*4))
 86 |         input = input.transpose(1, 3).transpose(2, 3) # (B*N*J, 64, 11, 11)
 87 |         feature = self.encode(input).reshape([B, N, J, -1]) # (B, N, J, d_attn)
 88 |         v = feature.mean(dim=-2)  # (B, N,d_attn)
 89 | 
 90 |         v = self.attention(v, v, v)
 91 |         v = self.pos_ffn(v)
 92 | 
 93 |         conf = self.confidence_net(v).reshape([B, N])
 94 |         joint_3d_select, conf_select, id_select = self.select(joint_3d, conf, self.num_select, self.random)
 95 | 
 96 |         conf_select = torch.softmax(conf_select, dim=-1)  # (B, k)
 97 | 
 98 |         joint_3d_conf = joint_3d_select * conf_select[:, :, None, None]
 99 |         joint_3d_conf = torch.sum(joint_3d_conf, 1)
100 | 
101 |         return conf, joint_3d_conf
102 | 


--------------------------------------------------------------------------------
/models/attention.py:
--------------------------------------------------------------------------------
  1 | """
  2 | MIT License
  3 | 
  4 | Copyright (c) 2017 Victor Huang
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | of this software and associated documentation files (the "Software"), to deal
  8 | in the Software without restriction, including without limitation the rights
  9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | copies of the Software, and to permit persons to whom the Software is
 11 | furnished to do so, subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | SOFTWARE.
 23 | """
 24 | import torch
 25 | from torch import nn as nn
 26 | from torch.nn import functional as F
 27 | 
 28 | 
 29 | class ScaledDotProductAttention(nn.Module):
 30 |     ''' Scaled Dot-Product Attention '''
 31 | 
 32 |     def __init__(self, temperature, attn_dropout=0.1):
 33 |         super().__init__()
 34 |         self.temperature = temperature
 35 |         self.dropout = nn.Dropout(attn_dropout)
 36 | 
 37 |     def forward(self, q, k, v):
 38 | 
 39 |         attn = torch.matmul(q / self.temperature, k.transpose(2, 3))
 40 | 
 41 |         attn = self.dropout(F.softmax(attn, dim=-1))
 42 |         output = torch.matmul(attn, v)
 43 | 
 44 |         return output, attn
 45 | 
 46 | 
 47 | class MultiHeadAttention(nn.Module):
 48 |     ''' Multi-Head Attention module '''
 49 | 
 50 |     def __init__(self, n_head, d_attn, d_k, d_v, dropout=0.1):
 51 |         super().__init__()
 52 | 
 53 |         self.n_head = n_head
 54 |         self.d_k = d_k
 55 |         self.d_v = d_v
 56 | 
 57 |         self.w_qs = nn.Linear(d_attn, n_head * d_k, bias=False)
 58 |         self.w_ks = nn.Linear(d_attn, n_head * d_k, bias=False)
 59 |         self.w_vs = nn.Linear(d_attn, n_head * d_v, bias=False)
 60 |         self.fc = nn.Linear(n_head * d_v, d_attn, bias=False)
 61 | 
 62 |         self.attention = ScaledDotProductAttention(temperature=d_k ** 0.5, attn_dropout=dropout)
 63 | 
 64 |         self.dropout = nn.Dropout(dropout)
 65 |         self.layer_norm = nn.LayerNorm(d_attn, eps=1e-6)
 66 | 
 67 |     def forward(self, q, k, v):
 68 | 
 69 |         d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
 70 |         B, num_views = q.size(0), q.size(1)
 71 | 
 72 |         residual = v
 73 | 
 74 |         # Pass through the pre-attention projection: b x num_views x (n*dv)
 75 |         # Separate different heads: B x num_views x n x dv
 76 |         q = self.w_qs(q).view(B, num_views, n_head, d_k)
 77 |         k = self.w_ks(k).view(B, num_views, n_head, d_k)
 78 |         v = self.w_vs(v).view(B, num_views, n_head, d_v)
 79 | 
 80 |         # Transpose for attention dot product: b x n x num_views x dv
 81 |         q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)
 82 | 
 83 |         v, attn = self.attention(q, k, v)
 84 | 
 85 |         # Transpose to move the head dimension back: b x num_views x n x dv
 86 |         # Combine the last two dimensions to concatenate all the heads together: b x num_views x (n*dv)
 87 |         v = v.transpose(1, 2).contiguous().view(B, num_views, -1)
 88 |         v = self.dropout(self.fc(v))
 89 |         v += residual
 90 | 
 91 |         v = self.layer_norm(v)
 92 | 
 93 |         return v
 94 | 
 95 | 
 96 | class PositionwiseFeedForward(nn.Module):
 97 |     ''' A two-feed-forward-layer module '''
 98 | 
 99 |     def __init__(self, d_in, d_hid, dropout=0.1):
100 |         super().__init__()
101 |         self.w_1 = nn.Linear(d_in, d_hid) # position-wise
102 |         self.w_2 = nn.Linear(d_hid, d_in) # position-wise
103 |         self.layer_norm = nn.LayerNorm(d_in, eps=1e-6)
104 |         self.dropout = nn.Dropout(dropout)
105 | 
106 |     def forward(self, x):
107 | 
108 |         residual = x
109 | 
110 |         x = self.w_2(F.relu(self.w_1(x)))
111 |         x = self.dropout(x)
112 |         x += residual
113 | 
114 |         x = self.layer_norm(x)
115 | 
116 |         return x


--------------------------------------------------------------------------------
/models/conf_net.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.nn import init
 3 | import torch
 4 | import torch.nn.functional as F
 5 | import numpy as np
 6 | 
 7 | import os
 8 | import sys
 9 | dir = os.path.dirname(os.path.abspath(__file__))
10 | root = os.path.dirname(dir)
11 | from models import resnet
12 | 
13 | 
14 | class ConfNet(nn.Module):
15 |     def __init__(self, num_views, dropout_rate):
16 |         super(ConfNet, self).__init__()
17 |         self.resnet = resnet.resnet18(pretrained=False)
18 |         self.dropout = nn.Dropout(dropout_rate)
19 |         self.fc = nn.Linear(1000, num_views)
20 | 
21 |     def forward(self, x):
22 |         """
23 | 
24 |         :param x: Tensor(B, 1, 176, 176)
25 |         :return:
26 |         """
27 |         n, c, h, w = x.size()  # x: [B, 1, H ,W]
28 | 
29 |         x = x[:, 0:1, :, :]  # depth
30 |         x = x.expand(n, 3, h, w)
31 |         x = self.resnet(x)
32 |         x = self.dropout(x)
33 |         x = self.fc(x)
34 |         return x
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     confnet = ConfNet(25, 0.5)
39 |     input = torch.randn((4, 1, 176, 176), dtype=torch.float32)
40 |     output = confnet(input)
41 |     print(output.shape)


--------------------------------------------------------------------------------
/models/layers.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | Pool = nn.MaxPool2d
 4 | 
 5 | 
 6 | def batchnorm(x):
 7 |     return nn.BatchNorm2d(x.size()[1])(x)
 8 | 
 9 | 
10 | class Conv(nn.Module):
11 |     def __init__(self, inp_dim, out_dim, kernel_size=3, stride=1, bn=False, relu=True):
12 |         super(Conv, self).__init__()
13 |         self.inp_dim = inp_dim
14 |         self.conv = nn.Conv2d(inp_dim, out_dim, kernel_size, stride, padding=(kernel_size - 1) // 2, bias=True)
15 |         self.relu = None
16 |         self.bn = None
17 |         if relu:
18 |             self.relu = nn.ReLU()
19 |         if bn:
20 |             self.bn = nn.BatchNorm2d(out_dim)
21 | 
22 |     def forward(self, x):
23 |         assert x.size()[1] == self.inp_dim, "{} {}".format(x.size()[1], self.inp_dim)
24 |         x = self.conv(x)
25 |         if self.bn is not None:
26 |             x = self.bn(x)
27 |         if self.relu is not None:
28 |             x = self.relu(x)
29 |         return x
30 | 
31 | 
32 | class Residual(nn.Module):
33 |     def __init__(self, inp_dim, out_dim):
34 |         super(Residual, self).__init__()
35 |         self.relu = nn.ReLU()
36 |         self.bn1 = nn.BatchNorm2d(inp_dim)
37 |         self.conv1 = Conv(inp_dim, int(out_dim / 2), 1, relu=False)
38 |         self.bn2 = nn.BatchNorm2d(int(out_dim / 2))
39 |         self.conv2 = Conv(int(out_dim / 2), int(out_dim / 2), 3, relu=False)
40 |         self.bn3 = nn.BatchNorm2d(int(out_dim / 2))
41 |         self.conv3 = Conv(int(out_dim / 2), out_dim, 1, relu=False)
42 |         self.skip_layer = Conv(inp_dim, out_dim, 1, relu=False)
43 |         if inp_dim == out_dim:
44 |             self.need_skip = False
45 |         else:
46 |             self.need_skip = True
47 | 
48 |     def forward(self, x):
49 |         if self.need_skip:
50 |             residual = self.skip_layer(x)
51 |         else:
52 |             residual = x
53 |         out = self.bn1(x)
54 |         out = self.relu(out)
55 |         out = self.conv1(out)
56 |         out = self.bn2(out)
57 |         out = self.relu(out)
58 |         out = self.conv2(out)
59 |         out = self.bn3(out)
60 |         out = self.relu(out)
61 |         out = self.conv3(out)
62 |         out += residual
63 |         return out
64 | 
65 | 
66 | class Hourglass(nn.Module):
67 |     def __init__(self, n, f, bn=None, increase=0):
68 |         super(Hourglass, self).__init__()
69 |         nf = f + increase
70 |         self.up1 = Residual(f, f)
71 |         # Lower branch
72 |         self.pool1 = Pool(2, 2)
73 |         self.low1 = Residual(f, nf)
74 |         self.n = n
75 |         # Recursive hourglass
76 |         if self.n > 1:
77 |             self.low2 = Hourglass(n - 1, nf, bn=bn)
78 |         else:
79 |             self.low2 = Residual(nf, nf)
80 |         self.low3 = Residual(nf, f)
81 |         self.up2 = nn.Upsample(scale_factor=2, mode='nearest')
82 | 
83 |     def forward(self, x):
84 |         up1 = self.up1(x)
85 |         pool1 = self.pool1(x)
86 |         low1 = self.low1(pool1)
87 |         low2 = self.low2(low1)
88 |         low3 = self.low3(low2)
89 |         up2 = self.up2(low3)
90 |         return up1 + up2


--------------------------------------------------------------------------------
/models/multiview_a2j.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | import os
  6 | import sys
  7 | dir = os.path.dirname(os.path.abspath(__file__))
  8 | root = os.path.dirname(dir)
  9 | sys.path.append(root)
 10 | from ops.render import depth_crop_expand
 11 | from ops.image_ops import normalize_depth_expand, normalize_depth
 12 | from ops.point_transform import transform_2D, transform_2D_to_3D, transform_3D
 13 | from models.a2j import A2J_model
 14 | from models.a2j_conf_net import A2JConfNet
 15 | import logging
 16 | logger = logging.getLogger(__file__)
 17 | 
 18 | 
 19 | class MultiviewA2J(nn.Module):
 20 |     def __init__(self, camera, num_joints, n_head, d_attn, d_k, d_v, d_inner, dropout_rate, num_select,
 21 |                  light=False, use_conf=True, random_select=False, random_sample=False):
 22 |         super(MultiviewA2J, self).__init__()
 23 |         self.camera = camera
 24 |         self.num_joints = num_joints
 25 |         self.n_head = n_head
 26 |         self.d_attn = d_attn
 27 |         self.d_k = d_k
 28 |         self.d_v = d_v
 29 |         self.d_inner = d_inner
 30 |         self.dropout_rate = dropout_rate
 31 |         self.num_select = num_select
 32 |         self.light = light
 33 |         self.use_conf = use_conf
 34 |         self.random_select = random_select
 35 |         self.random_sample = random_sample
 36 |         self.fx = camera["fx"]
 37 |         self.fy = camera["fy"]
 38 |         self.u0 = camera["u0"]
 39 |         self.v0 = camera["v0"]
 40 |         self.a2j = A2J_model(num_joints, dropout_rate=dropout_rate, light=light)
 41 |         self.conf_fuse_net = A2JConfNet(n_head, d_attn, d_k, d_v, d_inner, dropout_rate, num_select, random_select)
 42 | 
 43 |     def forward(self, cropped, crop_trans, com_2d, inter_matrix, cube, level, view_trans=None):
 44 |         """
 45 |         :param cropped: Tensor(B, 1, 176, 176) or Tensor(B, N, 1, 176, 176)
 46 |         :param crop_trans: Tensor(B, 3, 3)
 47 |         :param com_2d: Tensor(B, 3)
 48 |         :param inter_matrix: Tensor(B, 3, 3)
 49 |         :param cube: Tensor(B, 3)
 50 |         :param level: int
 51 |         :return:
 52 |             crop_expand: Tensor(B, num_views, 1, H, W)
 53 |             anchor_joints_2d_crop: Tensor(B, num_views, num_joints, 2)
 54 |             regression_joints_2d_crop: Tensor(B, num_views, num_joints, 2)
 55 |             depth_value_norm: Tensor(B, num_views, num_joints)
 56 |             joints_3d: Tensor(B, num_views, num_joints, 3)
 57 |             view_trans: Tensor(B, num_views, 4, 4)
 58 |             joint_3d_fused: Tensor(B, num_joints, 3)
 59 |             classification: Tensor(B*num_views, w/16*h/16*A, num_joints)
 60 |             regression: Tensor(B*num_views, w/16*h/16*A, num_joints, 2)
 61 |             depthregression: Tensor(B*num_views, w/16*h/16*A, num_joints)
 62 |         """
 63 |         if level==-1:
 64 |             assert view_trans is not None
 65 |             B, num_views, _, H, W = cropped.shape
 66 |             crop_expand = cropped
 67 |         else:
 68 |             B, _, H, W = cropped.shape
 69 |             if level>0:
 70 |                 with torch.no_grad():
 71 |                     # crop_expand: Tensor(B, num_views, 1, H, W)
 72 |                     # view_trans: Tensor(B, num_views, 4, 4)
 73 |                     crop_expand, view_trans = depth_crop_expand(cropped, self.fx, self.fy, self.u0, self.v0, crop_trans,
 74 |                                                                 level, com_2d, self.random_sample, False)
 75 |             elif level==0:
 76 |                 if self.random_sample:
 77 |                     crop_expand, view_trans = depth_crop_expand(cropped, self.fx, self.fy, self.u0, self.v0, crop_trans,
 78 |                                                                 level, com_2d, self.random_sample, False)
 79 |                 else:
 80 |                     crop_expand = cropped[:, None, :, :, :]
 81 |                     view_trans = torch.eye(4, dtype=torch.float32)[None, None, :, :]
 82 |                     view_trans = view_trans.repeat((B, 1, 1, 1)).to(cropped.device)
 83 | 
 84 |             B, num_views, _, H, W = crop_expand.shape
 85 |             crop_expand = normalize_depth_expand(crop_expand, com_2d, cube)
 86 |         crop_expand = crop_expand.reshape((B * num_views, 1, H, W))
 87 | 
 88 | 
 89 |         # classification: (B*num_views, w/16*h/16*A, num_joints)
 90 |         # regression: (B*num_views, w/16*h/16*A, num_joints, 2)
 91 |         # depthregression: (B*num_views, w/16*h/16*A, num_joints)
 92 |         # anchor_joints_2d: (B*num_views, num_joints, 2)
 93 |         # regression_joints_2d: (B*num_views, num_joints, 2)
 94 |         # depth_value: (B*num_views, num_joints)
 95 |         classification, regression, depthregression, anchor_joints_2d_crop, regression_joints_2d_crop, \
 96 |         depth_value_norm = self.a2j(crop_expand)
 97 | 
 98 |         inv_corp_trans = torch.inverse(crop_trans)
 99 |         inv_corp_trans_expand = inv_corp_trans[:, None, :, :].repeat([1, num_views, 1, 1])
100 |         inv_corp_trans_expand = inv_corp_trans_expand.reshape([-1, 3, 3])
101 |         regression_joints_2d = transform_2D(regression_joints_2d_crop, inv_corp_trans_expand)
102 |         com_z_expand = com_2d[:, 2][:, None].repeat([1, num_views]).reshape([B*num_views, 1])
103 |         cube_z_expand = cube[:, 2][:, None].repeat([1, num_views]).reshape([B*num_views, 1])
104 |         depth_value = depth_value_norm * cube_z_expand/2. + com_z_expand
105 |         regression_joints_2d = regression_joints_2d.reshape([B, num_views, self.num_joints, 2])
106 |         depth_value = depth_value.reshape([B, num_views, self.num_joints])
107 |         # joints_3d_trans: (B, num_views, num_joints, 3)
108 |         joints_3d_trans = torch.cat([regression_joints_2d, depth_value[..., None]], dim=-1)
109 |         joints_3d_trans = transform_2D_to_3D(joints_3d_trans, self.fx, self.fy, self.u0, self.v0)
110 |         joints_3d = transform_3D(joints_3d_trans, torch.inverse(view_trans))
111 |         joint_3d_fused = torch.mean(joints_3d, dim=1)
112 | 
113 |         crop_expand = crop_expand.reshape((B, num_views, 1, H, W))
114 |         anchor_joints_2d_crop = anchor_joints_2d_crop.reshape((B, num_views, self.num_joints, 2))
115 |         regression_joints_2d_crop = regression_joints_2d_crop.reshape((B, num_views, self.num_joints, 2))
116 |         depth_value_norm = depth_value_norm.reshape([B, num_views, self.num_joints])
117 | 
118 |         num_anchors = classification.shape[1]
119 |         classification = torch.reshape(classification, (B, num_views, num_anchors, self.num_joints))
120 |         regression = torch.reshape(regression, (B, num_views, num_anchors, self.num_joints, 2))
121 |         depthregression = torch.reshape(depthregression, (B, num_views, num_anchors, self.num_joints))
122 | 
123 |         if self.use_conf:
124 |             if level!=0:
125 |                 conf, joint_3d_conf = self.conf_fuse_net(classification, regression, depthregression, joints_3d)
126 |             else:
127 |                 conf = torch.ones((B, 1), dtype=torch.float32)
128 |                 joint_3d_conf = joint_3d_fused
129 |         else:
130 |             joint_3d_conf = joint_3d_fused
131 |             conf = None
132 | 
133 |         return crop_expand, anchor_joints_2d_crop, regression_joints_2d_crop, depth_value_norm, joints_3d, view_trans,\
134 |             joint_3d_fused, classification, regression, depthregression, conf, joint_3d_conf
135 | 
136 | 
137 | if __name__ == '__main__':
138 |     from feeders.nyu_feeder import NyuFeeder, collate_fn
139 |     from torch.utils.data.dataloader import DataLoader
140 |     import json
141 | 
142 |     dataset_config = json.load(open("../config/dataset/nyu.json", 'r'))
143 |     train_dataset = NyuFeeder('train')
144 |     dataloader = DataLoader(train_dataset, batch_size=6)
145 |     predictor = MultiviewA2J(dataset_config["camera"], 14).cuda()
146 |     for batch_idx, batch_data in enumerate(dataloader):
147 |         item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data
148 |         cropped = cropped.cuda()
149 |         crop_trans = crop_trans.cuda()
150 |         com_2d = com_2d.cuda()
151 |         inter_matrix = inter_matrix.cuda()
152 |         cube = cube.cuda()
153 |         crop_expand, anchor_joints_2d, regression_joints_2d, depth_value, joints_3d, view_trans = \
154 |             predictor(cropped, crop_trans, com_2d, inter_matrix, cube, level=4)
155 |         print(crop_expand.shape)
156 |         print(anchor_joints_2d.shape)
157 |         print(regression_joints_2d.shape)
158 |         print(depth_value.shape)
159 |         print(joints_3d.shape)
160 |         print(view_trans.shape)
161 |         break
162 | 


--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.utils.model_zoo as model_zoo
  3 | 
  4 | 
  5 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
  6 |            'resnet152']
  7 | 
  8 | 
  9 | model_urls = {
 10 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 11 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 12 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 13 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 14 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 15 | }
 16 | 
 17 | 
 18 | def conv3x3(in_planes, out_planes, stride=1, dilation=1):
 19 |     """3x3 convolution with padding"""
 20 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, dilation=dilation,
 21 |                      padding=dilation, bias=False)
 22 | 
 23 | 
 24 | def conv1x1(in_planes, out_planes, stride=1):
 25 |     """1x1 convolution"""
 26 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 27 | 
 28 | 
 29 | class BasicBlock(nn.Module):
 30 |     expansion = 1
 31 | 
 32 |     def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1):
 33 |         super(BasicBlock, self).__init__()
 34 |         self.conv1 = conv3x3(inplanes, planes, stride)
 35 |         self.bn1 = nn.BatchNorm2d(planes)
 36 |         self.relu = nn.ReLU(inplace=True)
 37 |         self.conv2 = conv3x3(planes, planes, dilation=dilation)
 38 |         self.bn2 = nn.BatchNorm2d(planes)
 39 |         self.downsample = downsample
 40 |         self.stride = stride
 41 | 
 42 |     def forward(self, x):
 43 |         identity = x
 44 | 
 45 |         out = self.conv1(x)
 46 |         out = self.bn1(out)
 47 |         out = self.relu(out)
 48 | 
 49 |         out = self.conv2(out)
 50 |         out = self.bn2(out)
 51 | 
 52 |         if self.downsample is not None:
 53 |             identity = self.downsample(x)
 54 | 
 55 |         out += identity
 56 |         out = self.relu(out)
 57 | 
 58 |         return out
 59 | 
 60 | 
 61 | class Bottleneck(nn.Module):
 62 |     expansion = 4
 63 | 
 64 |     def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1):
 65 |         super(Bottleneck, self).__init__()
 66 |         self.conv1 = conv1x1(inplanes, planes)
 67 |         self.bn1 = nn.BatchNorm2d(planes)
 68 |         self.conv2 = conv3x3(planes, planes, stride, dilation=dilation)
 69 |         self.bn2 = nn.BatchNorm2d(planes)
 70 |         self.conv3 = conv1x1(planes, planes * self.expansion)
 71 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
 72 |         self.relu = nn.ReLU(inplace=True)
 73 |         self.downsample = downsample
 74 |         self.stride = stride
 75 | 
 76 |     def forward(self, x):
 77 |         identity = x
 78 | 
 79 |         out = self.conv1(x)
 80 |         out = self.bn1(out)
 81 |         out = self.relu(out)
 82 | 
 83 |         out = self.conv2(out)
 84 |         out = self.bn2(out)
 85 |         out = self.relu(out)
 86 | 
 87 |         out = self.conv3(out)
 88 |         out = self.bn3(out)
 89 | 
 90 |         if self.downsample is not None:
 91 |             identity = self.downsample(x)
 92 | 
 93 |         out += identity
 94 |         out = self.relu(out)
 95 | 
 96 |         return out
 97 | 
 98 | 
 99 | class ResNet(nn.Module):
100 | 
101 |     def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
102 |         super(ResNet, self).__init__()
103 |         self.inplanes = 64
104 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
105 |                                bias=False)
106 |         self.bn1 = nn.BatchNorm2d(64)
107 |         self.relu = nn.ReLU(inplace=True)
108 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
109 |         self.layer1 = self._make_layer(block, 64, layers[0])
110 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
111 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
112 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=1,dilation=2)
113 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
114 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
115 | 
116 |         for m in self.modules():
117 |             if isinstance(m, nn.Conv2d):
118 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
119 |             elif isinstance(m, nn.BatchNorm2d):
120 |                 nn.init.constant_(m.weight, 1)
121 |                 nn.init.constant_(m.bias, 0)
122 | 
123 |         # Zero-initialize the last BN in each residual branch,
124 |         # so that the residual branch starts with zeros, and each residual block behaves like an identity.
125 |         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
126 |         if zero_init_residual:
127 |             for m in self.modules():
128 |                 if isinstance(m, Bottleneck):
129 |                     nn.init.constant_(m.bn3.weight, 0)
130 |                 elif isinstance(m, BasicBlock):
131 |                     nn.init.constant_(m.bn2.weight, 0)
132 | 
133 |     def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
134 |         downsample = None
135 |         if stride != 1 or self.inplanes != planes * block.expansion:
136 |             downsample = nn.Sequential(
137 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
138 |                 nn.BatchNorm2d(planes * block.expansion),
139 |             )
140 | 
141 |         layers = []
142 |         layers.append(block(self.inplanes, planes, stride, downsample))
143 |         self.inplanes = planes * block.expansion
144 |         for _ in range(1, blocks):
145 |             layers.append(block(self.inplanes, planes, dilation=dilation))
146 | 
147 |         return nn.Sequential(*layers)
148 | 
149 |     def forward(self, x):
150 |         x = self.conv1(x)
151 |         x = self.bn1(x)
152 |         x = self.relu(x)
153 |         x = self.maxpool(x)
154 | 
155 |         x = self.layer1(x)
156 |         x = self.layer2(x)
157 |         x = self.layer3(x)
158 |         x = self.layer4(x)
159 | 
160 |         x = self.avgpool(x)
161 |         x = x.view(x.size(0), -1)
162 |         x = self.fc(x)
163 | 
164 |         return x
165 | 
166 | 
167 | def resnet18(pretrained=False, **kwargs):
168 |     """Constructs a ResNet-18 model.
169 |     Args:
170 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
171 |     """
172 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
173 |     if pretrained:
174 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
175 |     return model
176 | 
177 | 
178 | def resnet34(pretrained=False, **kwargs):
179 |     """Constructs a ResNet-34 model.
180 |     Args:
181 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
182 |     """
183 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
184 |     if pretrained:
185 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
186 |     return model
187 | 
188 | 
189 | def resnet50(pretrained=False, **kwargs):
190 |     """Constructs a ResNet-50 model.
191 |     Args:
192 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
193 |     """
194 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
195 |     if pretrained:
196 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
197 |     return model
198 | 
199 | 
200 | def resnet101(pretrained=False, **kwargs):
201 |     """Constructs a ResNet-101 model.
202 |     Args:
203 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
204 |     """
205 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
206 |     if pretrained:
207 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
208 |     return model
209 | 
210 | 
211 | def resnet152(pretrained=False, **kwargs):
212 |     """Constructs a ResNet-152 model.
213 |     Args:
214 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
215 |     """
216 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
217 |     if pretrained:
218 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
219 |     return model


--------------------------------------------------------------------------------
/models/view_selector_a2j.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import os
  4 | import sys
  5 | dir = os.path.dirname(os.path.abspath(__file__))
  6 | root = os.path.dirname(dir)
  7 | sys.path.append(root)
  8 | from models.multiview_a2j import MultiviewA2J
  9 | from ops.point_transform import transform_2D_to_3D
 10 | from ops.render import uniform_view_matrix, render_view, depth_crop_expand
 11 | from ops.image_ops import normalize_depth_expand
 12 | 
 13 | 
 14 | class ViewSelector(nn.Module):
 15 |     def __init__(self, multiview_a2j, conf_net, random):
 16 |         super().__init__()
 17 |         self.multiview_a2j = multiview_a2j
 18 |         self.conf_net = conf_net
 19 |         self.random = random
 20 | 
 21 |         self.multiview_a2j.eval()
 22 |         self.num_joints = self.multiview_a2j.num_joints
 23 |         self.camera = self.multiview_a2j.camera
 24 |         self.fx = self.camera["fx"]
 25 |         self.fy = self.camera["fy"]
 26 |         self.u0 = self.camera["u0"]
 27 |         self.v0 = self.camera["v0"]
 28 | 
 29 |     def train(self, mode=True):
 30 |         self.training = mode
 31 |         for module in self.children():
 32 |             if module==self.conf_net:
 33 |                 module.train(mode)
 34 |         return self
 35 | 
 36 |     def select(self, conf, k, random=False):
 37 |         """
 38 | 
 39 |         :param conf: Tensor(B, N)
 40 |         :param k: int
 41 |         :return:
 42 |             conf_select: Tensor(B, k)
 43 |             id_select: Tensor(B, k)
 44 |         """
 45 |         B, N = conf.shape
 46 |         if random:
 47 |             conf_select_list = []
 48 |             id_select_list = []
 49 |             for i in range(B):
 50 |                 id = torch.arange(0, N, device=conf.device)
 51 |                 id = id[torch.randperm(N)]
 52 |                 id_select = id[:k]
 53 | 
 54 |                 conf_select = conf[i, id_select]
 55 |                 conf_select_list.append(conf_select)
 56 |                 id_select_list.append(id_select)
 57 |             conf_select = torch.stack(conf_select_list, dim=0)
 58 |             id_select = torch.stack(id_select_list, dim=0)
 59 |         else:
 60 |             conf_select, id_select = torch.topk(conf, k, dim=-1) # (B, k)
 61 | 
 62 |         return conf_select, id_select
 63 | 
 64 |     def select_crop(self, crop_expand, view_trans, conf, k):
 65 |         """
 66 |         :param crop_expand: Tensor(B, N, 1, 176, 176)
 67 |         :param view_trans: Tensor(B, N, 4, 4)
 68 |         :param conf: Tensor(B, N)
 69 |         :param k: int
 70 |         :return:
 71 |             crop_select: Tensor(B, k, 1, 176, 176)
 72 |             joint_3d_select: Tensor(B, k, J, 3)
 73 |             conf_select: Tensor(B, k)
 74 |             id_select: Tensor(B, k)
 75 |         """
 76 |         B, N, _, W, H = crop_expand.shape
 77 |         conf_select, id_select = torch.topk(conf, k, dim=-1) # (B, k)
 78 | 
 79 |         id_select_expand = id_select[:, :, None, None, None].repeat((1, 1, 1, W, H))
 80 |         crop_select = torch.gather(crop_expand, 1, id_select_expand) # (B, k, 1, 176, 176)
 81 | 
 82 |         id_select_expand = id_select[:, :, None, None].repeat((1, 1, 4, 4))
 83 |         view_trans_select = torch.gather(view_trans, 1, id_select_expand)  # (B, k, 4, 4)
 84 | 
 85 |         return crop_select, view_trans_select, conf_select, id_select
 86 | 
 87 |     def forward(self, cropped, crop_trans, com_2d, inter_matrix, cube, level, k, inference):
 88 |         """
 89 |         :param cropped: Tensor(B, 1, 176, 176)
 90 |         :param crop_trans: Tensor(B, 3, 3)
 91 |         :param com_2d: Tensor(B, 3)
 92 |         :param inter_matrix: Tensor(B, 3, 3)
 93 |         :param cube: Tensor(B, 3)
 94 |         :param level: int
 95 |         :param k: int
 96 |         :inference: bool
 97 |         :return:
 98 |         """
 99 |         if level==1:
100 |             self.shape = [1, 3]
101 |         elif level==2:
102 |             self.shape = [3, 3]
103 |         elif level==3:
104 |             self.shape = [3, 5]
105 |         elif level==4:
106 |             self.shape = [5, 5]
107 |         elif level==5:
108 |             self.shape = [9, 9]
109 |         else:
110 |             raise NotImplemented
111 | 
112 |         conf_light = self.conf_net(cropped)
113 | 
114 |         with torch.no_grad():
115 |             conf_select_light, id_select_light = self.select(conf_light, k, self.random)
116 |             crop_select_light, view_trans_select_light = depth_crop_expand(cropped, self.fx, self.fy, self.u0, self.v0,
117 |                 crop_trans, level, com_2d, False, random_ratote=False, indices=id_select_light)
118 |             crop_select_light = normalize_depth_expand(crop_select_light, com_2d, cube)
119 |             _, _, _, _, joints_3d_pred_select_light, _, joint_3d_fused_select_light, _, _, _, _, _ = \
120 |                 self.multiview_a2j(crop_select_light, crop_trans, com_2d, inter_matrix, cube, level=-1,
121 |                                    view_trans=view_trans_select_light)
122 |             conf_select_light = torch.softmax(conf_select_light, dim=-1)  # (B, k)
123 | 
124 |             joint_3d_conf_select_light = joints_3d_pred_select_light * conf_select_light[:, :, None, None]
125 |             joint_3d_conf_select_light = torch.sum(joint_3d_conf_select_light, 1)
126 |             if inference:
127 |                 return joints_3d_pred_select_light, joint_3d_fused_select_light, joint_3d_conf_select_light
128 |             else:
129 |                 crop_expand, anchor_joints_2d_crop, regression_joints_2d_crop, depth_value_norm, joints_3d_pred, \
130 |                 view_trans, joint_3d_fused, classification, regression, depthregression, conf, joint_3d_conf_select = \
131 |                     self.multiview_a2j(cropped, crop_trans, com_2d, inter_matrix, cube, level=level)
132 | 
133 |                 return crop_expand, view_trans, anchor_joints_2d_crop, regression_joints_2d_crop, depth_value_norm, \
134 |                        joints_3d_pred, joint_3d_fused, conf, joint_3d_conf_select, joints_3d_pred_select_light, \
135 |                        joint_3d_fused_select_light, joint_3d_conf_select_light, conf_light
136 | 


--------------------------------------------------------------------------------
/ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iscas3dv/handpose-virtualview/d220efa69ff031077381bc0d4cd58fae7049c329/ops/__init__.py


--------------------------------------------------------------------------------
/ops/cuda/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iscas3dv/handpose-virtualview/d220efa69ff031077381bc0d4cd58fae7049c329/ops/cuda/__init__.py


--------------------------------------------------------------------------------
/ops/cuda/depth_to_point_cloud_mask_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | // CUDA forward declarations
 6 | 
 7 | std::vector<torch::Tensor> depth_to_point_cloud_mask_cuda_forward(torch::Tensor depthmap);
 8 | 
 9 | // C++ interface
10 | 
11 | #define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor")
12 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
13 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
14 | 
15 | std::vector<torch::Tensor> depth_to_point_cloud_mask_forward(torch::Tensor depthmap) {
16 |     CHECK_INPUT(depthmap);
17 | 
18 |     return depth_to_point_cloud_mask_cuda_forward(depthmap);
19 | }
20 | 
21 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
22 |     m.def("forward", &depth_to_point_cloud_mask_forward, "depth to point cloud mask forward");
23 | }


--------------------------------------------------------------------------------
/ops/cuda/depth_to_point_cloud_mask_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cuda.h>
 4 | #include <cuda_runtime.h>
 5 | #include "helper_cuda.h"
 6 | 
 7 | #include <vector>
 8 | 
 9 | namespace {
10 | // input: depthmap(b, h*w)
11 | // output:  point_cloud (b, h*w, 3), mask (b, h*w)
12 | __global__ void depth_to_point_cloud_mask_forward_kernel(
13 |         const torch::PackedTensorAccessor32<int, 2, torch::RestrictPtrTraits> depthmap,
14 |         torch::PackedTensorAccessor32<int, 3, torch::RestrictPtrTraits> point_cloud,
15 |         torch::PackedTensorAccessor32<int, 2, torch::RestrictPtrTraits> mask,
16 |         const int h, const int w){
17 |     //batch index
18 |     const int n = blockIdx.y;
19 |     // column index
20 |     const int c = blockIdx.x * blockDim.x + threadIdx.x;
21 |     if(c < depthmap.size(1)) {
22 |         int d = depthmap[n][c];
23 |         point_cloud[n][c][0] = c%w;
24 |         point_cloud[n][c][1] = c/w;
25 |         point_cloud[n][c][2] = d==0?1:d; // avoid dividing 0 in 3D to 2D transform
26 |         mask[n][c] = (d!=0);
27 |     }
28 | }
29 | } // namespace
30 | 
31 | // input: depthmap: (b, h, w, 1)
32 | // output: point_cloud: (b, h*w, 3), mask: (b, h*w)
33 | std::vector<torch::Tensor> depth_to_point_cloud_mask_cuda_forward(torch::Tensor depthmap) {
34 |     const int b = depthmap.size(0);
35 |     const int h = depthmap.size(1);
36 |     const int w = depthmap.size(2);
37 |     depthmap = depthmap.reshape({b, h*w});
38 |     auto point_cloud = torch::zeros({b, h*w, 3},
39 |         torch::TensorOptions().dtype(depthmap.scalar_type()).device(depthmap.device()));
40 |     auto mask = torch::zeros({b, h*w}, torch::TensorOptions().dtype(torch::kInt32).device(depthmap.device()));
41 | 
42 |     const int threads = 1024;
43 |     const dim3 blocks((h*w + threads - 1) / threads, b);
44 | 
45 |     AT_DISPATCH_INTEGRAL_TYPES(depthmap.scalar_type(), "depth_to_point_cloud_mask_forward_cuda", ([&]() {
46 |         depth_to_point_cloud_mask_forward_kernel<<<blocks, threads>>>(
47 |                 depthmap.packed_accessor32<int, 2, torch::RestrictPtrTraits>(),
48 |                 point_cloud.packed_accessor32<int, 3, torch::RestrictPtrTraits>(),
49 |                 mask.packed_accessor32<int, 2, torch::RestrictPtrTraits>(),
50 |                 h, w);
51 |         }));
52 |     getLastCudaError("depth_to_point_cloud_mask_forward_kernel() execution failed.");
53 |     checkCudaErrors(cudaDeviceSynchronize());
54 |     return {point_cloud, mask};
55 | }
56 | 
57 | 


--------------------------------------------------------------------------------
/ops/cuda/point_cloud_mask_to_depth_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | // CUDA forward declarations
 6 | 
 7 | torch::Tensor point_cloud_mask_to_depth_cuda_forward(torch::Tensor point_cloud, torch::Tensor mask,
 8 |         const int h, const int w);
 9 | 
10 | // C++ interface
11 | 
12 | #define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor")
13 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
14 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
15 | 
16 | torch::Tensor point_cloud_mask_to_depth_forward(torch::Tensor point_cloud, torch::Tensor mask,
17 |         const int h, const int w) {
18 |     CHECK_INPUT(point_cloud);
19 | 
20 |     return point_cloud_mask_to_depth_cuda_forward(point_cloud, mask, h, w);
21 | }
22 | 
23 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
24 |     m.def("forward", &point_cloud_mask_to_depth_forward, "point cloud mask to depth forward");
25 | }


--------------------------------------------------------------------------------
/ops/cuda/point_cloud_mask_to_depth_cuda_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cuda.h>
 4 | #include <cuda_runtime.h>
 5 | #include "helper_cuda.h"
 6 | 
 7 | #include <vector>
 8 | 
 9 | namespace {
10 | // input:  point_cloud (b,h*w,3)
11 | // output: depthmap(b,h*w), mask (b, h*w)
12 | __global__ void point_cloud_mask_to_depth_forward_kernel(
13 |         torch::PackedTensorAccessor32<int, 2, torch::RestrictPtrTraits> depthmap,
14 |         const torch::PackedTensorAccessor32<int, 3, torch::RestrictPtrTraits> point_cloud,
15 |         const torch::PackedTensorAccessor32<int, 2, torch::RestrictPtrTraits> mask,
16 |         const int h, const int w) {
17 |     //batch index
18 |     const int n = blockIdx.y;
19 |     // column index
20 |     const int c = blockIdx.x * blockDim.x + threadIdx.x;
21 |     if(c < depthmap.size(1) && mask[n][c]) {
22 |         int u = point_cloud[n][c][0], v = point_cloud[n][c][1], d = point_cloud[n][c][2];
23 |         if(0<=u && u<w && 0<=v && v<h) {
24 |             atomicMin(&depthmap[n][v*w+u], d);
25 |         }
26 |     }
27 | }
28 | 
29 | // input: depthmap(b, h*w)
30 | // output: depthmap(b, h*w)
31 | __global__ void set_background_forward_kernel(
32 |         torch::PackedTensorAccessor32<int, 2, torch::RestrictPtrTraits> depthmap,
33 |         const int h, const int w, const int bg_val) {
34 |     //batch index
35 |     const int n = blockIdx.y;
36 |     // column index
37 |     const int c = blockIdx.x * blockDim.x + threadIdx.x;
38 |     if(c < depthmap.size(1) && depthmap[n][c] == INT_MAX) {
39 |         depthmap[n][c] = bg_val;
40 |     }
41 | }
42 | } // namespace
43 | 
44 | // input: point_cloud: (b, h*w, 3), mask: (b, h*w)
45 | // output: depthmap: (b, h, w, 1)
46 | torch::Tensor point_cloud_mask_to_depth_cuda_forward(torch::Tensor point_cloud, torch::Tensor mask,
47 |         const int h, const int w) {
48 |     const int b = point_cloud.size(0);
49 |     auto depthmap = torch::full({b, h*w}, INT_MAX,
50 |         torch::TensorOptions().dtype(point_cloud.scalar_type()).device(point_cloud.device()));
51 | 
52 |     const int threads = 1024;
53 |     const dim3 blocks((h*w + threads - 1) / threads, b);
54 | 
55 |     AT_DISPATCH_INTEGRAL_TYPES(depthmap.scalar_type(), "point_cloud_mask_to_depth_forward_cuda", ([&]() {
56 |         point_cloud_mask_to_depth_forward_kernel<<<blocks, threads>>>(
57 |                 depthmap.packed_accessor32<int, 2, torch::RestrictPtrTraits>(),
58 |                 point_cloud.packed_accessor32<int, 3, torch::RestrictPtrTraits>(),
59 |                 mask.packed_accessor32<int, 2, torch::RestrictPtrTraits>(),
60 |                 h, w);
61 |     }));
62 |     getLastCudaError("point_cloud_mask_to_depth_forward_kernel() execution failed.");
63 |     checkCudaErrors(cudaDeviceSynchronize());
64 | 
65 |     int bg_val = 0;
66 |     AT_DISPATCH_INTEGRAL_TYPES(depthmap.scalar_type(), "set_background_forward_kernel", ([&]() {
67 |         set_background_forward_kernel<<<blocks, threads>>>(
68 |                 depthmap.packed_accessor32<int, 2, torch::RestrictPtrTraits>(),
69 |                 h, w, bg_val);
70 |     }));
71 |     getLastCudaError("set_background_forward_kernel() execution failed.");
72 |     checkCudaErrors(cudaDeviceSynchronize());
73 |     depthmap = depthmap.reshape({b, h, w, 1});
74 |     return depthmap;
75 | }


--------------------------------------------------------------------------------
/ops/cuda/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | import torch
 4 | import os
 5 | 
 6 | arch_list = []
 7 | for i in range(torch.cuda.device_count()):
 8 |     arch = '{}.{}'.format(*torch.cuda.get_device_capability(i))
 9 |     if arch not in arch_list:
10 |         arch_list.append(arch)
11 | arch_list = ';'.join(arch_list)
12 | os.environ["TORCH_CUDA_ARCH_LIST"] = arch_list
13 | 
14 | setup(
15 |     name='render_cuda',
16 |     ext_modules=[
17 |         CUDAExtension('depth_to_point_cloud_mask_cuda', [
18 |             'depth_to_point_cloud_mask_cuda.cpp',
19 |             'depth_to_point_cloud_mask_cuda_kernel.cu',
20 |         ]),
21 |         CUDAExtension('point_cloud_mask_to_depth_cuda', [
22 |             'point_cloud_mask_to_depth_cuda.cpp',
23 |             'point_cloud_mask_to_depth_cuda_kernel.cu',
24 |         ])
25 |     ],
26 |     cmdclass={
27 |         'build_ext': BuildExtension
28 |     })


--------------------------------------------------------------------------------
/ops/image_ops.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | import numpy as np
  4 | 
  5 | 
  6 | def normalize_depth(depth, com_2d, cube):
  7 |     """Normalize depth to [-1, 1]
  8 | 
  9 |     :param depth: (B, 1, H, W)
 10 |     :param com_2d: (B, 3)
 11 |     :param cube_z: float
 12 |     :return:
 13 |     """
 14 |     B, _, H, W = depth.shape
 15 |     background = (depth<1e-3).float()
 16 |     com_z = com_2d[:, 2]
 17 |     com_z = com_z[:, None, None, None].repeat((1, 1, H, W))
 18 |     cube_z = cube[:, 2]
 19 |     cube_z = cube_z[:, None, None, None].repeat((1, 1, H, W))
 20 |     norm_depth = depth + background * (com_z + (cube_z / 2.))
 21 |     norm_depth = (norm_depth-com_z) / (cube_z/2.)
 22 |     return norm_depth
 23 | 
 24 | 
 25 | def normalize_depth_expand(depth_expand, com_2d, cube):
 26 |     """Normalize depth expand to [-1, 1]
 27 | 
 28 |         :param depth: (B, num_views, 1, H, W)
 29 |         :param com_2d: (B, 3)
 30 |         :param cube_z: (B, 3)
 31 |         :return:
 32 |     """
 33 |     B, N, _, H, W = depth_expand.shape
 34 |     background = (depth_expand<1e-3).float()
 35 |     com_z = com_2d[:, 2]
 36 |     com_z = com_z[:, None, None, None, None].repeat((1, N, 1, H, W))
 37 |     cube_z = cube[:, 2]
 38 |     cube_z = cube_z[:, None, None, None, None].repeat((1, N, 1, H, W))
 39 |     norm_depth_expand = depth_expand + background * (com_z + (cube_z / 2.))
 40 |     norm_depth_expand = (norm_depth_expand - com_z) / (cube_z / 2.)
 41 |     return norm_depth_expand
 42 | 
 43 | 
 44 | def normalize_image(img):
 45 |     """
 46 | 
 47 |     :param img: Tensor(B, 1, H, W)
 48 |     :return: Tensor(B, 1, H, W)
 49 |     """
 50 |     B, _, H, W = img.shape
 51 |     t_min, _ = torch.min(img.reshape([B, -1]), dim=-1)
 52 |     t_max, _ = torch.max(img.reshape([B, -1]), dim=-1)
 53 |     t_min = t_min[:, None].repeat(1, H*W).reshape([B, 1, H, W])
 54 |     t_max = t_max[:, None].repeat(1, H*W).reshape([B, 1, H, W])
 55 |     img = (img-t_min)/(t_max-t_min)
 56 |     return img
 57 | 
 58 | 
 59 | sobel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype='float32')
 60 | sobel_x = sobel_x.reshape((1, 1, 3, 3))
 61 | sobel_y = np.array([[-1, -2, -1], [0, 0, 0], [1, 2, 1]], dtype='float32')
 62 | sobel_y = sobel_y.reshape((1, 1, 3, 3))
 63 | pad = torch.nn.ReplicationPad2d(1)
 64 | 
 65 | def sobel_edge(img):
 66 |     """
 67 | 
 68 |     :param img: Tensor(B, 1, H, W)
 69 |     :return:
 70 |     """
 71 |     weight_x = torch.tensor(sobel_x, device=img.device, requires_grad=False)
 72 |     weight_y = torch.tensor(sobel_y, device=img.device, requires_grad=False)
 73 |     img = pad(img)
 74 |     edge_x = F.conv2d(img, weight_x)
 75 |     edge_y = F.conv2d(img, weight_y)
 76 |     edge = torch.abs(edge_x) + torch.abs(edge_y)
 77 |     # edge = torch.sqrt(edge_x*edge_x+edge_y*edge_y)
 78 |     return edge
 79 | 
 80 | 
 81 | def normalize_edge(edge: torch.Tensor):
 82 |     """
 83 | 
 84 |     :param edge: Tensor(B, 1, H, W)
 85 |     :return:
 86 |     """
 87 |     B, _, H, W = edge.size()
 88 |     edge = edge.reshape([B, -1])
 89 |     torch.min(edge, 1, keepdim=True)
 90 |     t_min = torch.min(edge, 1, keepdim=True)[0].repeat([1, H*W])
 91 |     t_max = torch.max(edge, 1, keepdim=True)[0].repeat([1, H*W])
 92 |     edge = (edge-t_min) / (t_max-t_min)
 93 |     edge = edge.reshape([B, 1, H, W])
 94 |     return edge
 95 | 
 96 | 
 97 | if __name__ == '__main__':
 98 |     B, N, H, W = 4, 12, 128, 128
 99 |     depth_expand = torch.randn((B, N, 1, H, W)).cuda()
100 |     com_2d = torch.randn((B, 3)).cuda()
101 |     cube_z = 125.
102 |     output = normalize_depth_expand(depth_expand, com_2d, cube_z)
103 |     print(output.shape)


--------------------------------------------------------------------------------
/ops/joint_ops.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import time
  4 | import os
  5 | import sys
  6 | dir = os.path.dirname(os.path.abspath(__file__))
  7 | root = os.path.dirname(dir)
  8 | sys.path.append(root)
  9 | from ops.loss_ops import gen_2D_gaussion_map
 10 | 
 11 | 
 12 | def solve(coor, b):
 13 |     """Ax=b, solve x, where
 14 |     A = [[coor[?, 0]**2, coor[?, 0], 1],
 15 |          [coor[?, 1]**2, coor[?, 1], 1],
 16 |          [coor[?, 2]**2, coor[?, 2], 1]]
 17 |     x is (a, b, c) which is coefficient of quadratic function,
 18 | 
 19 |     :param coor: Tensor(B, 3)
 20 |     :param b: Tensor(B, 3)
 21 |     :return: Tensor(B) -b/(a*2), the symmetry axis of quadratic function
 22 | 
 23 |     """
 24 |     B = coor.shape[0]
 25 |     A = torch.ones((B, 3, 3), dtype=torch.float32, device=b.device)
 26 |     A[:, :, 1] = coor
 27 |     A[:, :, 0] = A[:, :, 1]*A[:, :, 1]
 28 |     U, D, V = torch.svd(A) # (B, 3, 3), (B, 3), (B, 3, 3)
 29 |     b_ = U.transpose(-2, -1) @ b[:, :, None] # (B, 3, 1)
 30 |     b_ = b_.squeeze() # (B, 3)
 31 |     y = b_/D # (B, 3)
 32 |     result = V @ y[:, :, None] # (B, 3, 1)
 33 |     result = result.squeeze(dim=-1) # (B, 3)
 34 |     not_zero = (result[:, 0]!=0)
 35 |     x = coor[:, 1].clone()
 36 |     x[not_zero] = -result[not_zero, 1] / (result[not_zero, 0] * 2)
 37 |     return x
 38 | 
 39 | 
 40 | def heatmap_to_loc(heatmap, adjust=True):
 41 |     """
 42 | 
 43 |     :param heatmap: Tensor(B, num_joints, H, W)
 44 |     :return: Tensor(B, num_joints, 2)
 45 |     """
 46 |     device = heatmap.device
 47 |     B, J, H, W = heatmap.shape
 48 |     heatmap = heatmap.reshape((B*J, H, W))
 49 |     dense_flat = heatmap.reshape((B*J, -1))
 50 |     loc = torch.argmax(dense_flat, dim=-1)
 51 |     y = loc//W # (B*num_joints)
 52 |     x = loc%W # (B*num_joints)
 53 |     xx = x.float()+0.5
 54 |     yy = y.float()+0.5
 55 | 
 56 |     # adjust location. It is extremely slow on GPU, so we use CPU
 57 |     if adjust:
 58 |         x, y, xx, yy, dense_flat, loc = x.cpu(), y.cpu(), xx.cpu(), yy.cpu(), dense_flat.cpu(), loc.cpu()
 59 |         x_adjust_index = (0<x) & (x<W-1)
 60 |         if torch.any(x_adjust_index):
 61 |             coor_x = xx[x_adjust_index, None].repeat([1, 3])
 62 |             coor_x[:, 0] -= 1.
 63 |             coor_x[:, 2] += 1.
 64 |             b_x = torch.zeros_like(coor_x)
 65 |             b_x[:, 0] = dense_flat[x_adjust_index, loc[x_adjust_index]-1]
 66 |             b_x[:, 1] = dense_flat[x_adjust_index, loc[x_adjust_index]]
 67 |             b_x[:, 2] = dense_flat[x_adjust_index, loc[x_adjust_index]+1]
 68 |             xx[x_adjust_index] = solve(coor_x, b_x)
 69 |             # adjust = torch.zeros_like(coor_x[:, 1])
 70 |             # adjust[b_x[:, 0]<b_x[:, 2]] = 0.25
 71 |             # adjust[b_x[:, 0]>b_x[:, 2]] = -0.25
 72 |             # adjust_coor_x = coor_x[:, 1] + adjust
 73 |             # xx[x_adjust_index] = adjust_coor_x
 74 | 
 75 |         y_adjust_index = (0 < y) & (y < H - 1)
 76 |         if torch.any(y_adjust_index):
 77 |             coor_y = yy[y_adjust_index, None].repeat([1, 3]).float()
 78 |             coor_y[:, 0] -= 1.
 79 |             coor_y[:, 2] += 1.
 80 |             b_y = torch.zeros_like(coor_y)
 81 |             b_y[:, 0] = dense_flat[y_adjust_index, loc[y_adjust_index] - W]
 82 |             b_y[:, 1] = dense_flat[y_adjust_index, loc[y_adjust_index]]
 83 |             b_y[:, 2] = dense_flat[y_adjust_index, loc[y_adjust_index] + W]
 84 |             yy[y_adjust_index] = solve(coor_y, b_y)
 85 |             # adjust = torch.zeros_like(coor_y[:, 1])
 86 |             # adjust[b_y[:, 0] < b_y[:, 2]] = 0.25
 87 |             # adjust[b_y[:, 0] > b_y[:, 2]] = -0.25
 88 |             # adjust_coor_y = coor_y[:, 1] + adjust
 89 |             # yy[y_adjust_index] = adjust_coor_y
 90 | 
 91 |         xx, yy = xx.to(device), yy.to(device)
 92 | 
 93 |         # for b in range(B):
 94 |         #     for j in range(J):
 95 |         #         ax, ay = x[b, j], y[b, j]
 96 |         #         tmp = heatmap[b, j]
 97 |         #         # if (ax, ay) is not on bound
 98 |         #         if 0<ax and ax<W-1:
 99 |         #             # if tmp[ay, ax-1]<tmp[ay, ax+1]:
100 |         #             #     xx[b, j] += 0.25
101 |         #             # elif tmp[ay, ax-1]>tmp[ay, ax+1]:
102 |         #             #     xx[b, j] -= 0.25
103 |         #             xx[b, j] = solve(torch.stack([ax-0.5, ax+0.5, ax+1.5]), tmp[ay, ax-1:ax+2])
104 |         #
105 |         #         if 0<ay and ay<H-1:
106 |         #             # if tmp[ay-1, ax]<tmp[ay+1, ax]:
107 |         #             #     yy[b, j] += 0.25
108 |         #             # elif tmp[ay-1, ax]>tmp[ay+1, ax]:
109 |         #             #     yy[b, j] -= 0.25
110 |         #             yy[b, j] = solve(torch.stack([ay-0.5, ay+0.5, ay+1.5]), tmp[ay-1:ay+2, ax])
111 |     xx = xx.reshape([B, J])
112 |     yy = yy.reshape([B, J])
113 |     return torch.stack([xx, yy], dim=-1)
114 | 
115 | 
116 | def get_projection_matrices(inter_matrix, view_trans):
117 |     """
118 | 
119 |     :param inter_matrix: Tensor(B, 3, 3)
120 |     :param view_trans: Tensor(B, num_views, 4, 4)
121 |     :return: Tensor(B, num_views, 3, 4)
122 |     """
123 |     B, N = view_trans.size(0), view_trans.size(1)
124 |     eye = torch.eye(3, 4, dtype=torch.float32, device=inter_matrix.device)
125 |     eye = eye[None, ...].repeat([B, 1, 1]) # (B, 3, 4)
126 |     proj_mat = inter_matrix@eye # (B, 3, 4)
127 |     proj_mat = proj_mat[:, None, :, :].repeat(1, N, 1, 1) # (B, num_views, 3, 4)
128 |     proj_mat = proj_mat @ view_trans # (B, num_views, 3, 4)
129 |     return proj_mat
130 | 
131 | 
132 | def triangulate(joint_2d, cam_mat, weight=None):
133 |     """
134 | 
135 |     :param joint_2d: Tensor(B, num_joints, num_views, 2)
136 |     :param cam_mat: Tensor(B, num_joints, num_views, 3, 4)
137 |     :param weight: Tensor(B, num_joints, num_views)
138 |     :return:
139 |     """
140 |     B, J, N, _ = joint_2d.shape
141 |     joint_2d = joint_2d[..., None] # (B, num_joints, num_views, 2, 1)
142 |     c2 = cam_mat[..., 2:, :] # (B, num_joints, num_views, 1, 4)
143 |     c12 = cam_mat[..., :2, :] # (B, num_joints, num_views, 2, 4)
144 |     A = joint_2d @ c2 - c12 # (B, num_joints, num_views, 2, 4)
145 |     if weight is not None:
146 |         weight = weight[:, :, :, None, None].repeat([1, 1, 1, 2, 4])
147 |         A = A * weight
148 |     A = A.reshape([B, J, N*2, 4]) # (B, num_joints, num_views*2, 4)
149 |     device = A.device
150 |     A = A.cpu()
151 |     _, _, V = torch.svd(A) # (B, num_joints, 4, 4)
152 |     V = V.to(device)
153 |     X = V[..., -1] # (B, num_joints, 4)
154 |     X = X / X[..., -1, None] # (B, num_joints, 4)
155 |     joint_3d = X[..., :-1] # (B, num_joints, 3)
156 |     return joint_3d
157 | 
158 | 
159 | def compute_joint_3d(joint_2d, inter_matrix, view_trans, weight=None):
160 |     """Calculate 3D joint according to 2D joint.
161 | 
162 |     :param joint_2d: Tensor(B, num_views, num_joints, 2)
163 |     :param inter_matrix: Tensor(B, 3, 3)
164 |     :param view_trans: Tensor(B, num_views, 4, 4)
165 |     :param weight: Tensor(B, num_joints, num_views)
166 |     :return: Tensor(B, J, 3)
167 |     """
168 |     J = joint_2d.size(2)
169 |     proj_mat = get_projection_matrices(inter_matrix, view_trans) # (B, num_views, 3, 4)
170 |     joint_2d = joint_2d.permute([0, 2, 1, 3]) # (B, num_joints, num_views, 2)
171 |     proj_mat = proj_mat[:, None, ...].repeat([1,J, 1, 1, 1]) # (B, num_joints, num_views, 3, 4)
172 |     joint_3d = triangulate(joint_2d, proj_mat, weight)
173 |     return joint_3d
174 | 
175 | 
176 | def compute_joint_3d_view_select(confidence, joint_2d_pred, inter_matrix, view_trans):
177 |     """
178 | 
179 |     :param confidence: Tensor(B, N)
180 |     :param joint_2d_pred: Tensor(B, N, J, 2)
181 |     :param inter_matrix: Tensor(B, 3, 3)
182 |     :param view_trans: Tensor(B, N, 4, 4)
183 |     :return:
184 |         joint_3d_select: Tensor(B, J, 3)
185 |     """
186 |     B, N, J, _ = joint_2d_pred.shape
187 |     indices = torch.multinomial(confidence, 10, replacement=False)
188 |     joint_2d_indices = indices[:, :, None, None].repeat([1, 1, J, 2])
189 |     joint_2d_select = torch.gather(joint_2d_pred, 1, joint_2d_indices)
190 |     view_trans_indices = indices[:, :, None, None].repeat([1, 1, 4, 4])
191 |     view_trans_select = torch.gather(view_trans, 1, view_trans_indices)
192 | 
193 |     # _, indices = torch.sort(confidence, dim=-1, descending=True)
194 |     # joint_2d_indices = indices[:, :, None, None].repeat([1, 1, J, 2])
195 |     # joint_2d_select = joint_2d_pred.reshape([-1])[joint_2d_indices.reshape(-1)<10].reshape([B, 10, J, 2])
196 |     # view_trans_indices = indices[:, :, None, None].repeat([1, 1, 4, 4])
197 |     # view_trans_select = view_trans.reshape([-1])[view_trans_indices.reshape(-1)<10].reshape([B, 10, 4, 4])
198 | 
199 |     joint_3d_select = compute_joint_3d(joint_2d_select, inter_matrix, view_trans_select)
200 |     return joint_3d_select
201 | 
202 | 
203 | if __name__ == '__main__':
204 |     # x = torch.from_numpy(np.array([1., 1., 1.], dtype=np.float32))
205 |     # b = torch.from_numpy(np.array([3., 3., 3.], dtype=np.float32))
206 |     # print(solve(x, b))
207 |     H = W = 32
208 |     B = 480
209 |     joint_2d = torch.rand((B, 14, 3), dtype=torch.float32)*32
210 |     joint_2d[:, :, -1] = 1.
211 |     # joint_2d = torch.ones((B, 14, 2), dtype=torch.float32)
212 |     # joint_2d = torch.ones((B, 14, 2), dtype=torch.float32)
213 |     # joint_2d[:, :, 1] = torch.rand((B, 14), dtype=torch.float32)*32
214 |     # joint_2d[:, :, 0] = torch.rand((B, 14), dtype=torch.float32) * 32
215 |     print(joint_2d[0])
216 |     joint_2d = joint_2d.cuda()
217 | 
218 |     heatmap = gen_2D_gaussion_map(joint_2d, H, W, 1, 1, 1)
219 |     # print(heatmap.shape)
220 |     # heatmap = torch.zeros((1, 2, 32, 32), dtype=torch.float32)
221 |     # heatmap[:, :, 5:8, 5:8] = 1
222 |     split = time.time()
223 |     loc = heatmap_to_loc(heatmap)
224 |     print(loc[0])
225 |     print(time.time() - split)
226 | 
227 |     split = time.time()
228 |     loc = heatmap_to_loc(heatmap, False)
229 |     print(loc[0])
230 |     print(time.time() - split)
231 | 


--------------------------------------------------------------------------------
/ops/loss_ops.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch.nn import Module
  4 | import math
  5 | import numpy as np
  6 | import os
  7 | import sys
  8 | dir = os.path.dirname(os.path.abspath(__file__))
  9 | root = os.path.dirname(dir)
 10 | sys.path.append(root)
 11 | from ops.point_transform import transform_3D, transform_3D_to_2D, transform_2D
 12 | 
 13 | 
 14 | def gen_2D_gaussion_map(joint_2d, H, W, fx, fy, sigma):
 15 |     """
 16 | 
 17 |     :param joint_2d: Tensor(B, J, 3)
 18 |     :param H:
 19 |     :param W:
 20 |     :param sigma:
 21 |     :return: Tensor(B, J, H, W)
 22 |     """
 23 |     B, J, _ = joint_2d.shape
 24 |     u = torch.arange(W, device=joint_2d.device)
 25 |     v = torch.arange(H, device=joint_2d.device)
 26 |     v_t, u_t = torch.meshgrid([v, u]) # (H, W)
 27 |     grid = torch.stack([u_t, v_t], dim=-1)[None, ...].repeat([B, 1, 1, 1]) # (B, H, W, 2)
 28 |     grid = grid.reshape([B, H*W, 2])
 29 |     grid = grid[:, None, :, :].repeat([1, J, 1, 1]) # (B, J, H*W, 2)
 30 |     grid = grid.float() + 0.5 # coordinate of pixel is on center of pixel
 31 |     joint_2d = joint_2d[:, :, None, :].repeat([1, 1, H*W, 1]) # (B, J, W*H, 2)
 32 |     scale = joint_2d[:, :, :, 2]
 33 |     diff_x = ((grid[..., 0] - joint_2d[..., 0]) * scale / fx) ** 2
 34 |     diff_y = ((grid[..., 1] - joint_2d[..., 1]) * scale / fy) ** 2
 35 |     # diff_x = (grid[..., 0] - joint_2d[..., 0]) ** 2
 36 |     # diff_y = (grid[..., 1] - joint_2d[..., 1]) ** 2
 37 |     diff = diff_x + diff_y
 38 |     gaussian_map = 1 / (math.sqrt(2 * math.pi) * sigma) * torch.exp(-diff/(2*(sigma ** 2)))
 39 |     gaussian_map = gaussian_map.reshape([B, J, H, W])
 40 |     return gaussian_map
 41 | 
 42 | 
 43 | class LossCalculator(Module):
 44 |     def forward(self, heatmaps, joint_2d_pred, joint_3d_pred, view_trans, crop_trans, fx, fy, u0, v0, joint_3d_gt):
 45 |         """
 46 | 
 47 |         :param heatmap: Tensor(B, N, nstack, J, H, W)
 48 |         :param joint_2d_pred: Tensor(B, N, J, 2)
 49 |         :param joint_3d_pred: Tensor(B, J, 3)
 50 |         :param view_trans: Tensor(B, N, 4, 4)
 51 |         :param crop_trans: Tensor(B, 3, 3)
 52 |         :param fx: float
 53 |         :param fy: float
 54 |         :param u0: float
 55 |         :param v0: float
 56 |         :param joint_3d_gt: Tensor(B, J, 3)
 57 |         :return:
 58 |         """
 59 |         B, N, nstack, J, H, W = heatmaps.shape
 60 |         joint_3d_gt_expand = joint_3d_gt[:, None, :, :].repeat([1, N, 1, 1]) # (B, N, J, 3)
 61 |         crop_trans = crop_trans[:, None, :, :].repeat([1, N, 1, 1]) # (B, N, 3, 3)
 62 |         joint_3d_gt_expand = joint_3d_gt_expand.reshape([B * N, J, 3]) # (B*N, J, 3)
 63 |         view_trans = view_trans.reshape([B * N, 4, 4])
 64 |         crop_trans = crop_trans.reshape([B * N, 3, 3])
 65 |         joint_3d_gt_expand = transform_3D(joint_3d_gt_expand, view_trans) # (B*N, J, 3)
 66 |         joint_2d_gt = transform_3D_to_2D(joint_3d_gt_expand, fx, fy, u0, v0) # (B*N, J, 2)
 67 |         joint_2d_gt_crop = transform_2D(joint_2d_gt, crop_trans) / 4.
 68 |         heatmaps = heatmaps.reshape([B * N * nstack, J, H, W])
 69 |         gaussian_maps = gen_2D_gaussion_map(joint_2d_gt_crop, H, W, fx, fy, sigma=0.4) # (B*N, J, H, W)
 70 |         gaussian_maps = gaussian_maps[:, None, :, :, :].repeat([1, nstack, 1, 1, 1]).reshape([B*N*nstack, J, H, W])
 71 |         hm_loss = F.mse_loss(heatmaps, gaussian_maps, reduction='none')
 72 |         hm_loss = hm_loss.reshape([B, -1]).mean(-1)
 73 | 
 74 |         joint_2d_pred = joint_2d_pred.reshape([B*N, J, 2])
 75 |         error_2d = torch.norm(joint_2d_pred-joint_2d_gt[..., :2], dim=-1).mean(-1).reshape([B, N]).mean(-1)
 76 |         error_3d = torch.norm(joint_3d_pred - joint_3d_gt, dim=-1).mean(-1)
 77 |         return hm_loss, error_2d, error_3d, gaussian_maps.reshape([B, N, nstack, J, H, W])
 78 | 
 79 | 
 80 | class MultiA2JCalculator(Module):
 81 |     def __init__(self, reg_factor, conf_factor):
 82 |         super().__init__()
 83 |         self.reg_factor = reg_factor
 84 |         self.conf_factor = conf_factor
 85 |         self.smooth_l1_loss = torch.nn.SmoothL1Loss(reduction='none')
 86 | 
 87 |     def forward(self, anchor_joints_2d_crop, regression_joints_2d_crop, depth_value_norm, joints_3d_pred,
 88 |                 joints_3d_fused, joint_3d_conf, view_trans, crop_trans, com_2d, cube, fx, fy, u0, v0, joints_3d_gt):
 89 |         """
 90 |         :param anchor_joints_2d_crop: Tensor(B, N, J, 2)
 91 |         :param regression_joints_2d_crop: Tensor(B, N, J, 2)
 92 |         :param depth_value_norm: Tensor(B, N, J)
 93 |         :param joint_3d_pred: Tensor(B, N, J, 3)
 94 |         :param joints_3d_fused: Tensor(B, J, 3)
 95 |         :param joint_3d_conf: Tensor(B, J, 3)
 96 |         :param view_trans: Tensor(B, N, 4, 4)
 97 |         :param crop_trans: Tensor(B, 3, 3)
 98 |         :param com_2d: Tensor(B, 3)
 99 |         :param cube: Tensor(B, 3)
100 |         :param fx: float
101 |         :param fy: float
102 |         :param u0: float
103 |         :param v0: float
104 |         :param joints_3d_gt: Tensor(B, J, 3)
105 |         :return:
106 |         """
107 |         B, N, J, _ = anchor_joints_2d_crop.shape
108 |         joints_3d_gt_expand = joints_3d_gt[:, None, :, :].repeat([1, N, 1, 1]) # (B, N, J, 3)
109 |         joints_3d_gt_expand = transform_3D(joints_3d_gt_expand, view_trans) # (B, N, J, 3)
110 |         joints_2d_gt_expand = transform_3D_to_2D(joints_3d_gt_expand, fx, fy, u0, v0)[..., :2] # (B, N, J, 2)
111 |         crop_trans_expand = crop_trans[:, None, :, :].repeat([1, N, 1, 1])
112 |         joints_2d_gt_expand_crop = transform_2D(joints_2d_gt_expand, crop_trans_expand)
113 |         com_z_expand = com_2d[:, None, :].repeat([1, N, 1])[:, :, 2:]
114 |         cube_z_expand = cube[:, None, :].repeat([1, N, 1])[:, :, 2:]
115 |         depth_gt_norm_expand = (joints_3d_gt_expand[..., 2]-com_z_expand)/(cube_z_expand/2)
116 | 
117 |         anchor_loss = self.smooth_l1_loss(anchor_joints_2d_crop, joints_2d_gt_expand_crop)
118 |         regression_loss = self.smooth_l1_loss(regression_joints_2d_crop, joints_2d_gt_expand_crop)
119 |         depth_loss = self.smooth_l1_loss(depth_value_norm, depth_gt_norm_expand)
120 |         conf_loss = self.smooth_l1_loss(joint_3d_conf, joints_3d_gt)
121 | 
122 |         anchor_loss = anchor_loss.reshape([B, -1]).mean(-1)
123 |         regression_loss = regression_loss.reshape([B, -1]).mean(-1)
124 |         depth_loss = depth_loss.reshape([B, -1]).mean(-1)
125 |         conf_loss = conf_loss.reshape([B, -1]).mean(-1)
126 | 
127 |         reg_loss = regression_loss*0.5 + depth_loss
128 | 
129 |         loss = anchor_loss + reg_loss * self.reg_factor + conf_loss*self.conf_factor
130 | 
131 |         error_3d = torch.norm(joints_3d_pred-joints_3d_gt[:, None, :, :], dim=-1).mean(-1)
132 |         error_3d_fused = torch.norm(joints_3d_fused-joints_3d_gt, dim=-1).mean(-1)
133 |         error_3d_conf = torch.norm(joint_3d_conf-joints_3d_gt, dim=-1).mean(-1)
134 |         center_error_3d = error_3d[:, N//2]
135 |         min_error_3d, _ = torch.min(error_3d, dim=-1)
136 |         mean_error_3d = torch.mean(error_3d, dim=-1)
137 |         return anchor_loss, reg_loss, conf_loss, loss, center_error_3d, min_error_3d, mean_error_3d, error_3d, \
138 |                error_3d_fused, error_3d_conf
139 | 
140 | 
141 | class ConfidenceLossCalculator(Module):
142 |     def forward(self, confidence, joint_3d_pred, joint_3d_gt, view_trans, fx, fy, u0, v0, joint_2d_expand):
143 |         '''
144 | 
145 |         :param confidence: Tensor(B, N)
146 |         :param joint_3d_pred: Tensor(B, J, 3)
147 |         :param joint_3d_gt: Tensor(B, J, 3)
148 |         :param view_trans: Tensor(B, N, 4, 4)
149 |         :param fx: float
150 |         :param fy: flaat
151 |         :param u0: float
152 |         :param v0: float
153 |         :param joint_2d_expand: Tensor(B, N, J, 2)
154 |         :return:
155 |         '''
156 |         B, N = confidence.shape
157 |         J = joint_3d_pred.shape[1]
158 |         if N==3:
159 |             map_shape = [1, 3]
160 |         elif N==9:
161 |             map_shape = [3, 3]
162 |         elif N==15:
163 |             map_shape = [3, 5]
164 |         elif N==25:
165 |             map_shape = [5, 5]
166 |         elif N==81:
167 |             map_shape = [9, 9]
168 |         with torch.no_grad():
169 |             error_3d = torch.norm(joint_3d_pred - joint_3d_gt, dim=-1).mean(-1)
170 |         loss = F.smooth_l1_loss(joint_3d_pred, joint_3d_gt, reduction='none').mean(-1).mean(-1)
171 |         confidence = confidence.reshape([B]+map_shape)
172 |         return loss, error_3d, confidence
173 | 
174 |     def get_confidence(self, error_2d, map_shape):
175 |         """
176 | 
177 |         :param error_2d: Tensor(B, J, N)
178 |         :param map_shape: list
179 |         :return:
180 |             confidence: Tensor(B, N)
181 |         """
182 |         B, J, N = error_2d.shape
183 |         error_std, error_mean = torch.std_mean(error_2d, dim=-1)
184 |         error_std = error_std[:, :, None].repeat([1, 1, N])
185 |         error_mean = error_mean[:, :, None].repeat([1, 1, N])
186 |         confidence = -(error_2d - error_mean) / error_std
187 |         soft_confidence = torch.softmax(confidence, dim=-1)
188 |         # gauss_confidence = confidence
189 |         # confidence = confidence.reshape([B*J, 1]+map_shape)
190 |         # gauss_confidence = gaussian_blur2d(confidence, (5, 5), (1, 1))
191 |         # gauss_confidence = gauss_confidence.reshape([B, J, N])
192 |         # soft_gauss_confidence = torch.softmax(gauss_confidence, dim=-1)
193 |         return soft_confidence
194 | 
195 | 
196 | class ViewSelectLossCalculator(Module):
197 |     def forward(self, light_heatmaps, heatmap, joint_3d_pred, view_trans, crop_trans, fx, fy, u0, v0, joint_3d_gt,
198 |                 alpha):
199 |         """
200 | 
201 |         :param light_heatmaps: Tensor(B, N, nstack, J, H, W)
202 |         :param heatmap: Tensor(B, N, J, H, W)
203 |         :param joint_3d_pred: Tensor(B, J, 3)
204 |         :param view_trans: Tensor(B, N, 4, 4)
205 |         :param crop_trans: Tensor(B, 3, 3)
206 |         :param fx: float
207 |         :param fy: float
208 |         :param u0: float
209 |         :param v0: float
210 |         :param joint_3d_gt: Tensor(B, J, 3)
211 |         :return:
212 |         """
213 |         B, N, nstack, J, H, W = light_heatmaps.shape
214 |         joint_3d_gt_expand = joint_3d_gt[:, None, :, :].repeat([1, N, 1, 1])  # (B, N, J, 3)
215 |         crop_trans = crop_trans[:, None, :, :].repeat([1, N, 1, 1])  # (B, N, 3, 3)
216 |         joint_3d_gt_expand = joint_3d_gt_expand.reshape([B * N, J, 3])  # (B*N, J, 3)
217 |         view_trans = view_trans.reshape([B * N, 4, 4])
218 |         crop_trans = crop_trans.reshape([B * N, 3, 3])
219 |         joint_3d_gt_expand = transform_3D(joint_3d_gt_expand, view_trans)  # (B*N, J, 3)
220 |         joint_2d_gt = transform_3D_to_2D(joint_3d_gt_expand, fx, fy, u0, v0)  # (B*N, J, 2)
221 |         joint_2d_gt_crop = transform_2D(joint_2d_gt, crop_trans) / 4.
222 |         if heatmap is not None:
223 |             light_heatmaps = light_heatmaps.reshape([B * N * nstack, J, H, W])
224 |             gaussian_maps = gen_2D_gaussion_map(joint_2d_gt_crop, H, W, fx, fy, sigma=0.4)  # (B*N, J, H, W)
225 |             gaussian_maps = gaussian_maps[:, None, :, :, :].repeat([1, nstack, 1, 1, 1]).reshape(
226 |                 [B * N * nstack, J, H, W])
227 |             heatmap = heatmap[:, :, None, :, :, :].repeat(1, 1, nstack, 1, 1, 1)
228 |             heatmap = heatmap.reshape([B * N * nstack, J, H, W])
229 |             hm_loss = alpha * F.mse_loss(light_heatmaps, gaussian_maps, reduction='none') + \
230 |                       (1-alpha) * F.mse_loss(light_heatmaps, heatmap, reduction='none')
231 |             hm_loss = hm_loss.reshape([B, -1]).mean(-1)
232 |             gaussian_maps = gaussian_maps.reshape([B, N, nstack, J, H, W])
233 |             gaussian_maps = gaussian_maps[:, :, 0, :, :, :]  # (B, N, J, H, W)
234 |         else:
235 |             hm_loss = torch.zeros([B], dtype=torch.float32, device=light_heatmaps.device)
236 |             gaussian_maps = None
237 |         error_3d = torch.norm(joint_3d_pred - joint_3d_gt, dim=-1).mean(-1)
238 |         return hm_loss, error_3d, gaussian_maps
239 | 
240 | 
241 | class ViewSelectA2JLossCalculator(Module):
242 |     def __init__(self, alpha, conf_factor):
243 |         super().__init__()
244 |         self.alpha = alpha
245 |         self.conf_factor = conf_factor
246 |         self.smooth_l1_loss = torch.nn.SmoothL1Loss(reduction='none')
247 |         
248 |     def forward(self, joints_3d_pred, joint_3d_fused, conf, joint_3d_conf_select,
249 |                 joints_3d_pred_select_light, joint_3d_fused_select_light, joint_3d_conf_select_light, conf_light,
250 |                 view_trans, crop_trans, com_2d, cube, fx, fy, u0, v0, joints_3d_gt):
251 |         """
252 |         :param joints_3d_pred: Tensor(B, N, J, 3)
253 |         :param joint_3d_fused: Tensor(B, J, 3)
254 |         :param conf: Tensor(B, N)
255 |         :param joint_3d_conf_select: Tensor(B, J, 3)
256 |         :param joints_3d_pred_select_light: Tensor(B, k, J, 3)
257 |         :param joint_3d_fused_select_light: Tensor(B, J, 3)
258 |         :param joint_3d_conf_select_light: Tensor(B, J, 3)
259 |         :param conf_light: Tensor(B, N)
260 |         :param crop_trans: Tensor(B, 3, 3)
261 |         :param com_2d: Tensor(B, 3)
262 |         :param cube: Tensor(B, 3)
263 |         :param fx: float
264 |         :param fy: float
265 |         :param u0: float
266 |         :param v0: float
267 |         :param joints_3d_gt: Tensor(B, J, 3)
268 |         :return:
269 |         """
270 |         B, N, J, _ = joints_3d_pred.shape
271 |         joints_3d_gt_expand = joints_3d_gt[:, None, :, :].repeat([1, N, 1, 1])  # (B, N, J, 3)
272 |         joints_3d_gt_expand = transform_3D(joints_3d_gt_expand, view_trans)  # (B, N, J, 3)
273 |         joints_2d_gt_expand = transform_3D_to_2D(joints_3d_gt_expand, fx, fy, u0, v0)[..., :2]  # (B, N, J, 2)
274 |         crop_trans_expand = crop_trans[:, None, :, :].repeat([1, N, 1, 1])
275 |         joints_2d_gt_expand_crop = transform_2D(joints_2d_gt_expand, crop_trans_expand)
276 |         com_z_expand = com_2d[:, None, :].repeat([1, N, 1])[:, :, 2:]
277 |         cube_z_expand = cube[:, None, :].repeat([1, N, 1])[:, :, 2:]
278 |         depth_gt_norm_expand = (joints_3d_gt_expand[..., 2] - com_z_expand) / (cube_z_expand / 2)
279 | 
280 |         # sub_conf = conf[:, :, None]-conf[:, None, :]
281 |         # sub_conf_light = conf_light[:, :, None]-conf_light[:, None, :]
282 |         # conf_loss = self.smooth_l1_loss(sub_conf*100, sub_conf_light*100)
283 |         conf_loss = self.smooth_l1_loss(conf_light * self.conf_factor, conf * self.conf_factor)
284 | 
285 |         conf_loss = conf_loss.reshape([B, -1]).mean(-1)
286 | 
287 |         loss = conf_loss
288 | 
289 |         error_3d_fused_select_light = torch.norm(joint_3d_fused_select_light - joints_3d_gt, dim=-1).mean(-1)
290 |         error_3d_conf_select_light = torch.norm(joint_3d_conf_select_light - joints_3d_gt, dim=-1).mean(-1)
291 | 
292 |         error_3d_fused = torch.norm(joint_3d_fused - joints_3d_gt, dim=-1).mean(-1)
293 |         error_3d_conf_select = torch.norm(joint_3d_conf_select - joints_3d_gt, dim=-1).mean(-1)
294 |         return conf_loss, loss, error_3d_fused_select_light, error_3d_conf_select_light,\
295 |                error_3d_fused, error_3d_conf_select
296 | 
297 | 
298 | class ViewSelectLossCalculator2(Module):
299 |     def forward(self, joint_3d, joint_3d_uniform, confidence_select, joint_3d_gt):
300 |         """
301 | 
302 |         :param joint_3d: Tensor(B, J, 3)
303 |         :param joint_3d_uniform: Tensor(B, J, 3)
304 |         :param confidence: Tensor(B, num_views)
305 |         :param joint_3d_gt: Tensor(B, J, 3)
306 |         :return:
307 |         """
308 |         error_3d = torch.norm(joint_3d - joint_3d_gt, dim=-1).mean(-1)
309 |         error_3d_uniform = torch.norm(joint_3d_uniform - joint_3d_gt, dim=-1).mean(-1)
310 |         # reward = torch.ones_like(error_3d)
311 |         # reward[error_3d_uniform<error_3d] = -1
312 |         reward = error_3d_uniform - error_3d
313 |         weight = reward.clone()
314 |         weight[reward < 0] = weight[reward < 0] * 0.5
315 |         # reward[reward > 0] = reward[reward > 0] * 10
316 |         # loss = torch.mean(-torch.log(confidence_select.mean(dim=-1))*reward, dim=-1) + 1e-3*error_3d/error_3d_uniform
317 |         loss = -torch.log(confidence_select.sum(dim=-1)) * weight
318 |         return loss, error_3d, error_3d_uniform, reward
319 | 
320 | 
321 | class FocalLoss_Ori(Module):
322 |     """
323 |     This is a implementation of Focal Loss with smooth label cross entropy supported which is proposed in
324 |     'Focal Loss for Dense Object Detection. (https://arxiv.org/abs/1708.02002)'
325 |         Focal_Loss= -1*alpha*(1-pt)*log(pt)
326 |     :param num_class:
327 |     :param alpha: (tensor) 3D or 4D the scalar factor for this criterion
328 |     :param gamma: (float,double) gamma > 0 reduces the relative loss for well-classified examples (p>0.5) putting more
329 |                     focus on hard misclassified example
330 |     :param smooth: (float,double) smooth value when cross entropy
331 |     :param size_average: (bool, optional) By default, the losses are averaged over each loss element in the batch.
332 |     """
333 | 
334 |     def __init__(self, num_class, alpha=[0.25,0.75], gamma=2, balance_index=-1):
335 |         super(FocalLoss_Ori, self).__init__()
336 |         self.num_class = num_class
337 |         self.alpha = alpha
338 |         self.gamma = gamma
339 |         self.eps = 1e-6
340 | 
341 |         if isinstance(self.alpha, (list, tuple)):
342 |             assert len(self.alpha) == self.num_class
343 |             self.alpha = torch.Tensor(list(self.alpha))
344 |         elif isinstance(self.alpha, (float,int)):
345 |             assert 0<self.alpha<1.0, 'alpha should be in `(0,1)`)'
346 |             assert balance_index >-1
347 |             alpha = torch.ones((self.num_class))
348 |             alpha *= 1-self.alpha
349 |             alpha[balance_index] = self.alpha
350 |             self.alpha = alpha
351 |         elif isinstance(self.alpha,torch.Tensor):
352 |             self.alpha = self.alpha
353 |         else:
354 |             raise TypeError('Not support alpha type, expect `int|float|list|tuple|torch.Tensor`')
355 | 
356 |     def forward(self, logit, pred, target):
357 |         B = logit.size(0)
358 |         if logit.dim() > 2:
359 |             # N,C,d1,d2 -> N,C,m (m=d1*d2*...)
360 |             logit = logit.view(logit.size(0), logit.size(1), -1)
361 |             logit = logit.transpose(1, 2).contiguous() # [N,C,d1*d2..] -> [N,d1*d2..,C]
362 |             logit = logit.view(-1, logit.size(-1)) # [N,d1*d2..,C]-> [N*d1*d2..,C]
363 |         p = torch.softmax(logit, -1)
364 |         target = target.view(-1, 1) # [N,d1,d2,...]->[N*d1*d2*...,1]
365 | 
366 |         # -----------legacy way------------
367 |         #  idx = target.cpu().long()
368 |         # one_hot_key = torch.FloatTensor(target.size(0), self.num_class).zero_()
369 |         # one_hot_key = one_hot_key.scatter_(1, idx, 1)
370 |         # if one_hot_key.device != logit.device:
371 |         #     one_hot_key = one_hot_key.to(logit.device)
372 |         # pt = (one_hot_key * logit).sum(1) + epsilon
373 | 
374 |         # ----------memory saving way--------
375 |         pt = p.gather(1, target).view(-1) + self.eps # avoid apply
376 |         logpt = pt.log()
377 | 
378 |         if self.alpha.device != logpt.device:
379 |             alpha = self.alpha.to(logpt.device)
380 |             alpha_class = alpha.gather(0,target.view(-1))
381 |             logpt = alpha_class*logpt
382 |         loss = -1 * torch.pow(torch.sub(1.0, pt), self.gamma) * logpt
383 |         loss = torch.reshape(loss, [B, -1]).mean(dim=-1)
384 |         target = target.reshape([B, -1])
385 |         pred = pred.reshape([B, -1])
386 |         acc = torch.sum(target==pred, dim=-1, dtype=torch.float32)/target.shape[-1]
387 |         return loss, acc
388 | 
389 | if __name__ == '__main__':
390 |     fx, fy, u0, v0 = 588.03, 587.07, 320., 320.
391 |     # joint_2d = np.array([[0.5, 0.5, 500],
392 |     #                      [0.5, 1.5, 500],
393 |     #                      [0.5, 2.5, 500]], dtype=np.float32)
394 |     # joint_2d = torch.from_numpy(joint_2d[None, :, :])
395 |     # gaussian = gen_2D_gaussion_map(joint_2d, 3, 5, 1)
396 |     # print(gaussian)
397 | 
398 |     # B, N, nstack, J, H, W = 8, 40, 4, 14, 64, 64
399 |     # heatmaps = torch.rand((B, N, nstack, J, H, W), dtype=torch.float32).cuda()
400 |     # joint_2d_pred = torch.rand((B, N, J, 2), dtype=torch.float32).cuda()
401 |     # joint_3d_pred = torch.rand((B, J, 3), dtype=torch.float32).cuda()
402 |     # view_trans = torch.rand((B, N, 4, 4), dtype=torch.float32).cuda()
403 |     # crop_trans = torch.rand((B, 3, 3), dtype=torch.float32).cuda()
404 |     # joint_3d_gt = torch.rand((B, J, 3), dtype=torch.float32).cuda()
405 |     # loss_calc = LossCalculator()
406 |     # loss_calc = torch.nn.DataParallel(loss_calc)
407 |     # loss_calc = loss_calc.cuda()
408 |     # hm_loss, error_2d, error_3d, gaussian_maps = loss_calc(heatmaps, joint_2d_pred, joint_3d_pred, view_trans, crop_trans, fx, fy, u0, v0, joint_3d_gt)
409 |     # print(hm_loss.shape)
410 |     # print(error_2d.shape)
411 |     # print(error_3d.shape)
412 |     # print(gaussian_maps.shape)
413 | 
414 |     # B = 8
415 |     # N = 25
416 |     # J = 14
417 |     # error_2d_pred = torch.rand([B, N, J], dtype=torch.float32)
418 |     # joint_2d_pred = torch.rand([B, N, J, 2], dtype=torch.float32)
419 |     # joint_3d_gt = torch.rand([B, J, 3], dtype=torch.float32)
420 |     # view_trans = torch.rand([B, N, 4, 4], dtype=torch.float32)
421 |     #
422 |     # error2d_loss_calc = ConfidenceLossCalculator()
423 |     # error, loss, error_map_gt, error_map_pred = error2d_loss_calc(error_2d_pred, joint_2d_pred, joint_3d_gt, view_trans, fx, fy, u0, v0)
424 |     # print(error.shape)
425 |     # print(loss.shape)
426 |     # print(error_map_gt.shape)
427 |     # print(error_map_pred.shape)
428 | 
429 |     logit = torch.rand([4, 2, 480, 640], dtype=torch.float32)
430 |     pred = torch.zeros([4, 480, 640], dtype=torch.int64)
431 |     label = torch.zeros([4, 480, 640], dtype=torch.int64)
432 |     loss_calc = FocalLoss_Ori(2)
433 |     loss, acc = loss_calc(logit, pred, label)
434 |     print(loss.shape)
435 |     print(acc)
436 |     print(acc.dtype)
437 | 


--------------------------------------------------------------------------------
/ops/point_transform.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def transform_2D_to_3D(points, fx, fy, u0, v0):
 5 |     """
 6 | 
 7 |     :param points: Tensor(..., 3)
 8 |     :param fx:
 9 |     :param fy:
10 |     :param u0:
11 |     :param v0:
12 |     :return: Tensor(..., 3)
13 |     """
14 |     x = (points[..., 0] - u0) * points[..., 2] / fx
15 |     y = (points[..., 1] - v0) * points[..., 2] / fy
16 |     z = points[..., 2]
17 |     return torch.stack([x, y, z], dim=-1)
18 | 
19 | 
20 | def transform_3D_to_2D(points, fx, fy, u0, v0):
21 |     u = points[..., 0] / points[..., 2] * fx + u0
22 |     v = points[..., 1] / points[..., 2] * fy + v0
23 |     d = points[..., 2]
24 |     return torch.stack([u, v, d], dim=-1)
25 | 
26 | 
27 | def transform_3D(points, trans_matrix):
28 |     """3D affine transformation
29 | 
30 |     :param points: Tensor(..., N, 3)
31 |     :param trans_matrix: Tensor(..., 4, 4)
32 |     :return: Tensor(B, N, 3)
33 |     """
34 |     x = points[..., 0]
35 |     y = points[..., 1]
36 |     z = points[..., 2]
37 |     ones = torch.ones_like(x)
38 |     points_h = torch.stack([x, y, z, ones], -2) # (..., 4, N)
39 |     points_h = trans_matrix @ points_h
40 |     points = torch.transpose(points_h, -2, -1)[..., :3] # (..., N, 3)
41 |     return points
42 | 
43 | 
44 | def transform_2D(points, trans_matirx):
45 |     """2D affine transformation
46 | 
47 |     :param points: Tensor(..., N, 2|3)
48 |     :param trans_matirx: Tensor(..., 3, 3)
49 |     :return: Tensor(..., N, 2|3)
50 |     """
51 |     d = points.size(-1)
52 |     x = points[..., 0]
53 |     y = points[..., 1]
54 |     if d > 2:
55 |         z = points[..., 2]
56 |     ones = torch.ones_like(x)
57 |     points_h = torch.stack([x, y, ones], axis=-2) # (B, 3, N)
58 |     points_h = trans_matirx @ points_h
59 |     points = torch.transpose(points_h, -2, -1) # (B, N, 3)
60 |     if d > 2:
61 |         points[..., 2] = z
62 |     else:
63 |         points = points[..., :2]
64 |     return points
65 | 
66 | 
67 | def transform(points, trans_matrix):
68 |     """2D or 3D affine transformation.
69 |     This function is the same as the function of the above two functions.
70 |     But it can backward.
71 | 
72 |     :param points: Tensor(B, N, 3)
73 |     :param trans_matrix: Tensor(B, 3/4, 3/4)
74 |     :return: Tensor(B, N, 3)
75 |     """
76 |     # B, N, _ = points.shape
77 |     x = points[..., 0]
78 |     y = points[..., 1]
79 |     z = points[..., 2]
80 |     ones = torch.ones_like(x, requires_grad=False)
81 |     if trans_matrix.size(1)==4:
82 |         # points = points.transpose(1, 2)
83 |         # points[:, :3, :] = trans_matrix[:, :3, :3] @ points[:, :3, :]
84 |         # points[:, :3, :] += trans_matrix[:, :3, 3, None].repeat([1, 1, N])
85 |         # points = points.transpose(1, 2)
86 |         points_h = torch.stack([x, y, z, ones], -2)
87 |         points_h = trans_matrix @ points_h
88 |         points = torch.transpose(points_h, 1, 2)[..., :3]
89 |     elif trans_matrix.size(1)==3:
90 |         # points = points.transpose(1, 2)
91 |         # points[:, :2, :] = trans_matrix[:, :2, :2] @ points[:, :2, :]
92 |         # points[:, :2, :] += trans_matrix[:, :2, 2, None].repeat([1, 1, N])
93 |         # points = points.transpose(1, 2)
94 |         points_h = torch.stack([x, y, ones], -2)
95 |         points_h = trans_matrix @ points_h
96 |         points = torch.transpose(points_h, 1, 2)
97 |         points[..., 2] = z
98 |     return points
99 | 


--------------------------------------------------------------------------------
/ops/render.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import depth_to_point_cloud_mask_cuda
  4 | import point_cloud_mask_to_depth_cuda
  5 | import sys
  6 | import os
  7 | dir = os.path.dirname(os.path.abspath(__file__))
  8 | root = os.path.dirname(dir)
  9 | sys.path.append(root)
 10 | from torch.utils.data import DataLoader
 11 | from ops.point_transform import transform_3D, \
 12 |     transform_2D_to_3D, transform_3D_to_2D, transform_2D
 13 | from feeders.nyu_feeder import NyuFeeder
 14 | import logging
 15 | 
 16 | logging.basicConfig(level=logging.INFO, format="%(asctime)s: %(levelname)s %(name)s:%(lineno)d] %(message)s")
 17 | logger = logging.getLogger(__file__)
 18 | 
 19 | 
 20 | # def crop_trans_inv(crop_trans):
 21 | #     '''
 22 | #
 23 | #     :param crop_trans: Tensor(..., 3, 3)
 24 | #     :return:
 25 | #     '''
 26 | #     inv = torch.zeros_like(crop_trans)
 27 | #     inv[..., 0, 0] = 1 / crop_trans[..., 0, 0]
 28 | #     inv[..., 0, 2] = -crop_trans[..., 0, 2] / crop_trans[..., 0, 0]
 29 | #     inv[..., 1, 1] = 1 / crop_trans[..., 1, 1]
 30 | #     inv[..., 1, 2] = -crop_trans[..., 1, 2] / crop_trans[..., 1, 1]
 31 | #     inv[..., 2, 2] = 1.
 32 | #
 33 | #     return inv
 34 | 
 35 | 
 36 | def depth_to_point_cloud_mask(depth):
 37 |     """
 38 | 
 39 |     :param depth: Tensor(B, 1, H, W)
 40 |     :return: point_cloud: Tensor(B, N, 3), mask: Tensor(B, N)
 41 |     """
 42 |     depth = depth.permute((0, 2, 3, 1)) # (B, H, W, 1)
 43 |     return depth_to_point_cloud_mask_cuda.forward(depth.contiguous())
 44 | 
 45 | 
 46 | def point_cloud_mask_to_depth(point_cloud, mask, h, w):
 47 |     depth = point_cloud_mask_to_depth_cuda.forward(point_cloud.contiguous(), mask, h, w) # (B, H, W, 1)
 48 |     depth = depth.permute((0, 3, 1, 2)) # (B, 1, H, W)
 49 |     return depth
 50 | 
 51 | 
 52 | def uniform_view_matrix(center, level, random_sample, random_rotate):
 53 |     """Uniform generation of view transformation matrix
 54 | 
 55 |     :param center: Tensor(B, 3), 3D coordinate
 56 |     :param level: int, 1, 2, 3, 4 or 5
 57 |     :return: Tensor(B, num_views, 4, 4)
 58 |     """
 59 |     B = center.size(0)
 60 |     if random_sample:
 61 |         if level == 0:
 62 |             num_view = 1
 63 |         elif level == 1:
 64 |             num_view = 3
 65 |         elif level == 2:
 66 |             num_view = 9
 67 |         elif level == 3:
 68 |             num_view = 15
 69 |         elif level == 4:
 70 |             num_view = 25
 71 |         elif level == 5:
 72 |             num_view = 81
 73 |         else:
 74 |             logger.critical('level must be 1, 2, 3 or 4.')
 75 |             raise ValueError('level must be 1, 2, 3 or 4.')
 76 |         rotation = torch.from_numpy(np.random.uniform(-np.pi/3, np.pi/3, size=[num_view, 2])).to(center.device)
 77 |         rotation = rotation.float()
 78 |     else:
 79 |         if level == 1:
 80 |             # azimuth = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 3, device=center.device)  # 3
 81 |             azimuth = torch.linspace(-np.pi / 3, np.pi / 3, 3, device=center.device)
 82 |             elevation = torch.zeros([1], device=center.device)
 83 |         elif level == 2:
 84 |             # azimuth = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 3, device=center.device)  # 3
 85 |             # elevation = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 3, device=center.device)  # 3
 86 |             azimuth = torch.linspace(-np.pi / 3, np.pi / 3, 3, device=center.device) # 3
 87 |             elevation = torch.linspace(-np.pi / 3, np.pi / 3, 3, device=center.device)  # 3
 88 |         elif level == 3:
 89 |             # azimuth = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 6, device=center.device)  # 5
 90 |             # elevation = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 3, device=center.device)  # 3
 91 |             azimuth = torch.linspace(-np.pi / 3, np.pi / 3, 5, device=center.device)  # 5
 92 |             elevation = torch.linspace(-np.pi / 3, np.pi / 3, 3, device=center.device)  # 3
 93 |         elif level == 4:
 94 |             # azimuth = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 6, device=center.device)  # 5
 95 |             # elevation = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 6, device=center.device)  # 5
 96 |             azimuth = torch.linspace(-np.pi / 3, np.pi / 3, 5, device=center.device)  # 5
 97 |             elevation = torch.linspace(-np.pi / 3, np.pi / 3, 5, device=center.device)  # 5
 98 |         elif level == 5:
 99 |             # azimuth = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 12, device=center.device)  # 9
100 |             # elevation = torch.arange(-np.pi / 3, np.pi / 3 + 0.01, np.pi / 12, device=center.device)  # 9
101 |             azimuth = torch.linspace(-np.pi / 3, np.pi / 3, 9, device=center.device)  # 9
102 |             elevation = torch.linspace(-np.pi / 3, np.pi / 3, 9, device=center.device)  # 9
103 |         else:
104 |             logger.critical('level must be 1, 2, 3 or 4.')
105 |             raise ValueError('level must be 1, 2, 3 or 4.')
106 | 
107 |         elevation = elevation.float()
108 |         azimuth = azimuth.float()
109 | 
110 |         rotation = torch.meshgrid(elevation, azimuth)
111 |         rotation = torch.reshape(torch.stack(rotation, axis=-1), [-1, 2])
112 | 
113 |     rotation = rotation[None, :, :].repeat(B, 1, 1)
114 |     # print(rotation)
115 | 
116 |     N = rotation.size(1)
117 |     r_theta_x = rotation[..., 0]
118 |     r_theta_y = rotation[..., 1]
119 |     if random_rotate:
120 |         # r_theta_z = torch.rand([B, rotation.shape[1]], dtype=torch.float32, device=center.device) * np.pi * 2
121 |         r_theta_z = torch.ones([B, rotation.shape[1]], dtype=torch.float32, device=center.device) * np.pi * 2 * \
122 |                     np.random.rand()
123 |     else:
124 |         r_theta_z = torch.zeros([B, rotation.shape[1]], dtype=torch.float32, device=center.device)
125 |     center = center.float()
126 |     transform_center = center[:, None, :].repeat(1, N, 1)
127 |     zeros = torch.zeros([B, N], dtype=torch.float32, device=center.device)
128 |     ones = torch.ones([B, N], dtype=torch.float32, device=center.device)
129 | 
130 |     c, s = torch.cos(r_theta_x), torch.sin(r_theta_x)
131 |     Rx = torch.stack([ones, zeros, zeros, zeros,
132 |                       zeros, c, -s, zeros,
133 |                       zeros, s, c, zeros,
134 |                       zeros, zeros, zeros, ones], axis=-1)
135 |     Rx = torch.reshape(Rx, [B, N, 4, 4])
136 | 
137 |     c, s = torch.cos(r_theta_y), torch.sin(r_theta_y)
138 |     Ry = torch.stack([c, zeros, s, zeros,
139 |                       zeros, ones, zeros, zeros,
140 |                       -s, zeros, c, zeros,
141 |                       zeros, zeros, zeros, ones], axis=-1)
142 |     Ry = torch.reshape(Ry, [B, N, 4, 4])
143 | 
144 |     c, s = torch.cos(r_theta_z), torch.sin(r_theta_z)
145 |     Rz = torch.stack([c, -s, zeros, zeros,
146 |                       s, c, zeros, zeros,
147 |                       zeros, zeros, ones, zeros,
148 |                       zeros, zeros, zeros, ones], axis=-1)
149 |     Rz = torch.reshape(Rz, [B, N, 4, 4])
150 | 
151 |     to_center = torch.stack([ones, zeros, zeros, -transform_center[..., 0],
152 |                              zeros, ones, zeros, -transform_center[..., 1],
153 |                              zeros, zeros, ones, -transform_center[..., 2],
154 |                              zeros, zeros, zeros, ones], axis=-1)
155 |     to_center = torch.reshape(to_center, [B, N, 4, 4])
156 | 
157 |     # to_center_inv = torch.stack([ones, zeros, zeros, transform_center[..., 0],
158 |     #                              zeros, ones, zeros, transform_center[..., 1],
159 |     #                              zeros, zeros, ones, transform_center[..., 2],
160 |     #                              zeros, zeros, zeros, ones], axis=-1)
161 |     # to_center_inv = torch.reshape(to_center_inv, [B, N, 4, 4])
162 | 
163 |     transform_mat = torch.inverse(to_center) @ Ry @ Rx @ Rz @ to_center
164 |     return transform_mat
165 | 
166 | 
167 | def depth_crop_expand(depth_crop, fx, fy, u0, v0, crop_trans, level, com_2d, random_sample, random_ratote=False,
168 |                       indices=None):
169 |     """When
170 |     level=1, num_views=3
171 |     level=2, num_views=9
172 |     level=3, num_views=15
173 |     level=4, num_views=25
174 |     level=5, num_views=81
175 | 
176 |     :param depth_crop: Tensor(B, 1, H, W)
177 |     :param fx: float
178 |     :param fy: float
179 |     :param u0: float
180 |     :param v0: float
181 |     :param crop_trans: Tensor(B, 3, 3)
182 |     :param level: int, 1, 2, 3, 4, 5
183 |     :param com_2d: Tensor(B, 3)
184 |     :param random_sample: bool
185 |     :param random_ratote: bool
186 |     :param indices: Tensor(B, num_select)
187 |     :return:
188 |         if indices is None:
189 |             depth_crop_expand: Tensor(B, num_views, 1, H, W)
190 |             view_mat: Tensor(B, num_views, 4, 4)
191 |         else:
192 |             depth_crop_expand: Tensor(B, num_select, 1, H, W)
193 |             view_mat: Tensor(B, num_select, 4, 4)
194 |     """
195 |     B, _, H, W = depth_crop.size()
196 |     center = com_2d
197 |     center = transform_2D_to_3D(center, fx, fy, u0, v0)
198 |     view_mat = uniform_view_matrix(center, level, random_sample, random_ratote) # Tensor(B, num_views, 4, 4)
199 |     if indices is None:
200 |         num_views = view_mat.size(1)
201 |     else:
202 |         indices = indices[:, :, None, None].repeat([1, 1, 4, 4])
203 |         view_mat = torch.gather(view_mat, 1, indices)
204 |         num_views = indices.size(1)
205 |     depth_crop = depth_crop[:, None, :, :, :].repeat([1, num_views, 1, 1, 1])
206 |     depth_crop = depth_crop.reshape([B*num_views, 1, H, W])
207 |     crop_trans = crop_trans[:, None, :, :].repeat([1, num_views, 1, 1])
208 |     crop_trans = crop_trans.reshape([B*num_views, 3, 3])
209 |     view_mat = view_mat.reshape([B*num_views, 4, 4])
210 |     depth_expand = render_view(depth_crop, fx, fy, u0, v0, crop_trans, view_mat)
211 | 
212 |     depth_expand = depth_expand.reshape([B, num_views, 1, H, W])
213 |     view_mat = view_mat.reshape([B, num_views, 4, 4])
214 | 
215 |     return depth_expand, view_mat
216 | 
217 | def render_view(depth_crop, fx, fy, u0, v0, crop_trans, view_mat):
218 |     '''
219 | 
220 |     :param depth_crop: Tensor(B, 1, H, W)
221 |     :param fx: float
222 |     :param fy: float
223 |     :param u0: float
224 |     :param v0: float
225 |     :param crop_trans: Tensor(B, 3, 3)
226 |     :param view_mat: Tensor(B, 4, 4)
227 |     :return: Tensor(B, 1, H, W)
228 |     '''
229 |     B, _, H, W = depth_crop.size()
230 |     pc_crop, mask = depth_to_point_cloud_mask(torch.round(depth_crop).int())
231 |     pc_crop, mask = pc_crop.float(), mask.float()
232 |     pc = transform_2D(pc_crop, torch.inverse(crop_trans))
233 |     pc_3d = transform_2D_to_3D(pc, fx, fy, u0, v0)
234 |     pc_3d_trans = transform_3D(pc_3d, view_mat)
235 |     pc_trans = transform_3D_to_2D(pc_3d_trans, fx, fy, u0, v0)
236 |     pc_crop_trans = transform_2D(pc_trans, crop_trans)
237 |     depth_expand = point_cloud_mask_to_depth(torch.round(pc_crop_trans).int(), mask.int(), H, W)
238 |     return depth_expand.float()
239 | 
240 | if __name__ == '__main__':
241 |     uniform_view_matrix(torch.zeros([1, 3]), level=4, random_sample=False, random_rotate=False)
242 |     # from tqdm import tqdm
243 |     # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
244 |     # os.environ["CUDA_VISIBLE_DEVICES"] = '0'
245 |     # u0 = 320.0
246 |     # v0 = 240.0
247 |     # fx = 588.03
248 |     # fy = 587.07
249 |     # random_sample = True
250 |     # import matplotlib.pyplot as plt
251 |     # import utils.point_transform as np_pt
252 |     # B = 4
253 |     # train_dataset = NyuFeeder('train', max_jitter=10., depth_sigma=0., offset=30, random_flip=False)
254 |     # dataloader = DataLoader(train_dataset, shuffle=False, batch_size=B, num_workers=8)
255 |     # for batch_idx, batch_data in enumerate(tqdm(dataloader)):
256 |     #     item, depth, cropped, joint_3d, crop_trans, com_2d, inter_matrix, cube = batch_data
257 |     #
258 |     #     cropped, crop_trans, com_2d = cropped.cuda(), crop_trans.cuda(), com_2d.cuda()
259 |     #     confidence = torch.ones([B, 25])
260 |     #     indices = torch.multinomial(confidence, 3).cuda()
261 |     #     crop_expand, view_mat = depth_crop_expand(cropped, fx, fy, u0, v0, crop_trans, 4, com_2d, random_sample=False,
262 |     #                                               random_ratote=False, indices=indices)
263 |     #     cropped = cropped.cpu().numpy()
264 |     #     crop_trans = crop_trans.cpu().numpy()
265 |     #     crop_expand = crop_expand.cpu().numpy()
266 |     #     view_mat = view_mat.cpu().numpy()
267 |     #     com_2d = com_2d.cpu().numpy()
268 |     #     cube = cube.numpy()
269 |     #     com_2d = com_2d[0]
270 |     #     cube = cube[0]
271 |         # plt.imshow(cropped[0, 0, ...])
272 |         # plt.show()
273 |         # print(crop_expand.shape)
274 |         # for i in range(0, crop_expand.shape[1], 2):
275 |         #     img = crop_expand[0, i, 0, ...]
276 |         #     img[img>1e-3] = img[img>1e-3] - com_2d[2] + cube[2]/2.
277 |         #     img[img<1e-3] = cube[2]
278 |         #     img = img / cube[2]
279 |         #     _joint_3d = joint_3d[0]
280 |         #     _joint_3d = np_pt.transform_3D(_joint_3d, view_mat[0, i])
281 |         #     _joint_2d = np_pt.transform_3D_to_2D(_joint_3d, fx, fy, u0, v0)
282 |         #     _crop_joint_2d = np_pt.transform_2D(_joint_2d, crop_trans[0])
283 |         #     fig, ax = plt.subplots(figsize=plt.figaspect(img))
284 |         #     fig.subplots_adjust(0, 0, 1, 1)
285 |         #     ax.imshow(img, cmap='gray')
286 |         #     # ax.scatter(_crop_joint_2d[:, 0], _crop_joint_2d[:, 1], c='red', s=100)
287 |         #     ax.axis('off')
288 |         #     plt.savefig('{}.jpg'.format(i))
289 |         #     plt.show()
290 |         # break
291 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | imageio==2.13.5
 2 | matplotlib==3.5.1
 3 | numpy==1.22.1
 4 | opencv-python==4.5.5.62
 5 | Pillow==9.0.0
 6 | PyYAML==5.4.1
 7 | scikit-image==0.19.1
 8 | scipy==1.7.3
 9 | tensorboard==2.8.0
10 | tensorboardX==2.4.1
11 | tqdm
12 | -f https://download.pytorch.org/whl/torch_stable.html
13 | torch==1.8.1+cu111
14 | torchaudio==0.8.1
15 | torchvision==0.9.1+cu111
16 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iscas3dv/handpose-virtualview/d220efa69ff031077381bc0d4cd58fae7049c329/utils/__init__.py


--------------------------------------------------------------------------------
/utils/camera_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | import os
  4 | import imageio
  5 | import cv2
  6 | 
  7 | 
  8 | def get_camera_external_paramter(target, origin, up):
  9 |     '''
 10 |     Transform target, origin, up to R,T
 11 |     :param target : lookat/center/target
 12 |     :param origin : eye/origin
 13 |     :param up : up
 14 |     :return : R(3*3 matrix) T(3*1 vector)
 15 |     '''
 16 |     z = np.array(origin-target, dtype=np.float32)
 17 |     z = z / np.linalg.norm(z)
 18 |     x = np.cross(z,up)
 19 |     x = x / np.linalg.norm(x)
 20 |     y = np.cross(z,x)
 21 |     y = y / np.linalg.norm(y)
 22 |     R = np.eye(3, dtype=np.float32)
 23 |     R[0:3,0:3] = [x,y,z]
 24 |     T = -target
 25 |     return R, T
 26 | 
 27 | def RT2affine(R, T):
 28 |     '''
 29 |     Transform R,T to affine matrix(4*4)
 30 |     :param R : camera rotation
 31 |     :param T : camera translation
 32 |     :return : affine matrix(4*4)
 33 |     '''
 34 |     matrix_r = np.eye(4, dtype=np.float32)
 35 |     matrix_r[0:3,0:3] = R
 36 |     matrix_t = np.eye(4, dtype=np.float32)
 37 |     matrix_t[0:3,3] = T
 38 |     matrix = np.dot(matrix_r,matrix_t)
 39 |     return matrix
 40 | 
 41 | 
 42 | def get_camera_external_paramter_matrix(target, origin, up):
 43 |     """
 44 |     Args:
 45 |         target (np.array): lookat/center/target
 46 |         origin (np.array): eye/origin
 47 |         up (np.array): up
 48 | 
 49 |     Returns:
 50 |         np.array: camera external paramter matrix
 51 |     """
 52 |     R, T = get_camera_external_paramter(target, origin, up)
 53 |     matirx = RT2affine(R, T)
 54 |     return matirx
 55 | 
 56 | 
 57 | def get_camera_internal_parameter(fov, width, height):
 58 |     """
 59 |         Args:
 60 |             fov (float): denotes the camera’s field of view in degrees, fov maps to the x-axis in screen space
 61 |             width (int/float):
 62 |             height (int/float):
 63 | 
 64 |         Returns:
 65 |             fx, fy, ux, uy
 66 |     """
 67 |     fov_ = fov * math.pi / 180
 68 |     focal = (width / 2) / math.tan(fov_ / 2)
 69 | 
 70 |     fx = focal
 71 |     fy = focal
 72 |     ux = width / 2
 73 |     uy = height / 2
 74 | 
 75 |     return fx, fy, ux, uy
 76 | 
 77 | 
 78 | def get_camera_internal_parameter_matrix(fx, fy, ux, uy):
 79 |     """
 80 |     Args:
 81 |         fx:
 82 |         fy:
 83 |         ux:
 84 |         uy:
 85 | 
 86 |     Returns:
 87 |         np.array: camera internal parameter matrix
 88 |     """
 89 |     matrix = np.array([
 90 |         [fx,  0., ux],
 91 |         [ 0., fy, uy],
 92 |         [ 0.,  0.,  1]
 93 |     ], dtype=np.float32)
 94 | 
 95 |     return matrix
 96 | 
 97 | 
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     r_theta_x = 0
102 |     r_theta_y = np.pi/4
103 |     r_theta_z = 0
104 |     center = np.array([0., 0., 500.])
105 | 
106 |     c, s = np.cos(r_theta_x), np.sin(r_theta_x)
107 |     Rx = np.array([[1, 0, 0, 0],
108 |                    [0, c, s, 0],
109 |                    [0, -s, c, 0],
110 |                    [0, 0, 0, 1]])
111 | 
112 |     c, s = np.cos(r_theta_y), np.sin(r_theta_y)
113 |     Ry = np.array([[c, 0, -s, 0],
114 |                    [0, 1, 0, 0],
115 |                    [s, 0, c, 0],
116 |                    [0, 0, 0, 1]])
117 | 
118 |     c, s = np.cos(r_theta_z), np.sin(r_theta_z)
119 |     Rz = np.array([[c, s, 0, 0],
120 |                    [-s, c, 0, 0],
121 |                    [0, 0, 1, 0],
122 |                    [0, 0, 0,1]])
123 | 
124 |     to_center = np.array([[1, 0, 0, -center[0]],
125 |                           [0, 1, 0, -center[1]],
126 |                           [0, 0, 1, -center[2]],
127 |                           [0, 0, 0, 1]])
128 | 
129 |     transform_mat = np.linalg.inv(to_center)@Rz@Ry@Rx@to_center
130 |     # transform_mat = np.array(transform_mat, dtype=np.int)
131 |     print(transform_mat)
132 | 
133 |     target1 = np.array([-500, 0, 0])
134 |     origin = np.array([0, 0, 0])
135 |     up = np.array([0, 1, 0])
136 |     matrix1 = get_camera_external_paramter_matrix(target1, origin, up)
137 |     target2 = np.array([-353.55339, 0, -353.55339])
138 |     matrix2 = get_camera_external_paramter_matrix(target2, origin, up)
139 |     matrix = np.matmul(matrix2, np.linalg.inv(matrix1))
140 |     print(matrix)
141 | 
142 |     matrix = np.matmul(matrix2, np.linalg.inv(matrix1))
143 | 
144 | 
145 |     target1 = np.array([-501, 0, 0])
146 |     origin = np.array([-1, 0, 0])
147 |     up = np.array([0, 1, 0])
148 |     matrix1 = get_camera_external_paramter_matrix(target1, origin, up)
149 |     # print(matrix1)
150 | 
151 |     target2 = np.array([-1, 0, 500])
152 |     matrix2 = get_camera_external_paramter_matrix(target2, origin, up)
153 |     # print(matrix2)
154 |     matrix = np.matmul(matrix2, np.linalg.inv(matrix1))
155 |     # print(matrix)
156 | 
157 |     target1 = np.array([-500, 0, 0])
158 |     origin = np.array([0, 0, 0])
159 |     up = np.array([0, 1, 0])
160 |     matrix1 = get_camera_external_paramter_matrix(target1, origin, up)
161 |     # print(matrix1)
162 |     coor1 = np.matmul(matrix1, np.array([1, 1, 1, 1]))
163 |     ans1 = np.array([1., -1., 501., 1.])
164 |     assert (np.abs(coor1-ans1)<1e-8).all()
165 | 
166 | 
167 |     target2 = np.array([0, 0, 500])
168 |     matrix2 = get_camera_external_paramter_matrix(target2, origin, up)
169 |     coor2 = np.matmul(matrix2, np.array([1, 1, 1, 1]))
170 |     ans2 = np.array([1., -1., 499., 1.])
171 |     assert (np.abs(coor2-ans2)<1e-8).all()
172 |     matrix = np.matmul(matrix2, np.linalg.inv(matrix1))
173 |     # print(matrix)
174 | 
175 |     matrix = np.matmul(matrix2, np.linalg.inv(matrix1))
176 |     # print(matrix)
177 |     assert (np.abs(coor2-np.matmul(matrix, coor1))<1e-8).all()
178 | 
179 |     target1 = np.array([-500, 0, 0])
180 |     origin = np.array([0, 0, 0])
181 |     up = np.array([0, -1, 0])
182 |     matrix1 = get_camera_external_paramter_matrix(target1, origin, up)
183 |     coor1 = np.matmul(matrix1, np.array([1, 1, 1, 1]))
184 |     ans1 = np.array([-1., 1., 501., 1.])
185 |     assert (np.abs(coor1-ans1)<1e-8).all()
186 | 
187 |     target2 = np.array([0, 0, 500])
188 |     matrix2 = get_camera_external_paramter_matrix(target2, origin, up)
189 |     coor2 = np.matmul(matrix2, np.array([1, 1, 1, 1]))
190 |     ans2 = np.array([-1., 1., 499., 1.])
191 |     assert (np.abs(coor2-ans2)<1e-8).all()
192 | 
193 |     target1 = np.array([-501, 0, 0])
194 |     origin = np.array([-1, 0, 0])
195 |     up = np.array([0, 1, 1])
196 |     matrix1 = get_camera_external_paramter_matrix(target1, origin, up)
197 |     coor1 = np.matmul(matrix1, np.array([1, 1, 1, 1]))
198 |     ans1 = np.array([0., -1.41421356, 502., 1.])
199 |     assert (np.abs(coor1-ans1)<1e-8).all()
200 | 
201 |     target1 = np.array([-501, 1, 1])
202 |     origin = np.array([-1, 1, 1])
203 |     up = np.array([0, 1, 0])
204 |     matrix1 = get_camera_external_paramter_matrix(target1, origin, up)
205 |     coor = np.array([1, 1, 1, 1])
206 |     coor1 = np.matmul(matrix1, coor)
207 |     ans1 = np.array([0., 0., 502., 1.])
208 |     assert ((coor1-ans1)<1e-8).all()
209 | 
210 | 
211 |     target2 = np.array([0, 0, 499])
212 |     matrix2 = get_camera_external_paramter_matrix(target2, origin, up)
213 | 
214 | 
215 |     fov = 30
216 |     width = 128
217 |     height = 128
218 | 
219 |     internal_matrix = np.eye(4)
220 |     fx, fy, ux, uy = get_camera_internal_parameter(fov, width, height)
221 |     print(fx, fy, ux, uy)
222 |     tmp = get_camera_internal_parameter_matrix(fx, fy, ux, uy)
223 |     internal_matrix[:3,:3] = tmp
224 | 
225 |     # print(internal_matrix)
226 | 
227 |     dataset_dir = '/home/acc/cj/MultiviewRender/render_result'
228 |     views = np.load(os.path.join(dataset_dir, '../views.npy'), allow_pickle=True, encoding='latin1').item()
229 |     for (key, value) in views.items():
230 |         if(key!='reg_deng/RGB/hand_1/sample5'):
231 |             continue
232 |         print(key)
233 |         path = os.path.join(dataset_dir, key)
234 |         print(path)
235 |         img_view0 = cv2.imread(path+'_view0.exr', cv2.IMREAD_ANYDEPTH)
236 |         img_view1 = cv2.imread(path+'_view1.exr', cv2.IMREAD_ANYDEPTH)
237 |         print(value['origin'])
238 |         print(value['view0'])
239 |         print(value['view1'])
240 |         print(value['up'])
241 |         external_matrix0 = get_camera_external_paramter_matrix(value['view0'], value['origin'], value['up'])
242 |         external_matrix1 = get_camera_external_paramter_matrix(value['view1'], value['origin'], value['up'])
243 |         internal_matrix = np.eye(4)
244 |         tmp = get_camera_internal_parameter_matrix(fx, fy, ux, uy)
245 |         internal_matrix[:3,:3] = tmp
246 |         
247 |         matrix0 = np.matmul(internal_matrix, external_matrix0)
248 |         matrix1 = np.matmul(internal_matrix, external_matrix1)
249 |         matrix = np.matmul(matrix1, np.linalg.inv(matrix0))
250 | 
251 |         print(img_view0[64,64])
252 |         u, v = 57, 87
253 |         print(img_view0[v, u])
254 |         zc = img_view0[v, u]
255 |         point0 = np.array([u*zc, v*zc, zc, 1])
256 |         pointc0 = np.matmul(np.linalg.inv(internal_matrix), point0)
257 |         print(pointc0)
258 |         point = np.matmul(np.linalg.inv(external_matrix0), pointc0)
259 |         print(point)
260 |         pointc1 = np.matmul(external_matrix1, point)
261 |         print(pointc1)
262 |         point1 = np.matmul(internal_matrix, pointc1)
263 |         point1[:2] = point1[:2]/point1[2]
264 |         print(point1)
265 |         u, v = int(point1[0]), int(point1[1])
266 |         # print(img_view1[v-3:v+3, u-3:u+3])
267 |         print(img_view1[v, u])
268 | 
269 |         break
270 | 


--------------------------------------------------------------------------------
/utils/hand_detector.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright 2015, 2018 ICG, Graz University of Technology
  3 | This file is part of PreView.
  4 | PreView is free software: you can redistribute it and/or modify
  5 | it under the terms of the GNU General Public License as published by
  6 | the Free Software Foundation, either version 3 of the License, or
  7 | (at your option) any later version.
  8 | PreView is distributed in the hope that it will be useful,
  9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | GNU General Public License for more details.
 12 | You should have received a copy of the GNU General Public License
 13 | along with PreView.  If not, see <http://www.gnu.org/licenses/>.
 14 | """
 15 | 
 16 | import numpy as np
 17 | import cv2
 18 | import math
 19 | from scipy import ndimage
 20 | import matplotlib.pyplot as plt
 21 | import logging
 22 | logger = logging.getLogger(__file__)
 23 | 
 24 | 
 25 | def normlize_depth(depth, com_2d, cube_z):
 26 |     norm_depth = depth.copy()
 27 |     norm_depth[norm_depth==0] = com_2d[2]+(cube_z/2.)
 28 |     norm_depth -= com_2d[2]
 29 |     norm_depth /= (cube_z/2.)
 30 |     return norm_depth
 31 | 
 32 | 
 33 | def calculate_com_2d(dpt):
 34 |     """Calculate the center of mass
 35 | 
 36 |     :param dpt: depth image; invalid pixels which should not be considered must be set zero
 37 |     :return: (x,y,z) center of mass
 38 |     """
 39 |     dc = dpt.copy()
 40 |     cc = ndimage.measurements.center_of_mass(dc > 0)
 41 |     num = np.count_nonzero(dc)
 42 |     com_2d = np.array((cc[1] * num, cc[0] * num, dc.sum()), np.float32)
 43 | 
 44 |     if num == 0:
 45 |         return np.array((0, 0, 0), np.float32)
 46 |     else:
 47 |         return com_2d / num
 48 | 
 49 | 
 50 | def calc_mask(depth, com_2d, fx, fy, bbx, offset, minRatioInside=0.75, size=(250, 250, 250)):
 51 |     if len(size) != 3:
 52 |         raise ValueError("Size must be 3D and dsize 2D bounding box")
 53 | 
 54 |     if bbx is not None:
 55 |         if len(bbx)==6:
 56 |             left, right, up, down, front, back = bbx
 57 |         else:
 58 |             left, right, up, down = bbx
 59 |         left = int(math.floor(left * com_2d[2] / fx - offset) / com_2d[2] * fx)
 60 |         right = int(math.floor(right * com_2d[2] / fx + offset) / com_2d[2] * fx)
 61 |         up = int(math.floor(up * com_2d[2] / fx - offset) / com_2d[2] * fx)
 62 |         down = int(math.floor(down * com_2d[2] / fx + offset) / com_2d[2] * fx)
 63 |         left = max(left, 0)
 64 |         right = min(right, depth.shape[1])
 65 |         up = max(up, 0)
 66 |         down = min(down, depth.shape[0])
 67 |         imgDepth = np.zeros_like(depth)
 68 |         imgDepth[up:down, left:right] = depth[up:down, left:right]
 69 |         if len(bbx)==6:
 70 |             imgDepth[imgDepth < front-offset] = 0.
 71 |             imgDepth[imgDepth > back+offset] = 0.
 72 |     else:
 73 |         imgDepth = depth
 74 | 
 75 |     # calculate boundaries
 76 |     zstart = com_2d[2] - size[2] / 2.
 77 |     zend = com_2d[2] + size[2] / 2.
 78 |     xstart = int(math.floor((com_2d[0] * com_2d[2] / fx - size[0] / 2.) / com_2d[2] * fx))
 79 |     xend = int(math.floor((com_2d[0] * com_2d[2] / fx + size[0] / 2.) / com_2d[2] * fx))
 80 |     ystart = int(math.floor((com_2d[1] * com_2d[2] / fy - size[1] / 2.) / com_2d[2] * fy))
 81 |     yend = int(math.floor((com_2d[1] * com_2d[2] / fy + size[1] / 2.) / com_2d[2] * fy))
 82 | 
 83 |     # Check if part within image is large enough; otherwise stop
 84 |     xstartin = max(xstart, 0)
 85 |     xendin = min(xend, imgDepth.shape[1])
 86 |     ystartin = max(ystart, 0)
 87 |     yendin = min(yend, imgDepth.shape[0])
 88 |     ratioInside = float((xendin - xstartin) * (yendin - ystartin)) / float((xend - xstart) * (yend - ystart))
 89 |     if (ratioInside < minRatioInside) and (
 90 |             (com_2d[0] < 0) or (com_2d[0] >= imgDepth.shape[1]) or (com_2d[1] < 0) or (
 91 |             com_2d[1] >= imgDepth.shape[0])):
 92 |         # print("Hand largely outside image (ratio (inside) = {})".format(ratioInside))
 93 |         raise UserWarning('Hand not inside image')
 94 | 
 95 |     if (ystartin<yendin) and (xstartin<xendin):
 96 |         mask = np.zeros_like(imgDepth, dtype=np.int)
 97 |         depth_tmp = imgDepth[ystartin:yendin, xstartin:xendin]
 98 |         msk = np.bitwise_and(zstart<depth_tmp, depth_tmp<zend)
 99 |         mask[ystartin:yendin, xstartin:xendin][msk] = 1
100 |     else:
101 |         raise UserWarning("No hand.")
102 | 
103 |     # Sanity check
104 |     numValidPixels = np.sum(mask != 0)
105 |     if (numValidPixels < 40):
106 |         # plt.imshow(rz)
107 |         # plt.show()
108 |         # print("Too small number of foreground/hand pixels (={})".format(numValidPixels))
109 |         raise UserWarning("No valid hand. Foreground region too small.")
110 | 
111 |     return mask
112 | 
113 | 
114 | def crop_area_3d(depth, com_2d, fx, fy, bbx=None, offset=0., minRatioInside=0.75,
115 |                size=(250, 250, 250), dsize=(128, 128), docom=True):
116 |     """
117 |     Crop area of hand in 3D volumina, scales inverse to the distance of hand to camera
118 |     :param com_2d: center of mass, in image coordinates (x,y,z), z in mm
119 |     :param size: (x,y,z) extent of the source crop volume in mm
120 |     :param dsize: (x,y) extent of the destination size
121 |     :param docom: whether calcate center of mass on cropped
122 |     :return: cropped hand image, transformation matrix for joints
123 |     """
124 |     CROP_BG_VALUE = 0.0
125 | 
126 |     if len(size) != 3 or len(dsize) != 2:
127 |         # print('Size must be 3D and dsize 2D bounding box')
128 |         raise ValueError("Size must be 3D and dsize 2D bounding box")
129 | 
130 |     if bbx is not None:
131 |         if len(bbx)==6:
132 |             left, right, up, down, front, back = bbx
133 |         else:
134 |             left, right, up, down = bbx
135 |         left = int(math.floor(left * com_2d[2] / fx - offset) / com_2d[2] * fx)
136 |         right = int(math.floor(right * com_2d[2] / fx + offset) / com_2d[2] * fx)
137 |         up = int(math.floor(up * com_2d[2] / fx - offset) / com_2d[2] * fx)
138 |         down = int(math.floor(down * com_2d[2] / fx + offset) / com_2d[2] * fx)
139 |         left = max(left, 0)
140 |         right = min(right, depth.shape[1])
141 |         up = max(up, 0)
142 |         down = min(down, depth.shape[0])
143 |         imgDepth = np.zeros_like(depth)
144 |         imgDepth[up:down, left:right] = depth[up:down, left:right]
145 |         if len(bbx)==6:
146 |             imgDepth[imgDepth < front-offset] = 0.
147 |             imgDepth[imgDepth > back+offset] = 0.
148 |     else:
149 |         imgDepth = depth
150 | 
151 |     # calculate boundaries
152 |     zstart = com_2d[2] - size[2] / 2.
153 |     zend = com_2d[2] + size[2] / 2.
154 |     xstart = int(math.floor((com_2d[0] * com_2d[2] / fx - size[0] / 2.) / com_2d[2] * fx))
155 |     xend = int(math.floor((com_2d[0] * com_2d[2] / fx + size[0] / 2.) / com_2d[2] * fx))
156 |     ystart = int(math.floor((com_2d[1] * com_2d[2] / fy - size[1] / 2.) / com_2d[2] * fy))
157 |     yend = int(math.floor((com_2d[1] * com_2d[2] / fy + size[1] / 2.) / com_2d[2] * fy))
158 | 
159 |     # Check if part within image is large enough; otherwise stop
160 |     xstartin = max(xstart, 0)
161 |     xendin = min(xend, imgDepth.shape[1])
162 |     ystartin = max(ystart, 0)
163 |     yendin = min(yend, imgDepth.shape[0])
164 |     ratioInside = float((xendin - xstartin) * (yendin - ystartin)) / float((xend - xstart) * (yend - ystart))
165 |     if (ratioInside < minRatioInside) and (
166 |             (com_2d[0] < 0) or (com_2d[0] >= imgDepth.shape[1]) or (com_2d[1] < 0) or (com_2d[1] >= imgDepth.shape[0])):
167 |         # print("Hand largely outside image (ratio (inside) = {})".format(ratioInside))
168 |         raise UserWarning('Hand not inside image')
169 | 
170 |     # crop patch from source
171 |     cropped = imgDepth[max(ystart, 0):min(yend, imgDepth.shape[0]), max(xstart, 0):min(xend, imgDepth.shape[1])].copy()
172 |     # add pixels that are out of the image in order to keep aspect ratio
173 |     cropped = np.pad(cropped, ((abs(ystart) - max(ystart, 0), abs(yend) - min(yend, imgDepth.shape[0])),
174 |                                   (abs(xstart) - max(xstart, 0), abs(xend) - min(xend, imgDepth.shape[1]))),
175 |                         mode='constant', constant_values=int(CROP_BG_VALUE))
176 |     msk1 = np.bitwise_and(cropped < zstart, cropped != 0)
177 |     msk2 = np.bitwise_and(cropped > zend, cropped != 0)
178 |     cropped[msk1] = CROP_BG_VALUE  # backface is at 0, it is set later; setting anything outside cube to same value now (was set to zstart earlier)
179 |     cropped[msk2] = CROP_BG_VALUE  # backface is at 0, it is set later
180 | 
181 |     # for simulating COM within cube
182 |     if docom is True:
183 |         com_2d = calculate_com_2d(cropped)
184 |         if np.allclose(com_2d, 0.):
185 |             com_2d[2] = cropped[cropped.shape[0] // 2, cropped.shape[1] // 2]
186 |         com_2d[0] += xstart
187 |         com_2d[1] += ystart
188 | 
189 |         # calculate boundaries
190 |         zstart = com_2d[2] - size[2] / 2.
191 |         zend = com_2d[2] + size[2] / 2.
192 |         xstart = int(math.floor((com_2d[0] * com_2d[2] / fx - size[0] / 2.) / com_2d[2] * fx))
193 |         xend = int(math.floor((com_2d[0] * com_2d[2] / fx + size[0] / 2.) / com_2d[2] * fx))
194 |         ystart = int(math.floor((com_2d[1] * com_2d[2] / fy - size[1] / 2.) / com_2d[2] * fy))
195 |         yend = int(math.floor((com_2d[1] * com_2d[2] / fy + size[1] / 2.) / com_2d[2] * fy))
196 | 
197 |         # crop patch from source
198 |         cropped = imgDepth[max(ystart, 0):min(yend, imgDepth.shape[0]),
199 |                   max(xstart, 0):min(xend, imgDepth.shape[1])].copy()
200 |         # add pixels that are out of the image in order to keep aspect ratio
201 |         cropped = np.pad(cropped, ((abs(ystart) - max(ystart, 0), abs(yend) - min(yend, imgDepth.shape[0])),
202 |                                       (abs(xstart) - max(xstart, 0), abs(xend) - min(xend, imgDepth.shape[1]))),
203 |                             mode='constant', constant_values=0)
204 |         msk1 = np.bitwise_and(cropped < zstart, cropped != 0)
205 |         msk2 = np.bitwise_and(cropped > zend, cropped != 0)
206 |         cropped[msk1] = zstart
207 |         cropped[msk2] = CROP_BG_VALUE  # backface is at 0, it is set later
208 | 
209 |     wb = (xend - xstart)
210 |     hb = (yend - ystart)
211 |     trans = np.asmatrix(np.eye(3, dtype=np.float32))
212 |     trans[0, 2] = -xstart
213 |     trans[1, 2] = -ystart
214 |     # compute size of image patch for isotropic scaling where the larger side is the side length of the fixed size image patch (preserving aspect ratio)
215 |     if wb > hb:
216 |         sz = (dsize[0], int(round(hb * dsize[0] / float(wb))))
217 |     else:
218 |         sz = (int(round(wb * dsize[1] / float(hb))), dsize[1])
219 | 
220 |     # comdpute scale factor from cropped ROI in image to fixed size image patch; set up matrix with same scale in x and y (preserving aspect ratio)
221 |     roi = cropped
222 |     if roi.shape[0] > roi.shape[1]:  # Note, roi.shape is (y,x) and sz is (x,y)
223 |         scale = np.asmatrix(np.eye(3, dtype=np.float32) * sz[1] / float(roi.shape[0]))
224 |     else:
225 |         scale = np.asmatrix(np.eye(3, dtype=np.float32) * sz[0] / float(roi.shape[1]))
226 |     scale[2, 2] = 1
227 | 
228 |     # depth resize
229 |     rz = cv2.resize(cropped, sz, interpolation=cv2.INTER_NEAREST)
230 | 
231 |     # Sanity check
232 |     numValidPixels = np.sum(rz != CROP_BG_VALUE)
233 |     if (numValidPixels < 40) or (numValidPixels < (np.prod(dsize) * 0.01)):
234 |         # plt.imshow(rz)
235 |         # plt.show()
236 |         # print("Too small number of foreground/hand pixels (={})".format(numValidPixels))
237 |         raise UserWarning("No valid hand. Foreground region too small.")
238 | 
239 |     # Place the resized patch (with preserved aspect ratio) in the center of a fixed size patch (padded with default background values)
240 |     ret = np.ones(dsize, np.float32) * CROP_BG_VALUE  # use background as filler
241 |     xstart = int(math.floor(dsize[0] / 2 - rz.shape[1] / 2))
242 |     xend = int(xstart + rz.shape[1])
243 |     ystart = int(math.floor(dsize[1] / 2 - rz.shape[0] / 2))
244 |     yend = int(ystart + rz.shape[0])
245 |     ret[ystart:yend, xstart:xend] = rz
246 |     # print rz.shape
247 |     off = np.asmatrix(np.eye(3, dtype=np.float32))
248 |     off[0, 2] = xstart
249 |     off[1, 2] = ystart
250 | 
251 |     # Transformation from original image to fixed size crop includes
252 |     # the translation of the "anchor" point of the crop to origin (=trans),
253 |     # the (isotropic) scale factor (=scale), and
254 |     # the offset of the patch (with preserved aspect ratio) within the fixed size patch (=off)
255 |     return ret, off * scale * trans, com_2d
256 | 


--------------------------------------------------------------------------------
/utils/image_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def normlize_depth(depth, com_2d, cube_z):
 5 |     norm_depth = depth.copy()
 6 |     norm_depth[norm_depth==0] = com_2d[2]+(cube_z/2.)
 7 |     norm_depth -= com_2d[2]
 8 |     norm_depth /= (cube_z/2.)
 9 |     return norm_depth
10 | 
11 | 
12 | def normlize_image(img):
13 |     """
14 | 
15 |     :param img: np.array(H, W)
16 |     :return: np.array(H, W)
17 |     """
18 |     t_min = np.min(img)
19 |     t_max = np.max(img)
20 |     img = (img - t_min) / (t_max - t_min)
21 |     return img
22 | 


--------------------------------------------------------------------------------
/utils/point_transform.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def transform_3D(points, trans_matrix):
 5 |     """3D affine transformation
 6 | 
 7 |     :param points: Tensor(..., N, 3)
 8 |     :param cam_trans_matrix: Tensor(..., N, 4)
 9 |     :return: Tensor(B, N, 3)
10 |     """
11 |     x = points[..., 0]
12 |     y = points[..., 1]
13 |     z = points[..., 2]
14 |     ones = np.ones_like(x)
15 |     points_h = np.stack([x, y, z, ones], -2) # (..., 4, N)
16 |     points_h = trans_matrix @ points_h
17 |     dim = len(points.shape)
18 |     points = np.transpose(points_h, [i for i in range(dim-2)]+[dim-1,dim-2])[..., :3] # (..., N, 3)
19 |     return points
20 | 
21 | 
22 | def transform_3D_to_2D(points, fx, fy, u0, v0):
23 |     u = points[..., 0] / points[..., 2] * fx + u0
24 |     v = points[..., 1] / points[..., 2] * fy + v0
25 |     d = points[..., 2]
26 |     return np.stack([u, v, d], axis=-1)
27 | 
28 | 
29 | def transform_2D_to_3D(points, fx, fy, u0, v0):
30 |     x = (points[..., 0] - u0) * points[..., 2] / fx
31 |     y = (points[..., 1] - v0) * points[..., 2] / fy
32 |     z = points[..., 2]
33 |     return np.stack([x, y, z], axis=-1)
34 | 
35 | 
36 | def transform_2D(points, trans_matirx):
37 |     """2D affine transformation
38 | 
39 |     :param points: Tensor(..., N, 3)
40 |     :param trans_matirx: Tensor(..., 3, 3)
41 |     :return: Tensor(..., N, 3)
42 |     """
43 |     x = points[..., 0]
44 |     y = points[..., 1]
45 |     z = points[..., 2]
46 |     ones = np.ones_like(x)
47 |     points_h = np.stack([x, y, ones], axis=-2) # (..., 4, N)
48 |     points_h = trans_matirx @ points_h
49 |     dim = len(points.shape)
50 |     points = np.transpose(points_h, [i for i in range(dim-2)]+[dim-1,dim-2]) # (B, N, 3)
51 |     points[..., 2] = z
52 |     return points


--------------------------------------------------------------------------------
/utils/voxel_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def gen_voxel(cropped, com_2d, cube, voxel_len):
 5 |     """
 6 | 
 7 |     :param cropped: numpy([H, W], float)
 8 |     :param com_2d: numpy([3], float)
 9 |     :param cube: numpy([3], float)
10 |     :param voxel_len: int
11 |     :return: numpy([voxel_len, voxel_len, voxel_len], int)
12 |     """
13 |     H, W = cropped.shape
14 | 
15 |     # Where x is the x row, y is the y column
16 |     x = np.arange(H)
17 |     y = np.arange(W)
18 |     x, y = np.meshgrid(x, y, indexing='ij')
19 |     z = cropped.copy()
20 |     mask = np.bitwise_and(cropped>=com_2d[2]-cube[2]/2., cropped<com_2d[2]+cube[2]/2.)
21 |     mask = mask.reshape(-1)
22 |     x = x.reshape(-1)[mask]
23 |     y = y.reshape(-1)[mask]
24 |     z = z.reshape(-1)[mask]
25 | 
26 |     # Normalize x, y and z to [0, 1)
27 |     x = x/H
28 |     y = y/W
29 |     z = (z-com_2d[2]+cube[2]/2)/cube[2]
30 | 
31 |     # Get voxel
32 |     voxel = np.zeros([voxel_len, voxel_len, voxel_len], dtype=np.int)
33 |     x = (x*voxel_len).astype(np.int)
34 |     y = (y*voxel_len).astype(np.int)
35 |     z = (z*voxel_len).astype(np.int)
36 |     voxel[x, y, z] = 1
37 | 
38 |     return voxel
39 | 


--------------------------------------------------------------------------------