├── .gitignore
├── LICENSE
├── README.md
├── apps
    ├── __init__.py
    ├── eval.py
    └── train.py
├── environment.yaml
└── lib
    ├── __init__.py
    ├── data
        ├── BOP_BP_YCBV.py
        └── __init__.py
    ├── data_utils
        ├── __init__.py
        ├── aug_util.py
        └── sample_frustum_util.py
    ├── debug_pyrender_util.py
    ├── eval_Rt_time_util.py
    ├── geometry.py
    ├── loss_util.py
    ├── mesh_util.py
    ├── model
        ├── BasePIFuNet.py
        ├── HGFilters.py
        ├── HGPIFuNet.py
        ├── RayDistanceNormalizer.py
        ├── SurfaceClassifier.py
        └── __init__.py
    ├── net_util.py
    ├── options.py
    ├── rigid_fit
        ├── calculate_rmsd.py
        ├── ransac.py
        └── ransac_kabsch.py
    ├── sdf.py
    └── sym_util.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib64/
 18 | parts/
 19 | sdist/
 20 | var/
 21 | wheels/
 22 | pip-wheel-metadata/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | db.sqlite3-journal
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # IPython
 80 | profile_default/
 81 | ipython_config.py
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # pipenv
 87 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 88 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 89 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 90 | #   install all needed dependencies.
 91 | #Pipfile.lock
 92 | 
 93 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 94 | __pypackages__/
 95 | 
 96 | # Celery stuff
 97 | celerybeat-schedule
 98 | celerybeat.pid
 99 | 
100 | # SageMath parsed files
101 | *.sage.py
102 | 
103 | # Environments
104 | .env
105 | .venv
106 | env/
107 | venv/
108 | ENV/
109 | env.bak/
110 | venv.bak/
111 | 
112 | # Spyder project settings
113 | .spyderproject
114 | .spyproject
115 | 
116 | # Rope project settings
117 | .ropeproject
118 | 
119 | # mkdocs documentation
120 | /site
121 | 
122 | # mypy
123 | .mypy_cache/
124 | .dmypy.json
125 | dmypy.json
126 | 
127 | # Pyre type checker
128 | .pyre/
129 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Attribution-NonCommercial 4.0 International
  2 | 
  3 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  4 | does not provide legal services or legal advice. Distribution of
  5 | Creative Commons public licenses does not create a lawyer-client or
  6 | other relationship. Creative Commons makes its licenses and related
  7 | information available on an "as-is" basis. Creative Commons gives no
  8 | warranties regarding its licenses, any material licensed under their
  9 | terms and conditions, or any related information. Creative Commons
 10 | disclaims all liability for damages resulting from their use to the
 11 | fullest extent possible.
 12 | 
 13 | Using Creative Commons Public Licenses
 14 | 
 15 | Creative Commons public licenses provide a standard set of terms and
 16 | conditions that creators and other rights holders may use to share
 17 | original works of authorship and other material subject to copyright and
 18 | certain other rights specified in the public license below. The
 19 | following considerations are for informational purposes only, are not
 20 | exhaustive, and do not form part of our licenses.
 21 | 
 22 | -   Considerations for licensors: Our public licenses are intended for
 23 |     use by those authorized to give the public permission to use
 24 |     material in ways otherwise restricted by copyright and certain other
 25 |     rights. Our licenses are irrevocable. Licensors should read and
 26 |     understand the terms and conditions of the license they choose
 27 |     before applying it. Licensors should also secure all rights
 28 |     necessary before applying our licenses so that the public can reuse
 29 |     the material as expected. Licensors should clearly mark any material
 30 |     not subject to the license. This includes other CC-licensed
 31 |     material, or material used under an exception or limitation to
 32 |     copyright. More considerations for licensors :
 33 |     wiki.creativecommons.org/Considerations_for_licensors
 34 | 
 35 | -   Considerations for the public: By using one of our public licenses,
 36 |     a licensor grants the public permission to use the licensed material
 37 |     under specified terms and conditions. If the licensor's permission
 38 |     is not necessary for any reason–for example, because of any
 39 |     applicable exception or limitation to copyright–then that use is not
 40 |     regulated by the license. Our licenses grant only permissions under
 41 |     copyright and certain other rights that a licensor has authority to
 42 |     grant. Use of the licensed material may still be restricted for
 43 |     other reasons, including because others have copyright or other
 44 |     rights in the material. A licensor may make special requests, such
 45 |     as asking that all changes be marked or described. Although not
 46 |     required by our licenses, you are encouraged to respect those
 47 |     requests where reasonable. More considerations for the public :
 48 |     wiki.creativecommons.org/Considerations_for_licensees
 49 | 
 50 | Creative Commons Attribution-NonCommercial 4.0 International Public
 51 | License
 52 | 
 53 | By exercising the Licensed Rights (defined below), You accept and agree
 54 | to be bound by the terms and conditions of this Creative Commons
 55 | Attribution-NonCommercial 4.0 International Public License ("Public
 56 | License"). To the extent this Public License may be interpreted as a
 57 | contract, You are granted the Licensed Rights in consideration of Your
 58 | acceptance of these terms and conditions, and the Licensor grants You
 59 | such rights in consideration of benefits the Licensor receives from
 60 | making the Licensed Material available under these terms and conditions.
 61 | 
 62 | -   Section 1 – Definitions.
 63 | 
 64 |     -   a. Adapted Material means material subject to Copyright and
 65 |         Similar Rights that is derived from or based upon the Licensed
 66 |         Material and in which the Licensed Material is translated,
 67 |         altered, arranged, transformed, or otherwise modified in a
 68 |         manner requiring permission under the Copyright and Similar
 69 |         Rights held by the Licensor. For purposes of this Public
 70 |         License, where the Licensed Material is a musical work,
 71 |         performance, or sound recording, Adapted Material is always
 72 |         produced where the Licensed Material is synched in timed
 73 |         relation with a moving image.
 74 |     -   b. Adapter's License means the license You apply to Your
 75 |         Copyright and Similar Rights in Your contributions to Adapted
 76 |         Material in accordance with the terms and conditions of this
 77 |         Public License.
 78 |     -   c. Copyright and Similar Rights means copyright and/or similar
 79 |         rights closely related to copyright including, without
 80 |         limitation, performance, broadcast, sound recording, and Sui
 81 |         Generis Database Rights, without regard to how the rights are
 82 |         labeled or categorized. For purposes of this Public License, the
 83 |         rights specified in Section 2(b)(1)-(2) are not Copyright and
 84 |         Similar Rights.
 85 |     -   d. Effective Technological Measures means those measures that,
 86 |         in the absence of proper authority, may not be circumvented
 87 |         under laws fulfilling obligations under Article 11 of the WIPO
 88 |         Copyright Treaty adopted on December 20, 1996, and/or similar
 89 |         international agreements.
 90 |     -   e. Exceptions and Limitations means fair use, fair dealing,
 91 |         and/or any other exception or limitation to Copyright and
 92 |         Similar Rights that applies to Your use of the Licensed
 93 |         Material.
 94 |     -   f. Licensed Material means the artistic or literary work,
 95 |         database, or other material to which the Licensor applied this
 96 |         Public License.
 97 |     -   g. Licensed Rights means the rights granted to You subject to
 98 |         the terms and conditions of this Public License, which are
 99 |         limited to all Copyright and Similar Rights that apply to Your
100 |         use of the Licensed Material and that the Licensor has authority
101 |         to license.
102 |     -   h. Licensor means the individual(s) or entity(ies) granting
103 |         rights under this Public License.
104 |     -   i. NonCommercial means not primarily intended for or directed
105 |         towards commercial advantage or monetary compensation. For
106 |         purposes of this Public License, the exchange of the Licensed
107 |         Material for other material subject to Copyright and Similar
108 |         Rights by digital file-sharing or similar means is NonCommercial
109 |         provided there is no payment of monetary compensation in
110 |         connection with the exchange.
111 |     -   j. Share means to provide material to the public by any means or
112 |         process that requires permission under the Licensed Rights, such
113 |         as reproduction, public display, public performance,
114 |         distribution, dissemination, communication, or importation, and
115 |         to make material available to the public including in ways that
116 |         members of the public may access the material from a place and
117 |         at a time individually chosen by them.
118 |     -   k. Sui Generis Database Rights means rights other than copyright
119 |         resulting from Directive 96/9/EC of the European Parliament and
120 |         of the Council of 11 March 1996 on the legal protection of
121 |         databases, as amended and/or succeeded, as well as other
122 |         essentially equivalent rights anywhere in the world.
123 |     -   l. You means the individual or entity exercising the Licensed
124 |         Rights under this Public License. Your has a corresponding
125 |         meaning.
126 | 
127 | -   Section 2 – Scope.
128 | 
129 |     -   a. License grant.
130 |         -   1. Subject to the terms and conditions of this Public
131 |             License, the Licensor hereby grants You a worldwide,
132 |             royalty-free, non-sublicensable, non-exclusive, irrevocable
133 |             license to exercise the Licensed Rights in the Licensed
134 |             Material to:
135 |             -   A. reproduce and Share the Licensed Material, in whole
136 |                 or in part, for NonCommercial purposes only; and
137 |             -   B. produce, reproduce, and Share Adapted Material for
138 |                 NonCommercial purposes only.
139 |         -   2. Exceptions and Limitations. For the avoidance of doubt,
140 |             where Exceptions and Limitations apply to Your use, this
141 |             Public License does not apply, and You do not need to comply
142 |             with its terms and conditions.
143 |         -   3. Term. The term of this Public License is specified in
144 |             Section 6(a).
145 |         -   4. Media and formats; technical modifications allowed. The
146 |             Licensor authorizes You to exercise the Licensed Rights in
147 |             all media and formats whether now known or hereafter
148 |             created, and to make technical modifications necessary to do
149 |             so. The Licensor waives and/or agrees not to assert any
150 |             right or authority to forbid You from making technical
151 |             modifications necessary to exercise the Licensed Rights,
152 |             including technical modifications necessary to circumvent
153 |             Effective Technological Measures. For purposes of this
154 |             Public License, simply making modifications authorized by
155 |             this Section 2(a)(4) never produces Adapted Material.
156 |         -   5. Downstream recipients.
157 |             -   A. Offer from the Licensor – Licensed Material. Every
158 |                 recipient of the Licensed Material automatically
159 |                 receives an offer from the Licensor to exercise the
160 |                 Licensed Rights under the terms and conditions of this
161 |                 Public License.
162 |             -   B. No downstream restrictions. You may not offer or
163 |                 impose any additional or different terms or conditions
164 |                 on, or apply any Effective Technological Measures to,
165 |                 the Licensed Material if doing so restricts exercise of
166 |                 the Licensed Rights by any recipient of the Licensed
167 |                 Material.
168 |         -   6. No endorsement. Nothing in this Public License
169 |             constitutes or may be construed as permission to assert or
170 |             imply that You are, or that Your use of the Licensed
171 |             Material is, connected with, or sponsored, endorsed, or
172 |             granted official status by, the Licensor or others
173 |             designated to receive attribution as provided in Section
174 |             3(a)(1)(A)(i).
175 |     -   b. Other rights.
176 |         -   1. Moral rights, such as the right of integrity, are not
177 |             licensed under this Public License, nor are publicity,
178 |             privacy, and/or other similar personality rights; however,
179 |             to the extent possible, the Licensor waives and/or agrees
180 |             not to assert any such rights held by the Licensor to the
181 |             limited extent necessary to allow You to exercise the
182 |             Licensed Rights, but not otherwise.
183 |         -   2. Patent and trademark rights are not licensed under this
184 |             Public License.
185 |         -   3. To the extent possible, the Licensor waives any right to
186 |             collect royalties from You for the exercise of the Licensed
187 |             Rights, whether directly or through a collecting society
188 |             under any voluntary or waivable statutory or compulsory
189 |             licensing scheme. In all other cases the Licensor expressly
190 |             reserves any right to collect such royalties, including when
191 |             the Licensed Material is used other than for NonCommercial
192 |             purposes.
193 | 
194 | -   Section 3 – License Conditions.
195 | 
196 |     Your exercise of the Licensed Rights is expressly made subject to
197 |     the following conditions.
198 | 
199 |     -   a. Attribution.
200 |         -   1. If You Share the Licensed Material (including in modified
201 |             form), You must:
202 |             -   A. retain the following if it is supplied by the
203 |                 Licensor with the Licensed Material:
204 |                 -   i. identification of the creator(s) of the Licensed
205 |                     Material and any others designated to receive
206 |                     attribution, in any reasonable manner requested by
207 |                     the Licensor (including by pseudonym if designated);
208 |                 -   ii. a copyright notice;
209 |                 -   iii. a notice that refers to this Public License;
210 |                 -   iv. a notice that refers to the disclaimer of
211 |                     warranties;
212 |                 -   v. a URI or hyperlink to the Licensed Material to
213 |                     the extent reasonably practicable;
214 |             -   B. indicate if You modified the Licensed Material and
215 |                 retain an indication of any previous modifications; and
216 |             -   C. indicate the Licensed Material is licensed under this
217 |                 Public License, and include the text of, or the URI or
218 |                 hyperlink to, this Public License.
219 |         -   2. You may satisfy the conditions in Section 3(a)(1) in any
220 |             reasonable manner based on the medium, means, and context in
221 |             which You Share the Licensed Material. For example, it may
222 |             be reasonable to satisfy the conditions by providing a URI
223 |             or hyperlink to a resource that includes the required
224 |             information.
225 |         -   3. If requested by the Licensor, You must remove any of the
226 |             information required by Section 3(a)(1)(A) to the extent
227 |             reasonably practicable.
228 |         -   4. If You Share Adapted Material You produce, the Adapter's
229 |             License You apply must not prevent recipients of the Adapted
230 |             Material from complying with this Public License.
231 | 
232 | -   Section 4 – Sui Generis Database Rights.
233 | 
234 |     Where the Licensed Rights include Sui Generis Database Rights that
235 |     apply to Your use of the Licensed Material:
236 | 
237 |     -   a. for the avoidance of doubt, Section 2(a)(1) grants You the
238 |         right to extract, reuse, reproduce, and Share all or a
239 |         substantial portion of the contents of the database for
240 |         NonCommercial purposes only;
241 |     -   b. if You include all or a substantial portion of the database
242 |         contents in a database in which You have Sui Generis Database
243 |         Rights, then the database in which You have Sui Generis Database
244 |         Rights (but not its individual contents) is Adapted Material;
245 |         and
246 |     -   c. You must comply with the conditions in Section 3(a) if You
247 |         Share all or a substantial portion of the contents of the
248 |         database.
249 | 
250 |     For the avoidance of doubt, this Section 4 supplements and does not
251 |     replace Your obligations under this Public License where the
252 |     Licensed Rights include other Copyright and Similar Rights.
253 | 
254 | -   Section 5 – Disclaimer of Warranties and Limitation of Liability.
255 | 
256 |     -   a. Unless otherwise separately undertaken by the Licensor, to
257 |         the extent possible, the Licensor offers the Licensed Material
258 |         as-is and as-available, and makes no representations or
259 |         warranties of any kind concerning the Licensed Material, whether
260 |         express, implied, statutory, or other. This includes, without
261 |         limitation, warranties of title, merchantability, fitness for a
262 |         particular purpose, non-infringement, absence of latent or other
263 |         defects, accuracy, or the presence or absence of errors, whether
264 |         or not known or discoverable. Where disclaimers of warranties
265 |         are not allowed in full or in part, this disclaimer may not
266 |         apply to You.
267 |     -   b. To the extent possible, in no event will the Licensor be
268 |         liable to You on any legal theory (including, without
269 |         limitation, negligence) or otherwise for any direct, special,
270 |         indirect, incidental, consequential, punitive, exemplary, or
271 |         other losses, costs, expenses, or damages arising out of this
272 |         Public License or use of the Licensed Material, even if the
273 |         Licensor has been advised of the possibility of such losses,
274 |         costs, expenses, or damages. Where a limitation of liability is
275 |         not allowed in full or in part, this limitation may not apply to
276 |         You.
277 |     -   c. The disclaimer of warranties and limitation of liability
278 |         provided above shall be interpreted in a manner that, to the
279 |         extent possible, most closely approximates an absolute
280 |         disclaimer and waiver of all liability.
281 | 
282 | -   Section 6 – Term and Termination.
283 | 
284 |     -   a. This Public License applies for the term of the Copyright and
285 |         Similar Rights licensed here. However, if You fail to comply
286 |         with this Public License, then Your rights under this Public
287 |         License terminate automatically.
288 |     -   b. Where Your right to use the Licensed Material has terminated
289 |         under Section 6(a), it reinstates:
290 | 
291 |         -   1. automatically as of the date the violation is cured,
292 |             provided it is cured within 30 days of Your discovery of the
293 |             violation; or
294 |         -   2. upon express reinstatement by the Licensor.
295 | 
296 |         For the avoidance of doubt, this Section 6(b) does not affect
297 |         any right the Licensor may have to seek remedies for Your
298 |         violations of this Public License.
299 | 
300 |     -   c. For the avoidance of doubt, the Licensor may also offer the
301 |         Licensed Material under separate terms or conditions or stop
302 |         distributing the Licensed Material at any time; however, doing
303 |         so will not terminate this Public License.
304 |     -   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
305 |         License.
306 | 
307 | -   Section 7 – Other Terms and Conditions.
308 | 
309 |     -   a. The Licensor shall not be bound by any additional or
310 |         different terms or conditions communicated by You unless
311 |         expressly agreed.
312 |     -   b. Any arrangements, understandings, or agreements regarding the
313 |         Licensed Material not stated herein are separate from and
314 |         independent of the terms and conditions of this Public License.
315 | 
316 | -   Section 8 – Interpretation.
317 | 
318 |     -   a. For the avoidance of doubt, this Public License does not, and
319 |         shall not be interpreted to, reduce, limit, restrict, or impose
320 |         conditions on any use of the Licensed Material that could
321 |         lawfully be made without permission under this Public License.
322 |     -   b. To the extent possible, if any provision of this Public
323 |         License is deemed unenforceable, it shall be automatically
324 |         reformed to the minimum extent necessary to make it enforceable.
325 |         If the provision cannot be reformed, it shall be severed from
326 |         this Public License without affecting the enforceability of the
327 |         remaining terms and conditions.
328 |     -   c. No term or condition of this Public License will be waived
329 |         and no failure to comply consented to unless expressly agreed to
330 |         by the Licensor.
331 |     -   d. Nothing in this Public License constitutes or may be
332 |         interpreted as a limitation upon, or waiver of, any privileges
333 |         and immunities that apply to the Licensor or You, including from
334 |         the legal processes of any jurisdiction or authority.
335 | 
336 | Creative Commons is not a party to its public licenses. Notwithstanding,
337 | Creative Commons may elect to apply one of its public licenses to
338 | material it publishes and in those instances will be considered the
339 | "Licensor." The text of the Creative Commons public licenses is
340 | dedicated to the public domain under the CC0 Public Domain Dedication.
341 | Except for the limited purpose of indicating that material is shared
342 | under a Creative Commons public license or as otherwise permitted by the
343 | Creative Commons policies published at creativecommons.org/policies,
344 | Creative Commons does not authorize the use of the trademark "Creative
345 | Commons" or any other trademark or logo of Creative Commons without its
346 | prior written consent including, without limitation, in connection with
347 | any unauthorized modifications to any of its public licenses or any
348 | other arrangements, understandings, or agreements concerning use of
349 | licensed material. For the avoidance of doubt, this paragraph does not
350 | form part of the public licenses.
351 | 
352 | Creative Commons may be contacted at creativecommons.org.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Neural Correspondence Field for Object Pose Estimation
 2 | 
 3 | This repository provides the source code and trained models of the 6D object pose estimation method presented in:
 4 | 
 5 | [Lin Huang](https://linhuang17.github.io/), [Tomas Hodan](http://www.hodan.xyz), [Lingni Ma](https://www.linkedin.com/in/lingnima/), [Linguang Zhang](https://lg-zhang.github.io/), [Luan Tran](https://www.linkedin.com/in/luan-tran-3185009b/), [Christopher Twigg](https://chris.twi.gg/), [Po-Chen Wu](http://media.ee.ntu.edu.tw/personal/pcwu/), [Junsong Yuan](https://cse.buffalo.edu/~jsyuan/), [Cem Keskin](https://www.linkedin.com/in/cem-keskin-23692a15/), [Robert Wang](http://people.csail.mit.edu/rywang/)<br>
 6 | **Neural Correspondence Field for Object Pose Estimation**<br>
 7 | European Conference on Computer Vision (ECCV) 2022<br>
 8 | [Paper](https://arxiv.org/pdf/2208.00113.pdf) | [Webpage](https://linhuang17.github.io/NCF/) | [Bibtex](https://linhuang17.github.io/NCF/resources/huang2022ncf.txt)
 9 | 
10 | Contents: [Setup](#setup) | [Usage](#usage) | [Pre-trained models](#pre-trained-models)
11 | 
12 | 
13 | ## <a name="setup"></a>1. Setup
14 | 
15 | ### 1.1 Cloning the repository
16 | 
17 | Download the code:
18 | ```
19 | git clone https://github.com/LinHuang17/NCF-code.git
20 | cd NCF-code
21 | ```
22 | 
23 | ### 1.2 Python environment and dependencies
24 | 
25 | Create and activate conda environment with dependencies:
26 | ```
27 | conda env create -f environment.yaml
28 | conda activate ncf
29 | ```
30 | 
31 | ### 1.3 BOP datasets
32 | 
33 | For experiments on existing [BOP datasets](https://bop.felk.cvut.cz/datasets/), please follow the instructions on the [website](https://bop.felk.cvut.cz/datasets/) to download the base archives, 3D object models, the training images, and the test images.
34 | 
35 | For YCB-V, you are expected to have files: `ycbv_base.zip`, `ycbv_models.zip`, `ycbv_train_pbr.zip`, `ycbv_train_real.zip` (used for training models with real images), and `ycbv_test_bop19.zip`. Then, unpack them into folder `<path/to/ycbv>`.
36 | 
37 | ## <a name="usage"></a>2. Usage
38 | 
39 | ### 2.1 Inference with a pre-trained model
40 | 
41 | To evaluate on an object (e.g., cracker box) from YCB-V:
42 | 
43 | First, download and unpack the [pre-trained models](#pre-trained-models) into folder `<path/to/ncf_ycbv>`.
44 | 
45 | Then, run the following command with the cracker box's pre-trained model:
46 | ```
47 | export CUDA_VISIBLE_DEVICES=0 
48 | python -m apps.eval --exp_id ncf_ycbv_run2_eval --work_base_path <path/to/ncf_results> --model_dir <path/to/ycbv/models> --ds_ycbv_dir <path/to/ycbv> --obj_id 2 --bbx_size 380 --eval_perf True --load_netG_checkpoint_path <path/to/ncf_ycbv/ncf_ycbv_obj2> --num_in_batch 10000
49 | ```
50 | 
51 | where `work_base_path` is the path to the results (e.g., the estimated pose csv file as `ncf-obj2_ycbv-Rt-time.csv`), `model_dir` is the path to the YCB-V 3D object models, `ds_ycbv_dir` is the path to the YCB-V dataset, and `load_netG_checkpoint_path` is the path to the cracker box's pre-trained model.
52 | 
53 | 
54 | ### 2.2 Training your own model
55 | 
56 | To train on an object (e.g., cracker box) from YCB-V:
57 | 
58 | Run the following command:
59 | 
60 | ```
61 | export CUDA_VISIBLE_DEVICES=0
62 | python -m apps.train --exp_id ncf_ycbv_run2_train --work_base_path <path/to/ncf_results/> --model_dir <path/to/ycbv/models> --ds_ycbv_dir <path/to/ycbv> --obj_id 2 --bbx_size 380 --num_in_batch 10000
63 | ```
64 | 
65 | where `work_base_path` is the path to the results (e.g., the estimated pose csv file as `ncf-obj2_ycbv-Rt-time.csv`), `model_dir` is the path to the YCB-V 3D object models, and `ds_ycbv_dir` is the path to the YCB-V dataset.
66 | 
67 | 
68 | ## <a name="pre-trained-models"></a>3. Pre-trained models
69 | 
70 | - [YCB-V](https://drive.google.com/file/d/19rcvuIC7Ilu0MHPgLxmbxeUkOgBHR2be/view?usp=sharing)


--------------------------------------------------------------------------------
/apps/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinHuang17/NCF-code/8e429efb320b136786dc5438ea7d78c231ffab16/apps/__init__.py


--------------------------------------------------------------------------------
/apps/eval.py:
--------------------------------------------------------------------------------
  1 | """
  2 |  eval. for rigid obj.
  3 | """
  4 | 
  5 | import os
  6 | import sys
  7 | 
  8 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
  9 | ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 10 | 
 11 | import cv2
 12 | import json
 13 | import time
 14 | import random
 15 | import torch
 16 | import numpy as np
 17 | from tqdm import tqdm
 18 | import torch.nn as nn
 19 | from torch.utils.data import DataLoader, ConcatDataset
 20 | 
 21 | from lib.data import *
 22 | from lib.model import *
 23 | from lib.net_util import *
 24 | from lib.eval_Rt_time_util import *
 25 | from lib.options import BaseOptions
 26 | 
 27 | 
 28 | # get options
 29 | opt = BaseOptions().parse()
 30 | 
 31 | 
 32 | def evaluate(opt):
 33 | 
 34 |     # seed
 35 |     if opt.deterministic:
 36 |         seed = opt.seed
 37 |         print("Set manual random Seed: ", seed)
 38 |         random.seed(seed)
 39 |         np.random.seed(seed)
 40 |         torch.manual_seed(seed) # cpu
 41 |         torch.cuda.manual_seed(seed)
 42 |         torch.cuda.manual_seed_all(seed)
 43 |         torch.backends.cudnn.deterministic = True
 44 |         torch.backends.cudnn.benchmark = False
 45 |     else:
 46 |         torch.backends.cudnn.benchmark = True
 47 |         print("cuDNN benchmarking enabled")
 48 | 
 49 |     # set path
 50 |     work_path = os.path.join(opt.work_base_path, f"{opt.exp_id}")
 51 |     os.makedirs(work_path, exist_ok=True)
 52 |     checkpoints_path = os.path.join(work_path, "checkpoints")
 53 |     os.makedirs(checkpoints_path, exist_ok=True)
 54 |     results_path = os.path.join(work_path, "results")
 55 |     os.makedirs(results_path, exist_ok=True)
 56 |     tb_dir = os.path.join(work_path, "tb")
 57 |     os.makedirs(tb_dir, exist_ok=True)
 58 |     tb_runs_dir = os.path.join(tb_dir, "runs")
 59 |     os.makedirs(tb_runs_dir, exist_ok=True)
 60 |     debug_dir = os.path.join(work_path, "debug")
 61 |     os.makedirs(debug_dir, exist_ok=True)
 62 | 
 63 |     # set gpu environment
 64 |     devices_ids = opt.GPU_ID
 65 |     num_GPU = len(devices_ids)
 66 |     torch.cuda.set_device(devices_ids[0])
 67 | 
 68 |     # dataset
 69 |     test_dataset_list = []
 70 |     test_data_ids = [opt.eval_data]
 71 |     for data_id in test_data_ids:
 72 |         if data_id == 'lm_bop_cha':
 73 |             test_dataset_list.append(BOP_BP_LM(opt, phase='test'))
 74 |         if data_id == 'lmo_bop_cha':
 75 |             test_dataset_list.append(BOP_BP_LMO(opt, phase='test'))
 76 |         if data_id == 'ycbv_bop_cha':
 77 |             test_dataset_list.append(BOP_BP_YCBV(opt, phase='test'))
 78 |     projection_mode = test_dataset_list[0].projection_mode
 79 |     test_dataset = ConcatDataset(test_dataset_list)
 80 |     # create test data loader
 81 |     # NOTE: batch size should be 1 and use all the points for evaluation
 82 |     test_data_loader = DataLoader(test_dataset,
 83 |                                   batch_size=1, shuffle=False,
 84 |                                   num_workers=opt.num_threads, pin_memory=(opt.num_threads == 0))
 85 |                                 #   persistent_workers=(opt.num_threads > 0))
 86 |                                 #   num_workers=opt.num_threads, pin_memory=opt.pin_memory)
 87 |     print('test data size: ', len(test_dataset))
 88 | 
 89 |     # define model, multi-gpu, checkpoint
 90 |     sdf_criterion = None
 91 |     xyz_criterion = None
 92 |     netG = HGPIFuNet(opt, projection_mode,
 93 |                      sdf_loss_term=sdf_criterion,
 94 |                      xyz_loss_term=xyz_criterion)
 95 |     print('Using Network: ', netG.name)
 96 | 
 97 |     def set_eval():
 98 |         netG.eval()
 99 | 
100 |     # load checkpoints
101 |     if opt.continue_train or opt.eval_perf:
102 |         print('Loading for net G ...', opt.load_netG_checkpoint_path)
103 |         netG.load_state_dict(torch.load(opt.load_netG_checkpoint_path, map_location=torch.device('cpu')))
104 | 
105 |     # Data Parallel
106 |     # if num_GPU > 1:
107 |     netG = torch.nn.DataParallel(netG, device_ids=devices_ids, output_device=devices_ids[0])
108 |     # netG = torch.nn.parallel.DistributedDataParallel(netG, device_ids=devices_ids, output_device=devices_ids[0])
109 |     print(f'Data Paralleling on GPU: {devices_ids}')
110 |     netG.cuda()
111 | 
112 |     os.makedirs(checkpoints_path, exist_ok=True)
113 |     os.makedirs(results_path, exist_ok=True)
114 |     os.makedirs('%s/%s' % (checkpoints_path, opt.name), exist_ok=True)
115 |     os.makedirs('%s/%s' % (results_path, opt.name), exist_ok=True)
116 |     opt_log = os.path.join(results_path, opt.name, 'opt.txt')
117 |     with open(opt_log, 'w') as outfile:
118 |         outfile.write(json.dumps(vars(opt), indent=2))
119 | 
120 |     # evaluation
121 |     with torch.no_grad():
122 |         set_eval()
123 |         obj_id = [opt.obj_id][0]
124 |         print('eval. for obj. pose and time (test) ...')
125 |         save_csv_path = os.path.join(results_path, opt.name, f'ncf-obj{obj_id}_{opt.dataset}-Rt-time.csv')
126 |         eval_Rt_time(opt, netG.module, test_data_loader, save_csv_path)
127 | 
128 | if __name__ == '__main__':
129 |     evaluate(opt)
130 | 


--------------------------------------------------------------------------------
/apps/train.py:
--------------------------------------------------------------------------------
  1 | """
  2 |  train & eval. for rigid obj.
  3 | """
  4 | 
  5 | import os
  6 | import sys
  7 | 
  8 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
  9 | ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 10 | 
 11 | import cv2
 12 | import json
 13 | import time
 14 | import random
 15 | import torch
 16 | import numpy as np
 17 | from tqdm import tqdm
 18 | import torch.nn as nn
 19 | from torch.utils.data import DataLoader, ConcatDataset
 20 | 
 21 | from lib.data import *
 22 | from lib.model import *
 23 | from lib.net_util import *
 24 | from lib.sym_util import *
 25 | from lib.loss_util import *
 26 | from lib.eval_Rt_time_util import *
 27 | from lib.options import BaseOptions
 28 | 
 29 | from lib.debug_pyrender_util import *
 30 | from torch.utils.tensorboard import SummaryWriter
 31 | 
 32 | # get options
 33 | opt = BaseOptions().parse()
 34 | 
 35 | class meter():
 36 | 
 37 |     def __init__(self, opt):
 38 | 
 39 |         self.opt = opt
 40 | 
 41 |         self.load_time = AverageMeter()
 42 |         self.forward_time = AverageMeter()
 43 | 
 44 |         self.sdf_loss_meter = AverageMeter()
 45 |         if self.opt.use_xyz:
 46 |             self.xyz_loss_meter = AverageMeter()
 47 |         self.total_loss_meter = AverageMeter()
 48 | 
 49 |     def update_time(self, time, end, state):
 50 | 
 51 |         if state == 'forward':
 52 |             self.forward_time.update(time - end)
 53 |         elif state == 'load':
 54 |             self.load_time.update(time - end)
 55 | 
 56 |     def update_total_loss(self, total_loss, size):
 57 | 
 58 |         self.total_loss_meter.update(total_loss.item(), size)
 59 | 
 60 |     def update_loss(self, loss_dict, size):
 61 | 
 62 |         self.sdf_loss_meter.update(loss_dict['sdf_loss'].mean().item(), size)
 63 |         if self.opt.use_xyz:
 64 |             self.xyz_loss_meter.update(loss_dict['xyz_loss'].mean().item(), size)
 65 | 
 66 | def set_dataset_train_mode(dataset, mode=True):
 67 |     for dataset_idx in range(len(dataset.datasets)):
 68 |         dataset.datasets[dataset_idx].is_train = mode
 69 | 
 70 | def train(opt):
 71 | 
 72 |     # seed
 73 |     if opt.deterministic:
 74 |         seed = opt.seed
 75 |         print("Set manual random Seed: ", seed)
 76 |         random.seed(seed)
 77 |         np.random.seed(seed)
 78 |         torch.manual_seed(seed) # cpu
 79 |         torch.cuda.manual_seed(seed)
 80 |         torch.cuda.manual_seed_all(seed)
 81 |         torch.backends.cudnn.deterministic = True
 82 |         torch.backends.cudnn.benchmark = False
 83 |     else:
 84 |         torch.backends.cudnn.benchmark = True
 85 |         print("cuDNN benchmarking enabled")
 86 | 
 87 |     # set path
 88 |     work_path = os.path.join(opt.work_base_path, f"{opt.exp_id}")
 89 |     os.makedirs(work_path, exist_ok=True)
 90 |     checkpoints_path = os.path.join(work_path, "checkpoints")
 91 |     os.makedirs(checkpoints_path, exist_ok=True)
 92 |     results_path = os.path.join(work_path, "results")
 93 |     os.makedirs(results_path, exist_ok=True)
 94 |     tb_dir = os.path.join(work_path, "tb")
 95 |     os.makedirs(tb_dir, exist_ok=True)
 96 |     tb_runs_dir = os.path.join(tb_dir, "runs")
 97 |     os.makedirs(tb_runs_dir, exist_ok=True)
 98 |     debug_dir = os.path.join(work_path, "debug")
 99 |     os.makedirs(debug_dir, exist_ok=True)
100 | 
101 |     writer = SummaryWriter(os.path.join(tb_runs_dir, f'{opt.exp_id}'))
102 |     writer.add_text('Info', 'ncf for obj. Rt est. in frustum space using pred. sdf & xyz')
103 | 
104 |     # set gpu environment
105 |     devices_ids = opt.GPU_ID
106 |     num_GPU = len(devices_ids)
107 |     torch.cuda.set_device(devices_ids[0])
108 | 
109 |     # dataset
110 |     train_dataset_list = []
111 |     train_data_ids = [opt.train_data] + [opt.more_train_data]
112 |     for data_id in train_data_ids:
113 |         if data_id == 'lm':
114 |             train_dataset_list.append(BOP_BP_LM(opt, phase='train'))
115 |         if data_id == 'ycbv':
116 |             train_dataset_list.append(BOP_BP_YCBV(opt, phase='train'))
117 |         if data_id == 'ycbv_real':
118 |             train_dataset_list.append(BOP_BP_YCBV_real(opt, phase='train'))
119 |     projection_mode = train_dataset_list[0].projection_mode
120 |     train_dataset = ConcatDataset(train_dataset_list)
121 |     # create train data loader
122 |     train_data_loader = DataLoader(train_dataset,
123 |                                    batch_size=opt.batch_size, shuffle=not opt.serial_batches,
124 |                                    num_workers=opt.num_threads, pin_memory=(opt.num_threads == 0))
125 |                                 #    persistent_workers=(opt.num_threads > 0))
126 |                                 #    num_workers=opt.num_threads, pin_memory=opt.pin_memory)
127 |     print('train data size: ', len(train_dataset))
128 | 
129 |     test_dataset_list = []
130 |     test_data_ids = [opt.eval_data]
131 |     for data_id in test_data_ids:
132 |         if data_id == 'lm_bop_cha':
133 |             test_dataset_list.append(BOP_BP_LM(opt, phase='test'))
134 |         if data_id == 'lmo_bop_cha':
135 |             test_dataset_list.append(BOP_BP_LMO(opt, phase='test'))
136 |         if data_id == 'ycbv_bop_cha':
137 |             test_dataset_list.append(BOP_BP_YCBV(opt, phase='test'))
138 |     test_dataset = ConcatDataset(test_dataset_list)
139 |     # create test data loader
140 |     # NOTE: batch size should be 1 and use all the points for evaluation
141 |     test_data_loader = DataLoader(test_dataset,
142 |                                   batch_size=1, shuffle=False,
143 |                                   num_workers=opt.num_threads, pin_memory=(opt.num_threads == 0))
144 |                                 #   persistent_workers=(opt.num_threads > 0))
145 |                                 #   num_workers=opt.num_threads, pin_memory=opt.pin_memory)
146 |     print('test data size: ', len(test_dataset))
147 | 
148 |     # pre-define pool of symmetric poses
149 |     sym_pool=[]
150 |     obj_id = [opt.obj_id][0]
151 |     # load obj. param.
152 |     obj_params = get_obj_params(opt.model_dir, [opt.train_data][0])
153 |     # Load meta info about the models (including symmetries).
154 |     models_info = load_json(obj_params['models_info_path'], keys_to_int=True)
155 |     sym_poses = get_symmetry_transformations(models_info[obj_id], opt.max_sym_disc_step)
156 |     for sym_pose in sym_poses:
157 |         Rt = np.concatenate([sym_pose['R'], sym_pose['t'].reshape(3,1)], axis=1)
158 |         Rt = np.concatenate([Rt, np.array([0, 0, 0, 1]).reshape(1, 4)], axis=0)
159 |         sym_pool.append(torch.Tensor(Rt))
160 | 
161 |     # define model, multi-gpu, checkpoint
162 |     if opt.loss_type == 'mse':
163 |         sdf_criterion = torch.nn.MSELoss()
164 |     elif opt.loss_type == 'l1':
165 |         sdf_criterion = torch.nn.L1Loss()
166 |     elif opt.loss_type == 'huber':
167 |         sdf_criterion = torch.nn.SmoothL1Loss()
168 |     xyz_criterion = None
169 |     if opt.use_xyz:
170 |         if (len(sym_pool) > 1):
171 |             xyz_criterion = XYZLoss_sym(use_xyz_mask=True, sym_pool=sym_pool)
172 |         else:
173 |             xyz_criterion = XYZLoss(use_xyz_mask=True)
174 |     netG = HGPIFuNet(opt, projection_mode,
175 |                      sdf_loss_term=sdf_criterion,
176 |                      xyz_loss_term=xyz_criterion)
177 |     print('Using Network: ', netG.name)
178 | 
179 |     def set_train():
180 |         netG.train()
181 | 
182 |     def set_eval():
183 |         netG.eval()
184 | 
185 |     # load checkpoints
186 |     if opt.continue_train or opt.eval_perf:
187 |         print('Loading for net G ...', opt.load_netG_checkpoint_path)
188 |         netG.load_state_dict(torch.load(opt.load_netG_checkpoint_path, map_location=torch.device('cpu')))
189 | 
190 |     # Data Parallel
191 |     # if num_GPU > 1:
192 |     netG = torch.nn.DataParallel(netG, device_ids=devices_ids, output_device=devices_ids[0])
193 |     # netG = torch.nn.parallel.DistributedDataParallel(netG, device_ids=devices_ids, output_device=devices_ids[0])
194 |     print(f'Data Paralleling on GPU: {devices_ids}')
195 |     netG.cuda()
196 | 
197 |     os.makedirs(checkpoints_path, exist_ok=True)
198 |     os.makedirs(results_path, exist_ok=True)
199 |     os.makedirs('%s/%s' % (checkpoints_path, opt.name), exist_ok=True)
200 |     os.makedirs('%s/%s' % (results_path, opt.name), exist_ok=True)
201 |     opt_log = os.path.join(results_path, opt.name, 'opt.txt')
202 |     with open(opt_log, 'w') as outfile:
203 |         outfile.write(json.dumps(vars(opt), indent=2))
204 | 
205 |     # optimizer
206 |     lr = opt.learning_rate
207 |     if opt.optimizer == 'rms':
208 |         optimizerG = torch.optim.RMSprop(netG.module.parameters(), lr=lr, momentum=0, weight_decay=0)
209 |         print(f'Using optimizer: rms')
210 |         # optimizerG = torch.optim.RMSprop(netG.parameters(), lr=lr, momentum=0, weight_decay=0)
211 |         # optimizerG = torch.optim.RMSprop(netG.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay)
212 |     elif opt.optimizer == 'adam':
213 |         optimizerG = torch.optim.Adam(netG.module.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08)
214 |         print(f'Using optimizer: adam')
215 |         # optimizerG = torch.optim.Adam(netG.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08)
216 |     # load optimizer
217 |     if opt.continue_train and opt.load_optG_checkpoint_path is not None:
218 |         print('Loading for opt G ...', opt.load_optG_checkpoint_path)
219 |         optimizerG.load_state_dict(torch.load(opt.load_optG_checkpoint_path))
220 | 
221 |     # training
222 |     tb_train_idx = 0
223 |     start_epoch = 0 if not opt.continue_train else max(opt.resume_epoch,0)
224 |     for epoch in range(start_epoch, opt.num_epoch):
225 |         # log lr
226 |         writer.add_scalar('train/learning_rate', lr, epoch)
227 | 
228 |         # meter, time, train mode
229 |         train_meter = meter(opt)
230 |         epoch_start_time = time.time()
231 |         set_train()
232 |         # torch.cuda.synchronize()
233 |         iter_data_time = time.time()
234 |         for train_idx, train_data in enumerate(train_data_loader):
235 |             tb_train_idx += 1
236 |             # measure elapsed data loading time in batch
237 |             iter_start_time = time.time()
238 |             train_meter.update_time(iter_start_time, iter_data_time, 'load')
239 | 
240 |             # retrieve the data
241 |             # shape (B, 3, 480, 640)
242 |             image_tensor = train_data['img'].cuda()
243 |             # shape (B, 4, 4)
244 |             calib_tensor = train_data['calib'].cuda()
245 |             # shape (B, 3, 5000)
246 |             sample_tensor = train_data['samples'].cuda()
247 |             batch = image_tensor.size(0)
248 | 
249 |             # shape (B, 1, 5000)
250 |             label_tensor = train_data['labels'].cuda()
251 |             if opt.use_xyz:
252 |                 # shape (B, 1, 5000)
253 |                 xyz_tensor = train_data['xyzs'].cuda()
254 |                 xyz_mask_tensor = train_data['xyz_mask'].cuda()
255 |             transforms = torch.zeros([batch,2,3]).cuda()
256 |             transforms[:, 0,0] = 1 / (opt.img_size[0] // 2)
257 |             transforms[:, 1,1] = 1 / (opt.img_size[1] // 2)
258 |             transforms[:, 0,2] = -1
259 |             transforms[:, 1,2] = -1
260 |             if opt.use_xyz:
261 |                 results, loss_dict, xyzs, uvz = netG(image_tensor, sample_tensor, calib_tensor,
262 |                                                      labels=label_tensor, transforms=transforms,
263 |                                                      gt_xyzs=xyz_tensor, gt_xyz_mask=xyz_mask_tensor)
264 |             else:
265 |                 results, loss_dict, uvz = netG(image_tensor, sample_tensor, calib_tensor,
266 |                                                labels=label_tensor, transforms=transforms)
267 | 
268 |             optimizerG.zero_grad()
269 |             # for param in netG.module.parameters():
270 |             # for param in netG.parameters():
271 |                 # param.grad = None
272 |             loss_dict['total_loss'].mean().backward()
273 |             # error.backward()
274 |             optimizerG.step()
275 | 
276 |             # measure elapsed forward time in batch
277 |             # torch.cuda.synchronize()
278 |             iter_net_time = time.time()
279 |             train_meter.update_time(iter_net_time, iter_start_time, 'forward')
280 |             eta = ((iter_net_time - epoch_start_time) / (train_idx + 1)) * len(train_data_loader) - (
281 |                     iter_net_time - epoch_start_time)
282 | 
283 |             # update loss
284 |             train_meter.update_loss(loss_dict, batch)
285 |             # update total loss
286 |             train_meter.update_total_loss(loss_dict['total_loss'].mean(), batch)
287 | 
288 |             writer.add_scalar('train/total_loss_per_batch', train_meter.total_loss_meter.val, tb_train_idx)
289 |             writer.add_scalar('train/sdf_loss_per_batch', train_meter.sdf_loss_meter.val, tb_train_idx)
290 |             if opt.use_xyz:
291 |                 writer.add_scalar('train/xyz_loss_per_batch', train_meter.xyz_loss_meter.val, tb_train_idx)
292 |             if train_idx % opt.freq_plot == 0:
293 |                 print('Name: {0} | Epoch: {1} | {2}/{3} | Loss: {4:.06f} | LR: {5:.06f} | dataT: {6:.05f} | netT: {7:.05f} | ETA: {8:02d}:{9:02d}'.format(
294 |                       opt.name, epoch, train_idx, len(train_data_loader), loss_dict['total_loss'].mean().item(), lr,
295 |                       iter_start_time - iter_data_time, iter_net_time - iter_start_time, int(eta // 60), int(eta - 60 * (eta // 60))))
296 | 
297 |             if train_idx % opt.freq_debug == 0:
298 |                 with torch.no_grad():
299 |                     # debug for rgb, mask, rendering of object model
300 |                     # shape (4, 3, 480, 640)
301 |                     name = train_data['name'][0]
302 |                     model_mesh = train_data_loader.dataset.datasets[0].model_mesh_dict[name].copy(include_cache=False)
303 |                     img = (np.transpose(image_tensor[0].detach().cpu().numpy(), (1, 2, 0)) * 0.5 + 0.5)
304 |                     save_debug_path = os.path.join(debug_dir, f'train_sample{train_idx}_epoch{epoch}_debug.jpeg')
305 |                     viz_debug_data(img, model_mesh,
306 |                                    train_data['extrinsic'][0].detach().numpy(), train_data['aug_intrinsic'][0].detach().numpy(),
307 |                                    save_debug_path)
308 | 
309 |                     # debug for query projection during forward
310 |                     # shape (4, 3, 5000), (4, 1, 5000)
311 |                     inv_trans = torch.zeros([1,2,3])
312 |                     inv_trans[:, 0,0] = (opt.img_size[0] // 2)
313 |                     inv_trans[:, 1,1] = (opt.img_size[1] // 2)
314 |                     inv_trans[:, 0,2] = (opt.img_size[0] // 2)
315 |                     inv_trans[:, 1,2] = (opt.img_size[1] // 2)
316 |                     scale = inv_trans[:, :2, :2]
317 |                     shift = inv_trans[:, :2, 2:3]
318 |                     uv = torch.baddbmm(shift, scale, uvz[0].detach().cpu()[:2, :].unsqueeze(0))
319 |                     query_res = {'img': image_tensor[0].detach().cpu(), 'samples': uv.squeeze(0), 'labels': label_tensor[0].detach().cpu()}
320 |                     save_in_query_path = os.path.join(debug_dir, f'train_sample{train_idx}_epoch{epoch}_in_query.jpeg')
321 |                     save_out_query_path = os.path.join(debug_dir, f'train_sample{train_idx}_epoch{epoch}_out_query.jpeg')
322 |                     viz_debug_query_forward(opt.out_type, query_res, save_in_query_path, save_out_query_path)
323 | 
324 |                     # debug for prediction & gt ply for query & its label
325 |                     save_gt_path = os.path.join(debug_dir, f'train_sample{train_idx}_epoch{epoch}_sdf_gt.ply')
326 |                     save_sdf_path = os.path.join(debug_dir, f'train_sample{train_idx}_epoch{epoch}_sdf_est.ply')
327 |                     r = results[0].cpu()
328 |                     points = sample_tensor[0].transpose(0, 1).cpu()
329 |                     if opt.out_type[-3:] == 'sdf':
330 |                         save_samples_truncted_sdf(save_gt_path, points.detach().numpy(), label_tensor[0].transpose(0, 1).cpu().detach().numpy(), thres=opt.norm_clamp_dist)
331 |                         save_samples_truncted_sdf(save_sdf_path, points.detach().numpy(), r.detach().numpy(), thres=opt.norm_clamp_dist)
332 |                     if opt.use_xyz:
333 |                         norm_xyz_factor = train_data['norm_xyz_factor'][0].item()
334 |                         pred_xyzs = (xyzs[0].transpose(0, 1).cpu()) * norm_xyz_factor
335 |                         save_sdf_xyz_path = os.path.join(debug_dir, f'train_sample{train_idx}_epoch{epoch}_xyz_est.ply')
336 |                         save_samples_truncted_sdf(save_sdf_xyz_path, pred_xyzs.detach().numpy(), r.detach().numpy(), thres=opt.norm_clamp_dist)
337 | 
338 |             iter_data_time = time.time()
339 | 
340 |         writer.add_scalars('train/time_per_epoch', {'forward_per_batch': train_meter.forward_time.avg, 'dataload_per_batch': train_meter.load_time.avg}, epoch)
341 |         writer.add_scalar('train/total_loss_per_epoch', train_meter.total_loss_meter.avg, epoch)
342 |         writer.add_scalar('train/sdf_loss_per_epoch', train_meter.sdf_loss_meter.avg, epoch)
343 |         if opt.use_xyz:
344 |             writer.add_scalar('train/xyz_loss_per_epoch', train_meter.xyz_loss_meter.avg, epoch)
345 |         # update learning rate
346 |         lr = adjust_learning_rate(optimizerG, epoch, lr, opt.schedule, opt.gamma)
347 |         # save checkpoints
348 |         torch.save(netG.module.state_dict(), '%s/%s/netG_epoch_%d' % (checkpoints_path, opt.name, epoch))
349 |         torch.save(optimizerG.state_dict(), '%s/%s/optG_epoch_%d' % (checkpoints_path, opt.name, epoch))
350 | 
351 |         #### test
352 |         with torch.no_grad():
353 |             set_eval()
354 |             obj_id = [opt.obj_id][0]
355 |             if epoch > 0 and epoch % opt.freq_eval_all == 0 and opt.use_xyz and opt.gen_obj_pose:
356 |                 print('eval. for obj. pose and time (test) ...')
357 |                 save_csv_path = os.path.join(results_path, opt.name, f'ncf-obj{obj_id}_{opt.dataset}-Rt-time.csv')
358 |                 eval_Rt_time(opt, netG.module, test_data_loader, save_csv_path)
359 | 
360 |     writer.close()
361 | 
362 | if __name__ == '__main__':
363 |     train(opt)
364 | 


--------------------------------------------------------------------------------
/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: ncf
 2 | channels:
 3 | - nodefaults
 4 | - conda-forge
 5 | - pytorch
 6 | dependencies:
 7 | - python=3.7
 8 | - cudatoolkit=10.1
 9 | - pytorch=1.4.0=py3.7_cuda10.1.243_cudnn7.6.3_0
10 | - torchvision=0.5.0=py37_cu101
11 | - numpy
12 | - tqdm
13 | - pyembree
14 | - shapely
15 | - xxhash
16 | - trimesh
17 | - eigenpy
18 | - rtree
19 | - scikit-image==0.16.2
20 | - matplotlib
21 | - scipy
22 | - imageio
23 | - cython
24 | - pip
25 | - pip:
26 |   - pypng
27 |   - pysdf
28 |   - pyrender
29 |   - tensorboard
30 |   - transforms3d
31 |   - opencv-python
32 |   - Pillow==8.2.0
33 |   - git+https://github.com/hassony2/chumpy.git
34 |   - git+https://github.com/mmatl/pyopengl.git
35 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinHuang17/NCF-code/8e429efb320b136786dc5438ea7d78c231ffab16/lib/__init__.py


--------------------------------------------------------------------------------
/lib/data/BOP_BP_YCBV.py:
--------------------------------------------------------------------------------
  1 | """
  2 |  dataset class of bop ycbv
  3 | """
  4 | 
  5 | import os
  6 | import sys
  7 | import pdb
  8 | import random
  9 | import logging
 10 | import inspect
 11 | 
 12 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 13 | parentdir = os.path.dirname(currentdir)
 14 | sys.path.insert(0, parentdir)
 15 | 
 16 | import PIL
 17 | import json
 18 | import torch
 19 | import pickle
 20 | import numpy as np
 21 | from PIL import Image, ImageOps
 22 | from PIL.ImageFilter import GaussianBlur
 23 | 
 24 | from torch.utils.data import Dataset
 25 | from torch.utils.data import DataLoader
 26 | import torchvision.transforms as transforms
 27 | 
 28 | from data_utils.aug_util import AugmentOp, augment_image
 29 | from data_utils.sample_frustum_util import load_trimesh, wks_sampling_sdf_xyz_calc, wks_sampling_eff_csdf_xyz_calc, xyz_mask_calc
 30 | 
 31 | from options import BaseOptions
 32 | from debug_pyrender_util import *
 33 | 
 34 | # log = logging.getLogger('trimesh')
 35 | # log.setLevel(40)
 36 | 
 37 | class BOP_BP_YCBV(Dataset):
 38 |     @staticmethod
 39 |     def modify_commandline_options(parser, is_train):
 40 |         return parser
 41 | 
 42 |     def __init__(self, opt, phase='train'):
 43 |         self.opt = opt
 44 |         # path & state setup
 45 |         self.phase = phase
 46 |         self.is_train = (self.phase == 'train')
 47 | 
 48 |         # 3D->2D projection: 'orthogonal' or 'perspective'
 49 |         self.projection_mode = 'perspective'
 50 | 
 51 |         # ABBox or Sphere in cam. c.s.
 52 |         B_SHIFT = self.opt.bbx_shift
 53 |         Bx_SIZE = self.opt.bbx_size // 2
 54 |         By_SIZE = self.opt.bbx_size // 2
 55 |         Bz_SIZE = self.opt.bbx_size // 2
 56 |         self.B_MIN = np.array([-Bx_SIZE, -By_SIZE, -Bz_SIZE])
 57 |         self.B_MAX = np.array([Bx_SIZE, By_SIZE, Bz_SIZE])
 58 |         # wks box in cam. c.s.
 59 |         self.CAM_Bz_SHIFT = self.opt.wks_z_shift
 60 |         Cam_Bx_SIZE = self.opt.wks_size[0] // 2
 61 |         Cam_By_SIZE = self.opt.wks_size[1] // 2
 62 |         Cam_Bz_SIZE = self.opt.wks_size[2] // 2
 63 |         self.CAM_B_MIN = np.array([-Cam_Bx_SIZE, -Cam_By_SIZE, -Cam_Bz_SIZE+self.CAM_Bz_SHIFT])
 64 |         self.CAM_B_MAX = np.array([Cam_Bx_SIZE, Cam_By_SIZE, Cam_Bz_SIZE+self.CAM_Bz_SHIFT])
 65 |         # test wks box in cam. c.s.
 66 |         self.TEST_CAM_Bz_SHIFT = self.opt.test_wks_z_shift
 67 |         Test_Cam_Bx_SIZE = self.opt.test_wks_size[0] // 2
 68 |         Test_Cam_By_SIZE = self.opt.test_wks_size[1] // 2
 69 |         Test_Cam_Bz_SIZE = self.opt.test_wks_size[2] // 2
 70 |         self.TEST_CAM_B_MIN = np.array([-Test_Cam_Bx_SIZE, -Test_Cam_By_SIZE, -Test_Cam_Bz_SIZE+self.TEST_CAM_Bz_SHIFT])
 71 |         self.TEST_CAM_B_MAX = np.array([Test_Cam_Bx_SIZE, Test_Cam_By_SIZE, Test_Cam_Bz_SIZE+self.TEST_CAM_Bz_SHIFT])
 72 | 
 73 |         # PIL to tensor
 74 |         self.to_tensor = transforms.Compose([
 75 |             transforms.ToTensor(),
 76 |             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
 77 |         ])
 78 | 
 79 |         self.aug_ops = [
 80 |                         AugmentOp('blur', 0.4, [1, self.opt.aug_blur]),
 81 |                         AugmentOp('sharpness', 0.3, [0.0, self.opt.aug_sha]),
 82 |                         AugmentOp('contrast', 0.3, [0.2, self.opt.aug_con]),
 83 |                         AugmentOp('brightness', 0.5, [0.1, self.opt.aug_bri]),
 84 |                         AugmentOp('color', 0.3, [0.0, self.opt.aug_col]),
 85 |                     ]
 86 | 
 87 |         # ycbv train
 88 |         # self.obj_id = self.opt.obj_id
 89 |         self.obj_id_list = [self.opt.obj_id]
 90 |         self.model_dir = self.opt.model_dir
 91 |         self.ds_root_dir = self.opt.ds_ycbv_dir
 92 |         if self.phase == 'train':
 93 |             self.ds_dir = os.path.join(self.ds_root_dir, 'train_pbr')
 94 |             start = 0
 95 |             end = 50
 96 |             self.visib_fract_thresh = self.opt.visib_fract_thresh
 97 |         elif self.phase == 'test':
 98 |             self.ds_dir = os.path.join(self.ds_root_dir, 'test')
 99 |             start = 48
100 |             end = 60
101 |             self.visib_fract_thresh = 0.0
102 |         self.all_gt_info = []
103 |         for folder_id in range(start, end):
104 |             self.scene_gt_dict = {}
105 |             self.scene_gt_info_dict = {}
106 |             self.scene_camera_dict = {}
107 |             with open(os.path.join(self.ds_dir, f'{int(folder_id):06d}/scene_gt_info.json')) as f:
108 |                 self.scene_gt_info_dict = json.load(f)
109 | 
110 |             with open(os.path.join(self.ds_dir, f'{int(folder_id):06d}/scene_camera.json')) as f:
111 |                 self.scene_camera_dict = json.load(f)
112 | 
113 |             with open(os.path.join(self.ds_dir, f'{int(folder_id):06d}/scene_gt.json')) as f:
114 |                 self.scene_gt_dict = json.load(f)
115 | 
116 |             # for data_idx in range(len(self.scene_gt_info_dict)):
117 |             for data_id in self.scene_gt_info_dict.keys():
118 |                 len_item = len(self.scene_gt_info_dict[str(data_id)])
119 |                 for obj_idx in range(len_item):
120 |                     # self.all_gt_info[str(obj_id)] = []
121 |                     if self.scene_gt_dict[str(data_id)][obj_idx]['obj_id'] in self.obj_id_list:
122 |                         if self.scene_gt_info_dict[str(data_id)][obj_idx]['visib_fract'] > self.visib_fract_thresh:
123 |                             single_annot = {}
124 |                             single_annot['folder_id'] = folder_id
125 |                             single_annot['frame_id'] = int(data_id)
126 |                             single_annot['cam_R_m2c'] = self.scene_gt_dict[str(data_id)][obj_idx]['cam_R_m2c']
127 |                             single_annot['cam_t_m2c'] = self.scene_gt_dict[str(data_id)][obj_idx]['cam_t_m2c']
128 |                             single_annot['obj_id'] = self.scene_gt_dict[str(data_id)][obj_idx]['obj_id']
129 |                             single_annot['cam_K'] = self.scene_camera_dict[str(data_id)]['cam_K']
130 |                             # self.all_gt_info[str(obj_id)].append(single_annot)
131 |                             self.all_gt_info.append(single_annot)
132 |         self.model_mesh_dict = load_trimesh(self.model_dir, self.opt.model_unit)
133 | 
134 |     def __len__(self):
135 | 
136 |         return len(self.all_gt_info)
137 | 
138 |     def get_img_cam(self, frame_id):
139 | 
140 |         data_gt_info = self.all_gt_info[frame_id]
141 |         folder_id = data_gt_info['folder_id']
142 |         frame_id = data_gt_info['frame_id']
143 |         rgb_parent_path = os.path.join(self.ds_dir, f'{int(folder_id):06d}', 'rgb')
144 |         if self.phase == 'train':
145 |             rgb_path = os.path.join(rgb_parent_path, f'{int(frame_id):06d}.jpg')
146 |         elif self.phase == 'test':
147 |             rgb_path = os.path.join(rgb_parent_path, f'{int(frame_id):06d}.png')
148 | 
149 |         # shape (H, W, C)/(480, 640, 3)
150 |         render = Image.open(rgb_path).convert('RGB')
151 |         w, h = render.size
152 | 
153 |         # original camera intrinsic
154 |         K = np.array(data_gt_info['cam_K']).reshape(3, 3)
155 |         camera = dict(K=K.astype(np.float32), aug_K=np.copy(K.astype(np.float32)), resolution=(w, h))
156 | 
157 |         objects = []
158 |         # annotation for every object in the scene
159 |         # Rotation matrix from model to cam
160 |         R_m2c = np.array(data_gt_info['cam_R_m2c']).reshape(3, 3)
161 |         # translation vector from model to cam
162 |         # unit: mm -> cm
163 |         t_m2c = np.array(data_gt_info['cam_t_m2c'])
164 |         # Rigid Transform class from model to cam/model c.s. 6D pose in cam c.s./extrinsic
165 |         RT_m2c = np.concatenate([R_m2c, t_m2c.reshape(3,1)], axis=1)
166 |         # model to cam: Rigid Transform homo. matrix
167 |         RT_m2c = np.concatenate([RT_m2c, np.array([0, 0, 0, 1]).reshape(1, 4)], axis=0)
168 |         obj_id = data_gt_info['obj_id']
169 |         name = f'obj_{int(obj_id):06d}'
170 |         obj = dict(label=name, name=name, RT_m2c=RT_m2c.astype(np.float32))
171 |         objects.append(obj)
172 | 
173 |         # object name
174 |         objname = objects[0]['name']
175 | 
176 |         # color aug.
177 |         if self.is_train and self.opt.use_aug:
178 |             render = augment_image(render, self.aug_ops)
179 | 
180 |         aug_intrinsic = camera['aug_K']
181 |         aug_intrinsic = np.concatenate([aug_intrinsic, np.array([0, 0, 0]).reshape(3, 1)], 1)
182 |         aug_intrinsic = np.concatenate([aug_intrinsic, np.array([0, 0, 0, 1]).reshape(1, 4)], 0)
183 |         extrinsic = objects[0]['RT_m2c']
184 |         calib = torch.Tensor(np.matmul(aug_intrinsic, extrinsic)).float()
185 |         extrinsic = torch.Tensor(extrinsic).float()
186 |         aug_intrinsic = torch.Tensor(aug_intrinsic).float()
187 | 
188 |         render = self.to_tensor(render)
189 | 
190 |         # shape (C, H, W), ...
191 |         return {'img': render, 'calib': aug_intrinsic, 'extrinsic': extrinsic, 'aug_intrinsic': aug_intrinsic, 'folder_id': folder_id, 'frame_id': frame_id, 'obj_id': obj_id, 'name': objname}
192 | 
193 |     def get_item(self, index):
194 | 
195 |         res = {
196 |             'b_min': self.CAM_B_MIN,
197 |             'b_max': self.CAM_B_MAX,
198 |             'test_b_min': self.TEST_CAM_B_MIN,
199 |             'test_b_max': self.TEST_CAM_B_MAX,
200 |         }
201 | 
202 |         render_data = self.get_img_cam(index)
203 |         res.update(render_data)
204 |         if self.is_train:
205 |             if self.opt.out_type[:3] == 'eff':
206 |                 # efficient conventional-SDF calculation
207 |                 sample_data = wks_sampling_eff_csdf_xyz_calc(self.opt,
208 |                                                              # bouding box
209 |                                                              self.B_MAX, self.B_MIN,
210 |                                                              # wks
211 |                                                              self.CAM_B_MAX, self.CAM_B_MIN,
212 |                                                              # model mesh
213 |                                                              self.model_mesh_dict[res['name']].copy(include_cache=False),
214 |                                                              # camera param. & bouding volume
215 |                                                              res['extrinsic'].clone(), res['calib'].clone(), bounding='sphere')
216 |             else:
217 |                 # Ray-SDF or conventional-SDF calculation
218 |                 sample_data = wks_sampling_sdf_xyz_calc(self.opt,
219 |                                                         # bouding box
220 |                                                         self.B_MAX, self.B_MIN,
221 |                                                         # wks
222 |                                                         self.CAM_B_MAX, self.CAM_B_MIN,
223 |                                                         # model mesh
224 |                                                         self.model_mesh_dict[res['name']].copy(include_cache=False),
225 |                                                         # camera param. & bouding volume
226 |                                                         res['extrinsic'].clone(), res['calib'].clone(), bounding='sphere')
227 |             if self.opt.use_xyz:
228 |                 xyz_mask = xyz_mask_calc(sdfs=sample_data['labels'].clone(), xyz_range=self.opt.norm_clamp_dist)
229 |                 res.update(xyz_mask)
230 |             res.update(sample_data)
231 |         else:
232 |             norm_xyz_factor = self.opt.bbx_size / 2
233 |             res['norm_xyz_factor'] = torch.tensor(norm_xyz_factor)
234 | 
235 |         return res
236 | 
237 |     def __getitem__(self, index):
238 |         return self.get_item(index)
239 | 
240 | 
241 | if __name__ == '__main__':
242 | 
243 |     phase = 'train'
244 |     opt = BaseOptions().parse()
245 |     debug_path = f'/data1/lin/ncf_results/data/ycbv_{opt.out_type}_obj{opt.obj_id}_{phase}'
246 |     os.makedirs(debug_path, exist_ok=True)
247 |     dataset = BOP_BP_YCBV(opt, phase=phase)
248 |     print(f'len. of dataset {len(dataset)}')
249 | 
250 |     num_debug = 10
251 |     for idx in range(0, len(dataset), len(dataset) // num_debug):
252 |         print(f'Debugging for sample: {idx}')
253 |         res = dataset[idx]
254 | 
255 |         # debug for rgb, mask, rendering of object model
256 |         # img = np.uint8((np.transpose(res['img'].numpy(), (1, 2, 0)) * 0.5 + 0.5)[:, :, ::-1] * 255.0)
257 |         model_mesh = dataset.model_mesh_dict[res['name']].copy(include_cache=False)
258 |         img = (np.transpose(res['img'].numpy(), (1, 2, 0)) * 0.5 + 0.5)
259 |         save_debug_path = os.path.join(debug_path, f'data_sample{idx}_debug_bop_ycbv.jpeg')
260 |         viz_debug_data(img, model_mesh,
261 |                        res['extrinsic'].numpy(), res['aug_intrinsic'].numpy(),
262 |                        save_debug_path)
263 | 
264 |         # debug for sampled points with labels: same for each sample
265 |         save_sdf_path = os.path.join(debug_path, f'data_sample{idx}_clamp{opt.norm_clamp_dist}_sdf.ply')
266 |         save_sdf_xyz_path = os.path.join(debug_path, f'data_sample{idx}_clamp{opt.norm_clamp_dist}_xyz.ply')
267 |         save_samples_truncted_sdf(save_sdf_path, res['samples'].numpy().T, res['labels'].numpy().T, thres=opt.norm_clamp_dist)
268 |         save_samples_truncted_sdf(save_sdf_xyz_path, res['xyzs'].numpy().T, res['labels'].numpy().T, thres=opt.norm_clamp_dist)
269 | 
270 |         # debug for query projection
271 |         save_in_query_path = os.path.join(debug_path, f'data_sample{idx}_in_query.jpeg')
272 |         save_out_query_path = os.path.join(debug_path, f'data_sample{idx}_out_query.jpeg')
273 |         viz_debug_query(opt.out_type, res, save_in_query_path, save_out_query_path)
274 | 


--------------------------------------------------------------------------------
/lib/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .BOP_BP_YCBV import BOP_BP_YCBV
2 | 


--------------------------------------------------------------------------------
/lib/data_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinHuang17/NCF-code/8e429efb320b136786dc5438ea7d78c231ffab16/lib/data_utils/__init__.py


--------------------------------------------------------------------------------
/lib/data_utils/aug_util.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """Image augmentation operations."""
  4 | 
  5 | 
  6 | import random
  7 | from typing import Iterable, Tuple, NamedTuple
  8 | 
  9 | import numpy as np
 10 | from PIL import Image, ImageEnhance, ImageFilter
 11 | from PIL.Image import Image as ImageType
 12 | 
 13 | 
 14 | class AugmentOp(NamedTuple):
 15 |     """Parameters of an augmentation operation.
 16 | 
 17 |     name: Name of the augmentation operation.
 18 |     prob: Probability with which the operation is applied.
 19 |     param_range: A range from which to sample the value of the key parameter
 20 |         (each augmentation operation is assumed to have one key parameter).
 21 |     """
 22 | 
 23 |     name: str
 24 |     prob: float
 25 |     param_range: Tuple[float, float]
 26 | 
 27 | 
 28 | def _augment_pil_filter(
 29 |     im: ImageType,
 30 |     fn: ImageFilter.MultibandFilter,
 31 |     prob: float,
 32 |     param_range: Tuple[float, float],
 33 | ) -> ImageType:
 34 |     """Generic function for augmentations based on PIL's filter function.
 35 | 
 36 |     Args:
 37 |         im: An input image.
 38 |         fn: A filtering function to apply to the image.
 39 |         prob: Probability with which the function is applied.
 40 |         param_range: A range from which the value of the key parameter is sampled.
 41 |     Returns:
 42 |         A potentially augmented image.
 43 |     """
 44 | 
 45 |     if random.random() <= prob:
 46 |         im = im.filter(fn(random.randint(*map(int, param_range))))  # pyre-ignore
 47 |     return im
 48 | 
 49 | 
 50 | def _augment_pil_enhance(
 51 |     im: ImageType,
 52 |     fn: ImageEnhance._Enhance,
 53 |     prob: float,
 54 |     param_range: Tuple[float, float],
 55 | ) -> ImageType:
 56 |     """Generic function for augmentations based on PIL's enhance function.
 57 | 
 58 |     Args:
 59 |         im: An input image.
 60 |         fn: A filtering function to apply to the image.
 61 |         prob: Probability with which the function is applied.
 62 |         param_range: A range from which the value of the key parameter is sampled.
 63 |     Returns:
 64 |         A potentially augmented image.
 65 |     """
 66 | 
 67 |     if random.random() <= prob:
 68 |         im = fn(im).enhance(factor=random.uniform(*param_range))  # pyre-ignore
 69 |     return im
 70 | 
 71 | 
 72 | def blur(im, prob=0.5, param_range=(1, 3)):
 73 |     return _augment_pil_filter(im, ImageFilter.GaussianBlur, prob, param_range)
 74 | 
 75 | 
 76 | def sharpness(im, prob=0.5, param_range=(0.0, 50.0)):
 77 |     return _augment_pil_enhance(im, ImageEnhance.Sharpness, prob, param_range)
 78 | 
 79 | 
 80 | def contrast(im, prob=0.5, param_range=(0.2, 50.0)):
 81 |     return _augment_pil_enhance(im, ImageEnhance.Contrast, prob, param_range)
 82 | 
 83 | 
 84 | def brightness(im, prob=0.5, param_range=(0.1, 6.0)):
 85 |     return _augment_pil_enhance(im, ImageEnhance.Brightness, prob, param_range)
 86 | 
 87 | 
 88 | def color(im, prob=0.5, param_range=(0.0, 20.0)):
 89 |     return _augment_pil_enhance(im, ImageEnhance.Color, prob, param_range)
 90 | 
 91 | 
 92 | # def augment_image(im: np.ndarray, augment_ops: Iterable[AugmentOp]) -> np.ndarray:
 93 | #     """Applies a list of augmentations to an image.
 94 | 
 95 | #     Args:
 96 | #         im: An input image.
 97 | #         augment_ops: A list of augmentations to apply.
 98 | #     Returns:
 99 | #         A potentially augmented image.
100 | #     """
101 | 
102 | #     im_pil = Image.fromarray(im)
103 | #     for op in augment_ops:
104 | #         im_pil = globals()[op.name](im_pil, op.prob, op.param_range)
105 | #     return np.array(im_pil)
106 | 
107 | def augment_image(im: ImageType, augment_ops: Iterable[AugmentOp]) -> ImageType:
108 |     """Applies a list of augmentations to an image.
109 | 
110 |     Args:
111 |         im: An input image.
112 |         augment_ops: A list of augmentations to apply.
113 |     Returns:
114 |         A potentially augmented image.
115 |     """
116 | 
117 |     # im_pil = Image.fromarray(im)
118 |     im_pil = im
119 |     for op in augment_ops:
120 |         im_pil = globals()[op.name](im_pil, op.prob, op.param_range)
121 |     return im_pil
122 | 


--------------------------------------------------------------------------------
/lib/data_utils/sample_frustum_util.py:
--------------------------------------------------------------------------------
  1 | """
  2 |  utils for trimesh loading
  3 |            sampling surface, bouding volume, camera space
  4 |            SDF calc. via trimesh or pysdf
  5 |            xyz_correspondence & its mask gen.
  6 | """
  7 | import os
  8 | import pdb
  9 | import random
 10 | import logging
 11 | 
 12 | import torch
 13 | import numpy as np
 14 | 
 15 | import trimesh
 16 | from trimesh.ray import ray_pyembree
 17 | 
 18 | from pysdf import SDF
 19 | 
 20 | log = logging.getLogger('trimesh')
 21 | log.setLevel(40)
 22 | 
 23 | 
 24 | def load_trimesh(model_dir, model_unit):
 25 |     files = os.listdir(model_dir)
 26 |     mesh_dict = {}
 27 |     for idx, filename in enumerate(files):
 28 |         if filename[-4:] == '.ply':
 29 |             # load mesh in model space
 30 |             model_mesh = trimesh.load(os.path.join(model_dir, filename), process=False)
 31 |             # m -> mm unit if orig. ycbv
 32 |             if model_unit == 'meter':
 33 |                 # m -> mm unit
 34 |                 model_mesh.vertices = model_mesh.vertices * 1000
 35 |             key = filename[:-4]
 36 |             mesh_dict[key] = model_mesh
 37 | 
 38 |     return mesh_dict
 39 | 
 40 | 
 41 | def sampling_in_ball(num_points, dimension, radius=1):
 42 | 
 43 |     # First generate random directions by normalizing the length of a
 44 |     # vector of random-normal values (these distribute evenly on ball).
 45 |     random_directions = np.random.normal(size=(dimension,num_points))
 46 |     random_directions /= np.linalg.norm(random_directions, axis=0)
 47 | 
 48 |     # Second generate a random radius with probability proportional to
 49 |     # the surface area of a ball with a given radius.
 50 |     random_radii = np.random.random(num_points) ** (1/dimension)
 51 | 
 52 |     # Return the list of random (direction & length) points.
 53 |     return radius * (random_directions * random_radii).T
 54 | 
 55 | 
 56 | def out_of_plane_mask_calc(cam_pts, calib, img_size):
 57 |     # deal with out-of-plane cases
 58 |     c2i_rot = calib[:3, :3]
 59 |     c2i_trans = calib[:3, 3:4]
 60 |     img_sample_pts = torch.addmm(c2i_trans, c2i_rot, torch.Tensor(cam_pts.T).float())
 61 |     img_sample_uvs = img_sample_pts[:2, :] / img_sample_pts[2:3, :]
 62 | 
 63 |     # normalize to [-1,1]
 64 |     transforms = torch.zeros([2,3])
 65 |     transforms[0,0] = 1 / (img_size[0] // 2)
 66 |     transforms[1,1] = 1 / (img_size[1] // 2)
 67 |     transforms[0,2] = -1
 68 |     transforms[1,2] = -1
 69 |     scale = transforms[:2, :2]
 70 |     shift = transforms[:2, 2:3]
 71 |     img_sample_norm_uvs = torch.addmm(shift, scale, img_sample_uvs)
 72 |     in_img = (img_sample_norm_uvs[0,:] >= -1.0) & (img_sample_norm_uvs[0,:] <= 1.0) & (img_sample_norm_uvs[1,:] >= -1.0) & (img_sample_norm_uvs[1,:] <= 1.0)
 73 |     not_in_img = torch.logical_not(in_img).numpy()
 74 | 
 75 |     return not_in_img
 76 | 
 77 | 
 78 | # ray-SDF or conventional-SDF
 79 | def wks_sampling_sdf_xyz_calc(opt, bmax, bmin, cam_bmax, cam_bmin, model_mesh, extrinsic, calib, bounding):
 80 |     # if not self.is_train:
 81 |     #     random.seed(1991)
 82 |     #     np.random.seed(1991)
 83 |     #     torch.manual_seed(1991)
 84 | 
 85 |     # extrinsic to transform from model to cam. space
 86 |     m2c_rot = extrinsic.numpy()[:3, :3]
 87 |     m2c_trans = extrinsic.numpy()[:3, 3:4]
 88 |     # (N, 3)
 89 |     cam_vert_pts = (m2c_rot.dot(model_mesh.vertices.T) + m2c_trans.reshape((3, 1))).T
 90 |     # load mesh in cam. space
 91 |     cam_mesh = trimesh.Trimesh(vertices=cam_vert_pts, faces=model_mesh.faces, process=False)
 92 |     # (1) sampling in surface with gaussian noise
 93 |     surf_ratio = float(opt.sample_ratio) / 8
 94 |     surface_points_cam, _ = trimesh.sample.sample_surface(cam_mesh, int(surf_ratio * opt.num_sample_inout))
 95 |     # with gaussian noise
 96 |     sigma = opt.sigma_ratio * opt.clamp_dist
 97 |     noisy_surface_points_cam = surface_points_cam + np.random.normal(scale=sigma, size=surface_points_cam.shape)
 98 | 
 99 |     # (2) sampling in tight sphere: add random points within image space
100 |     # 16:1=1250/16:0.5=625 in tight sphere
101 |     bd_length = bmax - bmin
102 |     zero_rot = np.identity(3)
103 |     wks_ratio = opt.sample_ratio // 4
104 |     if bounding == 'abb':
105 |         bounding_points_model = np.random.rand(opt.num_sample_inout // wks_ratio, 3) * bd_length + bmin
106 |     elif bounding == 'sphere':
107 |         radius = bd_length.max() / 2
108 |         bounding_points_model = sampling_in_ball(opt.num_sample_inout // wks_ratio, 3, radius=radius)
109 |     # (N, 3)
110 |     bounding_points_trans = (zero_rot.dot(bounding_points_model.T) + m2c_trans.reshape((3, 1))).T
111 | 
112 |     # (3) sampling in 3D frustum inside the 3D genearl workspace in front of the camera
113 |     # 16:1=1250/16:0.5=625 in 3D workspace
114 |     wks_sample_flag = True
115 |     frustum_points_trans_list = []
116 |     wks_length = cam_bmax - cam_bmin
117 |     while wks_sample_flag:
118 |         # (N, 3)
119 |         wks_points_trans = np.random.rand((opt.num_sample_inout // wks_ratio) * 10, 3) * wks_length + cam_bmin
120 |         # filter out pts not in camera frustum
121 |         # (N,)
122 |         wks_not_in_img = out_of_plane_mask_calc(wks_points_trans, calib, opt.img_size)
123 |         # (N,)
124 |         wks_in_img = np.logical_not(wks_not_in_img)
125 |         frustum_points_trans_list = frustum_points_trans_list + wks_points_trans[wks_in_img].tolist()
126 |         if len(frustum_points_trans_list) >= (opt.num_sample_inout // wks_ratio):
127 |             wks_sample_flag = False
128 |             frustum_points_trans = np.array(frustum_points_trans_list[:(opt.num_sample_inout // wks_ratio)])
129 | 
130 |     # (N, 3): combine all 21250 points
131 |     sample_points_cam = np.concatenate([noisy_surface_points_cam, bounding_points_trans, frustum_points_trans], 0)
132 |     np.random.shuffle(sample_points_cam)
133 | 
134 |     inside = cam_mesh.contains(sample_points_cam)
135 |     inside_points = sample_points_cam[inside]
136 |     outside_points = sample_points_cam[np.logical_not(inside)]
137 | 
138 |     nin = inside_points.shape[0]
139 |     inside_points = inside_points[
140 |                     :opt.num_sample_inout // 2] if nin > opt.num_sample_inout // 2 else inside_points
141 |     outside_points = outside_points[
142 |                         :opt.num_sample_inout // 2] if nin > opt.num_sample_inout // 2 else outside_points[
143 |                                                                                             :(opt.num_sample_inout - nin)]
144 |     # (N, 3)
145 |     cam_sample_pts = np.concatenate([inside_points, outside_points], 0)
146 | 
147 |     # trimesh-based ray-SDF
148 |     if opt.out_type == 'rsdf':
149 |         # (N, 1)
150 |         labels = np.concatenate([np.ones((1, inside_points.shape[0])), np.zeros((1, outside_points.shape[0]))], 1).T
151 | 
152 |         ray_mesh_emb = ray_pyembree.RayMeshIntersector(cam_mesh, scale_to_box=False)
153 |         cam_sample_pt_sdf = np.zeros(cam_sample_pts.shape[0])
154 |         ray_origins = np.zeros_like(cam_sample_pts)
155 |         delta_vect = (cam_sample_pts - ray_origins)
156 |         norm_delta = np.expand_dims(np.linalg.norm(delta_vect, axis=1), axis=1)
157 |         unit_ray_dir = delta_vect / norm_delta
158 | 
159 |         # intersect = ray_mesh_emb.intersects_any(ray_origins, unit_ray_dir)
160 |         _, hit_index_ray, hit_locations = ray_mesh_emb.intersects_id(ray_origins, unit_ray_dir, multiple_hits=True, return_locations=True)
161 |         # intersect mask
162 |         hit_unique_idx_ray = np.unique(hit_index_ray)
163 |         hit_ray_mask = np.zeros(cam_sample_pts.shape[0], dtype=bool)
164 |         hit_ray_mask[hit_unique_idx_ray] = True
165 |         for idx, pt in enumerate(cam_sample_pts):
166 |             if hit_ray_mask[idx]:
167 |                 min_df = np.inf
168 |                 hit_idx_list = (np.where(np.array(hit_index_ray) == idx)[0]).tolist()
169 |                 for hit_idx in hit_idx_list:
170 |                     cur_df = np.linalg.norm((hit_locations[hit_idx] - pt))
171 |                     if cur_df < min_df:
172 |                         min_df = cur_df
173 |                 if labels[idx]:
174 |                     cam_sample_pt_sdf[idx] = -min_df
175 |                 else:
176 |                     cam_sample_pt_sdf[idx] = min_df
177 |             else:
178 |                 cam_sample_pt_sdf[idx] = 100 * opt.clamp_dist
179 |     # pysdf-based conventional-SDF
180 |     if opt.out_type == 'csdf':
181 |         sdf_calc_func = SDF(cam_mesh.vertices, cam_mesh.faces)
182 |         cam_sample_pt_sdf = (-1) * sdf_calc_func(cam_sample_pts)
183 | 
184 |     # shape (N, 1)
185 |     sdfs = np.expand_dims(cam_sample_pt_sdf, axis=1)
186 | 
187 |     # deal with out-of-plane cases
188 |     not_in_img = out_of_plane_mask_calc(cam_sample_pts, calib, opt.img_size)
189 |     sdfs[not_in_img] = 100 * opt.clamp_dist
190 | 
191 |     norm_sdfs = sdfs / (opt.clamp_dist / opt.norm_clamp_dist)
192 | 
193 |     # obtain for xyz of correspondence in model space
194 |     inverse_ext = torch.inverse(extrinsic)
195 |     c2m_rot = inverse_ext[:3, :3]
196 |     c2m_trans = inverse_ext[:3, 3:4]
197 |     # (3, N)
198 |     model_sample_pts = torch.addmm(c2m_trans, c2m_rot, torch.Tensor(cam_sample_pts.T).float()).float()
199 |     norm_xyz_factor = opt.bbx_size / 2
200 |     norm_model_sample_pts = model_sample_pts / norm_xyz_factor
201 |     # (3, N)
202 |     cam_sample_pts = torch.Tensor(cam_sample_pts.T).float()
203 |     # (1, N)
204 |     norm_sdfs = torch.Tensor(norm_sdfs.T).float()
205 | 
206 |     del model_mesh
207 |     del cam_mesh
208 | 
209 |     return {
210 |         'samples': cam_sample_pts,
211 |         'labels': norm_sdfs,
212 |         'xyzs': norm_model_sample_pts,
213 |         'norm_xyz_factor': torch.tensor(norm_xyz_factor)
214 |     }
215 | 
216 | 
217 | # efficient conventional-SDF
218 | def wks_sampling_eff_csdf_xyz_calc(opt, bmax, bmin, cam_bmax, cam_bmin, model_mesh, extrinsic, calib, bounding):
219 |     # if not self.is_train:
220 |     #     random.seed(1991)
221 |     #     np.random.seed(1991)
222 |     #     torch.manual_seed(1991)
223 | 
224 |     # extrinsic to transform from model to cam. space
225 |     m2c_rot = extrinsic.numpy()[:3, :3]
226 |     m2c_trans = extrinsic.numpy()[:3, 3:4]
227 |     # (N, 3)
228 |     cam_vert_pts = (m2c_rot.dot(model_mesh.vertices.T) + m2c_trans.reshape((3, 1))).T
229 |     # load mesh in cam. space
230 |     cam_mesh = trimesh.Trimesh(vertices=cam_vert_pts, faces=model_mesh.faces, process=False)
231 |     # (1) sampling in surface with gaussian noise
232 |     surf_ratio = float(opt.sample_ratio) / 8
233 |     surface_points_cam, _ = trimesh.sample.sample_surface(cam_mesh, int(surf_ratio * opt.num_sample_inout))
234 |     # with gaussian noise
235 |     sigma = opt.sigma_ratio * opt.clamp_dist
236 |     noisy_surface_points_cam = surface_points_cam + np.random.normal(scale=sigma, size=surface_points_cam.shape)
237 | 
238 |     # (2) sampling in tight sphere: add random points within image space
239 |     # 16:1=1250/16:0.5=625 in tight sphere
240 |     bd_length = bmax - bmin
241 |     zero_rot = np.identity(3)
242 |     wks_ratio = opt.sample_ratio // 4
243 |     if bounding == 'abb':
244 |         bounding_points_model = np.random.rand(opt.num_sample_inout // wks_ratio, 3) * bd_length + bmin
245 |     elif bounding == 'sphere':
246 |         radius = bd_length.max() / 2
247 |         bounding_points_model = sampling_in_ball(opt.num_sample_inout // wks_ratio, 3, radius=radius)
248 |     # (N, 3)
249 |     bounding_points_trans = (zero_rot.dot(bounding_points_model.T) + m2c_trans.reshape((3, 1))).T
250 | 
251 |     # (3) sampling in 3D frustum inside the 3D genearl workspace in front of the camera
252 |     # 16:1=1250/16:0.5=625 in 3D workspace
253 |     wks_sample_flag = True
254 |     frustum_points_trans_list = []
255 |     wks_length = cam_bmax - cam_bmin
256 |     while wks_sample_flag:
257 |         # (N, 3)
258 |         wks_points_trans = np.random.rand((opt.num_sample_inout // wks_ratio) * 10, 3) * wks_length + cam_bmin
259 |         # filter out pts not in camera frustum
260 |         # (N,)
261 |         wks_not_in_img = out_of_plane_mask_calc(wks_points_trans, calib, opt.img_size)
262 |         # (N,)
263 |         wks_in_img = np.logical_not(wks_not_in_img)
264 |         frustum_points_trans_list = frustum_points_trans_list + wks_points_trans[wks_in_img].tolist()
265 |         if len(frustum_points_trans_list) >= (opt.num_sample_inout // wks_ratio):
266 |             wks_sample_flag = False
267 |             frustum_points_trans = np.array(frustum_points_trans_list[:(opt.num_sample_inout // wks_ratio)])
268 | 
269 |     # (N, 3): combine all 21250 points
270 |     sample_points_cam = np.concatenate([noisy_surface_points_cam, bounding_points_trans, frustum_points_trans], 0)
271 |     np.random.shuffle(sample_points_cam)
272 | 
273 |     # pysdf-based conventional-SDF
274 |     sdf_calc_func = SDF(cam_mesh.vertices, cam_mesh.faces)
275 |     sample_points_cam_sdf = (-1) * sdf_calc_func(sample_points_cam)
276 | 
277 |     inside = (sample_points_cam_sdf < 0)
278 |     inside_points = sample_points_cam[inside]
279 |     outside_points = sample_points_cam[np.logical_not(inside)]
280 |     inside_points_sdf = sample_points_cam_sdf[inside]
281 |     outside_points_sdf = sample_points_cam_sdf[np.logical_not(inside)]
282 | 
283 |     nin = inside_points.shape[0]
284 |     inside_points = inside_points[
285 |                     :opt.num_sample_inout // 2] if nin > opt.num_sample_inout // 2 else inside_points
286 |     outside_points = outside_points[
287 |                         :opt.num_sample_inout // 2] if nin > opt.num_sample_inout // 2 else outside_points[
288 |                                                                                             :(opt.num_sample_inout - nin)]
289 |     inside_points_sdf = inside_points_sdf[
290 |                     :opt.num_sample_inout // 2] if nin > opt.num_sample_inout // 2 else inside_points_sdf
291 |     outside_points_sdf = outside_points_sdf[
292 |                         :opt.num_sample_inout // 2] if nin > opt.num_sample_inout // 2 else outside_points_sdf[
293 |                                                                                             :(opt.num_sample_inout - nin)]
294 |     # (N, 3)
295 |     cam_sample_pts = np.concatenate([inside_points, outside_points], 0)
296 |     cam_sample_pt_sdf = np.concatenate([inside_points_sdf, outside_points_sdf], 0)
297 | 
298 |     # shape (N, 1)
299 |     sdfs = np.expand_dims(cam_sample_pt_sdf, axis=1)
300 | 
301 |     # deal with out-of-plane cases
302 |     not_in_img = out_of_plane_mask_calc(cam_sample_pts, calib, opt.img_size)
303 |     sdfs[not_in_img] = 100 * opt.clamp_dist
304 | 
305 |     norm_sdfs = sdfs / (opt.clamp_dist / opt.norm_clamp_dist)
306 | 
307 |     # obtain for xyz of correspondence in model space
308 |     inverse_ext = torch.inverse(extrinsic)
309 |     c2m_rot = inverse_ext[:3, :3]
310 |     c2m_trans = inverse_ext[:3, 3:4]
311 |     # (3, N)
312 |     model_sample_pts = torch.addmm(c2m_trans, c2m_rot, torch.Tensor(cam_sample_pts.T).float()).float()
313 |     norm_xyz_factor = opt.bbx_size / 2
314 |     norm_model_sample_pts = model_sample_pts / norm_xyz_factor
315 |     # (3, N)
316 |     cam_sample_pts = torch.Tensor(cam_sample_pts.T).float()
317 |     # (1, N)
318 |     norm_sdfs = torch.Tensor(norm_sdfs.T).float()
319 | 
320 |     del model_mesh
321 |     del cam_mesh
322 | 
323 |     return {
324 |         'samples': cam_sample_pts,
325 |         'labels': norm_sdfs,
326 |         'xyzs': norm_model_sample_pts,
327 |         'norm_xyz_factor': torch.tensor(norm_xyz_factor)
328 |     }
329 | 
330 | 
331 | def xyz_mask_calc(sdfs, xyz_range):
332 | 
333 |     # shape (1, num_sample_inout)
334 |     return {'xyz_mask': (abs(sdfs) < xyz_range).float()}


--------------------------------------------------------------------------------
/lib/debug_pyrender_util.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | """
  5 |  debugging utils
  6 | """
  7 | 
  8 | import os
  9 | import sys
 10 | import pdb
 11 | import code
 12 | import json
 13 | import random
 14 | import pickle
 15 | import warnings
 16 | import datetime
 17 | import subprocess
 18 | 
 19 | import PIL
 20 | import cv2
 21 | import torch
 22 | import numpy as np
 23 | from matplotlib import pyplot as plt
 24 | 
 25 | # os.environ['PYOPENGL_PLATFORM'] = 'osmesa'
 26 | # import pyrender
 27 | 
 28 | import trimesh
 29 | # import transforms3d as t3d
 30 | 
 31 | 
 32 | # """
 33 | # pyrender-based rendering
 34 | # """
 35 | # class Renderer(object):
 36 | #     """
 37 | #     Render mesh using PyRender for visualization.
 38 | #     in m unit by default
 39 | #     """
 40 | #     def __init__(self, alight_color, dlight_color, dlight_int=2.0, bg='black', im_width=640, im_height=480):
 41 | 
 42 | #         self.im_width = im_width
 43 | #         self.im_height = im_height
 44 | #         # light initialization
 45 | #         self.alight_color = alight_color
 46 | #         self.dlight_int = dlight_int
 47 | #         self.dlight_color = dlight_color
 48 | #         # blending coe for bg
 49 | #         if bg == 'white':
 50 | #             self.bg_color = [1.0, 1.0, 1.0]
 51 | #         elif bg == 'black':
 52 | #             self.bg_color = [0.0, 0.0, 0.0]
 53 | 
 54 | #         # render creation
 55 | #         self.renderer = pyrender.OffscreenRenderer(self.im_width, self.im_height)
 56 | #         # renderer_flags = pyrender.constants.RenderFlags.DEPTH_ONLY
 57 | #         # renderer_flags = pyrender.constants.RenderFlags.FLAT
 58 | #         # renderer_flags = pyrender.constants.RenderFlags.RGBA
 59 | 
 60 | #         # light creation
 61 | #         self.direc_light = pyrender.DirectionalLight(color=self.dlight_color, intensity=self.dlight_int)
 62 | 
 63 | #     def render(self, cam_intr, cam_pose, tri_mesh):
 64 | 
 65 | #         # scene creation
 66 | #         self.scene = pyrender.Scene(ambient_light=self.alight_color, bg_color=self.bg_color)
 67 | 
 68 | #         # camera
 69 | #         K = np.copy(cam_intr)
 70 | #         fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
 71 | #         # fx, fy, cx, cy = K[0][0], K[1][1], K[0][2], K[1][2]
 72 | #         camera = pyrender.IntrinsicsCamera(fx=fx, fy=fy, cx=cx, cy=cy)
 73 | 
 74 | #         # Object->Camera to Camera->Object.
 75 | #         # camera_pose = np.linalg.inv(camera_pose)
 76 | 
 77 | #         # OpenCV to OpenGL coordinate system.
 78 | #         camera_pose = self.opencv_to_opengl_transformation(cam_pose)
 79 | 
 80 | #         # create mesh node
 81 | #         tri_mesh.vertices *= 0.001  # To meters.
 82 | #         mesh = pyrender.Mesh.from_trimesh(tri_mesh)
 83 | 
 84 | #         # add mesh
 85 | #         self.scene.add(mesh)
 86 | #         # add direc_light
 87 | #         self.scene.add(self.direc_light, pose=camera_pose)
 88 | #         # Create a camera node and add
 89 | #         camera_node = pyrender.Node(camera=camera, matrix=camera_pose)
 90 | #         self.scene.add_node(camera_node)
 91 | 
 92 | #         # render
 93 | #         color, _ = self.renderer.render(self.scene)
 94 | #         # color, _ = renderer.render(scene, flags=renderer_flags)
 95 | #         self.scene.remove_node(camera_node)
 96 | #         color = np.uint8(color)
 97 | 
 98 | #         return color
 99 | 
100 | #     def opencv_to_opengl_transformation(self, trans):
101 | #         """Converts a transformation from OpenCV to OpenGL coordinate system.
102 | 
103 | #         :param trans: A 4x4 transformation matrix.
104 | #         """
105 | #         yz_flip = np.eye(4, dtype=np.float64)
106 | #         yz_flip[1, 1], yz_flip[2, 2] = -1, -1
107 | #         trans = trans.dot(yz_flip)
108 | #         return trans
109 | 
110 | """
111 |  save point cloud
112 | """
113 | def save_samples_rgb(fname, points, rgb):
114 |     '''
115 |     Save the visualization of sampling to a ply file.
116 |     Red points represent positive predictions.
117 |     Green points represent negative predictions.
118 |     :param fname: File name to save
119 |     :param points: [N, 3] array of points
120 |     :param rgb: [N, 3] array of rgb values in the range [0~1]
121 |     :return:
122 |     '''
123 |     to_save = np.concatenate([points, rgb * 255], axis=-1)
124 |     return np.savetxt(fname,
125 |                       to_save,
126 |                       fmt='%.6f %.6f %.6f %d %d %d',
127 |                       comments='',
128 |                       header=(
129 |                           'ply\nformat ascii 1.0\nelement vertex {:d}\nproperty float x\nproperty float y\nproperty float z\nproperty uchar red\nproperty uchar green\nproperty uchar blue\nend_header').format(
130 |                           points.shape[0])
131 |                       )
132 | 
133 | def save_samples_truncted_prob(fname, points, prob):
134 |     '''
135 |     Save the visualization of sampling to a ply file.
136 |     Red points represent positive predictions.
137 |     Green points represent negative predictions.
138 |     :param fname: File name to save
139 |     :param points: [N, 3] array of points
140 |     :param prob: [N, 1] array of predictions in the range [0~1]
141 |     :return:
142 |     '''
143 |     r = (prob > 0.5).reshape([-1, 1]) * 255
144 |     g = (prob < 0.5).reshape([-1, 1]) * 255
145 |     b = np.zeros(r.shape)
146 | 
147 |     to_save = np.concatenate([points, r, g, b], axis=-1)
148 |     return np.savetxt(fname,
149 |                       to_save,
150 |                       fmt='%.6f %.6f %.6f %d %d %d',
151 |                       comments='',
152 |                       header=(
153 |                           'ply\nformat ascii 1.0\nelement vertex {:d}\nproperty float x\nproperty float y\nproperty float z\nproperty uchar red\nproperty uchar green\nproperty uchar blue\nend_header').format(
154 |                           points.shape[0])
155 |                       )
156 | 
157 | def save_samples_truncted_sdf(fname, points, sdf, thres):
158 |     '''
159 |     Save the visualization of sampling to a ply file.
160 |     Red points represent positive predictions.
161 |     Green points represent negative predictions.
162 |     :param fname: File name to save
163 |     :param points: [N, 3] array of points
164 |     :param sdf: [N, 1] array of predictions in the range [0~1]
165 |     :return:
166 |     '''
167 |     r = (sdf <= -thres).reshape([-1, 1]) * 255
168 |     g = (sdf >= thres).reshape([-1, 1]) * 255
169 |     b = (abs(sdf) < thres).reshape([-1, 1]) * 255
170 |     # b = np.zeros(r.shape)
171 |     # pdb.set_trace()
172 |     to_save = np.concatenate([points, r, g, b], axis=-1)
173 |     return np.savetxt(fname,
174 |                       to_save,
175 |                       fmt='%.6f %.6f %.6f %d %d %d',
176 |                       comments='',
177 |                       header=(
178 |                           'ply\nformat ascii 1.0\nelement vertex {:d}\nproperty float x\nproperty float y\nproperty float z\nproperty uchar red\nproperty uchar green\nproperty uchar blue\nend_header').format(
179 |                           points.shape[0])
180 |                       )
181 | 
182 | """
183 |  save mesh
184 | """
185 | def save_obj_mesh(mesh_path, verts, faces):
186 |     file = open(mesh_path, 'w')
187 | 
188 |     for v in verts:
189 |         file.write('v %.4f %.4f %.4f\n' % (v[0], v[1], v[2]))
190 |     for f in faces:
191 |         f_plus = f + 1
192 |         file.write('f %d %d %d\n' % (f_plus[0], f_plus[2], f_plus[1]))
193 |     file.close()
194 | 
195 | 
196 | def save_obj_mesh_with_color(mesh_path, verts, faces, colors):
197 |     file = open(mesh_path, 'w')
198 | 
199 |     for idx, v in enumerate(verts):
200 |         c = colors[idx]
201 |         file.write('v %.4f %.4f %.4f %.4f %.4f %.4f\n' % (v[0], v[1], v[2], c[0], c[1], c[2]))
202 |     for f in faces:
203 |         f_plus = f + 1
204 |         file.write('f %d %d %d\n' % (f_plus[0], f_plus[2], f_plus[1]))
205 |     file.close()
206 | 
207 | 
208 | def save_obj_mesh_with_uv(mesh_path, verts, faces, uvs):
209 |     file = open(mesh_path, 'w')
210 | 
211 |     for idx, v in enumerate(verts):
212 |         vt = uvs[idx]
213 |         file.write('v %.4f %.4f %.4f\n' % (v[0], v[1], v[2]))
214 |         file.write('vt %.4f %.4f\n' % (vt[0], vt[1]))
215 | 
216 |     for f in faces:
217 |         f_plus = f + 1
218 |         file.write('f %d/%d %d/%d %d/%d\n' % (f_plus[0], f_plus[0],
219 |                                               f_plus[2], f_plus[2],
220 |                                               f_plus[1], f_plus[1]))
221 |     file.close()
222 | 
223 | 
224 | """
225 |  viz img, mask, rendering
226 | """
227 | def viz_debug_data(img, model_mesh, extrinsic, aug_intrinsic, save_debug_path):
228 | 
229 |     fig = plt.figure(figsize=(3, 3))
230 |     ax = fig.add_subplot(1,1,1)
231 |     ax.imshow(img)
232 |     plt.axis('off')
233 | 
234 |     plt.tight_layout()
235 |     plt.savefig(save_debug_path, dpi=100)
236 | 
237 | 
238 | """
239 | viz query projection for debugging
240 | """
241 | def viz_debug_query(out_type, res, save_in_query_path, save_out_query_path):
242 | 
243 |     # from RGB order to opencv BGR order
244 |     img = np.uint8((np.transpose(res['img'].numpy(), (1, 2, 0)) * 0.5 + 0.5)[:, :, ::-1] * 255.0)
245 |     img_cp = np.copy(img)
246 |     rot = res['calib'][:3, :3]
247 |     trans = res['calib'][:3, 3:4]
248 | 
249 |     # draw points inside
250 |     # pts = torch.addmm(trans, rot, sample_data['samples'])  # [3, N]
251 |     if out_type[-3:] == 'sdf':
252 |         pts = torch.addmm(trans, rot, res['samples'][:, res['labels'][0] < 0])  # [3, N]
253 |     uv = pts[:2, :] / pts[2:3, :]
254 |     uvz = torch.cat([uv, pts[2:3, :]], 0)
255 |     # draw projected queries
256 |     img = np.ascontiguousarray(img, dtype=np.uint8)
257 |     for pt in torch.transpose(uvz, 0, 1):
258 |         img = cv2.circle(img, (int(pt[0]), int(pt[1])), 2, (0,0,255), -1)
259 |     cv2.imwrite(save_in_query_path, img)
260 | 
261 |     # draw points outside
262 |     if out_type[-3:] == 'sdf':
263 |         pts = torch.addmm(trans, rot, res['samples'][:, res['labels'][0] > 0])  # [3, N]
264 |     uv = pts[:2, :] / pts[2:3, :]
265 |     uvz = torch.cat([uv, pts[2:3, :]], 0)
266 |     # draw projected queries
267 |     img_cp = np.ascontiguousarray(img_cp, dtype=np.uint8)
268 |     for pt in torch.transpose(uvz, 0, 1):
269 |         img_cp = cv2.circle(img_cp, (int(pt[0]), int(pt[1])), 2, (0,255,0), -1)
270 |     cv2.imwrite(save_out_query_path, img_cp)
271 | 
272 | def viz_debug_query_forward(out_type, res, save_in_query_path, save_out_query_path):
273 | 
274 |     # from RGB order to opencv BGR order
275 |     img = np.uint8((np.transpose(res['img'].numpy(), (1, 2, 0)) * 0.5 + 0.5)[:, :, ::-1] * 255.0)
276 |     img_cp = np.copy(img)
277 | 
278 |     # draw points inside
279 |     if out_type[-3:] == 'sdf':
280 |         uv = (res['samples'][:, res['labels'][0] < 0])  # [2, N]
281 |     # draw projected queries
282 |     img = np.ascontiguousarray(img, dtype=np.uint8)
283 |     for pt in torch.transpose(uv, 0, 1):
284 |         img = cv2.circle(img, (int(pt[0]), int(pt[1])), 2, (0,0,255), -1)
285 |     cv2.imwrite(save_in_query_path, img)
286 | 
287 |     # draw points outside
288 |     if out_type[-3:] == 'sdf':
289 |         uv = (res['samples'][:, res['labels'][0] > 0])  # [2, N]
290 |     # draw projected queries
291 |     img_cp = np.ascontiguousarray(img_cp, dtype=np.uint8)
292 |     for pt in torch.transpose(uv, 0, 1):
293 |         img_cp = cv2.circle(img_cp, (int(pt[0]), int(pt[1])), 2, (0,255,0), -1)
294 |     cv2.imwrite(save_out_query_path, img_cp)
295 | 
296 | """
297 |  Meter for recording
298 | """
299 | class AverageMeter(object):
300 |     """
301 |      refer to https://github.com/bearpaw/pytorch-pose
302 |      Computes and stores the average and current value
303 |     """
304 |     def __init__(self):
305 |         self.reset()
306 | 
307 |     def reset(self):
308 |         self.val = 0
309 |         self.avg = 0
310 |         self.sum = 0
311 |         self.count = 0
312 | 
313 |     def update(self, val, n=1):
314 |         self.val = val
315 |         self.sum += val * n
316 |         self.count += n
317 |         self.avg = self.sum / self.count
318 | 


--------------------------------------------------------------------------------
/lib/eval_Rt_time_util.py:
--------------------------------------------------------------------------------
  1 | """
  2 |  comprehensive evaluation for:
  3 |  SDF, predicted corresopndence, 6D pose
  4 | """
  5 | 
  6 | import os
  7 | import json
  8 | import time
  9 | from tqdm import tqdm
 10 | 
 11 | import torch
 12 | import numpy as np
 13 | from PIL import Image
 14 | 
 15 | import trimesh
 16 | 
 17 | from .geometry import *
 18 | 
 19 | from lib.rigid_fit.ransac import RansacEstimator
 20 | from lib.rigid_fit.ransac_kabsch import Procrustes
 21 | 
 22 | from .sdf import create_grid, eval_sdf_xyz_grid_frustum
 23 | 
 24 | 
 25 | def save_bop_results(path, results, version='bop19'):
 26 |   """Saves 6D object pose estimates to a file.
 27 |   :param path: Path to the output file.
 28 |   :param results: Dictionary with pose estimates.
 29 |   :param version: Version of the results.
 30 |   """
 31 |   # See docs/bop_challenge_2019.md for details.
 32 |   if version == 'bop19':
 33 |     lines = ['scene_id,im_id,obj_id,score,R,t,time']
 34 |     for res in results:
 35 |       if 'time' in res:
 36 |         run_time = res['time']
 37 |       else:
 38 |         run_time = -1
 39 | 
 40 |       lines.append('{scene_id},{im_id},{obj_id},{score},{R},{t},{time}'.format(
 41 |         scene_id=res['scene_id'],
 42 |         im_id=res['im_id'],
 43 |         obj_id=res['obj_id'],
 44 |         score=res['score'],
 45 |         R=' '.join(map(str, res['R'].flatten().tolist())),
 46 |         t=' '.join(map(str, res['t'].flatten().tolist())),
 47 |         time=run_time))
 48 | 
 49 |     with open(path, 'w') as f:
 50 |       f.write('\n'.join(lines))
 51 | 
 52 |   else:
 53 |     raise ValueError('Unknown version of BOP results.')
 54 | 
 55 | 
 56 | def out_of_plane_mask_calc(cam_pts, calib, img_size):
 57 |     # deal with out-of-plane cases
 58 |     c2i_rot = calib[:3, :3]
 59 |     c2i_trans = calib[:3, 3:4]
 60 |     img_sample_pts = torch.addmm(c2i_trans, c2i_rot, torch.Tensor(cam_pts.T).float())
 61 |     img_sample_uvs = img_sample_pts[:2, :] / img_sample_pts[2:3, :]
 62 | 
 63 |     # normalize to [-1,1]
 64 |     transforms = torch.zeros([2,3])
 65 |     transforms[0,0] = 1 / (img_size[0] // 2)
 66 |     transforms[1,1] = 1 / (img_size[1] // 2)
 67 |     transforms[0,2] = -1
 68 |     transforms[1,2] = -1
 69 |     scale = transforms[:2, :2]
 70 |     shift = transforms[:2, 2:3]
 71 |     img_sample_norm_uvs = torch.addmm(shift, scale, img_sample_uvs)
 72 |     in_img = (img_sample_norm_uvs[0,:] >= -1.0) & (img_sample_norm_uvs[0,:] <= 1.0) & (img_sample_norm_uvs[1,:] >= -1.0) & (img_sample_norm_uvs[1,:] <= 1.0)
 73 |     not_in_img = torch.logical_not(in_img).numpy()
 74 | 
 75 |     return not_in_img
 76 | 
 77 | 
 78 | """
 79 | generate 6D rigid pose based on SDF & Corresopndence
 80 | calculate eval. time
 81 | """
 82 | def eval_Rt_time(opt, net, test_data_loader, save_csv_path):
 83 | 
 84 |     with torch.no_grad():
 85 |         preds = []
 86 |         # for test_idx, test_data in enumerate(test_data_loader):
 87 |         for test_idx, test_data in enumerate(tqdm(test_data_loader)):
 88 | 
 89 |             # retrieve the data
 90 |             # resolution = opt.resolution
 91 |             resolution_X = int(opt.test_wks_size[0] / opt.step_size)
 92 |             resolution_Y = int(opt.test_wks_size[1] / opt.step_size)
 93 |             resolution_Z = int(opt.test_wks_size[2] / opt.step_size)
 94 |             image_tensor = test_data['img'].cuda()
 95 |             calib_tensor = test_data['calib'].cuda()
 96 |             norm_xyz_factor = test_data['norm_xyz_factor'][0].item()
 97 | 
 98 |             # get all 3D queries
 99 |             # create a grid by resolution
100 |             # and transforming matrix for grid coordinates to real world xyz
101 |             b_min = np.array(test_data['test_b_min'][0])
102 |             b_max = np.array(test_data['test_b_max'][0])
103 |             coords, mat = create_grid(resolution_X, resolution_Y, resolution_Z, b_min, b_max, transform=None)
104 |             # (M=KxKxK, 3)
105 |             coords = coords.reshape([3, -1]).T
106 |             # (M,)
107 |             coords_not_in_img = out_of_plane_mask_calc(coords, test_data['calib'][0], opt.img_size)
108 |             # (M,)
109 |             coords_in_img = np.logical_not(coords_not_in_img)
110 |             # (3, N)
111 |             coords_in_frustum = coords[coords_in_img].T
112 | 
113 |             # transform for proj.
114 |             transforms = torch.zeros([1,2,3]).cuda()
115 |             transforms[:, 0,0] = 1 / (opt.img_size[0] // 2)
116 |             transforms[:, 1,1] = 1 / (opt.img_size[1] // 2)
117 |             transforms[:, 0,2] = -1
118 |             transforms[:, 1,2] = -1
119 | 
120 |             # create ransac
121 |             ransac = RansacEstimator(
122 |                                     min_samples=opt.min_samples,
123 |                                     residual_threshold=(opt.res_thresh)**2,
124 |                                     max_trials=opt.max_trials,
125 |                                     )
126 | 
127 |             eval_start_time = time.time()
128 |             # get 2D feat. maps
129 |             net.filter(image_tensor)
130 |             # Then we define the lambda function for cell evaluation
131 |             def eval_func(points):
132 |                 points = np.expand_dims(points, axis=0)
133 |                 # points = np.repeat(points, net.num_views, axis=0)
134 |                 samples = torch.from_numpy(points).cuda().float()
135 | 
136 |                 transforms = torch.zeros([1,2,3]).cuda()
137 |                 transforms[:, 0,0] = 1 / (opt.img_size[0] // 2)
138 |                 transforms[:, 1,1] = 1 / (opt.img_size[1] // 2)
139 |                 transforms[:, 0,2] = -1
140 |                 transforms[:, 1,2] = -1
141 |                 net.query(samples, calib_tensor, transforms=transforms)
142 |                 # shape (B, 1, N) -> (N)
143 |                 eval_sdfs = net.preds[0][0]
144 |                 # shape (B, 3, N) -> (3, N)
145 |                 eval_xyzs = net.xyzs[0]
146 |                 return eval_sdfs.detach().cpu().numpy(), eval_xyzs.detach().cpu().numpy()
147 |             # (N), (3, N), all the predicted dfs and xyzs
148 |             pred_sdfs, pred_xyzs = eval_sdf_xyz_grid_frustum(coords_in_frustum, eval_func, num_samples=opt.num_in_batch)
149 |             # norm_xyz_factor = max(opt.bbx_size) / 2
150 |             pred_xyzs = pred_xyzs * norm_xyz_factor
151 |             # get sdf & xyz within clamping distance
152 |             pos_anchor_mask = (abs(pred_sdfs) < opt.norm_clamp_dist)
153 |             est_cam_pts = coords_in_frustum[:, pos_anchor_mask]
154 |             est_model_pts = pred_xyzs[:, pos_anchor_mask]
155 |             # mask_sdfs = pred_sdfs[pos_anchor_mask]
156 | 
157 |             # estimate 6D pose with RANSAC-based kabsch or procruste
158 |             ret = ransac.fit(Procrustes(), [est_model_pts.T, est_cam_pts.T])
159 |             eval_end_time = time.time()
160 |             eval_time = eval_end_time - eval_start_time
161 | 
162 |             # est. RT
163 |             RT_m2c_est = ret["best_params"]
164 |             R_m2c_est = RT_m2c_est[:3, :3]
165 |             t_m2c_est = RT_m2c_est[:3, 3:4]
166 | 
167 |             scene_id = int(test_data['folder_id'][0])
168 |             im_id = int(test_data['frame_id'][0])
169 |             obj_id = int(test_data['obj_id'][0])
170 |             pred = dict(scene_id=scene_id,
171 |                         im_id=im_id,
172 |                         obj_id=obj_id,
173 |                         score=1,
174 |                         R=np.array(R_m2c_est).reshape(3, 3),
175 |                         t=np.array(t_m2c_est),
176 |                         time=eval_time)
177 |             preds.append(pred)
178 |         save_bop_results(save_csv_path, preds)
179 | 


--------------------------------------------------------------------------------
/lib/geometry.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def index(feat, uv):
 5 |     '''
 6 | 
 7 |     :param feat: [B, C, H, W] image features
 8 |     :param uv: [B, 2, N] uv coordinates in the image plane, range [-1, 1]
 9 |     :return: [B, C, N] image features at the uv coordinates
10 |     '''
11 |     uv = uv.transpose(1, 2)  # [B, N, 2]
12 |     uv = uv.unsqueeze(2)  # [B, N, 1, 2]
13 |     # NOTE: for newer PyTorch, it seems that training results are degraded due to implementation diff in F.grid_sample
14 |     # for old versions, simply remove the aligned_corners argument.
15 |     samples = torch.nn.functional.grid_sample(feat, uv, align_corners=True)  # [B, C, N, 1]
16 |     return samples[:, :, :, 0]  # [B, C, N]
17 | 
18 | 
19 | def orthogonal(points, calibrations, transforms=None):
20 |     '''
21 |     Compute the orthogonal projections of 3D points into the image plane by given projection matrix
22 |     :param points: [B, 3, N] Tensor of 3D points
23 |     :param calibrations: [B, 4, 4] Tensor of projection matrix
24 |     :param transforms: [B, 2, 3] Tensor of image transform matrix
25 |     :return: xyz: [B, 3, N] Tensor of xyz coordinates in the image plane
26 |     '''
27 |     rot = calibrations[:, :3, :3]
28 |     trans = calibrations[:, :3, 3:4]
29 |     pts = torch.baddbmm(trans, rot, points)  # [B, 3, N]
30 |     if transforms is not None:
31 |         scale = transforms[:2, :2]
32 |         shift = transforms[:2, 2:3]
33 |         pts[:, :2, :] = torch.baddbmm(shift, scale, pts[:, :2, :])
34 |     return pts
35 | 
36 | 
37 | def perspective(points, calibrations, transforms=None):
38 |     '''
39 |     Compute the perspective projections of 3D points into the image plane by given projection matrix
40 |     :param points: [Bx3xN] Tensor of 3D points
41 |     :param calibrations: [Bx4x4] Tensor of projection matrix
42 |     :param transforms: [Bx2x3] Tensor of image transform matrix
43 |     :return: uv: [Bx2xN] Tensor of uv coordinates in the image plane
44 |     '''
45 |     rot = calibrations[:, :3, :3]
46 |     trans = calibrations[:, :3, 3:4]
47 |     homo = torch.baddbmm(trans, rot, points)  # [B, 3, N]
48 |     uv = homo[:, :2, :] / homo[:, 2:3, :]
49 |     if transforms is not None:
50 |         scale = transforms[:, :2, :2]
51 |         shift = transforms[:, :2, 2:3]
52 |         # scale = transforms[:2, :2]
53 |         # shift = transforms[:2, 2:3]
54 |         uv = torch.baddbmm(shift, scale, uv)
55 | 
56 |     uvz = torch.cat([uv, homo[:, 2:3, :]], 1)
57 |     return uvz
58 | 


--------------------------------------------------------------------------------
/lib/loss_util.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright 2020-present Zerong Zheng. All Rights Reserved.
  3 | 
  4 | import os
  5 | import json
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | import pdb
 11 | 
 12 | 
 13 | """
 14 | XYZ loss w/o symmetry
 15 | """
 16 | class XYZLoss(nn.Module):
 17 |     def __init__(self, use_xyz_mask=True):
 18 |         super(XYZLoss, self).__init__()
 19 |         # self.criterion = nn.MSELoss(reduction='mean')
 20 |         self.criterion = nn.SmoothL1Loss()
 21 |         self.use_xyz_mask = use_xyz_mask
 22 | 
 23 |     def forward(self, output, target, xyz_mask):
 24 |         '''
 25 |         should be consistent for tensor shape
 26 |         (B, 3, N)/(B, 3, N)/(B, 1, N) or
 27 |         (B, N, 3)/(B, N, 3)/(B, N, 1)
 28 |         '''
 29 |         if self.use_xyz_mask:
 30 |             loss = self.criterion(
 31 |                 output.mul(xyz_mask),
 32 |                 target.mul(xyz_mask)
 33 |             )
 34 |         else:
 35 |             # loss += 0.5 * self.criterion(xyz_pred, xyz_gt)
 36 |             loss = self.criterion(output, target)
 37 | 
 38 |         return loss
 39 | 
 40 | """
 41 | XYZ loss with symmetry
 42 | """
 43 | class XYZLoss_sym(nn.Module):
 44 |     def __init__(self, use_xyz_mask=True, sym_pool=None):
 45 |         super(XYZLoss_sym, self).__init__()
 46 |         # self.criterion = nn.MSELoss(reduction='mean')
 47 |         self.criterion = nn.SmoothL1Loss(reduction='none')
 48 |         self.use_xyz_mask = use_xyz_mask
 49 | 
 50 |         self.sym_pool = sym_pool
 51 | 
 52 |     def forward(self, output, target, xyz_mask):
 53 |         '''
 54 |         should be consistent for tensor shape
 55 |         (B, 3, N)/(B, 3, N)/(B, 1, N) or
 56 |         (B, N, 3)/(B, N, 3)/(B, N, 1)
 57 |         '''
 58 |         output = output.permute(0,2,1)
 59 |         target = target.permute(0,2,1)
 60 |         xyz_mask = xyz_mask.permute(0,2,1)
 61 |         if (len(self.sym_pool) > 1):
 62 |             for sym_id, transform in enumerate(self.sym_pool):
 63 |                 # repeat: (3, 3) -> (B, 3, 3)
 64 |                 rot = transform[:3, :3].cuda().repeat((target.size(0),1,1))
 65 |                 # repeat: (3, 1) -> (B, 3, 1)
 66 |                 trans = transform[:3, 3:4].cuda().repeat((target.size(0),1,1))
 67 |                 # (B, 3, 3) * (B, 3, N)  + (B, 3, 1) -> (B, 3, N) -> (B, N, 3)
 68 |                 sym_target = torch.baddbmm(trans, rot, target.permute(0,2,1)).permute(0,2,1)
 69 |                 if self.use_xyz_mask:
 70 |                     # (B, N, 3)
 71 |                     loss_xyz_temp = self.criterion(output.mul(xyz_mask), sym_target.mul(xyz_mask))
 72 |                 else:
 73 |                     # loss += 0.5 * self.criterion(xyz_pred, xyz_gt)
 74 |                     # (B, N, 3)
 75 |                     loss_xyz_temp = self.criterion(output, sym_target)
 76 |                 # (B, N)
 77 |                 loss_xyz_temp = torch.sum(loss_xyz_temp, dim=2) / 3
 78 |                 # (B)
 79 |                 loss_sum = torch.sum(loss_xyz_temp, dim=1)
 80 |                 if(sym_id > 0):
 81 |                     # (M, B)
 82 |                     loss_sums = torch.cat((loss_sums, loss_sum.unsqueeze(0)), dim=0)
 83 |                     # (M, B, N)
 84 |                     loss_xyzs = torch.cat((loss_xyzs, loss_xyz_temp.unsqueeze(0)), dim=0)
 85 |                 else:
 86 |                     loss_sums = loss_sum.unsqueeze(0)
 87 |                     loss_xyzs = loss_xyz_temp.unsqueeze(0)
 88 |             # (1, B)
 89 |             min_values = torch.min(loss_sums, dim=0, keepdim=True)[0]
 90 |             # (M, B)
 91 |             loss_switch = torch.eq(loss_sums, min_values).type(output.dtype)
 92 |             # (M, B, 1) * (M, B, N) -> (M, B, N)
 93 |             loss_xyz = loss_switch.unsqueeze(2) * loss_xyzs
 94 |             # (B, N)
 95 |             loss_xyz = torch.sum(loss_xyz, dim=0)
 96 |         else:
 97 |             if self.use_xyz_mask:
 98 |                 # (B, N, 3)
 99 |                 loss_xyz = self.criterion(output.mul(xyz_mask), target.mul(xyz_mask))
100 |             else:
101 |                 # (B, N, 3)
102 |                 loss_xyz = self.criterion(output, target)
103 |             # (B, N)
104 |             loss_xyz = torch.sum(loss_xyz, dim=2) / 3
105 |         loss = loss_xyz
106 |         loss = torch.mean(loss)
107 | 
108 |         return loss
109 | 
110 | class XYZLoss_old(nn.Module):
111 |     def __init__(self, use_xyz_mask=True):
112 |         super(XYZLoss_orig, self).__init__()
113 |         # self.criterion = nn.MSELoss(reduction='mean')
114 |         self.criterion = nn.SmoothL1Loss()
115 |         self.use_xyz_mask = use_xyz_mask
116 | 
117 |     def forward(self, output, target, use_xyz_mask):
118 |         batch_size = output.size(0)
119 |         num_queries = output.size(1)
120 |         xyzs_pred = output.reshape((batch_size, num_queries, -1)).split(1, 1)
121 |         xyzs_gt = target.reshape((batch_size, num_queries, -1)).split(1, 1)
122 |         loss = 0
123 | 
124 |         for idx in range(num_queries):
125 |             xyz_pred = xyzs_pred[idx].squeeze()
126 |             xyz_gt = xyzs_gt[idx].squeeze()
127 |             if self.use_xyz_mask:
128 |                 # loss += 0.5 * self.criterion(
129 |                 loss += self.criterion(
130 |                     xyz_pred.mul(use_xyz_mask[:, idx]),
131 |                     xyz_gt.mul(use_xyz_mask[:, idx])
132 |                 )
133 |             else:
134 |                 # loss += 0.5 * self.criterion(xyz_pred, xyz_gt)
135 |                 loss +=self.criterion(xyz_pred, xyz_gt)
136 | 
137 |         return loss / num_queries
138 | 
139 | 
140 | class LipschitzLoss(nn.Module):
141 |     def __init__(self, k, reduction=None):
142 |         super(LipschitzLoss, self).__init__()
143 |         self.relu = nn.ReLU()
144 |         self.k = k
145 |         self.reduction = reduction
146 | 
147 |     def forward(self, x1, x2, y1, y2):
148 |         l = self.relu(torch.norm(y1-y2, dim=-1) / (torch.norm(x1-x2, dim=-1)+1e-3) - self.k)
149 |         # l = torch.clamp(l, 0.0, 5.0)    # avoid
150 |         if self.reduction is None or self.reduction == "mean":
151 |             return torch.mean(l)
152 |         else:
153 |             return torch.sum(l)
154 | 
155 | 
156 | class HuberFunc(nn.Module):
157 |     def __init__(self, reduction=None):
158 |         super(HuberFunc, self).__init__()
159 |         self.reduction = reduction
160 | 
161 |     def forward(self, x, delta):
162 |         n = torch.abs(x)
163 |         cond = n < delta
164 |         l = torch.where(cond, 0.5 * n ** 2, n*delta - 0.5 * delta**2)
165 |         if self.reduction is None or self.reduction == "mean":
166 |             return torch.mean(l)
167 |         else:
168 |             return torch.sum(l)
169 | 
170 | 
171 | class SoftL1Loss(nn.Module):
172 |     def __init__(self, reduction=None):
173 |         super(SoftL1Loss, self).__init__()
174 |         self.reduction = reduction
175 | 
176 |     def forward(self, input, target, eps=0.0, lamb=0.0):
177 |         ret = torch.abs(input - target) - eps
178 |         ret = torch.clamp(ret, min=0.0, max=100.0)
179 |         ret = ret * (1 + lamb * torch.sign(target) * torch.sign(target-input))
180 |         if self.reduction is None or self.reduction == "mean":
181 |             return torch.mean(ret)
182 |         else:
183 |             return torch.sum(ret)
184 | 
185 | 
186 | 
187 | if __name__ == '__main__':
188 | 
189 |     criterion1 = XYZLoss()
190 |     criterion2 = XYZLoss_orig()
191 |     aa = torch.rand((2,5000,3))
192 |     bb = torch.rand((2,5000,3))
193 |     # bb = aa.clone()
194 |     mask = torch.rand((2,5000,1)) > 0.3
195 |     pdb.set_trace()
196 | 
197 |     loss1 = criterion1(aa,bb,mask)
198 |     loss2 = criterion1(aa.permute(0,2,1),bb.permute(0,2,1),mask.permute(0,2,1))
199 |     loss3 = criterion2(aa,bb,mask)
200 | 
201 |     print('debug')
202 | 


--------------------------------------------------------------------------------
/lib/mesh_util.py:
--------------------------------------------------------------------------------
 1 | from skimage import measure
 2 | import numpy as np
 3 | import torch
 4 | from .sdf import create_grid, eval_grid_octree, eval_grid
 5 | from skimage import measure
 6 | 
 7 | 
 8 | def reconstruction(opt, net, calib_tensor,
 9 |                    resolution, b_min, b_max, thresh=0.5,
10 |                    use_octree=False, num_samples=10000, transform=None):
11 |     '''
12 |     Reconstruct meshes from sdf predicted by the network.
13 |     :param net: a BasePixImpNet object. call image filter beforehead.
14 |     :param cuda: cuda device
15 |     :param calib_tensor: calibration tensor
16 |     :param resolution: resolution of the grid cell
17 |     :param b_min: bounding box corner [x_min, y_min, z_min]
18 |     :param b_max: bounding box corner [x_max, y_max, z_max]
19 |     :param use_octree: whether to use octree acceleration
20 |     :param num_samples: how many points to query each gpu iteration
21 |     :return: marching cubes results.
22 |     '''
23 |     # First we create a grid by resolution
24 |     # and transforming matrix for grid coordinates to real world xyz
25 |     coords, mat = create_grid(resolution, resolution, resolution,
26 |                               b_min, b_max, transform=transform)
27 | 
28 |     # Then we define the lambda function for cell evaluation
29 |     def eval_func(points):
30 |         points = np.expand_dims(points, axis=0)
31 |         points = np.repeat(points, net.num_views, axis=0)
32 |         samples = torch.from_numpy(points).cuda().float()
33 | 
34 |         transforms = torch.zeros([1,2,3]).cuda()
35 |         transforms[:, 0,0] = 1 / (opt.img_size[0] // 2)
36 |         transforms[:, 1,1] = 1 / (opt.img_size[1] // 2)
37 |         transforms[:, 0,2] = -1 
38 |         transforms[:, 1,2] = -1
39 |         net.query(samples, calib_tensor, transforms=transforms)
40 |         pred = net.get_preds()[0][0]
41 |         return pred.detach().cpu().numpy()
42 | 
43 |     # Then we evaluate the grid
44 |     if use_octree:
45 |         sdf = eval_grid_octree(coords, eval_func, num_samples=num_samples)
46 |     else:
47 |         sdf = eval_grid(coords, eval_func, num_samples=num_samples)
48 | 
49 |     # Finally we do marching cubes
50 |     try:
51 |         verts, faces, normals, values = measure.marching_cubes_lewiner(sdf, thresh)
52 |         # transform verts into world coordinate system
53 |         verts = np.matmul(mat[:3, :3], verts.T) + mat[:3, 3:4]
54 |         verts = verts.T
55 |         return verts, faces, normals, values
56 |     except:
57 |         print('error cannot marching cubes')
58 |         return -1
59 | 
60 | 


--------------------------------------------------------------------------------
/lib/model/BasePIFuNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from ..geometry import index, orthogonal, perspective
 6 | 
 7 | class BasePIFuNet(nn.Module):
 8 |     def __init__(self,
 9 |                  projection_mode='perspective',
10 |                  sdf_loss_term=nn.L1Loss(),
11 |                  xyz_loss_term=nn.SmoothL1Loss(),
12 |                  ):
13 |         """
14 |         :param projection_mode:
15 |         Either orthogonal or perspective.
16 |         It will call the corresponding function for projection.
17 |         :param error_term:
18 |         nn Loss between the predicted [B, Res, N] and the label [B, Res, N]
19 |         """
20 |         super(BasePIFuNet, self).__init__()
21 |         self.name = 'base'
22 | 
23 |         self.sdf_loss_term = sdf_loss_term
24 |         self.xyz_loss_term = xyz_loss_term
25 | 
26 |         self.index = index
27 |         self.projection = orthogonal if projection_mode == 'orthogonal' else perspective
28 | 
29 |         self.preds = None
30 |         self.labels = None
31 | 
32 |     def forward(self, points, images, calibs, transforms=None):
33 |         '''
34 |         :param points: [B, 3, N] world space coordinates of points
35 |         :param images: [B, C, H, W] input images
36 |         :param calibs: [B, 3, 4] calibration matrices for each image
37 |         :param transforms: Optional [B, 2, 3] image space coordinate transforms
38 |         :return: [B, Res, N] predictions for each point
39 |         '''
40 |         self.filter(images)
41 |         self.query(points, calibs, transforms)
42 |         return self.get_preds()
43 | 
44 |     def filter(self, images):
45 |         '''
46 |         Filter the input images
47 |         store all intermediate features.
48 |         :param images: [B, C, H, W] input images
49 |         '''
50 |         None
51 | 
52 |     def query(self, points, calibs, transforms=None, labels=None):
53 |         '''
54 |         Given 3D points, query the network predictions for each point.
55 |         Image features should be pre-computed before this call.
56 |         store all intermediate features.
57 |         query() function may behave differently during training/testing.
58 |         :param points: [B, 3, N] world space coordinates of points
59 |         :param calibs: [B, 3, 4] calibration matrices for each image
60 |         :param transforms: Optional [B, 2, 3] image space coordinate transforms
61 |         :param labels: Optional [B, Res, N] gt labeling
62 |         :return: [B, Res, N] predictions for each point
63 |         '''
64 |         None
65 | 
66 |     def get_preds(self):
67 |         '''
68 |         Get the predictions from the last query
69 |         :return: [B, Res, N] network prediction for the last query
70 |         '''
71 |         return self.preds
72 | 
73 |     def get_loss(self):
74 |         '''
75 |         Get the network loss from the last query
76 |         :return: loss term
77 |         '''
78 |         return self.sdf_loss_term(self.preds, self.labels)
79 | 


--------------------------------------------------------------------------------
/lib/model/HGFilters.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from ..net_util import *
  5 | 
  6 | 
  7 | class HourGlass(nn.Module):
  8 |     def __init__(self, num_modules, depth, num_features, norm='batch'):
  9 |         super(HourGlass, self).__init__()
 10 |         self.num_modules = num_modules
 11 |         self.depth = depth
 12 |         self.features = num_features
 13 |         self.norm = norm
 14 | 
 15 |         self._generate_network(self.depth)
 16 | 
 17 |     def _generate_network(self, level):
 18 |         self.add_module('b1_' + str(level), ConvBlock(self.features, self.features, norm=self.norm))
 19 | 
 20 |         self.add_module('b2_' + str(level), ConvBlock(self.features, self.features, norm=self.norm))
 21 | 
 22 |         if level > 1:
 23 |             self._generate_network(level - 1)
 24 |         else:
 25 |             self.add_module('b2_plus_' + str(level), ConvBlock(self.features, self.features, norm=self.norm))
 26 | 
 27 |         self.add_module('b3_' + str(level), ConvBlock(self.features, self.features, norm=self.norm))
 28 | 
 29 |     def _forward(self, level, inp):
 30 |         # Upper branch
 31 |         up1 = inp
 32 |         up1 = self._modules['b1_' + str(level)](up1)
 33 | 
 34 |         # Lower branch
 35 |         low1 = F.avg_pool2d(inp, 2, stride=2)
 36 |         low1 = self._modules['b2_' + str(level)](low1)
 37 | 
 38 |         if level > 1:
 39 |             low2 = self._forward(level - 1, low1)
 40 |         else:
 41 |             low2 = low1
 42 |             low2 = self._modules['b2_plus_' + str(level)](low2)
 43 | 
 44 |         low3 = low2
 45 |         low3 = self._modules['b3_' + str(level)](low3)
 46 | 
 47 |         # NOTE: for newer PyTorch (1.3~), it seems that training results are degraded due to implementation diff in F.grid_sample
 48 |         # if the pretrained model behaves weirdly, switch with the commented line.
 49 |         # NOTE: I also found that "bicubic" works better.
 50 |         up2 = F.interpolate(low3, scale_factor=2, mode='bicubic', align_corners=True)
 51 |         # up2 = F.interpolate(low3, scale_factor=2, mode='nearest)
 52 | 
 53 |         return up1 + up2
 54 | 
 55 |     def forward(self, x):
 56 |         return self._forward(self.depth, x)
 57 | 
 58 | 
 59 | class HGFilter(nn.Module):
 60 |     def __init__(self, opt):
 61 |         super(HGFilter, self).__init__()
 62 |         self.num_modules = opt.num_stack
 63 | 
 64 |         self.opt = opt
 65 | 
 66 |         # Base part
 67 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
 68 | 
 69 |         if self.opt.norm == 'batch':
 70 |             self.bn1 = nn.BatchNorm2d(64)
 71 |         elif self.opt.norm == 'group':
 72 |             self.bn1 = nn.GroupNorm(32, 64)
 73 | 
 74 |         if self.opt.hg_down == 'conv64':
 75 |             self.conv2 = ConvBlock(64, 64, self.opt.norm)
 76 |             self.down_conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
 77 |         elif self.opt.hg_down == 'conv128':
 78 |             self.conv2 = ConvBlock(64, 128, self.opt.norm)
 79 |             self.down_conv2 = nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1)
 80 |         elif self.opt.hg_down == 'ave_pool':
 81 |             self.conv2 = ConvBlock(64, 128, self.opt.norm)
 82 |         else:
 83 |             raise NameError('Unknown Fan Filter setting!')
 84 | 
 85 |         self.conv3 = ConvBlock(128, 128, self.opt.norm)
 86 |         self.conv4 = ConvBlock(128, 256, self.opt.norm)
 87 | 
 88 |         # Stacking part
 89 |         for hg_module in range(self.num_modules):
 90 |             self.add_module('m' + str(hg_module), HourGlass(1, opt.num_hourglass, 256, self.opt.norm))
 91 | 
 92 |             self.add_module('top_m_' + str(hg_module), ConvBlock(256, 256, self.opt.norm))
 93 |             self.add_module('conv_last' + str(hg_module),
 94 |                             nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0))
 95 |             if self.opt.norm == 'batch':
 96 |                 self.add_module('bn_end' + str(hg_module), nn.BatchNorm2d(256))
 97 |             elif self.opt.norm == 'group':
 98 |                 self.add_module('bn_end' + str(hg_module), nn.GroupNorm(32, 256))
 99 |                 
100 |             self.add_module('l' + str(hg_module), nn.Conv2d(256,
101 |                                                             opt.hourglass_dim, kernel_size=1, stride=1, padding=0))
102 | 
103 |             if hg_module < self.num_modules - 1:
104 |                 self.add_module(
105 |                     'bl' + str(hg_module), nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0))
106 |                 self.add_module('al' + str(hg_module), nn.Conv2d(opt.hourglass_dim,
107 |                                                                  256, kernel_size=1, stride=1, padding=0))
108 | 
109 |     def forward(self, x):
110 |         x = F.relu(self.bn1(self.conv1(x)), True)
111 |         tmpx = x
112 |         if self.opt.hg_down == 'ave_pool':
113 |             x = F.avg_pool2d(self.conv2(x), 2, stride=2)
114 |         elif self.opt.hg_down in ['conv64', 'conv128']:
115 |             x = self.conv2(x)
116 |             x = self.down_conv2(x)
117 |         else:
118 |             raise NameError('Unknown Fan Filter setting!')
119 | 
120 |         normx = x
121 | 
122 |         x = self.conv3(x)
123 |         x = self.conv4(x)
124 | 
125 |         previous = x
126 | 
127 |         outputs = []
128 |         for i in range(self.num_modules):
129 |             hg = self._modules['m' + str(i)](previous)
130 | 
131 |             ll = hg
132 |             ll = self._modules['top_m_' + str(i)](ll)
133 | 
134 |             ll = F.relu(self._modules['bn_end' + str(i)]
135 |                         (self._modules['conv_last' + str(i)](ll)), True)
136 | 
137 |             # Predict heatmaps
138 |             tmp_out = self._modules['l' + str(i)](ll)
139 |             outputs.append(tmp_out)
140 | 
141 |             if i < self.num_modules - 1:
142 |                 ll = self._modules['bl' + str(i)](ll)
143 |                 tmp_out_ = self._modules['al' + str(i)](tmp_out)
144 |                 previous = previous + ll + tmp_out_
145 | 
146 |         return outputs, tmpx.detach(), normx
147 | 


--------------------------------------------------------------------------------
/lib/model/HGPIFuNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from .BasePIFuNet import BasePIFuNet
  5 | from .SurfaceClassifier import SurfaceClassifier
  6 | from .RayDistanceNormalizer import RayDistanceNormalizer
  7 | from .HGFilters import *
  8 | from ..net_util import init_net
  9 | 
 10 | 
 11 | class HGPIFuNet(BasePIFuNet):
 12 |     '''
 13 |     HG PIFu network uses Hourglass stacks as the image filter.
 14 |     It does the following:
 15 |         1. Compute image feature stacks and store it in self.im_feat_list
 16 |             self.im_feat_list[-1] is the last stack (output stack)
 17 |         2. Calculate calibration
 18 |         3. If training, it index on every intermediate stacks,
 19 |             If testing, it index on the last stack.
 20 |         4. Classification.
 21 |         5. During training, error is calculated on all stacks.
 22 |     '''
 23 | 
 24 |     def __init__(self,
 25 |                  opt,
 26 |                  projection_mode='perspective',
 27 |                  sdf_loss_term=nn.L1Loss(),
 28 |                  xyz_loss_term=nn.SmoothL1Loss(),
 29 |                  ):
 30 |         super(HGPIFuNet, self).__init__(
 31 |             projection_mode=projection_mode,
 32 |             sdf_loss_term=sdf_loss_term,
 33 |             xyz_loss_term=xyz_loss_term)
 34 | 
 35 |         self.name = 'hgpifu'
 36 | 
 37 |         self.opt = opt
 38 |         self.num_views = self.opt.num_views
 39 | 
 40 |         self.image_filter = HGFilter(opt)
 41 | 
 42 |         self.last_op = None
 43 |         if self.opt.out_type[-3:] == 'sdf':
 44 |             if self.opt.use_tanh:
 45 |                 self.last_op = nn.Tanh()
 46 |         if self.opt.use_xyz:
 47 |             mlp_dim = self.opt.mlp_dim_xyz
 48 |         else:
 49 |             mlp_dim = self.opt.mlp_dim
 50 | 
 51 |         self.surface_classifier = SurfaceClassifier(
 52 |                                                     filter_channels=mlp_dim,
 53 |                                                     num_views=self.opt.num_views,
 54 |                                                     no_residual=self.opt.no_residual,
 55 |                                                     last_op=self.last_op)
 56 | 
 57 |         self.normalizer = RayDistanceNormalizer(opt)
 58 | 
 59 |         # This is a list of [B x Feat_i x H x W] features
 60 |         self.im_feat_list = []
 61 |         self.tmpx = None
 62 |         self.normx = None
 63 | 
 64 |         self.intermediate_preds_list = []
 65 | 
 66 |         # init_type (str)    -- the name of an initialization method: normal | xavier | kaiming | orthogonal
 67 |         # gain (float)       -- scaling factor for normal, xavier and orthogonal.
 68 |         # init_net(self)
 69 |         init_net(self, init_type=self.opt.init_type, init_gain=self.opt.init_gain)
 70 | 
 71 |     def filter(self, images):
 72 |         '''
 73 |         Filter the input images
 74 |         store all intermediate features.
 75 |         :param images: [B, C, H, W] input images
 76 |         '''
 77 |         self.im_feat_list, self.tmpx, self.normx = self.image_filter(images)
 78 |         # If it is not in training, only produce the last im_feat
 79 |         if not self.training:
 80 |             self.im_feat_list = [self.im_feat_list[-1]]
 81 | 
 82 |     def query(self, points, calibs, transforms=None, labels=None):
 83 |         '''
 84 |         Given 3D points, query the network predictions for each point.
 85 |         Image features should be pre-computed before this call.
 86 |         store all intermediate features.
 87 |         query() function may behave differently during training/testing.
 88 |         :param points: [B, 3, N] world space coordinates of points
 89 |         :param calibs: [B, 3, 4] calibration matrices for each image
 90 |         :param transforms: Optional [B, 2, 3] image space coordinate transforms
 91 |         :param labels: Optional [B, Res, N] gt labeling
 92 |         :return: [B, Res, N] predictions for each point
 93 |         '''
 94 |         if labels is not None:
 95 |             self.labels = labels
 96 | 
 97 |         self.uvz = self.projection(points, calibs, transforms)
 98 |         uv = self.uvz[:, :2, :]
 99 |         z = self.uvz[:, 2:3, :]
100 | 
101 |         # debug for query during forward
102 |         # pdb.set_trace()
103 |         # debug_dir = '/mnt/data0/lin/results/hopifu/debug/forward_query/'
104 |         # res = {'img': images.cpu()[0], 'samples': xyz.cpu()[0], 'labels': labels.cpu()[0]}
105 |         # viz_debug_query_forward(res, 1, debug_dir)
106 | 
107 |         in_img = (uv[:, 0] >= -1.0) & (uv[:, 0] <= 1.0) & (uv[:, 1] >= -1.0) & (uv[:, 1] <= 1.0)
108 | 
109 |         # self.z_feat = self.normalizer(z, calibs=calibs)
110 |         self.dist_ray_feat = self.normalizer(points, uv, transforms=transforms, calibs=calibs)
111 | 
112 |         if self.opt.skip_hourglass:
113 |             tmpx_local_feature = self.index(self.tmpx, uv)
114 | 
115 |         self.intermediate_preds_list = []
116 | 
117 |         for im_feat in self.im_feat_list:
118 |             # [B, Feat_i + z, N]
119 |             point_local_feat_list = [self.index(im_feat, uv), self.dist_ray_feat]
120 | 
121 |             if self.opt.skip_hourglass:
122 |                 point_local_feat_list.append(tmpx_local_feature)
123 | 
124 |             point_local_feat = torch.cat(point_local_feat_list, 1)
125 | 
126 |             # out of image plane is always set to 0 for occupancy or 1000 for sdf
127 |             # pred (B, 1, 5000)/(B, 4, 5000)
128 |             # in_img (B, N), not_in_img (B, 1, N)
129 |             # ((in_img == False).nonzero(as_tuple=True))
130 |             pred = in_img[:,None].float() * self.surface_classifier(point_local_feat)
131 |             if self.opt.out_type[-3:] == 'sdf':
132 |                 norm_factor = (self.opt.clamp_dist / self.opt.norm_clamp_dist)
133 |                 not_in_img = (torch.logical_not(in_img).float() * (100 * self.opt.clamp_dist / norm_factor)).unsqueeze(1)
134 |                 if self.opt.use_xyz:
135 |                     added_zeros = torch.zeros((pred.shape[0], 3, pred.shape[2])).cuda()
136 |                     pred = pred + torch.cat((not_in_img, added_zeros), dim=1)
137 |                 else:
138 |                     pred = pred + not_in_img
139 |             self.intermediate_preds_list.append(pred)
140 | 
141 |         # shape (B, 1, 5000)
142 |         if self.opt.use_xyz:
143 |             self.preds = self.intermediate_preds_list[-1][:,0,:].unsqueeze(1)
144 |             # shape (B, 3, 5000)
145 |             self.xyzs = self.intermediate_preds_list[-1][:,1:,:]
146 |         else:
147 |             self.preds = self.intermediate_preds_list[-1]
148 | 
149 |     def get_im_feat(self):
150 |         '''
151 |         Get the image filter
152 |         :return: [B, C_feat, H, W] image feature after filtering
153 |         '''
154 |         return self.im_feat_list[-1]
155 | 
156 |     # def get_error(self):
157 |     #     '''
158 |     #     Hourglass has its own intermediate supervision scheme
159 |     #     '''
160 |     #     error = 0
161 |     #     for preds in self.intermediate_preds_list:
162 |     #         error += self.error_term(preds, self.labels)
163 |     #     error /= len(self.intermediate_preds_list)
164 | 
165 |     #     return error
166 |     def get_loss(self):
167 |         '''
168 |         Hourglass has its own intermediate supervision scheme
169 |         '''
170 |         loss_dict = {}
171 |         loss_dict['sdf_loss'] = 0.
172 |         if self.opt.use_xyz:
173 |             loss_dict['xyz_loss'] = 0.
174 |         loss_dict['total_loss'] = 0.
175 |         for preds in self.intermediate_preds_list:
176 | 
177 |             if self.opt.out_type[-3:] == 'sdf':
178 |                 pred_sdf = torch.clamp(preds[:,0,:].unsqueeze(1), -self.opt.norm_clamp_dist, self.opt.norm_clamp_dist)
179 |                 gt_sdf = torch.clamp(self.labels, -self.opt.norm_clamp_dist, self.opt.norm_clamp_dist)
180 |                 loss_dict['sdf_loss']  += self.sdf_loss_term(pred_sdf, gt_sdf)
181 | 
182 |             if self.opt.use_xyz:
183 |                 loss_dict['xyz_loss'] += self.xyz_loss_term(preds[:,1:,:], self.norm_gt_xyzs, self.gt_xyz_mask)
184 | 
185 |         loss_dict['sdf_loss'] /= len(self.intermediate_preds_list)
186 |         loss_dict['total_loss'] += loss_dict['sdf_loss']
187 |         if self.opt.use_xyz:
188 |             loss_dict['xyz_loss'] /= len(self.intermediate_preds_list)
189 |             loss_dict['total_loss'] += self.opt.xyz_lambda * loss_dict['xyz_loss']
190 | 
191 |         return loss_dict
192 | 
193 |     def forward(self, images, points, calibs, labels=None, transforms=None, gt_xyzs=None, gt_xyz_mask=None, pairwise_dist=None, pairwise_pt_idxs=None):
194 |         # pdb.set_trace()
195 |         if self.opt.use_xyz:
196 |             # norm_xyz_factor = max(self.opt.bbx_size) / 2
197 |             # self.norm_points_model = xyzs / norm_xyz_factor
198 |             self.norm_gt_xyzs = gt_xyzs
199 |             self.gt_xyz_mask = gt_xyz_mask
200 | 
201 |         # Get image feature
202 |         self.filter(images)
203 | 
204 |         # Phase 2: point query
205 |         self.query(points=points, calibs=calibs, transforms=transforms, labels=labels)
206 | 
207 |         # get the prediction
208 |         res = self.get_preds()
209 | 
210 |         # get the error
211 |         loss_dict = self.get_loss()
212 | 
213 |         if self.opt.use_xyz:
214 |             return res, loss_dict, self.xyzs, self.uvz
215 |         else:
216 |             return res, loss_dict, self.uvz
217 | 


--------------------------------------------------------------------------------
/lib/model/RayDistanceNormalizer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class RayDistanceNormalizer(nn.Module):
 6 |     def __init__(self, opt):
 7 |         super(RayDistanceNormalizer, self).__init__()
 8 | 
 9 |         self.opt = opt
10 |         self.norm_method = self.opt.rdist_norm
11 | 
12 |         if self.norm_method == 'uvf':
13 |             self.half_w = (self.opt.img_size[0] // 2)
14 |             self.half_h = (self.opt.img_size[1] // 2)
15 |         if self.norm_method == 'minmax':
16 |             CAM_Bz_SHIFT = self.opt.wks_z_shift
17 |             Bx_SIZE = self.opt.wks_size[0] / 2
18 |             By_SIZE = self.opt.wks_size[1] / 2
19 |             Bz_SIZE = self.opt.wks_size[2] / 2
20 |             self.rdist_min = -Bz_SIZE + CAM_Bz_SHIFT
21 |             self.rdist_max = torch.norm(torch.tensor([Bx_SIZE, By_SIZE, Bz_SIZE + CAM_Bz_SHIFT], dtype=torch.float)).item()
22 | 
23 |     def forward(self, queries, norm_uv=None, transforms=None, calibs=None):
24 |         '''
25 |         Normalize dist_ray_feature
26 |         :param dist_ray_feature: [B, 1, N] query distance along the ray normalized by projected uv distance along the ray
27 |         :return:
28 |         '''
29 |         batch_size = queries.shape[0]
30 |         pt_size = queries.shape[2]
31 |         # (B, 1, N) = (B, 3, N)
32 |         abs_dist_ray = torch.norm(queries, dim=1).unsqueeze(1)
33 | 
34 |         if self.norm_method == 'uvf':
35 |             # (B, 2, 3)
36 |             inv_trans = torch.zeros_like(transforms)    
37 |             inv_trans[:, 0,0] = self.half_w
38 |             inv_trans[:, 1,1] = self.half_h
39 |             # inv_trans[:, 0,2] = self.half_w 
40 |             # inv_trans[:, 1,2] = self.half_h
41 |             inv_trans[:, 0,2] = 0
42 |             inv_trans[:, 1,2] = 0
43 |             scale = inv_trans[:, :2, :2]
44 |             shift = inv_trans[:, :2, 2:3]
45 |             # (B, 2, N)
46 |             uv = torch.baddbmm(shift, scale, norm_uv)
47 |             # (B)
48 |             ave_focal = (calibs[:, 0,0] + calibs[:, 1,1]) / 2
49 |             # (B, 1, N)
50 |             ave_focal = ave_focal.unsqueeze(1).expand(batch_size, pt_size).unsqueeze(1)
51 |             # (B, 3, N)
52 |             proj_uvf = torch.cat((uv, ave_focal), dim=1)
53 |             # (B, 1, N)
54 |             proj_dist_ray = torch.norm(proj_uvf, dim=1).unsqueeze(1)
55 |             
56 |             return abs_dist_ray / proj_dist_ray
57 | 
58 |         elif self.norm_method == 'minmax':
59 |             
60 |             return (abs_dist_ray - self.rdist_min) / (self.rdist_max - self.rdist_min)
61 |             
62 | 


--------------------------------------------------------------------------------
/lib/model/SurfaceClassifier.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class SurfaceClassifier(nn.Module):
 7 |     def __init__(self, filter_channels, num_views=1, no_residual=True, last_op=None):
 8 |         super(SurfaceClassifier, self).__init__()
 9 | 
10 |         self.filters = []
11 |         self.num_views = num_views
12 |         self.no_residual = no_residual
13 |         filter_channels = filter_channels
14 |         self.last_op = last_op
15 | 
16 |         if self.no_residual:
17 |             for l in range(0, len(filter_channels) - 1):
18 |                 self.filters.append(nn.Conv1d(
19 |                     filter_channels[l],
20 |                     filter_channels[l + 1],
21 |                     1))
22 |                 self.add_module("conv%d" % l, self.filters[l])
23 |         else:
24 |             for l in range(0, len(filter_channels) - 1):
25 |                 if 0 != l:
26 |                     self.filters.append(
27 |                         nn.Conv1d(
28 |                             filter_channels[l] + filter_channels[0],
29 |                             filter_channels[l + 1],
30 |                             1))
31 |                 else:
32 |                     self.filters.append(nn.Conv1d(
33 |                         filter_channels[l],
34 |                         filter_channels[l + 1],
35 |                         1))
36 | 
37 |                 self.add_module("conv%d" % l, self.filters[l])
38 | 
39 |     def forward(self, feature):
40 |         '''
41 | 
42 |         :param feature: list of [BxC_inxHxW] tensors of image features
43 |         :param xy: [Bx3xN] tensor of (x,y) coodinates in the image plane
44 |         :return: [BxC_outxN] tensor of features extracted at the coordinates
45 |         '''
46 | 
47 |         y = feature
48 |         tmpy = feature
49 |         for i, f in enumerate(self.filters):
50 |             if self.no_residual:
51 |                 y = self._modules['conv' + str(i)](y)
52 |             else:
53 |                 y = self._modules['conv' + str(i)](
54 |                     y if i == 0
55 |                     else torch.cat([y, tmpy], 1)
56 |                 )
57 |             if i != len(self.filters) - 1:
58 |                 y = F.leaky_relu(y)
59 | 
60 |             if self.num_views > 1 and i == len(self.filters) // 2:
61 |                 y = y.view(
62 |                     -1, self.num_views, y.shape[1], y.shape[2]
63 |                 ).mean(dim=1)
64 |                 tmpy = feature.view(
65 |                     -1, self.num_views, feature.shape[1], feature.shape[2]
66 |                 ).mean(dim=1)
67 | 
68 |         if self.last_op:
69 |             # y = self.last_op(y)
70 |             y[:,0,:] = self.last_op(y[:,0,:])
71 | 
72 |         return y
73 | 


--------------------------------------------------------------------------------
/lib/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .BasePIFuNet import BasePIFuNet
2 | from .HGPIFuNet import HGPIFuNet
3 | 


--------------------------------------------------------------------------------
/lib/net_util.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import cv2
  3 | import torch
  4 | import functools
  5 | import numpy as np
  6 | from tqdm import tqdm
  7 | from PIL import Image
  8 | import torch.nn as nn
  9 | from torch.nn import init
 10 | import torch.nn.functional as F
 11 | 
 12 | 
 13 | def adjust_learning_rate(optimizer, epoch, lr, schedule, gamma):
 14 |     """Sets the learning rate to the initial LR decayed by schedule"""
 15 |     if epoch in schedule:
 16 |         lr *= gamma
 17 |         for param_group in optimizer.param_groups:
 18 |             param_group['lr'] = lr
 19 |     return lr
 20 | 
 21 | def init_weights(net, init_type='normal', init_gain=0.02):
 22 |     """Initialize network weights.
 23 | 
 24 |     Parameters:
 25 |         net (network)   -- network to be initialized
 26 |         init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal
 27 |         init_gain (float)    -- scaling factor for normal, xavier and orthogonal.
 28 | 
 29 |     We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might
 30 |     work better for some applications. Feel free to try yourself.
 31 |     """
 32 | 
 33 |     def init_func(m):  # define the initialization function
 34 |         classname = m.__class__.__name__
 35 |         if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
 36 |             if init_type == 'normal':
 37 |                 init.normal_(m.weight.data, 0.0, init_gain)
 38 |             elif init_type == 'xavier':
 39 |                 init.xavier_normal_(m.weight.data, gain=init_gain)
 40 |             elif init_type == 'kaiming':
 41 |                 init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
 42 |             elif init_type == 'orthogonal':
 43 |                 init.orthogonal_(m.weight.data, gain=init_gain)
 44 |             else:
 45 |                 raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
 46 |             if hasattr(m, 'bias') and m.bias is not None:
 47 |                 init.constant_(m.bias.data, 0.0)
 48 |         elif classname.find(
 49 |                 'BatchNorm2d') != -1:  # BatchNorm Layer's weight is not a matrix; only normal distribution applies.
 50 |             init.normal_(m.weight.data, 1.0, init_gain)
 51 |             init.constant_(m.bias.data, 0.0)
 52 | 
 53 |     print('initialize network with %s' % init_type)
 54 |     net.apply(init_func)  # apply the initialization function <init_func>
 55 | 
 56 | 
 57 | def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[]):
 58 |     """Initialize a network: 1. register CPU/GPU device (with multi-GPU support); 2. initialize the network weights
 59 |     Parameters:
 60 |         net (network)      -- the network to be initialized
 61 |         init_type (str)    -- the name of an initialization method: normal | xavier | kaiming | orthogonal
 62 |         gain (float)       -- scaling factor for normal, xavier and orthogonal.
 63 |         gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2
 64 | 
 65 |     Return an initialized network.
 66 |     """
 67 |     if len(gpu_ids) > 0:
 68 |         assert (torch.cuda.is_available())
 69 |         net.to(gpu_ids[0])
 70 |         net = torch.nn.DataParallel(net, gpu_ids)  # multi-GPUs
 71 |     init_weights(net, init_type, init_gain=init_gain)
 72 |     return net
 73 | 
 74 | 
 75 | def imageSpaceRotation(xy, rot):
 76 |     '''
 77 |     args:
 78 |         xy: (B, 2, N) input
 79 |         rot: (B, 2) x,y axis rotation angles
 80 | 
 81 |     rotation center will be always image center (other rotation center can be represented by additional z translation)
 82 |     '''
 83 |     disp = rot.unsqueeze(2).sin().expand_as(xy)
 84 |     return (disp * xy).sum(dim=1)
 85 | 
 86 | 
 87 | def cal_gradient_penalty(netD, real_data, fake_data, device, type='mixed', constant=1.0, lambda_gp=10.0):
 88 |     """Calculate the gradient penalty loss, used in WGAN-GP paper https://arxiv.org/abs/1704.00028
 89 | 
 90 |     Arguments:
 91 |         netD (network)              -- discriminator network
 92 |         real_data (tensor array)    -- real images
 93 |         fake_data (tensor array)    -- generated images from the generator
 94 |         device (str)                -- GPU / CPU: from torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu')
 95 |         type (str)                  -- if we mix real and fake data or not [real | fake | mixed].
 96 |         constant (float)            -- the constant used in formula ( | |gradient||_2 - constant)^2
 97 |         lambda_gp (float)           -- weight for this loss
 98 | 
 99 |     Returns the gradient penalty loss
100 |     """
101 |     if lambda_gp > 0.0:
102 |         if type == 'real':  # either use real images, fake images, or a linear interpolation of two.
103 |             interpolatesv = real_data
104 |         elif type == 'fake':
105 |             interpolatesv = fake_data
106 |         elif type == 'mixed':
107 |             alpha = torch.rand(real_data.shape[0], 1)
108 |             alpha = alpha.expand(real_data.shape[0], real_data.nelement() // real_data.shape[0]).contiguous().view(
109 |                 *real_data.shape)
110 |             alpha = alpha.to(device)
111 |             interpolatesv = alpha * real_data + ((1 - alpha) * fake_data)
112 |         else:
113 |             raise NotImplementedError('{} not implemented'.format(type))
114 |         interpolatesv.requires_grad_(True)
115 |         disc_interpolates = netD(interpolatesv)
116 |         gradients = torch.autograd.grad(outputs=disc_interpolates, inputs=interpolatesv,
117 |                                         grad_outputs=torch.ones(disc_interpolates.size()).to(device),
118 |                                         create_graph=True, retain_graph=True, only_inputs=True)
119 |         gradients = gradients[0].view(real_data.size(0), -1)  # flat the data
120 |         gradient_penalty = (((gradients + 1e-16).norm(2, dim=1) - constant) ** 2).mean() * lambda_gp  # added eps
121 |         return gradient_penalty, gradients
122 |     else:
123 |         return 0.0, None
124 | 
125 | def get_norm_layer(norm_type='instance'):
126 |     """Return a normalization layer
127 |     Parameters:
128 |         norm_type (str) -- the name of the normalization layer: batch | instance | none
129 |     For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev).
130 |     For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics.
131 |     """
132 |     if norm_type == 'batch':
133 |         norm_layer = functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True)
134 |     elif norm_type == 'instance':
135 |         norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
136 |     elif norm_type == 'group':
137 |         norm_layer = functools.partial(nn.GroupNorm, 32)
138 |     elif norm_type == 'none':
139 |         norm_layer = None
140 |     else:
141 |         raise NotImplementedError('normalization layer [%s] is not found' % norm_type)
142 |     return norm_layer
143 | 
144 | class Flatten(nn.Module):
145 |     def forward(self, input):
146 |         return input.view(input.size(0), -1)
147 | 
148 | def conv3x3(in_planes, out_planes, strd=1, padding=1, bias=False):
149 |     "3x3 convolution with padding"
150 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3,
151 |                      stride=strd, padding=padding, bias=bias)
152 | 
153 | class ConvBlock(nn.Module):
154 |     def __init__(self, in_planes, out_planes, norm='batch'):
155 |         super(ConvBlock, self).__init__()
156 |         self.conv1 = conv3x3(in_planes, int(out_planes / 2))
157 |         self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4))
158 |         self.conv3 = conv3x3(int(out_planes / 4), int(out_planes / 4))
159 | 
160 |         if norm == 'batch':
161 |             self.bn1 = nn.BatchNorm2d(in_planes)
162 |             self.bn2 = nn.BatchNorm2d(int(out_planes / 2))
163 |             self.bn3 = nn.BatchNorm2d(int(out_planes / 4))
164 |             self.bn4 = nn.BatchNorm2d(in_planes)
165 |         elif norm == 'group':
166 |             self.bn1 = nn.GroupNorm(32, in_planes)
167 |             self.bn2 = nn.GroupNorm(32, int(out_planes / 2))
168 |             self.bn3 = nn.GroupNorm(32, int(out_planes / 4))
169 |             self.bn4 = nn.GroupNorm(32, in_planes)
170 |         
171 |         if in_planes != out_planes:
172 |             self.downsample = nn.Sequential(
173 |                 self.bn4,
174 |                 nn.ReLU(True),
175 |                 nn.Conv2d(in_planes, out_planes,
176 |                           kernel_size=1, stride=1, bias=False),
177 |             )
178 |         else:
179 |             self.downsample = None
180 | 
181 |     def forward(self, x):
182 |         residual = x
183 | 
184 |         out1 = self.bn1(x)
185 |         out1 = F.relu(out1, True)
186 |         out1 = self.conv1(out1)
187 | 
188 |         out2 = self.bn2(out1)
189 |         out2 = F.relu(out2, True)
190 |         out2 = self.conv2(out2)
191 | 
192 |         out3 = self.bn3(out2)
193 |         out3 = F.relu(out3, True)
194 |         out3 = self.conv3(out3)
195 | 
196 |         out3 = torch.cat((out1, out2, out3), 1)
197 | 
198 |         if self.downsample is not None:
199 |             residual = self.downsample(residual)
200 | 
201 |         out3 += residual
202 | 
203 |         return out3
204 |   


--------------------------------------------------------------------------------
/lib/options.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | 
  5 | class BaseOptions():
  6 |     def __init__(self):
  7 |         self.initialized = False
  8 | 
  9 |     def initialize(self, parser):
 10 |         # Experiment launch: Logistic/Datasets related
 11 |         g_logistic = parser.add_argument_group('Logistic')
 12 |         g_logistic.add_argument('--exp_id', type=str, default='ncf_ycbv_run2',help='')
 13 |         g_logistic.add_argument('--work_base_path', type=str, default='/data1/lin/ncf_results/runs',help='')
 14 |         
 15 |         g_logistic.add_argument('--dataset', type=str, default='ycbv',help='lm | lmo | ycbv')
 16 |         g_logistic.add_argument('--train_data', type=str, default='ycbv', help='lm | ycbv | ycbv_real')
 17 |         g_logistic.add_argument('--more_train_data', type=str, default='none', help='ycbv_real')
 18 |         g_logistic.add_argument('--eval_data', type=str, default='ycbv_bop_cha', help='lm_bop_cha | lmo_bop_cha | ycbv_bop_cha')
 19 |         g_logistic.add_argument('--model_dir', type=str, default='/data2/lin/bop_datasets/ycbv/models', help='')
 20 |         g_logistic.add_argument('--ds_lm_dir', type=str, default='/data2/lin/bop_datasets/lm', help='')
 21 |         g_logistic.add_argument('--ds_lmo_dir', type=str, default='/data2/lin/bop_datasets/lmo', help='')
 22 |         g_logistic.add_argument('--ds_ycbv_dir', type=str, default='/data2/lin/bop_datasets/ycbv', help='')
 23 | 
 24 |         g_logistic.add_argument('--visib_fract_thresh', type=float, default=0.3, help='0.05 | 0.1 | 0.15 | 0.3')
 25 |         g_logistic.add_argument('--model_unit', type=str, default='mm', help='meter | mm')
 26 | 
 27 |         g_logistic.add_argument('--obj_id', default=2, type=int, help='ids for object')
 28 |         g_logistic.add_argument('--wks_size', type=int, default=[1600, 1600, 2000], help='size of workspace/mm')
 29 |         g_logistic.add_argument('--wks_z_shift', type=int, default=1010, help='shift of workspace/mm')
 30 |         g_logistic.add_argument('--test_wks_size', type=int, default=[1200, 1200, 930], help='size of test workspace/mm')
 31 |         g_logistic.add_argument('--test_wks_z_shift', type=int, default=925, help='shift of test workspace/mm')
 32 |         g_logistic.add_argument('--max_sym_disc_step', type=float, default=0.01, help='')
 33 |         g_logistic.add_argument('--sample_ratio', type=int, default=20, help='20 | 24 | 16 | 32 for surf')
 34 |         g_logistic.add_argument('--bbx_size', type=int, default=380, help='size of object bounding box/mm')
 35 |         g_logistic.add_argument('--bbx_shift', type=int, default=0, help='shift of object bounding box/mm')
 36 |         g_logistic.add_argument('--use_remap', type=bool, default=True, help='')
 37 |         g_logistic.add_argument('--rdist_norm', type=str, default='uvf', help='normlization method for ray distance, uvf|minmax')
 38 | 
 39 |         g_logistic.add_argument('--img_size', type=int, default=[640,480], help='image shape')
 40 |         g_logistic.add_argument('--num_views', type=int, default=1, help='How many views to use for multiview network.')
 41 | 
 42 |         g_logistic.add_argument('--GPU_ID', default=[0], type=int, help='# of GPUs')
 43 |         g_logistic.add_argument('--deterministic', type=bool, default=False, help='')
 44 |         g_logistic.add_argument('--seed', type=int, default=0)
 45 | 
 46 |         g_logistic.add_argument('--continue_train', type=bool, default=False, help='continue training: load model')
 47 |         g_logistic.add_argument('--resume_epoch', type=int, default=0, help='epoch resuming the training')
 48 |         g_logistic.add_argument('--eval_perf', type=bool, default=False, help='evaluation: load model')
 49 |         g_logistic.add_argument('--eval_epoch', type=int, default=0, help='epoch for eval.')
 50 | 
 51 |         g_logistic.add_argument('--load_netG_checkpoint_path', type=str, default=None, help='path to save checkpoints')
 52 |         g_logistic.add_argument('--load_optG_checkpoint_path', type=str, default=None, help='path to save checkpoints')
 53 |         g_logistic.add_argument('--name', type=str, default='example',
 54 |                            help='name of the experiment. It decides where to store/load samples and models')
 55 | 
 56 |         # Sampling related
 57 |         g_sample = parser.add_argument_group('Sampling')
 58 |         g_sample.add_argument('--sigma_ratio', type=float, default=0.5, help='perturbation ratio of standard deviation for positions: 0.5 | 0.75')
 59 | 
 60 |         g_sample.add_argument('--num_sample_inout', type=int, default=5000, help='# of sampling points: 5000')
 61 | 
 62 |         # Rigid pose related
 63 |         g_rigid = parser.add_argument_group('Rigid')
 64 |         g_rigid.add_argument('--min_samples', type=int, default=3, help='min. #samples for ransac')
 65 |         g_rigid.add_argument('--res_thresh', type=float, default=20, help='residual threshold for selecting inliers')
 66 |         g_rigid.add_argument('--max_trials', type=int, default=200, help='max. #iterations')
 67 | 
 68 |         # Pre. & Aug. related
 69 |         g_aug = parser.add_argument_group('aug')
 70 |         # appearance
 71 |         g_aug.add_argument('--use_aug', type=bool, default=True, help='')
 72 |         g_aug.add_argument('--aug_blur', type=int, default=3, help='augmentation blur')
 73 |         g_aug.add_argument('--aug_sha', type=float, default=50.0, help='augmentation sharpness')
 74 |         g_aug.add_argument('--aug_con', type=float, default=50.0, help='augmentation contrast')
 75 |         g_aug.add_argument('--aug_bri', type=float, default=6.0, help='augmentation brightness')
 76 |         g_aug.add_argument('--aug_col', type=float, default=20.0, help='augmentation color')
 77 | 
 78 |         # Training related
 79 |         g_train = parser.add_argument_group('Training')
 80 |         g_train.add_argument('--batch_size', type=int, default=4, help='input batch size')
 81 | 
 82 |         g_train.add_argument('--num_threads', default=1, type=int, help='# sthreads for loading data')
 83 |         g_train.add_argument('--serial_batches', action='store_true',
 84 |                              help='if true, takes images in order to make batches, otherwise takes them randomly')
 85 |         # g_train.add_argument('--pin_memory', type=bool, default=True, help='pin_memory')
 86 | 
 87 |         g_train.add_argument('--out_type', type=str, default='rsdf', help='rsdf | csdf | eff_csdf')
 88 |         g_train.add_argument('--loss_type', type=str, default='l1', help='mse | l1 | huber')
 89 |         g_train.add_argument('--clamp_dist', type=float, default=5.0, help='')
 90 |         g_train.add_argument('--norm_clamp_dist', type=float, default=0.1, help='')
 91 |         g_train.add_argument('--use_xyz', type=bool, default=True, help='')
 92 |         g_train.add_argument('--xyz_lambda', type=float, default=1.0, help='')
 93 | 
 94 |         g_train.add_argument('--init_type', type=str, default='normal', help='normal | xavier | kaiming | orthogonal')
 95 |         g_train.add_argument('--init_gain', type=float, default=0.02, help='scaling factor for normal, xavier and orthogonal')
 96 |         g_train.add_argument('--optimizer', choices=["adam", "rms"], default="rms")
 97 |         g_train.add_argument('--learning_rate', type=float, default=1e-4, help='') # 1e-3
 98 |         g_train.add_argument('--gamma', type=float, default=0.1, help='LR is multiplied by gamma on schedule.')
 99 |         g_train.add_argument('--schedule', type=int, nargs='+', default=[500, 1000, 1500],
100 |                             help='Decrease learning rate at these epochs.') 
101 |         g_train.add_argument('--num_epoch', type=int, default=2000, help='num epoch to train')
102 | 
103 |         g_train.add_argument('--freq_plot', type=int, default=7000, help='freqency of the error plot')
104 |         g_train.add_argument('--freq_debug', type=int, default=7000, help='frequence of the visualization')
105 | 
106 |         # Model related
107 |         g_model = parser.add_argument_group('Model')
108 |         # General
109 |         g_model.add_argument('--norm', type=str, default='group',
110 |                              help='instance normalization or batch normalization or group normalization')
111 |         # hg filter specify
112 |         g_model.add_argument('--num_stack', type=int, default=4, help='# of stacked layer of hourglass')
113 |         g_model.add_argument('--num_hourglass', type=int, default=2, help='# of hourglass')
114 |         g_model.add_argument('--skip_hourglass', action='store_true', help='skip connection in hourglass')
115 |         g_model.add_argument('--hg_down', type=str, default='ave_pool', help='ave pool || conv64 || conv128')
116 |         g_model.add_argument('--hourglass_dim', type=int, default='256', help='256 | 512')
117 | 
118 |         # Classification General
119 |         g_model.add_argument('--mlp_dim', nargs='+', default=[257, 1024, 512, 256, 128, 1], type=int,
120 |                              help='# of dimensions of mlp')
121 |         g_model.add_argument('--mlp_dim_xyz', nargs='+', default=[257, 1024, 512, 256, 128, 4],
122 |                              type=int, help='# of dimensions of mlp')
123 | 
124 |         g_model.add_argument('--use_tanh', type=bool, default=True,
125 |                              help='using tanh after last conv of image_filter network')
126 | 
127 |         g_model.add_argument('--no_residual', action='store_true', help='no skip connection in mlp')
128 | 
129 |         # Eval. related
130 |         g_eval = parser.add_argument_group('Evaluation')
131 |         g_eval.add_argument('--step_size', type=int, default=10, help='step size (mm) of grid')
132 |         g_eval.add_argument('--num_in_batch', type=int, default=1500000, help='number of each batch for eval.')
133 |         g_eval.add_argument('--thresh', type=float, default=0.0, help='0.0999 | 0.0 | -0.0999')
134 | 
135 |         g_eval.add_argument('--freq_eval_all', type=int, default=20, help='freqency of the eval. for all')
136 |         g_eval.add_argument('--gen_obj_pose', type=bool, default=True, help='')
137 | 
138 |         # special tasks
139 |         self.initialized = True
140 |         return parser
141 | 
142 |     def gather_options(self):
143 |         # initialize parser with basic options
144 |         if not self.initialized:
145 |             parser = argparse.ArgumentParser(
146 |                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
147 |             parser = self.initialize(parser)
148 | 
149 |         self.parser = parser
150 | 
151 |         return parser.parse_args()
152 | 
153 |     def print_options(self, opt):
154 |         message = ''
155 |         message += '----------------- Options ---------------\n'
156 |         for k, v in sorted(vars(opt).items()):
157 |             comment = ''
158 |             default = self.parser.get_default(k)
159 |             if v != default:
160 |                 comment = '\t[default: %s]' % str(default)
161 |             message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment)
162 |         message += '----------------- End -------------------'
163 |         print(message)
164 | 
165 |     def parse(self):
166 |         opt = self.gather_options()
167 |         return opt
168 | 


--------------------------------------------------------------------------------
/lib/rigid_fit/ransac.py:
--------------------------------------------------------------------------------
 1 | """A simple RANSAC class implementation.
 2 | References:
 3 | [1] : https://github.com/scikit-image/scikit-image/blob/master/skimage/measure/fit.py
 4 | [2] : https://github.com/scikit-learn/scikit-learn/blob/e5698bde9/sklearn/linear_model/_ransac.py
 5 | """
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | class RansacEstimator:
11 |   """Random Sample Consensus.
12 |   """
13 |   def __init__(self, min_samples=None, residual_threshold=None, max_trials=100):
14 |     """Constructor.
15 | 
16 |     Args:
17 |       min_samples: The minimal number of samples needed to fit the model
18 |         to the data. If `None`, we assume a linear model in which case
19 |         the minimum number is one more than the feature dimension.
20 |       residual_threshold: The maximum allowed residual for a sample to
21 |         be classified as an inlier. If `None`, the threshold is chosen
22 |         to be the median absolute deviation of the target variable.
23 |       max_trials: The maximum number of trials to run RANSAC for. By
24 |         default, this value is 100.
25 |     """
26 |     self.min_samples = min_samples
27 |     self.residual_threshold = residual_threshold
28 |     self.max_trials = max_trials
29 | 
30 |   def fit(self, model, data):
31 |     """Robustely fit a model to the data.
32 | 
33 |     Args:
34 |       model: a class object that implements `estimate` and
35 |         `residuals` methods.
36 |       data: the data to fit the model to. Can be a list of
37 |         data pairs, such as `X` and `y` in the case of
38 |         regression.
39 | 
40 |     Returns:
41 |       A dictionary containing:
42 |         best_model: the model with the largest consensus set
43 |           and lowest residual error.
44 |         inliers: a boolean mask indicating the inlier subset
45 |           of the data for the best model.
46 |     """
47 |     best_model = None
48 |     best_inliers = None
49 |     best_num_inliers = 0
50 |     best_residual_sum = np.inf
51 | 
52 |     if not isinstance(data, (tuple, list)):
53 |       data = [data]
54 |     num_data, num_feats = data[0].shape
55 | 
56 |     for trial in range(self.max_trials):
57 |       # randomly select subset
58 |       rand_subset_idxs = np.random.choice(
59 |         np.arange(num_data), size=self.min_samples, replace=False)
60 |       rand_subset = [d[rand_subset_idxs] for d in data]
61 | 
62 |       # estimate with model
63 |       model.estimate(*rand_subset)
64 | 
65 |       # compute residuals
66 |       residuals = model.residuals(*data)
67 |       # residuals_sum = residuals.sum()
68 |       inliers = residuals <= self.residual_threshold
69 |       num_inliers = np.sum(inliers)
70 | 
71 |       # decide if better
72 |       # if (best_num_inliers < num_inliers) or (best_residual_sum > residuals_sum):
73 |       if (best_num_inliers < num_inliers):
74 |         best_num_inliers = num_inliers
75 |         # best_residual_sum = residuals_sum
76 |         best_inliers = inliers
77 | 
78 |     # refit model using all inliers for this set
79 |     if best_num_inliers == 0:
80 |       data_inliers = data
81 |     else:
82 |       data_inliers = [d[best_inliers] for d in data]
83 |     model.estimate(*data_inliers)
84 | 
85 |     ret = {
86 |       "best_params": model.params,
87 |       "best_inliers": best_inliers,
88 |     }
89 |     return ret


--------------------------------------------------------------------------------
/lib/rigid_fit/ransac_kabsch.py:
--------------------------------------------------------------------------------
  1 | """Estimate a rigid transform between 2 point clouds.
  2 | """
  3 | 
  4 | import numpy as np
  5 | from .ransac import RansacEstimator
  6 | 
  7 | import pdb
  8 | 
  9 | def gen_data(N=100, frac=0.1):
 10 |   # create a random rigid transform
 11 |   transform = np.eye(4)
 12 |   # transform[:3, :3] = RotationMatrix.random()
 13 |   transform[:3, :3] = np.array([-0.52573111, 0.85065081, 0.0, 0.84825128, 0.52424812, -0.07505775, -0.06384793, -0.03946019, -0.99717919]).reshape(3,3)
 14 |   transform[:3, 3] = 2 * np.random.randn(3) + 1
 15 | 
 16 |   # create a random source point cloud
 17 |   src_pc = 5 * np.random.randn(N, 3) + 2
 18 |   dst_pc = Procrustes.transform_xyz(src_pc, transform)
 19 | 
 20 |   # corrupt
 21 |   rand_corrupt = np.random.choice(np.arange(len(src_pc)), replace=False, size=int(frac*N))
 22 |   dst_pc[rand_corrupt] += np.random.uniform(-10, 10, (int(frac*N), 3))
 23 | 
 24 |   return src_pc, dst_pc, transform, rand_corrupt
 25 | 
 26 | 
 27 | def transform_from_rotm_tr(rotm, tr):
 28 |   transform = np.eye(4)
 29 |   transform[:3, :3] = rotm
 30 |   transform[:3, 3] = tr
 31 |   return transform
 32 | 
 33 | class Procrustes:
 34 |   """Determines the best rigid transform [1] between two point clouds.
 35 | 
 36 |   References:
 37 |     [1]: https://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem
 38 |   """
 39 |   def __init__(self, transform=None):
 40 |     self._transform = transform
 41 | 
 42 |   def __call__(self, xyz):
 43 |     return Procrustes.transform_xyz(xyz, self._transform)
 44 | 
 45 |   @staticmethod
 46 |   def transform_xyz(xyz, transform):
 47 |     """Applies a rigid transform to an (N, 3) point cloud.
 48 |     """
 49 |     xyz_h = np.hstack([xyz, np.ones((len(xyz), 1))])  # homogenize 3D pointcloud
 50 |     xyz_t_h = (transform @ xyz_h.T).T  # apply transform
 51 |     return xyz_t_h[:, :3]
 52 | 
 53 | #   def estimate(self, X, Y):
 54 | #     # find centroids
 55 | #     X_c = np.mean(X, axis=0)
 56 | #     Y_c = np.mean(Y, axis=0)
 57 | 
 58 | #     # shift
 59 | #     X_s = X - X_c
 60 | #     Y_s = Y - Y_c
 61 | 
 62 | #     # compute SVD of covariance matrix
 63 | #     cov = Y_s.T @ X_s
 64 | #     u, _, vt = np.linalg.svd(cov)
 65 | 
 66 | #     # determine rotation
 67 | #     rot = u @ vt
 68 | #     if np.linalg.det(rot) < 0.:
 69 | #       vt[2, :] *= -1
 70 | #       rot = u @ vt
 71 | 
 72 | #     # determine optimal translation
 73 | #     trans = Y_c - rot @ X_c
 74 | 
 75 | #     self._transform = transform_from_rotm_tr(rot, trans)
 76 | 
 77 |   def estimate(self, X, Y):
 78 |     # find centroids
 79 |     X_c = np.mean(X, axis=0)
 80 |     Y_c = np.mean(Y, axis=0)
 81 | 
 82 |     # shift
 83 |     X_s = X - X_c
 84 |     Y_s = Y - Y_c
 85 | 
 86 |     # Computation of the covariance matrix
 87 |     C = np.dot(np.transpose(Y_s), X_s)
 88 | 
 89 |     # Computation of the optimal rotation matrix
 90 |     # This can be done using singular value decomposition (SVD)
 91 |     # Getting the sign of the det(V)*(W) to decide
 92 |     # whether we need to correct our rotation matrix to ensure a
 93 |     # right-handed coordinate system.
 94 |     # And finally calculating the optimal rotation matrix U
 95 |     # see http://en.wikipedia.org/wiki/Kabsch_algorithm
 96 |     V, S, W = np.linalg.svd(C)
 97 |     d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0
 98 | 
 99 |     if d:
100 |         S[-1] = -S[-1]
101 |         V[:, -1] = -V[:, -1]
102 | 
103 |     # Create Rotation matrix U
104 |     rot = np.dot(V, W)
105 | 
106 |     # determine optimal translation
107 |     trans = Y_c - rot @ X_c
108 | 
109 |     self._transform = transform_from_rotm_tr(rot, trans)
110 | 
111 |   def residuals(self, X, Y):
112 |     """L2 distance between point correspondences.
113 |     """
114 |     Y_est = self(X)
115 |     sum_sq = np.sum((Y_est - Y)**2, axis=1)
116 |     return sum_sq
117 | 
118 |   @property
119 |   def params(self):
120 |     return self._transform
121 | 
122 | 
123 | if __name__ == "__main__":
124 |   src_pc, dst_pc, transform_true, rand_corrupt = gen_data(frac=0.2)
125 | 
126 |   # estimate without ransac, i.e. using all
127 |   # point correspondences
128 |   naive_model = Procrustes()
129 |   naive_model.estimate(src_pc, dst_pc)
130 |   transform_naive = naive_model.params
131 |   mse_naive = np.sqrt(naive_model.residuals(src_pc, dst_pc).mean())
132 |   print("mse naive: {}".format(mse_naive))
133 | 
134 | 
135 |   # estimate with RANSAC
136 |   ransac = RansacEstimator(
137 |     min_samples=3,
138 |     # 5, 10, 20
139 |     residual_threshold=(10)**2,
140 |     max_trials=100,
141 |   )
142 |   ret = ransac.fit(Procrustes(), [src_pc, dst_pc])
143 |   transform_ransac = ret["best_params"]
144 | 
145 | 
146 |   inliers_ransac = ret["best_inliers"]
147 |   mse_ransac = np.sqrt(Procrustes(transform_ransac).residuals(src_pc, dst_pc).mean())
148 |   print("mse ransac all: {}".format(mse_ransac))
149 |   mse_ransac_inliers = np.sqrt(
150 |     Procrustes(transform_ransac).residuals(src_pc[inliers_ransac], dst_pc[inliers_ransac]).mean())
151 |   print("mse ransac inliers: {}".format(mse_ransac_inliers))
152 |   pdb.set_trace()


--------------------------------------------------------------------------------
/lib/sdf.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def create_grid(resX, resY, resZ, b_min=np.array([0, 0, 0]), b_max=np.array([1, 1, 1]), transform=None):
  5 |     '''
  6 |     Create a dense grid of given resolution and bounding box
  7 |     :param resX: resolution along X axis
  8 |     :param resY: resolution along Y axis
  9 |     :param resZ: resolution along Z axis
 10 |     :param b_min: vec3 (x_min, y_min, z_min) bounding box corner
 11 |     :param b_max: vec3 (x_max, y_max, z_max) bounding box corner
 12 |     :return: [3, resX, resY, resZ] coordinates of the grid, and transform matrix from mesh index
 13 |     '''
 14 |     coords = np.mgrid[:resX, :resY, :resZ]
 15 |     coords = coords.reshape(3, -1)
 16 |     coords_matrix = np.eye(4)
 17 |     length = b_max - b_min
 18 |     coords_matrix[0, 0] = length[0] / resX
 19 |     coords_matrix[1, 1] = length[1] / resY
 20 |     coords_matrix[2, 2] = length[2] / resZ
 21 |     coords_matrix[0:3, 3] = b_min
 22 |     coords = np.matmul(coords_matrix[:3, :3], coords) + coords_matrix[:3, 3:4]
 23 |     if transform is not None:
 24 |         coords = np.matmul(transform[:3, :3], coords) + transform[:3, 3:4]
 25 |         coords_matrix = np.matmul(transform, coords_matrix)
 26 |     coords = coords.reshape(3, resX, resY, resZ)
 27 |     return coords, coords_matrix
 28 | 
 29 | 
 30 | def batch_eval(points, eval_func, num_samples=512 * 512 * 512):
 31 |     num_pts = points.shape[1]
 32 |     sdf = np.zeros(num_pts)
 33 | 
 34 |     num_batches = num_pts // num_samples
 35 |     for i in range(num_batches):
 36 |         sdf[i * num_samples:i * num_samples + num_samples] = eval_func(
 37 |             points[:, i * num_samples:i * num_samples + num_samples])
 38 |     if num_pts % num_samples:
 39 |         sdf[num_batches * num_samples:] = eval_func(points[:, num_batches * num_samples:])
 40 | 
 41 |     return sdf
 42 | 
 43 | 
 44 | def eval_grid(coords, eval_func, num_samples=512 * 512 * 512):
 45 |     resolution = coords.shape[1:4]
 46 |     coords = coords.reshape([3, -1])
 47 |     sdf = batch_eval(coords, eval_func, num_samples=num_samples)
 48 |     return sdf.reshape(resolution)
 49 | 
 50 | 
 51 | def eval_grid_octree(coords, eval_func,
 52 |                      init_resolution=64, threshold=0.01,
 53 |                      num_samples=512 * 512 * 512):
 54 |     resolution = coords.shape[1:4]
 55 | 
 56 |     sdf = np.zeros(resolution)
 57 | 
 58 |     dirty = np.ones(resolution, dtype=np.bool)
 59 |     grid_mask = np.zeros(resolution, dtype=np.bool)
 60 | 
 61 |     reso = resolution[0] // init_resolution
 62 | 
 63 |     while reso > 0:
 64 |         # subdivide the grid
 65 |         grid_mask[0:resolution[0]:reso, 0:resolution[1]:reso, 0:resolution[2]:reso] = True
 66 |         # test samples in this iteration
 67 |         test_mask = np.logical_and(grid_mask, dirty)
 68 |         #print('step size:', reso, 'test sample size:', test_mask.sum())
 69 |         points = coords[:, test_mask]
 70 | 
 71 |         sdf[test_mask] = batch_eval(points, eval_func, num_samples=num_samples)
 72 |         dirty[test_mask] = False
 73 | 
 74 |         # do interpolation
 75 |         if reso <= 1:
 76 |             break
 77 |         for x in range(0, resolution[0] - reso, reso):
 78 |             for y in range(0, resolution[1] - reso, reso):
 79 |                 for z in range(0, resolution[2] - reso, reso):
 80 |                     # if center marked, return
 81 |                     if not dirty[x + reso // 2, y + reso // 2, z + reso // 2]:
 82 |                         continue
 83 |                     v0 = sdf[x, y, z]
 84 |                     v1 = sdf[x, y, z + reso]
 85 |                     v2 = sdf[x, y + reso, z]
 86 |                     v3 = sdf[x, y + reso, z + reso]
 87 |                     v4 = sdf[x + reso, y, z]
 88 |                     v5 = sdf[x + reso, y, z + reso]
 89 |                     v6 = sdf[x + reso, y + reso, z]
 90 |                     v7 = sdf[x + reso, y + reso, z + reso]
 91 |                     v = np.array([v0, v1, v2, v3, v4, v5, v6, v7])
 92 |                     v_min = v.min()
 93 |                     v_max = v.max()
 94 |                     # this cell is all the same
 95 |                     if (v_max - v_min) < threshold:
 96 |                         sdf[x:x + reso, y:y + reso, z:z + reso] = (v_max + v_min) / 2
 97 |                         dirty[x:x + reso, y:y + reso, z:z + reso] = False
 98 |         reso //= 2
 99 | 
100 |     return sdf.reshape(resolution)
101 | 
102 | 
103 | 
104 | 
105 | """
106 | for hopifu-only
107 | """
108 | import pdb
109 | def batch_eval_sdf_xyz(points, eval_func, num_samples=512 * 512 * 512):
110 |     num_pts = points.shape[1]
111 |     sdf = np.zeros(num_pts)
112 |     xyz = np.zeros((3, num_pts))
113 | 
114 |     # pdb.set_trace()
115 |     num_batches = num_pts // num_samples
116 |     for i in range(num_batches):
117 |         sdf[i * num_samples:i * num_samples + num_samples], xyz[:, i * num_samples:i * num_samples + num_samples] = eval_func(
118 |                                                                         points[:, i * num_samples:i * num_samples + num_samples])
119 |     if num_pts % num_samples:
120 |         sdf[num_batches * num_samples:], xyz[:, num_batches * num_samples:] = eval_func(points[:, num_batches * num_samples:])
121 | 
122 |     return sdf, xyz
123 | 
124 | def eval_sdf_xyz_grid(coords, eval_func, num_samples=512 * 512 * 512):
125 |     resolution = coords.shape[1:4]
126 |     coords = coords.reshape([3, -1])
127 |     sdf, xyz = batch_eval_sdf_xyz(coords, eval_func, num_samples=num_samples)
128 |     return sdf, xyz
129 | 
130 | def eval_sdf_xyz_grid_frustum(coords, eval_func, num_samples=512 * 512 * 512):
131 |     # coords = coords.reshape([3, -1])
132 |     sdf, xyz = batch_eval_sdf_xyz(coords, eval_func, num_samples=num_samples)
133 |     return sdf, xyz
134 | 


--------------------------------------------------------------------------------
/lib/sym_util.py:
--------------------------------------------------------------------------------
  1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz)
  2 | # Center for Machine Perception, Czech Technical University in Prague
  3 | 
  4 | """Parameters of the BOP datasets."""
  5 | 
  6 | import os
  7 | import pdb
  8 | import json
  9 | 
 10 | import math
 11 | import numpy as np
 12 | 
 13 | def load_json(path, keys_to_int=False):
 14 |   """Loads content of a JSON file.
 15 | 
 16 |   :param path: Path to the JSON file.
 17 |   :return: Content of the loaded JSON file.
 18 |   """
 19 |   # Keys to integers.
 20 |   def convert_keys_to_int(x):
 21 |     return {int(k) if k.lstrip('-').isdigit() else k: v for k, v in x.items()}
 22 | 
 23 |   with open(path, 'r') as f:
 24 |     if keys_to_int:
 25 |       content = json.load(f, object_hook=lambda x: convert_keys_to_int(x))
 26 |     else:
 27 |       content = json.load(f)
 28 | 
 29 |   return content
 30 | 
 31 | def get_obj_params(models_path, dataset_name):
 32 |   """Returns parameters of object models for the specified dataset.
 33 | 
 34 |   :param models_path: Path to a folder with models.
 35 |   :param dataset_name: Name of the dataset for which to return the parameters.
 36 |   :return: Dictionary with object model parameters for the specified dataset.
 37 |   """
 38 |   # Object ID's.
 39 |   obj_ids = {
 40 |     'lm': list(range(1, 16)),
 41 |     'lmo': [1, 5, 6, 8, 9, 10, 11, 12],
 42 |     'tudl': list(range(1, 4)),
 43 |     'tyol': list(range(1, 22)),
 44 |     'ruapc': list(range(1, 15)),
 45 |     'icmi': list(range(1, 7)),
 46 |     'icbin': list(range(1, 3)),
 47 |     'itodd': list(range(1, 29)),
 48 |     'hbs': [1, 3, 4, 8, 9, 10, 12, 15, 17, 18, 19, 22, 23, 29, 32, 33],
 49 |     'hb': list(range(1, 34)),  # Full HB dataset.
 50 |     'ycbv': list(range(1, 22)),
 51 |     'hope': list(range(1, 29)),
 52 |   }[dataset_name]
 53 | 
 54 |   # ID's of objects with ambiguous views evaluated using the ADI pose error
 55 |   # function (the others are evaluated using ADD). See Hodan et al. (ECCVW'16).
 56 |   symmetric_obj_ids = {
 57 |     'lm': [3, 7, 10, 11],
 58 |     'lmo': [10, 11],
 59 |     'tudl': [],
 60 |     'tyol': [3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 15, 16, 17, 18, 19, 21],
 61 |     'ruapc': [8, 9, 12, 13],
 62 |     'icmi': [1, 2, 6],
 63 |     'icbin': [1],
 64 |     'itodd': [2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 17, 18, 19, 23, 24, 25, 27, 28],
 65 |     'hbs': [10, 12, 18, 29],
 66 |     'hb': [6, 10, 11, 12, 13, 14, 18, 24, 29],
 67 |     'ycbv': [1, 13, 14, 16, 18, 19, 20, 21],
 68 |     'hope': None,  # Not defined yet.
 69 |   }[dataset_name]
 70 | 
 71 |   # Both versions of the HB dataset share the same directory.
 72 |   if dataset_name == 'hbs':
 73 |     dataset_name = 'hb'
 74 | 
 75 |   p = {
 76 |     # ID's of all objects included in the dataset.
 77 |     'obj_ids': obj_ids,
 78 | 
 79 |     # ID's of objects with symmetries.
 80 |     'symmetric_obj_ids': symmetric_obj_ids,
 81 | 
 82 |     # Path to a file with meta information about the object models.
 83 |     'models_info_path': os.path.join(models_path, 'models_info.json')
 84 |   }
 85 | 
 86 |   return p
 87 | 
 88 | def unit_vector(data, axis=None, out=None):
 89 |   """Return ndarray normalized by length, i.e. Euclidean norm, along axis.
 90 | 
 91 |   >>> v0 = numpy.random.random(3)
 92 |   >>> v1 = unit_vector(v0)
 93 |   >>> numpy.allclose(v1, v0 / numpy.linalg.norm(v0))
 94 |   True
 95 |   >>> v0 = numpy.random.rand(5, 4, 3)
 96 |   >>> v1 = unit_vector(v0, axis=-1)
 97 |   >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=2)), 2)
 98 |   >>> numpy.allclose(v1, v2)
 99 |   True
100 |   >>> v1 = unit_vector(v0, axis=1)
101 |   >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=1)), 1)
102 |   >>> numpy.allclose(v1, v2)
103 |   True
104 |   >>> v1 = numpy.empty((5, 4, 3))
105 |   >>> unit_vector(v0, axis=1, out=v1)
106 |   >>> numpy.allclose(v1, v2)
107 |   True
108 |   >>> list(unit_vector([]))
109 |   []
110 |   >>> list(unit_vector([1]))
111 |   [1.0]
112 | 
113 |   """
114 |   if out is None:
115 |     data = np.array(data, dtype=np.float64, copy=True)
116 |     if data.ndim == 1:
117 |       data /= math.sqrt(np.dot(data, data))
118 |       return data
119 |   else:
120 |     if out is not data:
121 |       out[:] = np.array(data, copy=False)
122 |     data = out
123 |   length = np.atleast_1d(np.sum(data * data, axis))
124 |   np.sqrt(length, length)
125 |   if axis is not None:
126 |     length = np.expand_dims(length, axis)
127 |   data /= length
128 |   if out is None:
129 |     return data
130 | 
131 | def rotation_matrix(angle, direction, point=None):
132 |   """Return matrix to rotate about axis defined by point and direction.
133 | 
134 |   >>> R = rotation_matrix(math.pi/2, [0, 0, 1], [1, 0, 0])
135 |   >>> numpy.allclose(numpy.dot(R, [0, 0, 0, 1]), [1, -1, 0, 1])
136 |   True
137 |   >>> angle = (random.random() - 0.5) * (2*math.pi)
138 |   >>> direc = numpy.random.random(3) - 0.5
139 |   >>> point = numpy.random.random(3) - 0.5
140 |   >>> R0 = rotation_matrix(angle, direc, point)
141 |   >>> R1 = rotation_matrix(angle-2*math.pi, direc, point)
142 |   >>> is_same_transform(R0, R1)
143 |   True
144 |   >>> R0 = rotation_matrix(angle, direc, point)
145 |   >>> R1 = rotation_matrix(-angle, -direc, point)
146 |   >>> is_same_transform(R0, R1)
147 |   True
148 |   >>> I = numpy.identity(4, numpy.float64)
149 |   >>> numpy.allclose(I, rotation_matrix(math.pi*2, direc))
150 |   True
151 |   >>> numpy.allclose(2, numpy.trace(rotation_matrix(math.pi/2,
152 |   ...                                               direc, point)))
153 |   True
154 | 
155 |   """
156 |   sina = math.sin(angle)
157 |   cosa = math.cos(angle)
158 |   direction = unit_vector(direction[:3])
159 |   # rotation matrix around unit vector
160 |   R = np.diag([cosa, cosa, cosa])
161 |   R += np.outer(direction, direction) * (1.0 - cosa)
162 |   direction *= sina
163 |   R += np.array([[0.0, -direction[2], direction[1]],
164 |                     [direction[2], 0.0, -direction[0]],
165 |                     [-direction[1], direction[0], 0.0]])
166 |   M = np.identity(4)
167 |   M[:3, :3] = R
168 |   if point is not None:
169 |     # rotation not around origin
170 |     point = np.array(point[:3], dtype=np.float64, copy=False)
171 |     M[:3, 3] = point - np.dot(R, point)
172 |   return M
173 | 
174 | def get_symmetry_transformations(model_info, max_sym_disc_step):
175 |   """Returns a set of symmetry transformations for an object model.
176 | 
177 |   :param model_info: See files models_info.json provided with the datasets.
178 |   :param max_sym_disc_step: The maximum fraction of the object diameter which
179 |     the vertex that is the furthest from the axis of continuous rotational
180 |     symmetry travels between consecutive discretized rotations.
181 |   :return: The set of symmetry transformations.
182 |   """
183 |   # Discrete symmetries.
184 |   trans_disc = [{'R': np.eye(3), 't': np.array([[0, 0, 0]]).T}]  # Identity.
185 |   if 'symmetries_discrete' in model_info:
186 |     for sym in model_info['symmetries_discrete']:
187 |       sym_4x4 = np.reshape(sym, (4, 4))
188 |       R = sym_4x4[:3, :3]
189 |       t = sym_4x4[:3, 3].reshape((3, 1))
190 |       trans_disc.append({'R': R, 't': t})
191 | 
192 |   # Discretized continuous symmetries.
193 |   trans_cont = []
194 |   if 'symmetries_continuous' in model_info:
195 |     for sym in model_info['symmetries_continuous']:
196 |       axis = np.array(sym['axis'])
197 |       offset = np.array(sym['offset']).reshape((3, 1))
198 | 
199 |       # (PI * diam.) / (max_sym_disc_step * diam.) = discrete_steps_count
200 |       discrete_steps_count = int(np.ceil(np.pi / max_sym_disc_step))
201 | 
202 |       # Discrete step in radians.
203 |       discrete_step = 2.0 * np.pi / discrete_steps_count
204 | 
205 |       for i in range(1, discrete_steps_count):
206 |         R = rotation_matrix(i * discrete_step, axis)[:3, :3]
207 |         t = -R.dot(offset) + offset
208 |         trans_cont.append({'R': R, 't': t})
209 | 
210 |   # Combine the discrete and the discretized continuous symmetries.
211 |   trans = []
212 |   for tran_disc in trans_disc:
213 |     if len(trans_cont):
214 |       for tran_cont in trans_cont:
215 |         R = tran_cont['R'].dot(tran_disc['R'])
216 |         t = tran_cont['R'].dot(tran_disc['t']) + tran_cont['t']
217 |         trans.append({'R': R, 't': t})
218 |     else:
219 |       trans.append(tran_disc)
220 | 
221 |   return trans
222 | 
223 | 
224 | if __name__ == '__main__':
225 |   # PARAMETERS.
226 |   ################################################################################
227 |   p = {
228 |     # See dataset_params.py for options.
229 |     'dataset': 'ycbv',
230 | 
231 |     # See misc.get_symmetry_transformations().
232 |     'max_sym_disc_step': 0.01,
233 | 
234 |     # Folder containing the BOP datasets.
235 |     'models_path': '/mnt/data0/lin/bop_datasets/ycbv/models',
236 | 
237 |   }
238 |   ################################################################################
239 | 
240 |   # Load dataset parameters.
241 |   obj_params = get_obj_params(p['models_path'], p['dataset'])
242 | 
243 |   # Load meta info about the models (including symmetries).
244 |   models_info = load_json(obj_params['models_info_path'], keys_to_int=True)
245 | 
246 | #   for obj_id in obj_params['obj_ids']:
247 |   import torch
248 |   sym_pool=[]
249 |   obj_id = 13
250 |   sym_poses = get_symmetry_transformations(models_info[obj_id], p['max_sym_disc_step'])
251 |   for sym_pose in sym_poses:
252 |       Rt = np.concatenate([sym_pose['R'], sym_pose['t'].reshape(3,1)], axis=1)
253 |       Rt = np.concatenate([Rt, np.array([0, 0, 0, 1]).reshape(1, 4)], axis=0)
254 |       sym_pool.append(torch.Tensor(Rt))
255 |   pdb.set_trace()
256 | 


--------------------------------------------------------------------------------