├── .gitignore
├── LICENSE
├── README.md
├── docs
    ├── code-of-conduct.md
    └── contributing.md
└── dvs
    ├── checkpoint
        └── stabilzation
        │   └── stabilzation_last.checkpoint
    ├── conf
        ├── stabilzation.yaml
        └── stabilzation_train.yaml
    ├── data
        └── arial.ttf
    ├── dataset.py
    ├── flownet2
        ├── LICENSE
        ├── README.md
        ├── __init__.py
        ├── convert.py
        ├── datasets.py
        ├── install.sh
        ├── losses.py
        ├── main.py
        ├── models.py
        ├── networks
        │   ├── FlowNetC.py
        │   ├── FlowNetFusion.py
        │   ├── FlowNetS.py
        │   ├── FlowNetSD.py
        │   ├── __init__.py
        │   ├── channelnorm_package
        │   │   ├── __init__.py
        │   │   ├── channelnorm.py
        │   │   ├── channelnorm_cuda.cc
        │   │   ├── channelnorm_kernel.cu
        │   │   ├── channelnorm_kernel.cuh
        │   │   └── setup.py
        │   ├── correlation_package
        │   │   ├── __init__.py
        │   │   ├── correlation.py
        │   │   ├── correlation_cuda.cc
        │   │   ├── correlation_cuda_kernel.cu
        │   │   ├── correlation_cuda_kernel.cuh
        │   │   └── setup.py
        │   ├── resample2d_package
        │   │   ├── __init__.py
        │   │   ├── resample2d.py
        │   │   ├── resample2d_cuda.cc
        │   │   ├── resample2d_kernel.cu
        │   │   ├── resample2d_kernel.cuh
        │   │   └── setup.py
        │   └── submodules.py
        ├── run.sh
        ├── run_release.sh
        └── utils
        │   ├── __init__.py
        │   ├── flow_utils.py
        │   ├── frame_utils.py
        │   ├── param_utils.py
        │   └── tools.py
    ├── gyro
        ├── __init__.py
        ├── gyro_function.py
        └── gyro_io.py
    ├── inference.py
    ├── load_frame_sensor_data.py
    ├── loss.py
    ├── metrics.py
    ├── model.py
    ├── printer.py
    ├── requirements.txt
    ├── train.py
    ├── util.py
    └── warp
        ├── __init__.py
        ├── rasterizer.py
        ├── read_write.py
        └── warping.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | .torch
 3 | _ext
 4 | *.o
 5 | _ext/
 6 | *.png
 7 | *.jpg
 8 | *.tar
 9 | log/*
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Online Fused Video Stabilization
 2 | 
 3 | [[Paper]](https://openaccess.thecvf.com/content/WACV2022/papers/Shi_Deep_Online_Fused_Video_Stabilization_WACV_2022_paper.pdf)[[Supplementary]](https://zhmeishi.github.io/dvs/paper/dvs_supp.pdf)  [[Project Page]](https://zhmeishi.github.io/dvs/) [[Dataset]](https://storage.googleapis.com/dataset_release/all.zip) [[Our Result]](https://storage.googleapis.com/dataset_release/inference_result_release.zip) [[More Results]](https://zhmeishi.github.io/dvs/supp/results.html) 
 4 | 
 5 | This repository contains the Pytorch implementation of our method in the paper "Deep Online Fused Video Stabilization".
 6 | 
 7 | ## Environment Setting
 8 | Python version >= 3.6
 9 | Pytorch with CUDA >= 1.0.0 (guide is [here](https://pytorch.org/get-started/locally/))
10 | Install other used packages:
11 | ```
12 | cd dvs
13 | pip install -r requirements.txt --ignore-installed
14 | ```
15 | 
16 | ## Data Preparation
17 | Download sample video [here](https://drive.google.com/file/d/1PpF3-6BbQKy9fldjIfwa5AlbtQflx3sG/view?usp=sharing).
18 | Uncompress the *video* folder under the *dvs* folder.
19 | ```
20 | python load_frame_sensor_data.py 
21 | ```
22 | Demo of curve visualization:
23 | The **gyro/OIS curve visualization** can be found at *dvs/video/s_114_outdoor_running_trail_daytime/ControlCam_20200930_104820_real.jpg*.
24 | 
25 | 
26 | ## FlowNet2 Preparation
27 | Note, we provide optical flow result of one test video in our Data Preparation. If you would like to generate them for all test videos, please follow [FlowNet2 official website](https://github.com/NVIDIA/flownet2-pytorch) and guide below. Otherwise, you can skip this section. 
28 | 
29 | Note, FlowNet2 installation is tricky. Please use Python=3.6 and Pytorch=1.0.0. More details are [here](https://github.com/NVIDIA/flownet2-pytorch/issues/156) or contact us for any questions.
30 | 
31 | Download FlowNet2 model *FlowNet2_checkpoint.pth.tar* [here](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view).  Move it under folder *dvs/flownet2*.
32 | ```
33 | python warp/read_write.py # video2frames
34 | cd flownet2
35 | bash install.sh # install package
36 | bash run.sh # generate optical flow file for dataset
37 | ``` 
38 | 
39 | ## Running Inference 
40 | ```
41 | python inference.py
42 | python metrics.py
43 | ``` 
44 | The loss and metric information will be printed in the terminal. The metric numbers can be slightly different due to difference on opencv/pytorch versions.  
45 | 
46 | The result is under *dvs/test/stabilzation*.   
47 | In *s_114_outdoor_running_trail_daytime.jpg*, the blue curve is the output of our models, and the green curve is the input.   
48 | *s_114_outdoor_running_trail_daytime_stab.mp4* is uncropped stabilized video.  
49 | *s_114_outdoor_running_trail_daytime_stab_crop.mp4* is cropped stabilized video. Note, the cropped video is generated after running the metrics code.   
50 | 
51 | ## Training
52 | Download dataset for training and test [here](https://storage.googleapis.com/dataset_release/all.zip). 
53 | Uncompress *all.zip* and move *dataset_release* folder under the *dvs* folder.
54 | 
55 | Follow FlowNet2 Preparation Section.
56 | ```
57 | python warp/read_write.py --dir_path ./dataset_release # video2frames
58 | cd flownet2
59 | bash run_release.sh # generate optical flow file for dataset
60 | ``` 
61 | 
62 | Run training code.
63 | ```
64 | python train.py
65 | ``` 
66 | The model is saved in *checkpoint/stabilzation_train*.
67 | 
68 | ## Citation 
69 | If you use this code or dataset for your research, please cite our paper.
70 | ```
71 | @inproceedings{shi2022deep,
72 |   title={Deep Online Fused Video Stabilization},
73 |   author={Shi, Zhenmei and Shi, Fuhao and Lai, Wei-Sheng and Liang, Chia-Kai and Liang, Yingyu},
74 |   booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
75 |   pages={1250--1258},
76 |   year={2022}
77 | }
78 | ```
79 | 


--------------------------------------------------------------------------------
/docs/code-of-conduct.md:
--------------------------------------------------------------------------------
 1 | # Google Open Source Community Guidelines
 2 | 
 3 | At Google, we recognize and celebrate the creativity and collaboration of open
 4 | source contributors and the diversity of skills, experiences, cultures, and
 5 | opinions they bring to the projects and communities they participate in.
 6 | 
 7 | Every one of Google's open source projects and communities are inclusive
 8 | environments, based on treating all individuals respectfully, regardless of
 9 | gender identity and expression, sexual orientation, disabilities,
10 | neurodiversity, physical appearance, body size, ethnicity, nationality, race,
11 | age, religion, or similar personal characteristic.
12 | 
13 | We value diverse opinions, but we value respectful behavior more.
14 | 
15 | Respectful behavior includes:
16 | 
17 | * Being considerate, kind, constructive, and helpful.
18 | * Not engaging in demeaning, discriminatory, harassing, hateful, sexualized, or
19 |   physically threatening behavior, speech, and imagery.
20 | * Not engaging in unwanted physical contact.
21 | 
22 | Some Google open source projects [may adopt][] an explicit project code of
23 | conduct, which may have additional detailed expectations for participants. Most
24 | of those projects will use our [modified Contributor Covenant][].
25 | 
26 | [may adopt]: https://opensource.google/docs/releasing/preparing/#conduct
27 | [modified Contributor Covenant]: https://opensource.google/docs/releasing/template/CODE_OF_CONDUCT/
28 | 
29 | ## Resolve peacefully
30 | 
31 | We do not believe that all conflict is necessarily bad; healthy debate and
32 | disagreement often yields positive results. However, it is never okay to be
33 | disrespectful.
34 | 
35 | If you see someone behaving disrespectfully, you are encouraged to address the
36 | behavior directly with those involved. Many issues can be resolved quickly and
37 | easily, and this gives people more control over the outcome of their dispute.
38 | If you are unable to resolve the matter for any reason, or if the behavior is
39 | threatening or harassing, report it. We are dedicated to providing an
40 | environment where participants feel welcome and safe.
41 | 
42 | ## Reporting problems
43 | 
44 | Some Google open source projects may adopt a project-specific code of conduct.
45 | In those cases, a Google employee will be identified as the Project Steward,
46 | who will receive and handle reports of code of conduct violations. In the event
47 | that a project hasn’t identified a Project Steward, you can report problems by
48 | emailing opensource@google.com.
49 | 
50 | We will investigate every complaint, but you may not receive a direct response.
51 | We will use our discretion in determining when and how to follow up on reported
52 | incidents, which may range from not taking action to permanent expulsion from
53 | the project and project-sponsored spaces. We will notify the accused of the
54 | report and provide them an opportunity to discuss it before any action is
55 | taken. The identity of the reporter will be omitted from the details of the
56 | report supplied to the accused. In potentially harmful situations, such as
57 | ongoing harassment or threats to anyone's safety, we may take action without
58 | notice.
59 | 
60 | *This document was adapted from the [IndieWeb Code of Conduct][] and can also
61 | be found at <https://opensource.google/conduct/>.*
62 | 
63 | [IndieWeb Code of Conduct]: https://indieweb.org/code-of-conduct
64 | 


--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 
25 | ## Community Guidelines
26 | 
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google/conduct/).
29 | 


--------------------------------------------------------------------------------
/dvs/checkpoint/stabilzation/stabilzation_last.checkpoint:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/checkpoint/stabilzation/stabilzation_last.checkpoint


--------------------------------------------------------------------------------
/dvs/conf/stabilzation.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   exp: 'stabilzation'
 3 |   checkpoints_dir: './checkpoint'
 4 |   log: './log'
 5 |   data_dir: './video'           
 6 |   use_cuda: true
 7 |   batch_size: 16
 8 |   resize_ratio: 0.25
 9 |   number_real: 10
10 |   number_virtual: 2
11 |   time_train: 2000  # ms
12 |   sample_freq: 40   # ms
13 |   channel_size: 1
14 |   num_workers: 16                    # num_workers for data_loader
15 | model:
16 |   load_model:  null
17 |   cnn:
18 |     activate_function: relu         # sigmoid, relu, tanh, quadratic
19 |     batch_norm: true
20 |     gap: false
21 |     layers:
22 |   rnn:
23 |     layers:  
24 |     - - 512                        
25 |       - true  
26 |     - - 512                        
27 |       - true    
28 |   fc:
29 |     activate_function: relu
30 |     batch_norm: false               # (batch_norm and drop_out) is False
31 |     layers:  
32 |     - - 256                        
33 |       - true  
34 |     - - 4                         # last layer should be equal to nr_class
35 |       - true
36 |     drop_out: 0
37 | train:
38 |   optimizer: "adam"                  # adam or sgd
39 |   momentum: 0.9                     # for sgd
40 |   decay_epoch: null
41 |   epoch: 400
42 |   snapshot: 2
43 |   init_lr: 0.0001
44 |   lr_decay: 0.5
45 |   lr_step: 200                       # if > 0 decay_epoch should be null
46 |   seed: 1
47 |   weight_decay: 0.0001
48 |   clip_norm: False
49 |   init: "xavier_uniform"            # xavier_uniform or xavier_normal
50 | loss:
51 |   follow: 10
52 |   angle: 1
53 |   smooth: 10 #10
54 |   c2_smooth: 200 #20
55 |   undefine: 2.0
56 |   opt: 0.1
57 |   stay: 0


--------------------------------------------------------------------------------
/dvs/conf/stabilzation_train.yaml:
--------------------------------------------------------------------------------
 1 | data:
 2 |   exp: 'stabilzation_train'
 3 |   checkpoints_dir: './checkpoint'
 4 |   log: './log'
 5 |   data_dir: './dataset_release'           
 6 |   use_cuda: true
 7 |   batch_size: 16
 8 |   resize_ratio: 0.25
 9 |   number_real: 10
10 |   number_virtual: 2
11 |   time_train: 2000  # ms
12 |   sample_freq: 40   # ms
13 |   channel_size: 1
14 |   num_workers: 16                    # num_workers for data_loader
15 | model:
16 |   load_model:  null
17 |   cnn:
18 |     activate_function: relu         # sigmoid, relu, tanh, quadratic
19 |     batch_norm: true
20 |     gap: false
21 |     layers:
22 |   rnn:
23 |     layers:  
24 |     - - 512                        
25 |       - true  
26 |     - - 512                        
27 |       - true    
28 |   fc:
29 |     activate_function: relu
30 |     batch_norm: false               # (batch_norm and drop_out) is False
31 |     layers:  
32 |     - - 256                        
33 |       - true  
34 |     - - 4                         # last layer should be equal to nr_class
35 |       - true
36 |     drop_out: 0
37 | train:
38 |   optimizer: "adam"                  # adam or sgd
39 |   momentum: 0.9                     # for sgd
40 |   decay_epoch: null
41 |   epoch: 400
42 |   snapshot: 2
43 |   init_lr: 0.0001
44 |   lr_decay: 0.5
45 |   lr_step: 200                       # if > 0 decay_epoch should be null
46 |   seed: 1
47 |   weight_decay: 0.0001
48 |   clip_norm: False
49 |   init: "xavier_uniform"            # xavier_uniform or xavier_normal
50 | loss:
51 |   follow: 10
52 |   angle: 1
53 |   smooth: 10 #10
54 |   c2_smooth: 200 #20
55 |   undefine: 2.0
56 |   opt: 0.1
57 |   stay: 0


--------------------------------------------------------------------------------
/dvs/data/arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/data/arial.ttf


--------------------------------------------------------------------------------
/dvs/dataset.py:
--------------------------------------------------------------------------------
  1 | from torch.utils.data import Dataset
  2 | import os
  3 | import collections
  4 | from gyro import (
  5 |     LoadGyroData, 
  6 |     LoadOISData, 
  7 |     LoadFrameData, 
  8 |     GetGyroAtTimeStamp, 
  9 |     get_static, 
 10 |     GetMetadata, 
 11 |     GetProjections, 
 12 |     train_GetGyroAtTimeStamp,
 13 |     QuaternionProduct,
 14 |     QuaternionReciprocal,
 15 |     FindOISAtTimeStamp,
 16 |     norm_quat
 17 |     )
 18 | import random
 19 | import numpy as np
 20 | import torchvision.transforms as transforms
 21 | import torch
 22 | from flownet2 import flow_utils
 23 | from scipy import ndimage, misc
 24 | from numpy import linalg as LA
 25 | 
 26 | def get_data_loader(cf, no_flo = False):
 27 |     size = cf["data"]["batch_size"]
 28 |     num_workers = cf["data"]["num_workers"]
 29 |     train_data, test_data = get_dataset(cf, no_flo)
 30 |     trainloader = torch.utils.data.DataLoader(train_data, batch_size=size,shuffle=True, pin_memory=True, num_workers=num_workers)
 31 |     testloader = torch.utils.data.DataLoader(test_data, batch_size=size,shuffle=False, pin_memory=True, num_workers=num_workers)
 32 |     return trainloader,testloader
 33 | 
 34 | def get_dataset(cf, no_flo = False):
 35 |     resize_ratio = cf["data"]["resize_ratio"]
 36 |     train_transform, test_transform = _data_transforms()
 37 |     train_path = os.path.join(cf["data"]["data_dir"], "training")
 38 |     test_path = os.path.join(cf["data"]["data_dir"], "test")
 39 |     if not os.path.exists(train_path):
 40 |         train_path = cf["data"]["data_dir"]
 41 |     if not os.path.exists(test_path):
 42 |         test_path = cf["data"]["data_dir"]
 43 | 
 44 |     train_data = Dataset_Gyro(
 45 |         train_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"], 
 46 |         time_train = cf["data"]["time_train"]*1000000, transform = train_transform, resize_ratio = resize_ratio, no_flo = no_flo)
 47 |     test_data = Dataset_Gyro(
 48 |         test_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"], 
 49 |         time_train = cf["data"]["time_train"]*1000000, transform = test_transform, resize_ratio = resize_ratio, no_flo = no_flo)
 50 |     return train_data, test_data
 51 | 
 52 | def get_inference_data_loader(cf, data_path, no_flo = False):
 53 |     test_data = get_inference_dataset(cf, data_path, no_flo)
 54 |     testloader = torch.utils.data.DataLoader(test_data, batch_size=1,shuffle=False, pin_memory=True, num_workers=1)
 55 |     return testloader
 56 | 
 57 | def get_inference_dataset(cf, data_path, no_flo = False):
 58 |     resize_ratio = cf["data"]["resize_ratio"]
 59 |     _, test_transform = _data_transforms()
 60 |     test_data = Dataset_Gyro(
 61 |         data_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"], 
 62 |         time_train = cf["data"]["time_train"]*1000000, transform = test_transform, resize_ratio = resize_ratio,
 63 |         inference_only = True, no_flo = no_flo)
 64 |     return test_data
 65 | 
 66 | def _data_transforms():
 67 | 
 68 |     test_transform = transforms.Compose(
 69 |         [transforms.ToTensor(),
 70 |         ])
 71 |     train_transform = transforms.Compose(
 72 |         [transforms.ToTensor(),
 73 |         ])
 74 | 
 75 |     return train_transform, test_transform
 76 | 
 77 | class DVS_data():
 78 |     def __init__(self):
 79 |         self.gyro = None
 80 |         self.ois = None
 81 |         self.frame = None
 82 |         self.length = 0
 83 |         self.flo_path = None
 84 |         self.flo_shape = None
 85 |         self.flo_back_path = None
 86 | 
 87 | class Dataset_Gyro(Dataset):
 88 |     def __init__(self, path, sample_freq = 33*1000000, number_real = 10, time_train = 2000*1000000, \
 89 |         transform = None, inference_only = False, no_flo = False, resize_ratio = 1): 
 90 |         r"""
 91 |         Arguments:
 92 |             sample_freq: real quaternions [t-sample_freq*number_real, t+sample_freq*number_real] ns
 93 |             number_real: real gyro num in half time_interval
 94 |             time_train: time for a batch ns
 95 |         """
 96 |         self.sample_freq = sample_freq
 97 |         self.number_real = number_real
 98 |         self.no_flo = no_flo
 99 |         self.resize_ratio = resize_ratio
100 |         self.static_options = get_static()
101 |         self.inference_only = inference_only
102 | 
103 |         self.ois_ratio = np.array([self.static_options["crop_window_width"] / self.static_options["width"], \
104 |             self.static_options["crop_window_height"] / self.static_options["height"]]) * 0.01
105 |         self.unit_size = 4
106 | 
107 |         if inference_only:
108 |             self.length = 1
109 |             self.data = [self.process_one_video(path)]
110 |             self.number_train = self.data[0].length  
111 |             return
112 | 
113 |         self.time_train = time_train
114 |         self.number_train = time_train//self.sample_freq
115 |         
116 |         self.data_name = sorted(os.listdir(path))
117 |         self.length = len(self.data_name)
118 |         self.data = []
119 |         for i in range(self.length):
120 |             self.data.append(self.process_one_video(os.path.join(path,self.data_name[i])))
121 |     
122 |     def process_one_video(self, path):
123 |         dvs_data = DVS_data()
124 |         files = sorted(os.listdir(path))
125 |         print(path)
126 |         for f in files:
127 |             file_path = os.path.join(path,f)
128 |             if "gimbal" in file_path.lower():
129 |                 continue
130 |             if "frame" in f and "txt" in f:
131 |                 dvs_data.frame = LoadFrameData(file_path)
132 |                 print("frame:", dvs_data.frame.shape, end="    ")
133 |             elif "gyro" in f:
134 |                 dvs_data.gyro = LoadGyroData(file_path)
135 |                 dvs_data.gyro = preprocess_gyro(dvs_data.gyro) 
136 |                 print("gyro:", dvs_data.gyro.shape, end="    ")
137 |             elif "ois" in f and "txt" in f:
138 |                 dvs_data.ois = LoadOISData(file_path)
139 |                 print("ois:", dvs_data.ois.shape, end="    ")
140 |             elif f == "flo":
141 |                 dvs_data.flo_path, dvs_data.flo_shape = LoadFlow(file_path)
142 |                 print("flo_path:", len(dvs_data.flo_path), end="    ")
143 |                 print("flo_shape:", dvs_data.flo_shape, end="    ")
144 |             elif f == "flo_back":
145 |                 dvs_data.flo_back_path, _ = LoadFlow(file_path)
146 |             
147 |         print()
148 |         if dvs_data.flo_path is not None:
149 |             dvs_data.length = min(dvs_data.frame.shape[0] - 1, len(dvs_data.flo_path))
150 |         else:
151 |             dvs_data.length = dvs_data.frame.shape[0] - 1 
152 |         return dvs_data
153 | 
154 |     def generate_quaternions(self, dvs_data):
155 |         first_id = random.randint(0, dvs_data.length - self.number_train) + 1 # skip the first frame
156 | 
157 |         sample_data = np.zeros((self.number_train, 2 * self.number_real + 1, self.unit_size), dtype=np.float32)
158 |         sample_ois = np.zeros((self.number_train, 2), dtype=np.float32)
159 | 
160 |         sample_time = np.zeros((self.number_train+1), dtype=np.float32)
161 |         sample_time[0] = get_timestamp(dvs_data.frame, first_id - 1)
162 | 
163 |         real_postion = np.zeros((self.number_train, 4), dtype=np.float32)
164 | 
165 |         time_start = sample_time[0]
166 | 
167 |         for i in range(self.number_train):
168 |             sample_time[i+1] = get_timestamp(dvs_data.frame, first_id + i)
169 |             real_postion[i] = GetGyroAtTimeStamp(dvs_data.gyro, sample_time[i+1] - self.sample_freq)
170 |             sample_ois[i] = self.get_ois_at_timestamp(dvs_data.ois, sample_time[i+1])
171 |             for j in range(-self.number_real, self.number_real+1):
172 |                 index = j + self.number_real
173 |                 time_stamp = sample_time[i+1] + self.sample_freq * j 
174 |                 sample_data[i, index] = self.get_data_at_timestamp(dvs_data.gyro, dvs_data.ois, time_stamp, real_postion[i])
175 |                 
176 |         sample_data = np.reshape(sample_data, (self.number_train, (2*self.number_real+1) * self.unit_size))
177 |         return sample_data, sample_time, first_id, real_postion, sample_ois
178 | 
179 |     def load_flo(self, idx, first_id):
180 |         shape = self.data[idx].flo_shape
181 |         h, w = shape[0], shape[1]
182 |         flo = np.zeros((self.number_train, h, w, 2))
183 |         flo_back = np.zeros((self.number_train, h, w, 2))
184 | 
185 |         for i in range(self.number_train):
186 |             frame_id = i + first_id
187 |             f = flow_utils.readFlow(self.data[idx].flo_path[frame_id-1]).astype(np.float32) 
188 |             flo[i] = f
189 | 
190 |             f_b = flow_utils.readFlow(self.data[idx].flo_back_path[frame_id-1]).astype(np.float32) 
191 |             flo_back[i] = f_b
192 | 
193 |         return flo, flo_back
194 | 
195 |     def load_real_projections(self, idx, first_id):
196 |         real_projections = np.zeros((self.number_train + 1, self.static_options["num_grid_rows"], 3, 3))
197 |         for i in range(self.number_train + 1):
198 |             frame_id = i + first_id
199 |             metadata = GetMetadata(self.data[idx].frame, frame_id - 1)
200 |             real_projections[i] = np.array(GetProjections(self.static_options, metadata, self.data[idx].gyro, np.zeros(self.data[idx].ois.shape), no_shutter = True))
201 |         return real_projections
202 | 
203 |     def __getitem__(self, idx):
204 |         inputs, times, first_id, real_postion, ois = self.generate_quaternions(self.data[idx]) 
205 |         real_projections = self.load_real_projections(idx, first_id)
206 |         if self.no_flo:
207 |             flo, flo_back = 0, 0
208 |         else:
209 |             flo, flo_back = self.load_flo(idx, first_id)
210 |         return inputs, times, flo, flo_back, real_projections, real_postion, ois, idx
211 | 
212 |     def __len__(self):
213 |         return self.length
214 | 
215 |     def get_virtual_data(self, virtual_queue, real_queue_idx, pre_times, cur_times, time_start, batch_size, number_virtual, quat_t_1):
216 |         # virtual_queue: [batch_size, num, 5 (timestamp, quats)]
217 |         # eular angle, 
218 |         # deta R angular velocity [Q't-1, Q't-2] 
219 |         # output virtual angular velocity, x, x*dtime => detaQt
220 |         virtual_data = np.zeros((batch_size, number_virtual, 4), dtype=np.float32)
221 |         vt_1 = np.zeros((batch_size, 4), dtype=np.float32)
222 |         quat_t_1 = quat_t_1.numpy()
223 |         for i in range(batch_size):
224 |             sample_time = cur_times[i]
225 |             for j in range(number_virtual):
226 |                 time_stamp = sample_time - self.sample_freq * (number_virtual - j) 
227 |                 virtual_data[i, j] = get_virtual_at_timestamp(virtual_queue[i], self.data[real_queue_idx[i]].gyro, time_stamp, time_start[i], quat_t_1[i])
228 |             vt_1[i] = get_virtual_at_timestamp(virtual_queue[i], self.data[real_queue_idx[i]].gyro, pre_times[i], time_start[i], None)
229 |         virtual_data = np.reshape(virtual_data, (batch_size, number_virtual * 4))
230 |         return torch.tensor(virtual_data, dtype=torch.float), torch.tensor(vt_1, dtype=torch.float)
231 | 
232 |     def update_virtual_queue(self, batch_size, virtual_queue, out, times):
233 |         virtual_data = np.zeros((batch_size, 5))
234 |         virtual_data[:,0] = times
235 |         virtual_data[:, 1:] = out
236 |         virtual_data = np.expand_dims(virtual_data, axis = 1)
237 | 
238 |         if None in virtual_queue:
239 |             virtual_queue = virtual_data
240 |         else:
241 |             virtual_queue = np.concatenate((virtual_queue, virtual_data), axis = 1)
242 |         return virtual_queue
243 | 
244 |     def random_init_virtual_queue(self, batch_size, real_postion, times):
245 |         virtual_queue = np.zeros((batch_size, 3, 5))
246 |         virtual_queue[:, 2, 0] = times - 0.1 * self.sample_freq
247 |         virtual_queue[:, 1, 0] = times - 1.1 * self.sample_freq
248 |         virtual_queue[:, 0, 0] = times - 2.1 * self.sample_freq
249 |         for i in range(batch_size):
250 |             quat = np.random.uniform(low=-0.06, high= 0.06, size=4) # transfer to angle # 0.05
251 |             quat[3] = 1
252 |             quat = quat / LA.norm(quat)
253 |             quat = norm_quat(QuaternionProduct(real_postion[i], quat))
254 |             virtual_queue[i, 2, 1:] = quat
255 |             virtual_queue[i, 1, 1:] = quat
256 |             virtual_queue[i, 0, 1:] = quat
257 |         return virtual_queue
258 | 
259 |     def get_data_at_timestamp(self, gyro_data, ois_data, time_stamp, quat_t_1):
260 |         quat_t = GetGyroAtTimeStamp(gyro_data, time_stamp)
261 |         quat_dif = QuaternionProduct(quat_t, QuaternionReciprocal(quat_t_1))  
262 |         return quat_dif
263 | 
264 |     def get_ois_at_timestamp(self, ois_data, time_stamp):
265 |         ois_t = FindOISAtTimeStamp(ois_data, time_stamp)
266 |         ois_t = np.array(ois_t) / self.ois_ratio
267 |         return ois_t
268 | 
269 | def get_timestamp(frame_data, idx):
270 |     sample_time = frame_data[idx, 0]
271 |     metadata = GetMetadata(frame_data, idx)
272 |     timestmap_ns = metadata["timestamp_ns"] + metadata["rs_time_ns"] * 0.5
273 |     return timestmap_ns
274 | 
275 | def preprocess_gyro(gyro, extend = 200):
276 |     fake_gyro = np.zeros((extend, 5))
277 |     time_start = gyro[0,0]
278 |     for i in range(extend):
279 |         fake_gyro[-i-1, 0] = time_start - (gyro[i+1, 0] - time_start)
280 |         fake_gyro[-i-1, 4] = gyro[i+1, 4]
281 |         fake_gyro[-i-1, 1:4] = -gyro[i+1, 1:4]
282 | 
283 |     new_gyro = np.concatenate((fake_gyro, gyro), axis = 0)
284 |     return new_gyro
285 | 
286 | def LoadFlow(path):
287 |     file_names = sorted(os.listdir(path))
288 |     file_path =[]
289 |     for n in file_names:
290 |         file_path.append(os.path.join(path, n))
291 |     return file_path, flow_utils.readFlow(file_path[0]).shape
292 | 
293 | def get_virtual_at_timestamp(virtual_queue, real_queue, time_stamp, time_start, quat_t_1 = None, sample_freq = None):
294 |     if virtual_queue is None:
295 |         quat_t = GetGyroAtTimeStamp(real_queue, time_stamp)
296 |     else:
297 |         quat_t = train_GetGyroAtTimeStamp(virtual_queue, time_stamp)
298 |         if quat_t is None:
299 |             quat_t = GetGyroAtTimeStamp(real_queue, time_stamp)
300 |             
301 |     if quat_t_1 is None:
302 |         return quat_t
303 |     else:
304 |         quat_dif = QuaternionProduct(quat_t, QuaternionReciprocal(quat_t_1))  
305 |         return quat_dif
306 | 


--------------------------------------------------------------------------------
/dvs/flownet2/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2017 NVIDIA CORPORATION
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.


--------------------------------------------------------------------------------
/dvs/flownet2/README.md:
--------------------------------------------------------------------------------
  1 | # flownet2-pytorch 
  2 | 
  3 | Pytorch implementation of [FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks](https://arxiv.org/abs/1612.01925). 
  4 | 
  5 | Multiple GPU training is supported, and the code provides examples for training or inference on [MPI-Sintel](http://sintel.is.tue.mpg.de/) clean and final datasets. The same commands can be used for training or inference with other datasets. See below for more detail.
  6 | 
  7 | Inference using fp16 (half-precision) is also supported.
  8 | 
  9 | For more help, type <br />
 10 |     
 11 |     python main.py --help
 12 | 
 13 | ## Network architectures
 14 | Below are the different flownet neural network architectures that are provided. <br />
 15 | A batchnorm version for each network is also available.
 16 | 
 17 |  - **FlowNet2S**
 18 |  - **FlowNet2C**
 19 |  - **FlowNet2CS**
 20 |  - **FlowNet2CSS**
 21 |  - **FlowNet2SD**
 22 |  - **FlowNet2**
 23 | 
 24 | ## Custom layers
 25 | 
 26 | `FlowNet2` or `FlowNet2C*` achitectures rely on custom layers `Resample2d` or `Correlation`. <br />
 27 | A pytorch implementation of these layers with cuda kernels are available at [./networks](./networks). <br />
 28 | Note : Currently, half precision kernels are not available for these layers.
 29 | 
 30 | ## Data Loaders
 31 | 
 32 | Dataloaders for FlyingChairs, FlyingThings, ChairsSDHom and ImagesFromFolder are available in [datasets.py](./datasets.py). <br />
 33 | 
 34 | ## Loss Functions
 35 | 
 36 | L1 and L2 losses with multi-scale support are available in [losses.py](./losses.py). <br />
 37 | 
 38 | ## Installation 
 39 | 
 40 |     # get flownet2-pytorch source
 41 |     git clone https://github.com/NVIDIA/flownet2-pytorch.git
 42 |     cd flownet2-pytorch
 43 | 
 44 |     # install custom layers
 45 |     bash install.sh
 46 |     
 47 | ### Python requirements 
 48 | Currently, the code supports python 3
 49 | * numpy 
 50 | * PyTorch ( == 0.4.1, for <= 0.4.0 see branch [python36-PyTorch0.4](https://github.com/NVIDIA/flownet2-pytorch/tree/python36-PyTorch0.4))
 51 | * scipy 
 52 | * scikit-image
 53 | * tensorboardX
 54 | * colorama, tqdm, setproctitle 
 55 | 
 56 | ## Converted Caffe Pre-trained Models
 57 | We've included caffe pre-trained models. Should you use these pre-trained weights, please adhere to the [license agreements](https://drive.google.com/file/d/1TVv0BnNFh3rpHZvD-easMb9jYrPE2Eqd/view?usp=sharing). 
 58 | 
 59 | * [FlowNet2](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view?usp=sharing)[620MB]
 60 | * [FlowNet2-C](https://drive.google.com/file/d/1BFT6b7KgKJC8rA59RmOVAXRM_S7aSfKE/view?usp=sharing)[149MB]
 61 | * [FlowNet2-CS](https://drive.google.com/file/d/1iBJ1_o7PloaINpa8m7u_7TsLCX0Dt_jS/view?usp=sharing)[297MB]
 62 | * [FlowNet2-CSS](https://drive.google.com/file/d/157zuzVf4YMN6ABAQgZc8rRmR5cgWzSu8/view?usp=sharing)[445MB]
 63 | * [FlowNet2-CSS-ft-sd](https://drive.google.com/file/d/1R5xafCIzJCXc8ia4TGfC65irmTNiMg6u/view?usp=sharing)[445MB]
 64 | * [FlowNet2-S](https://drive.google.com/file/d/1V61dZjFomwlynwlYklJHC-TLfdFom3Lg/view?usp=sharing)[148MB]
 65 | * [FlowNet2-SD](https://drive.google.com/file/d/1QW03eyYG_vD-dT-Mx4wopYvtPu_msTKn/view?usp=sharing)[173MB]
 66 |     
 67 | ## Inference
 68 |     # Example on MPISintel Clean   
 69 |     python main.py --inference --model FlowNet2 --save_flow --inference_dataset MpiSintelClean \
 70 |     --inference_dataset_root /path/to/mpi-sintel/clean/dataset \
 71 |     --resume /path/to/checkpoints 
 72 |     
 73 | ## Training and validation
 74 | 
 75 |     # Example on MPISintel Final and Clean, with L1Loss on FlowNet2 model
 76 |     python main.py --batch_size 8 --model FlowNet2 --loss=L1Loss --optimizer=Adam --optimizer_lr=1e-4 \
 77 |     --training_dataset MpiSintelFinal --training_dataset_root /path/to/mpi-sintel/final/dataset  \
 78 |     --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset
 79 | 
 80 |     # Example on MPISintel Final and Clean, with MultiScale loss on FlowNet2C model 
 81 |     python main.py --batch_size 8 --model FlowNet2C --optimizer=Adam --optimizer_lr=1e-4 --loss=MultiScale --loss_norm=L1 \
 82 |     --loss_numScales=5 --loss_startScale=4 --optimizer_lr=1e-4 --crop_size 384 512 \
 83 |     --training_dataset FlyingChairs --training_dataset_root /path/to/flying-chairs/dataset  \
 84 |     --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset
 85 |     
 86 | ## Results on MPI-Sintel
 87 | [![Predicted flows on MPI-Sintel](./image.png)](https://www.youtube.com/watch?v=HtBmabY8aeU "Predicted flows on MPI-Sintel")
 88 | 
 89 | ## Reference 
 90 | If you find this implementation useful in your work, please acknowledge it appropriately and cite the paper:
 91 | ````
 92 | @InProceedings{IMKDB17,
 93 |   author       = "E. Ilg and N. Mayer and T. Saikia and M. Keuper and A. Dosovitskiy and T. Brox",
 94 |   title        = "FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks",
 95 |   booktitle    = "IEEE Conference on Computer Vision and Pattern Recognition (CVPR)",
 96 |   month        = "Jul",
 97 |   year         = "2017",
 98 |   url          = "http://lmb.informatik.uni-freiburg.de//Publications/2017/IMKDB17"
 99 | }
100 | ````
101 | ```
102 | @misc{flownet2-pytorch,
103 |   author = {Fitsum Reda and Robert Pottorff and Jon Barker and Bryan Catanzaro},
104 |   title = {flownet2-pytorch: Pytorch implementation of FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks},
105 |   year = {2017},
106 |   publisher = {GitHub},
107 |   journal = {GitHub repository},
108 |   howpublished = {\url{https://github.com/NVIDIA/flownet2-pytorch}}
109 | }
110 | ```
111 | ## Related Optical Flow Work from Nvidia 
112 | Code (in Caffe and Pytorch): [PWC-Net](https://github.com/NVlabs/PWC-Net) <br />
113 | Paper : [PWC-Net: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume](https://arxiv.org/abs/1709.02371). 
114 | 
115 | ## Acknowledgments
116 | Parts of this code were derived, as noted in the code, from [ClementPinard/FlowNetPytorch](https://github.com/ClementPinard/FlowNetPytorch).
117 | 


--------------------------------------------------------------------------------
/dvs/flownet2/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import flow_utils, tools


--------------------------------------------------------------------------------
/dvs/flownet2/convert.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2.7
  2 | 
  3 | import caffe
  4 | from caffe.proto import caffe_pb2
  5 | import sys, os
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | 
 10 | import argparse, tempfile
 11 | import numpy as np
 12 | 
 13 | parser = argparse.ArgumentParser()
 14 | parser.add_argument('caffe_model', help='input model in hdf5 or caffemodel format')
 15 | parser.add_argument('prototxt_template',help='prototxt template')
 16 | parser.add_argument('flownet2_pytorch', help='path to flownet2-pytorch')
 17 | 
 18 | args = parser.parse_args()
 19 | 
 20 | args.rgb_max = 255
 21 | args.fp16 = False
 22 | args.grads = {}
 23 | 
 24 | # load models
 25 | sys.path.append(args.flownet2_pytorch)
 26 | 
 27 | import models
 28 | from utils.param_utils import *
 29 | 
 30 | width = 256
 31 | height = 256
 32 | keys = {'TARGET_WIDTH': width, 
 33 |         'TARGET_HEIGHT': height,
 34 |         'ADAPTED_WIDTH':width,
 35 |         'ADAPTED_HEIGHT':height,
 36 |         'SCALE_WIDTH':1.,
 37 |         'SCALE_HEIGHT':1.,}
 38 | 
 39 | template = '\n'.join(np.loadtxt(args.prototxt_template, dtype=str, delimiter='\n'))
 40 | for k in keys:
 41 |     template = template.replace('$%s$'%(k),str(keys[k]))
 42 | 
 43 | prototxt = tempfile.NamedTemporaryFile(mode='w', delete=True)
 44 | prototxt.write(template)
 45 | prototxt.flush()
 46 | 
 47 | net = caffe.Net(prototxt.name, args.caffe_model, caffe.TEST)
 48 | 
 49 | weights = {}
 50 | biases = {}
 51 | 
 52 | for k, v in list(net.params.items()):
 53 |     weights[k] = np.array(v[0].data).reshape(v[0].data.shape)
 54 |     biases[k] = np.array(v[1].data).reshape(v[1].data.shape)
 55 |     print((k, weights[k].shape, biases[k].shape))
 56 | 
 57 | if 'FlowNet2/' in args.caffe_model:
 58 |     model = models.FlowNet2(args)
 59 | 
 60 |     parse_flownetc(model.flownetc.modules(), weights, biases)
 61 |     parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
 62 |     parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
 63 |     parse_flownetsd(model.flownets_d.modules(), weights, biases, param_prefix='netsd_')
 64 |     parse_flownetfusion(model.flownetfusion.modules(), weights, biases, param_prefix='fuse_')
 65 | 
 66 |     state = {'epoch': 0,
 67 |              'state_dict': model.state_dict(),
 68 |              'best_EPE': 1e10}
 69 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2_checkpoint.pth.tar'))
 70 | 
 71 | elif 'FlowNet2-C/' in args.caffe_model:
 72 |     model = models.FlowNet2C(args)
 73 | 
 74 |     parse_flownetc(model.modules(), weights, biases)
 75 |     state = {'epoch': 0,
 76 |              'state_dict': model.state_dict(),
 77 |              'best_EPE': 1e10}
 78 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-C_checkpoint.pth.tar'))
 79 | 
 80 | elif 'FlowNet2-CS/' in args.caffe_model:
 81 |     model = models.FlowNet2CS(args)
 82 | 
 83 |     parse_flownetc(model.flownetc.modules(), weights, biases)
 84 |     parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
 85 | 
 86 |     state = {'epoch': 0,
 87 |              'state_dict': model.state_dict(),
 88 |              'best_EPE': 1e10}
 89 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CS_checkpoint.pth.tar'))
 90 | 
 91 | elif 'FlowNet2-CSS/' in args.caffe_model:
 92 |     model = models.FlowNet2CSS(args)
 93 | 
 94 |     parse_flownetc(model.flownetc.modules(), weights, biases)
 95 |     parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
 96 |     parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
 97 | 
 98 |     state = {'epoch': 0,
 99 |              'state_dict': model.state_dict(),
100 |              'best_EPE': 1e10}
101 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS_checkpoint.pth.tar'))
102 | 
103 | elif 'FlowNet2-CSS-ft-sd/' in args.caffe_model:
104 |     model = models.FlowNet2CSS(args)
105 | 
106 |     parse_flownetc(model.flownetc.modules(), weights, biases)
107 |     parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
108 |     parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
109 | 
110 |     state = {'epoch': 0,
111 |              'state_dict': model.state_dict(),
112 |              'best_EPE': 1e10}
113 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS-ft-sd_checkpoint.pth.tar'))
114 | 
115 | elif 'FlowNet2-S/' in args.caffe_model:
116 |     model = models.FlowNet2S(args)
117 | 
118 |     parse_flownetsonly(model.modules(), weights, biases, param_prefix='')
119 |     state = {'epoch': 0,
120 |              'state_dict': model.state_dict(),
121 |              'best_EPE': 1e10}
122 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-S_checkpoint.pth.tar'))
123 | 
124 | elif 'FlowNet2-SD/' in args.caffe_model:
125 |     model = models.FlowNet2SD(args)
126 | 
127 |     parse_flownetsd(model.modules(), weights, biases, param_prefix='')
128 | 
129 |     state = {'epoch': 0,
130 |              'state_dict': model.state_dict(),
131 |              'best_EPE': 1e10}
132 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-SD_checkpoint.pth.tar'))
133 | 
134 | else:
135 |     print(('model type cound not be determined from input caffe model %s'%(args.caffe_model)))
136 |     quit()
137 | print(("done converting ", args.caffe_model))


--------------------------------------------------------------------------------
/dvs/flownet2/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | cd ./networks/correlation_package
 3 | rm -rf *_cuda.egg-info build dist __pycache__
 4 | python3 setup.py install --user
 5 | 
 6 | cd ../resample2d_package
 7 | rm -rf *_cuda.egg-info build dist __pycache__
 8 | python3 setup.py install --user
 9 | 
10 | cd ../channelnorm_package
11 | rm -rf *_cuda.egg-info build dist __pycache__
12 | python3 setup.py install --user
13 | 
14 | cd ..
15 | 


--------------------------------------------------------------------------------
/dvs/flownet2/losses.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Portions of this code copyright 2017, Clement Pinard
 3 | '''
 4 | 
 5 | # freda (todo) : adversarial loss 
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | import math
10 | 
11 | def EPE(input_flow, target_flow):
12 |     return torch.norm(target_flow-input_flow,p=2,dim=1).mean()
13 | 
14 | class L1(nn.Module):
15 |     def __init__(self):
16 |         super(L1, self).__init__()
17 |     def forward(self, output, target):
18 |         lossvalue = torch.abs(output - target).mean()
19 |         return lossvalue
20 | 
21 | class L2(nn.Module):
22 |     def __init__(self):
23 |         super(L2, self).__init__()
24 |     def forward(self, output, target):
25 |         lossvalue = torch.norm(output-target,p=2,dim=1).mean()
26 |         return lossvalue
27 | 
28 | class L1Loss(nn.Module):
29 |     def __init__(self, args):
30 |         super(L1Loss, self).__init__()
31 |         self.args = args
32 |         self.loss = L1()
33 |         self.loss_labels = ['L1', 'EPE']
34 | 
35 |     def forward(self, output, target):
36 |         lossvalue = self.loss(output, target)
37 |         epevalue = EPE(output, target)
38 |         return [lossvalue, epevalue]
39 | 
40 | class L2Loss(nn.Module):
41 |     def __init__(self, args):
42 |         super(L2Loss, self).__init__()
43 |         self.args = args
44 |         self.loss = L2()
45 |         self.loss_labels = ['L2', 'EPE']
46 | 
47 |     def forward(self, output, target):
48 |         lossvalue = self.loss(output, target)
49 |         epevalue = EPE(output, target)
50 |         return [lossvalue, epevalue]
51 | 
52 | class MultiScale(nn.Module):
53 |     def __init__(self, args, startScale = 4, numScales = 5, l_weight= 0.32, norm= 'L1'):
54 |         super(MultiScale,self).__init__()
55 | 
56 |         self.startScale = startScale
57 |         self.numScales = numScales
58 |         self.loss_weights = torch.FloatTensor([(l_weight / 2 ** scale) for scale in range(self.numScales)])
59 |         self.args = args
60 |         self.l_type = norm
61 |         self.div_flow = 0.05
62 |         assert(len(self.loss_weights) == self.numScales)
63 | 
64 |         if self.l_type == 'L1':
65 |             self.loss = L1()
66 |         else:
67 |             self.loss = L2()
68 | 
69 |         self.multiScales = [nn.AvgPool2d(self.startScale * (2**scale), self.startScale * (2**scale)) for scale in range(self.numScales)]
70 |         self.loss_labels = ['MultiScale-'+self.l_type, 'EPE'],
71 | 
72 |     def forward(self, output, target):
73 |         lossvalue = 0
74 |         epevalue = 0
75 | 
76 |         if type(output) is tuple:
77 |             target = self.div_flow * target
78 |             for i, output_ in enumerate(output):
79 |                 target_ = self.multiScales[i](target)
80 |                 epevalue += self.loss_weights[i]*EPE(output_, target_)
81 |                 lossvalue += self.loss_weights[i]*self.loss(output_, target_)
82 |             return [lossvalue, epevalue]
83 |         else:
84 |             epevalue += EPE(output, target)
85 |             lossvalue += self.loss(output, target)
86 |             return  [lossvalue, epevalue]
87 | 
88 | 


--------------------------------------------------------------------------------
/dvs/flownet2/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os
  3 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.utils.data import DataLoader
  7 | from torch.autograd import Variable
  8 | from tensorboardX import SummaryWriter
  9 | 
 10 | import argparse, os, sys, subprocess
 11 | import colorama
 12 | import numpy as np
 13 | from tqdm import tqdm
 14 | from glob import glob
 15 | from os.path import *
 16 | 
 17 | import models, datasets
 18 | from utils import flow_utils, tools
 19 | import time
 20 | 
 21 |     # Reusable function for inference
 22 | def inference(args, epoch, data_path, data_loader, model, offset=0):
 23 | 
 24 |     model.eval()
 25 |     
 26 |     if args.save_flow or args.render_validation:
 27 |         flow_folder = "{}/flo".format(data_path)
 28 |         flow_back_folder = "{}/flo_back".format(data_path)
 29 |         if not os.path.exists(flow_folder):
 30 |             os.makedirs(flow_folder)
 31 |         if not os.path.exists(flow_back_folder):
 32 |             os.makedirs(flow_back_folder)
 33 |     
 34 |     # visualization folder
 35 |     if args.inference_visualize:
 36 |         flow_vis_folder = "{}/flo_vis".format(data_path)
 37 |         if not os.path.exists(flow_vis_folder):
 38 |             os.makedirs(flow_vis_folder)
 39 |         flow_back_vis_folder = "{}/flo_back_vis".format(data_path)
 40 |         if not os.path.exists(flow_back_vis_folder):
 41 |             os.makedirs(flow_back_vis_folder)
 42 |     
 43 |     args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches
 44 | 
 45 |     progress = tqdm(data_loader, ncols=100, total=np.minimum(len(data_loader), args.inference_n_batches), desc='Inferencing ', 
 46 |         leave=True, position=offset)
 47 | 
 48 |     for batch_idx, (data) in enumerate(progress):
 49 |         data = data[0]
 50 |         data_back = torch.cat((data[:,:,1:,:,:], data[:,:,:1,:,:]), dim = 2)
 51 |         if args.cuda:
 52 |             data_forward = data.cuda(non_blocking=True)
 53 |             data_back = data_back.cuda(non_blocking=True)
 54 |         data_forward = Variable(data_forward)
 55 |         data_back = Variable(data_back)
 56 | 
 57 |         flo_path = join(flow_folder, '%06d.flo'%(batch_idx))
 58 |         flo_back_path = join(flow_back_folder, '%06d.flo'%(batch_idx))
 59 |         frame_size = data_loader.dataset.frame_size
 60 |         if not os.path.exists(flo_path):
 61 |             with torch.no_grad():
 62 |                 output = model(data_forward)[:,:,:frame_size[0], :frame_size[1]]
 63 |             if args.save_flow or args.render_validation:
 64 |                 _pflow = output[0].data.cpu().numpy().transpose(1, 2, 0)
 65 |                 flow_utils.writeFlow( flo_path,  _pflow)
 66 |                 if args.inference_visualize:
 67 |                     flow_utils.visulize_flow_file(
 68 |                         join(flow_folder, '%06d.flo' % (batch_idx)),flow_vis_folder)
 69 | 
 70 |         if not os.path.exists(flo_back_path):
 71 |             with torch.no_grad():
 72 |                 output = model(data_back)[:,:,:frame_size[0], :frame_size[1]]
 73 |             if args.save_flow or args.render_validation:
 74 |                 _pflow = output[0].data.cpu().numpy().transpose(1, 2, 0)
 75 |                 flow_utils.writeFlow( flo_back_path,  _pflow)
 76 |                 if args.inference_visualize:
 77 |                     flow_utils.visulize_flow_file(
 78 |                         join(flow_back_folder, '%06d.flo' % (batch_idx)), flow_back_vis_folder)
 79 |                 
 80 |         progress.update(1)
 81 | 
 82 |         if batch_idx == (args.inference_n_batches - 1):
 83 |             break
 84 |     progress.close()
 85 |     return
 86 | 
 87 | if __name__ == '__main__':
 88 |     parser = argparse.ArgumentParser()
 89 |     parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).')
 90 |     parser.add_argument('--fp16_scale', type=float, default=1024., help='Loss scaling, positive power of 2 values can improve fp16 convergence.')
 91 | 
 92 |     parser.add_argument('--start_epoch', type=int, default=1)
 93 |     parser.add_argument('--batch_size', '-b', type=int, default=8, help="Batch size")
 94 |     parser.add_argument('--crop_size', type=int, nargs='+', default = [256, 256], help="Spatial dimension to crop training samples for training")
 95 |     parser.add_argument("--rgb_max", type=float, default = 255.)
 96 | 
 97 |     parser.add_argument('--number_workers', '-nw', '--num_workers', type=int, default=8)
 98 |     parser.add_argument('--number_gpus', '-ng', type=int, default=-1, help='number of GPUs to use')
 99 |     parser.add_argument('--no_cuda', action='store_true')
100 | 
101 |     parser.add_argument('--save', '-s', default='./Google', type=str, help='directory for saving')
102 | 
103 |     parser.add_argument('--inference', action='store_true')
104 |     parser.add_argument('--inference_visualize', action='store_true',
105 |                         help="visualize the optical flow during inference")
106 |     parser.add_argument('--inference_size', type=int, nargs='+', default = [-1,-1], help='spatial size divisible by 64. default (-1,-1) - largest possible valid size would be used')
107 |     parser.add_argument('--inference_batch_size', type=int, default=1)
108 |     parser.add_argument('--inference_n_batches', type=int, default=-1)
109 |     parser.add_argument('--save_flow', action='store_true', help='save predicted flows to file')
110 | 
111 |     parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
112 |     parser.add_argument('--log_frequency', '--summ_iter', type=int, default=1, help="Log every n batches")
113 | 
114 |     tools.add_arguments_for_module(parser, models, argument_for_class='model', default='FlowNet2')
115 |     
116 |     tools.add_arguments_for_module(parser, datasets, argument_for_class='inference_dataset', default='Google', 
117 |                                     skip_params=['is_cropped'],
118 |                                     parameter_defaults={'root': './Google/train',
119 |                                                         'replicates': 1})
120 | 
121 |     main_dir = os.path.dirname(os.path.realpath(__file__))
122 |     os.chdir(main_dir)
123 | 
124 |     # Parse the official arguments
125 |     with tools.TimerBlock("Parsing Arguments") as block:
126 |         args = parser.parse_args()
127 |         if args.number_gpus < 0 : args.number_gpus = torch.cuda.device_count()
128 | 
129 |         # Get argument defaults (hastag #thisisahack)
130 |         parser.add_argument('--IGNORE',  action='store_true')
131 |         defaults = vars(parser.parse_args(['--IGNORE']))
132 | 
133 |         # Print all arguments, color the non-defaults
134 |         for argument, value in sorted(vars(args).items()):
135 |             reset = colorama.Style.RESET_ALL
136 |             color = reset if value == defaults[argument] else colorama.Fore.MAGENTA
137 |             block.log('{}{}: {}{}'.format(color, argument, value, reset))
138 | 
139 |         args.model_class = tools.module_to_dict(models)[args.model]
140 | 
141 |         args.inference_dataset_class = tools.module_to_dict(datasets)[args.inference_dataset]
142 | 
143 |         args.cuda = not args.no_cuda and torch.cuda.is_available()
144 |         # args.current_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]).rstrip()
145 |         args.log_file = join(args.save, 'args.txt')
146 | 
147 |         # dict to collect activation gradients (for training debug purpose)
148 |         args.grads = {}
149 | 
150 |         args.total_epochs = 1
151 |         args.inference_dir = "{}/inference".format(args.save)
152 | 
153 |     print('Source Code')
154 |     # print(('  Current Git Hash: {}\n'.format(args.current_hash)))
155 | 
156 |     # Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments
157 |     with tools.TimerBlock("Initializing Datasets") as block:
158 |         args.effective_batch_size = args.batch_size * args.number_gpus
159 |         args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus
160 |         args.effective_number_workers = args.number_workers * args.number_gpus
161 |         gpuargs = {'num_workers': args.effective_number_workers, 
162 |                    'pin_memory': True, 
163 |                    'drop_last' : True} if args.cuda else {}
164 |         inf_gpuargs = gpuargs.copy()
165 |         inf_gpuargs['num_workers'] = args.number_workers
166 | 
167 |         block.log('Inference Dataset: {}'.format(args.inference_dataset))
168 | 
169 |         dataset_root = args.inference_dataset_root 
170 |         data_name = sorted(os.listdir(dataset_root))
171 | 
172 |         block.log(data_name)
173 |         inference_loaders = {}
174 |         for i in range(len(data_name)):
175 |             dataset_path = os.path.join(dataset_root, data_name[i])
176 |             args.inference_dataset_root  = dataset_path
177 |             inference_dataset = args.inference_dataset_class(args, False, **tools.kwargs_from_args(args, 'inference_dataset'))
178 |             inference_loaders[dataset_path] = DataLoader(inference_dataset, batch_size=args.effective_inference_batch_size, shuffle=False, **inf_gpuargs)
179 |             block.log('Inference Input: {}'.format(' '.join([str([d for d in x.size()]) for x in inference_dataset[0][0]])))
180 | 
181 |     # Dynamically load model and loss class with parameters passed in via "--model_[param]=[value]" or "--loss_[param]=[value]" arguments
182 |     with tools.TimerBlock("Building {} model".format(args.model)) as block:
183 |         class Model(nn.Module):
184 |             def __init__(self, args):
185 |                 super(Model, self).__init__()
186 |                 kwargs = tools.kwargs_from_args(args, 'model')
187 |                 self.model = args.model_class(args, **kwargs)
188 |                 
189 |             def forward(self, data):
190 |                 output = self.model(data)
191 |                 return output
192 | 
193 |         model = Model(args)
194 | 
195 |         block.log('Effective Batch Size: {}'.format(args.effective_batch_size))
196 |         block.log('Number of parameters: {}'.format(sum([p.data.nelement() if p.requires_grad else 0 for p in model.parameters()])))
197 | 
198 |         if args.cuda and args.number_gpus > 0:
199 |             block.log('Initializing CUDA')
200 |             model = model.cuda()
201 |             block.log('Parallelizing')
202 |             model = nn.parallel.DataParallel(model, device_ids=list(range(args.number_gpus)))
203 | 
204 |         # Load weights if needed, otherwise randomly initialize
205 |         if args.resume and os.path.isfile(args.resume):
206 |             block.log("Loading checkpoint '{}'".format(args.resume))
207 |             checkpoint = torch.load(args.resume)
208 |             model.module.model.load_state_dict(checkpoint['state_dict'])
209 |             block.log("Loaded checkpoint '{}' (at epoch {})".format(args.resume, checkpoint['epoch']))
210 | 
211 |         elif args.resume and args.inference:
212 |             block.log("No checkpoint found at '{}'".format(args.resume))
213 |             quit()
214 | 
215 |         else:
216 |             block.log("Random initialization")
217 | 
218 |         block.log("Initializing save directory: {}".format(args.save))
219 |         if not os.path.exists(args.save):
220 |             os.makedirs(args.save)
221 | 
222 |     # Log all arguments to file
223 |     for argument, value in sorted(vars(args).items()):
224 |         block.log2file(args.log_file, '{}: {}'.format(argument, value))
225 | 
226 |     for data_path in inference_loaders:
227 |         # Primary epoch loop
228 |         progress = tqdm(list(range(args.start_epoch, args.total_epochs + 1)), miniters=1, ncols=100, desc='Overall Progress', leave=True, position=0)
229 |         offset = 1
230 | 
231 |         for epoch in progress:
232 |             stats = inference(args=args, epoch=epoch - 1, data_path = data_path, data_loader=inference_loaders[data_path], model=model, offset=offset)
233 |             offset += 1
234 |         print("\n")


--------------------------------------------------------------------------------
/dvs/flownet2/networks/FlowNetC.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn import init
  4 | 
  5 | import math
  6 | import numpy as np
  7 | 
  8 | from .correlation_package.correlation import Correlation
  9 | 
 10 | from .submodules import *
 11 | 'Parameter count , 39,175,298 '
 12 | 
 13 | class FlowNetC(nn.Module):
 14 |     def __init__(self,args, batchNorm=True, div_flow = 20):
 15 |         super(FlowNetC,self).__init__()
 16 | 
 17 |         self.batchNorm = batchNorm
 18 |         self.div_flow = div_flow
 19 | 
 20 |         self.conv1   = conv(self.batchNorm,   3,   64, kernel_size=7, stride=2)
 21 |         self.conv2   = conv(self.batchNorm,  64,  128, kernel_size=5, stride=2)
 22 |         self.conv3   = conv(self.batchNorm, 128,  256, kernel_size=5, stride=2)
 23 |         self.conv_redir  = conv(self.batchNorm, 256,   32, kernel_size=1, stride=1)
 24 | 
 25 |         if args.fp16:
 26 |             self.corr = nn.Sequential(
 27 |                 tofp32(),
 28 |                 Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1),
 29 |                 tofp16())
 30 |         else:
 31 |             self.corr = Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1)
 32 | 
 33 |         self.corr_activation = nn.LeakyReLU(0.1,inplace=True)
 34 |         self.conv3_1 = conv(self.batchNorm, 473,  256)
 35 |         self.conv4   = conv(self.batchNorm, 256,  512, stride=2)
 36 |         self.conv4_1 = conv(self.batchNorm, 512,  512)
 37 |         self.conv5   = conv(self.batchNorm, 512,  512, stride=2)
 38 |         self.conv5_1 = conv(self.batchNorm, 512,  512)
 39 |         self.conv6   = conv(self.batchNorm, 512, 1024, stride=2)
 40 |         self.conv6_1 = conv(self.batchNorm,1024, 1024)
 41 | 
 42 |         self.deconv5 = deconv(1024,512)
 43 |         self.deconv4 = deconv(1026,256)
 44 |         self.deconv3 = deconv(770,128)
 45 |         self.deconv2 = deconv(386,64)
 46 | 
 47 |         self.predict_flow6 = predict_flow(1024)
 48 |         self.predict_flow5 = predict_flow(1026)
 49 |         self.predict_flow4 = predict_flow(770)
 50 |         self.predict_flow3 = predict_flow(386)
 51 |         self.predict_flow2 = predict_flow(194)
 52 | 
 53 |         self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
 54 |         self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
 55 |         self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
 56 |         self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
 57 | 
 58 |         for m in self.modules():
 59 |             if isinstance(m, nn.Conv2d):
 60 |                 if m.bias is not None:
 61 |                     init.uniform_(m.bias)
 62 |                 init.xavier_uniform_(m.weight)
 63 | 
 64 |             if isinstance(m, nn.ConvTranspose2d):
 65 |                 if m.bias is not None:
 66 |                     init.uniform_(m.bias)
 67 |                 init.xavier_uniform_(m.weight)
 68 |                 # init_deconv_bilinear(m.weight)
 69 |         self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
 70 | 
 71 |     def forward(self, x):
 72 |         x1 = x[:,0:3,:,:]
 73 |         x2 = x[:,3::,:,:]
 74 | 
 75 |         out_conv1a = self.conv1(x1)
 76 |         out_conv2a = self.conv2(out_conv1a)
 77 |         out_conv3a = self.conv3(out_conv2a)
 78 | 
 79 |         # FlownetC bottom input stream
 80 |         out_conv1b = self.conv1(x2)
 81 |         
 82 |         out_conv2b = self.conv2(out_conv1b)
 83 |         out_conv3b = self.conv3(out_conv2b)
 84 | 
 85 |         # Merge streams
 86 |         out_corr = self.corr(out_conv3a, out_conv3b) # False
 87 |         out_corr = self.corr_activation(out_corr)
 88 | 
 89 |         # Redirect top input stream and concatenate
 90 |         out_conv_redir = self.conv_redir(out_conv3a)
 91 | 
 92 |         in_conv3_1 = torch.cat((out_conv_redir, out_corr), 1)
 93 | 
 94 |         # Merged conv layers
 95 |         out_conv3_1 = self.conv3_1(in_conv3_1)
 96 | 
 97 |         out_conv4 = self.conv4_1(self.conv4(out_conv3_1))
 98 | 
 99 |         out_conv5 = self.conv5_1(self.conv5(out_conv4))
100 |         out_conv6 = self.conv6_1(self.conv6(out_conv5))
101 | 
102 |         flow6       = self.predict_flow6(out_conv6)
103 |         flow6_up    = self.upsampled_flow6_to_5(flow6)
104 |         out_deconv5 = self.deconv5(out_conv6)
105 | 
106 |         concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
107 | 
108 |         flow5       = self.predict_flow5(concat5)
109 |         flow5_up    = self.upsampled_flow5_to_4(flow5)
110 |         out_deconv4 = self.deconv4(concat5)
111 |         concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
112 | 
113 |         flow4       = self.predict_flow4(concat4)
114 |         flow4_up    = self.upsampled_flow4_to_3(flow4)
115 |         out_deconv3 = self.deconv3(concat4)
116 |         concat3 = torch.cat((out_conv3_1,out_deconv3,flow4_up),1)
117 | 
118 |         flow3       = self.predict_flow3(concat3)
119 |         flow3_up    = self.upsampled_flow3_to_2(flow3)
120 |         out_deconv2 = self.deconv2(concat3)
121 |         concat2 = torch.cat((out_conv2a,out_deconv2,flow3_up),1)
122 | 
123 |         flow2 = self.predict_flow2(concat2)
124 | 
125 |         if self.training:
126 |             return flow2,flow3,flow4,flow5,flow6
127 |         else:
128 |             return flow2,
129 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/FlowNetFusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn import init
 4 | 
 5 | import math
 6 | import numpy as np
 7 | 
 8 | from .submodules import *
 9 | 'Parameter count = 581,226'
10 | 
11 | class FlowNetFusion(nn.Module):
12 |     def __init__(self,args, batchNorm=True):
13 |         super(FlowNetFusion,self).__init__()
14 | 
15 |         self.batchNorm = batchNorm
16 |         self.conv0   = conv(self.batchNorm,  11,   64)
17 |         self.conv1   = conv(self.batchNorm,  64,   64, stride=2)
18 |         self.conv1_1 = conv(self.batchNorm,  64,   128)
19 |         self.conv2   = conv(self.batchNorm,  128,  128, stride=2)
20 |         self.conv2_1 = conv(self.batchNorm,  128,  128)
21 | 
22 |         self.deconv1 = deconv(128,32)
23 |         self.deconv0 = deconv(162,16)
24 | 
25 |         self.inter_conv1 = i_conv(self.batchNorm,  162,   32)
26 |         self.inter_conv0 = i_conv(self.batchNorm,  82,   16)
27 | 
28 |         self.predict_flow2 = predict_flow(128)
29 |         self.predict_flow1 = predict_flow(32)
30 |         self.predict_flow0 = predict_flow(16)
31 | 
32 |         self.upsampled_flow2_to_1 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
33 |         self.upsampled_flow1_to_0 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
34 | 
35 |         for m in self.modules():
36 |             if isinstance(m, nn.Conv2d):
37 |                 if m.bias is not None:
38 |                     init.uniform_(m.bias)
39 |                 init.xavier_uniform_(m.weight)
40 | 
41 |             if isinstance(m, nn.ConvTranspose2d):
42 |                 if m.bias is not None:
43 |                     init.uniform_(m.bias)
44 |                 init.xavier_uniform_(m.weight)
45 |                 # init_deconv_bilinear(m.weight)
46 | 
47 |     def forward(self, x):
48 |         out_conv0 = self.conv0(x)
49 |         out_conv1 = self.conv1_1(self.conv1(out_conv0))
50 |         out_conv2 = self.conv2_1(self.conv2(out_conv1))
51 | 
52 |         flow2       = self.predict_flow2(out_conv2)
53 |         flow2_up    = self.upsampled_flow2_to_1(flow2)
54 |         out_deconv1 = self.deconv1(out_conv2)
55 |         
56 |         concat1 = torch.cat((out_conv1,out_deconv1,flow2_up),1)
57 |         out_interconv1 = self.inter_conv1(concat1)
58 |         flow1       = self.predict_flow1(out_interconv1)
59 |         flow1_up    = self.upsampled_flow1_to_0(flow1)
60 |         out_deconv0 = self.deconv0(concat1)
61 |         
62 |         concat0 = torch.cat((out_conv0,out_deconv0,flow1_up),1)
63 |         out_interconv0 = self.inter_conv0(concat0)
64 |         flow0       = self.predict_flow0(out_interconv0)
65 | 
66 |         return flow0
67 | 
68 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/FlowNetS.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Portions of this code copyright 2017, Clement Pinard
 3 | '''
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from torch.nn import init
 8 | 
 9 | import math
10 | import numpy as np
11 | 
12 | from .submodules import *
13 | 'Parameter count : 38,676,504 '
14 | 
15 | class FlowNetS(nn.Module):
16 |     def __init__(self, args, input_channels = 12, batchNorm=True):
17 |         super(FlowNetS,self).__init__()
18 | 
19 |         self.batchNorm = batchNorm
20 |         self.conv1   = conv(self.batchNorm,  input_channels,   64, kernel_size=7, stride=2)
21 |         self.conv2   = conv(self.batchNorm,  64,  128, kernel_size=5, stride=2)
22 |         self.conv3   = conv(self.batchNorm, 128,  256, kernel_size=5, stride=2)
23 |         self.conv3_1 = conv(self.batchNorm, 256,  256)
24 |         self.conv4   = conv(self.batchNorm, 256,  512, stride=2)
25 |         self.conv4_1 = conv(self.batchNorm, 512,  512)
26 |         self.conv5   = conv(self.batchNorm, 512,  512, stride=2)
27 |         self.conv5_1 = conv(self.batchNorm, 512,  512)
28 |         self.conv6   = conv(self.batchNorm, 512, 1024, stride=2)
29 |         self.conv6_1 = conv(self.batchNorm,1024, 1024)
30 | 
31 |         self.deconv5 = deconv(1024,512)
32 |         self.deconv4 = deconv(1026,256)
33 |         self.deconv3 = deconv(770,128)
34 |         self.deconv2 = deconv(386,64)
35 | 
36 |         self.predict_flow6 = predict_flow(1024)
37 |         self.predict_flow5 = predict_flow(1026)
38 |         self.predict_flow4 = predict_flow(770)
39 |         self.predict_flow3 = predict_flow(386)
40 |         self.predict_flow2 = predict_flow(194)
41 | 
42 |         self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
43 |         self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
44 |         self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
45 |         self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
46 | 
47 |         for m in self.modules():
48 |             if isinstance(m, nn.Conv2d):
49 |                 if m.bias is not None:
50 |                     init.uniform_(m.bias)
51 |                 init.xavier_uniform_(m.weight)
52 | 
53 |             if isinstance(m, nn.ConvTranspose2d):
54 |                 if m.bias is not None:
55 |                     init.uniform_(m.bias)
56 |                 init.xavier_uniform_(m.weight)
57 |                 # init_deconv_bilinear(m.weight)
58 |         self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
59 | 
60 |     def forward(self, x):
61 |         out_conv1 = self.conv1(x)
62 | 
63 |         out_conv2 = self.conv2(out_conv1)
64 |         out_conv3 = self.conv3_1(self.conv3(out_conv2))
65 |         out_conv4 = self.conv4_1(self.conv4(out_conv3))
66 |         out_conv5 = self.conv5_1(self.conv5(out_conv4))
67 |         out_conv6 = self.conv6_1(self.conv6(out_conv5))
68 | 
69 |         flow6       = self.predict_flow6(out_conv6)
70 |         flow6_up    = self.upsampled_flow6_to_5(flow6)
71 |         out_deconv5 = self.deconv5(out_conv6)
72 |         
73 |         concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
74 |         flow5       = self.predict_flow5(concat5)
75 |         flow5_up    = self.upsampled_flow5_to_4(flow5)
76 |         out_deconv4 = self.deconv4(concat5)
77 |         
78 |         concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
79 |         flow4       = self.predict_flow4(concat4)
80 |         flow4_up    = self.upsampled_flow4_to_3(flow4)
81 |         out_deconv3 = self.deconv3(concat4)
82 |         
83 |         concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
84 |         flow3       = self.predict_flow3(concat3)
85 |         flow3_up    = self.upsampled_flow3_to_2(flow3)
86 |         out_deconv2 = self.deconv2(concat3)
87 | 
88 |         concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
89 |         flow2 = self.predict_flow2(concat2)
90 | 
91 |         if self.training:
92 |             return flow2,flow3,flow4,flow5,flow6
93 |         else:
94 |             return flow2,
95 | 
96 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/FlowNetSD.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn import init
  4 | 
  5 | import math
  6 | import numpy as np
  7 | 
  8 | from .submodules import *
  9 | 'Parameter count = 45,371,666'
 10 | 
 11 | class FlowNetSD(nn.Module):
 12 |     def __init__(self, args, batchNorm=True):
 13 |         super(FlowNetSD,self).__init__()
 14 | 
 15 |         self.batchNorm = batchNorm
 16 |         self.conv0   = conv(self.batchNorm,  6,   64)
 17 |         self.conv1   = conv(self.batchNorm,  64,   64, stride=2)
 18 |         self.conv1_1 = conv(self.batchNorm,  64,   128)
 19 |         self.conv2   = conv(self.batchNorm,  128,  128, stride=2)
 20 |         self.conv2_1 = conv(self.batchNorm,  128,  128)
 21 |         self.conv3   = conv(self.batchNorm, 128,  256, stride=2)
 22 |         self.conv3_1 = conv(self.batchNorm, 256,  256)
 23 |         self.conv4   = conv(self.batchNorm, 256,  512, stride=2)
 24 |         self.conv4_1 = conv(self.batchNorm, 512,  512)
 25 |         self.conv5   = conv(self.batchNorm, 512,  512, stride=2)
 26 |         self.conv5_1 = conv(self.batchNorm, 512,  512)
 27 |         self.conv6   = conv(self.batchNorm, 512, 1024, stride=2)
 28 |         self.conv6_1 = conv(self.batchNorm,1024, 1024)
 29 | 
 30 |         self.deconv5 = deconv(1024,512)
 31 |         self.deconv4 = deconv(1026,256)
 32 |         self.deconv3 = deconv(770,128)
 33 |         self.deconv2 = deconv(386,64)
 34 | 
 35 |         self.inter_conv5 = i_conv(self.batchNorm,  1026,   512)
 36 |         self.inter_conv4 = i_conv(self.batchNorm,  770,   256)
 37 |         self.inter_conv3 = i_conv(self.batchNorm,  386,   128)
 38 |         self.inter_conv2 = i_conv(self.batchNorm,  194,   64)
 39 | 
 40 |         self.predict_flow6 = predict_flow(1024)
 41 |         self.predict_flow5 = predict_flow(512)
 42 |         self.predict_flow4 = predict_flow(256)
 43 |         self.predict_flow3 = predict_flow(128)
 44 |         self.predict_flow2 = predict_flow(64)
 45 | 
 46 |         self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
 47 |         self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
 48 |         self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
 49 |         self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
 50 | 
 51 |         for m in self.modules():
 52 |             if isinstance(m, nn.Conv2d):
 53 |                 if m.bias is not None:
 54 |                     init.uniform_(m.bias)
 55 |                 init.xavier_uniform_(m.weight)
 56 | 
 57 |             if isinstance(m, nn.ConvTranspose2d):
 58 |                 if m.bias is not None:
 59 |                     init.uniform_(m.bias)
 60 |                 init.xavier_uniform_(m.weight)
 61 |                 # init_deconv_bilinear(m.weight)
 62 |         self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
 63 | 
 64 | 
 65 | 
 66 |     def forward(self, x):
 67 |         out_conv0 = self.conv0(x)
 68 |         out_conv1 = self.conv1_1(self.conv1(out_conv0))
 69 |         out_conv2 = self.conv2_1(self.conv2(out_conv1))
 70 | 
 71 |         out_conv3 = self.conv3_1(self.conv3(out_conv2))
 72 |         out_conv4 = self.conv4_1(self.conv4(out_conv3))
 73 |         out_conv5 = self.conv5_1(self.conv5(out_conv4))
 74 |         out_conv6 = self.conv6_1(self.conv6(out_conv5))
 75 | 
 76 |         flow6       = self.predict_flow6(out_conv6)
 77 |         flow6_up    = self.upsampled_flow6_to_5(flow6)
 78 |         out_deconv5 = self.deconv5(out_conv6)
 79 |         
 80 |         concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
 81 |         out_interconv5 = self.inter_conv5(concat5)
 82 |         flow5       = self.predict_flow5(out_interconv5)
 83 | 
 84 |         flow5_up    = self.upsampled_flow5_to_4(flow5)
 85 |         out_deconv4 = self.deconv4(concat5)
 86 |         
 87 |         concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
 88 |         out_interconv4 = self.inter_conv4(concat4)
 89 |         flow4       = self.predict_flow4(out_interconv4)
 90 |         flow4_up    = self.upsampled_flow4_to_3(flow4)
 91 |         out_deconv3 = self.deconv3(concat4)
 92 |         
 93 |         concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
 94 |         out_interconv3 = self.inter_conv3(concat3)
 95 |         flow3       = self.predict_flow3(out_interconv3)
 96 |         flow3_up    = self.upsampled_flow3_to_2(flow3)
 97 |         out_deconv2 = self.deconv2(concat3)
 98 | 
 99 |         concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
100 |         out_interconv2 = self.inter_conv2(concat2)
101 |         flow2 = self.predict_flow2(out_interconv2)
102 | 
103 |         if self.training:
104 |             return flow2,flow3,flow4,flow5,flow6
105 |         else:
106 |             return flow2,
107 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/networks/__init__.py


--------------------------------------------------------------------------------
/dvs/flownet2/networks/channelnorm_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/networks/channelnorm_package/__init__.py


--------------------------------------------------------------------------------
/dvs/flownet2/networks/channelnorm_package/channelnorm.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Function, Variable
 2 | from torch.nn.modules.module import Module
 3 | import channelnorm_cuda
 4 | 
 5 | class ChannelNormFunction(Function):
 6 | 
 7 |     @staticmethod
 8 |     def forward(ctx, input1, norm_deg=2):
 9 |         assert input1.is_contiguous()
10 |         b, _, h, w = input1.size()
11 |         output = input1.new(b, 1, h, w).zero_()
12 | 
13 |         channelnorm_cuda.forward(input1, output, norm_deg)
14 |         ctx.save_for_backward(input1, output)
15 |         ctx.norm_deg = norm_deg
16 | 
17 |         return output
18 | 
19 |     @staticmethod
20 |     def backward(ctx, grad_output):
21 |         input1, output = ctx.saved_tensors
22 | 
23 |         grad_input1 = Variable(input1.new(input1.size()).zero_())
24 | 
25 |         channelnorm_cuda.backward(input1, output, grad_output.data,
26 |                                               grad_input1.data, ctx.norm_deg)
27 | 
28 |         return grad_input1, None
29 | 
30 | 
31 | class ChannelNorm(Module):
32 | 
33 |     def __init__(self, norm_deg=2):
34 |         super(ChannelNorm, self).__init__()
35 |         self.norm_deg = norm_deg
36 | 
37 |     def forward(self, input1):
38 |         return ChannelNormFunction.apply(input1, self.norm_deg)
39 | 
40 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/channelnorm_package/channelnorm_cuda.cc:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | #include <ATen/ATen.h>
 3 | 
 4 | #include "channelnorm_kernel.cuh"
 5 | 
 6 | int channelnorm_cuda_forward(
 7 |     at::Tensor& input1, 
 8 |     at::Tensor& output,
 9 |     int norm_deg) {
10 | 
11 |     channelnorm_kernel_forward(input1, output, norm_deg);
12 |     return 1;
13 | }
14 | 
15 | 
16 | int channelnorm_cuda_backward(
17 |     at::Tensor& input1, 
18 |     at::Tensor& output,
19 |     at::Tensor& gradOutput,
20 |     at::Tensor& gradInput1,
21 |     int norm_deg) {
22 | 
23 |     channelnorm_kernel_backward(input1, output, gradOutput, gradInput1, norm_deg);
24 |     return 1;
25 | }
26 | 
27 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
28 |   m.def("forward", &channelnorm_cuda_forward, "Channel norm forward (CUDA)");
29 |   m.def("backward", &channelnorm_cuda_backward, "Channel norm backward (CUDA)");
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/channelnorm_package/channelnorm_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | #include <ATen/Context.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include "channelnorm_kernel.cuh"
  6 | 
  7 | #define CUDA_NUM_THREADS 512 
  8 | 
  9 | #define DIM0(TENSOR) ((TENSOR).x)
 10 | #define DIM1(TENSOR) ((TENSOR).y)
 11 | #define DIM2(TENSOR) ((TENSOR).z)
 12 | #define DIM3(TENSOR) ((TENSOR).w)
 13 | 
 14 | #define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))])
 15 | 
 16 | using at::Half;
 17 | 
 18 | template <typename scalar_t>
 19 | __global__ void kernel_channelnorm_update_output(
 20 |     const int n, 
 21 |     const scalar_t* __restrict__ input1,
 22 |     const long4 input1_size,
 23 |     const long4 input1_stride,
 24 |     scalar_t* __restrict__ output, 
 25 |     const long4 output_size,
 26 |     const long4 output_stride,
 27 |     int norm_deg) {
 28 | 
 29 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 30 | 
 31 |     if (index >= n) {
 32 |         return;
 33 |     }
 34 | 
 35 |     int dim_b = DIM0(output_size);
 36 |     int dim_c = DIM1(output_size);
 37 |     int dim_h = DIM2(output_size);
 38 |     int dim_w = DIM3(output_size);
 39 |     int dim_chw = dim_c * dim_h * dim_w;
 40 | 
 41 |     int b = ( index / dim_chw ) % dim_b;
 42 |     int y = ( index / dim_w )   % dim_h;
 43 |     int x = ( index          )  % dim_w;
 44 | 
 45 |     int i1dim_c = DIM1(input1_size);
 46 |     int i1dim_h = DIM2(input1_size);
 47 |     int i1dim_w = DIM3(input1_size);
 48 |     int i1dim_chw = i1dim_c * i1dim_h * i1dim_w;
 49 |     int i1dim_hw  = i1dim_h * i1dim_w;
 50 | 
 51 |     float result = 0.0;
 52 | 
 53 |     for (int c = 0; c < i1dim_c; ++c) {
 54 |         int i1Index = b * i1dim_chw + c * i1dim_hw + y * i1dim_w + x;
 55 |         scalar_t val = input1[i1Index];
 56 |         result += static_cast<float>(val * val);
 57 |     }
 58 |     result = sqrt(result);
 59 |     output[index] = static_cast<scalar_t>(result);
 60 | }
 61 | 
 62 | 
 63 | template <typename scalar_t>
 64 | __global__ void kernel_channelnorm_backward_input1(
 65 |     const int n,
 66 |     const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
 67 |     const scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride, 
 68 |     const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride,
 69 |     scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, 
 70 |     int norm_deg) {
 71 | 
 72 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 73 | 
 74 |     if (index >= n) {
 75 |         return;
 76 |     }
 77 | 
 78 |     float val = 0.0;
 79 | 
 80 |     int dim_b = DIM0(gradInput_size);
 81 |     int dim_c = DIM1(gradInput_size);
 82 |     int dim_h = DIM2(gradInput_size);
 83 |     int dim_w = DIM3(gradInput_size);
 84 |     int dim_chw = dim_c * dim_h * dim_w;
 85 |     int dim_hw  = dim_h * dim_w;
 86 | 
 87 |     int b = ( index / dim_chw ) % dim_b;
 88 |     int y = ( index / dim_w )   % dim_h;
 89 |     int x = ( index          )  % dim_w;
 90 | 
 91 | 
 92 |     int outIndex = b * dim_hw + y * dim_w + x;
 93 |     val = static_cast<float>(gradOutput[outIndex]) * static_cast<float>(input1[index]) / (static_cast<float>(output[outIndex])+1e-9);
 94 |     gradInput[index] = static_cast<scalar_t>(val);
 95 | 
 96 | }
 97 | 
 98 | void channelnorm_kernel_forward(
 99 |     at::Tensor& input1, 
100 |     at::Tensor& output, 
101 |     int norm_deg) {
102 | 
103 |     const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
104 |     const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
105 | 
106 |     const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
107 |     const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
108 | 
109 |     int n = output.numel();
110 | 
111 |     AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_forward", ([&] {
112 | 
113 |       kernel_channelnorm_update_output<scalar_t><<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
114 | //at::globalContext().getCurrentCUDAStream() >>>(
115 |           n,
116 |           input1.data<scalar_t>(), 
117 |           input1_size,
118 |           input1_stride, 
119 |           output.data<scalar_t>(),
120 |           output_size,
121 |           output_stride, 
122 |           norm_deg);
123 | 
124 |     }));
125 | 
126 |       // TODO: ATen-equivalent check
127 | 
128 |      // THCudaCheck(cudaGetLastError());
129 | }
130 | 
131 | void channelnorm_kernel_backward(
132 |     at::Tensor& input1, 
133 |     at::Tensor& output,
134 |     at::Tensor& gradOutput, 
135 |     at::Tensor& gradInput1, 
136 |     int norm_deg) {
137 | 
138 |     const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
139 |     const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
140 | 
141 |     const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
142 |     const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
143 | 
144 |     const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3));
145 |     const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3));
146 | 
147 |     const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3));
148 |     const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3));
149 | 
150 |     int n = gradInput1.numel();
151 | 
152 |     AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_backward_input1", ([&] {
153 | 
154 |       kernel_channelnorm_backward_input1<scalar_t><<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
155 | //at::globalContext().getCurrentCUDAStream() >>>(
156 |           n, 
157 |           input1.data<scalar_t>(),
158 |           input1_size,
159 |           input1_stride,
160 |           output.data<scalar_t>(),
161 |           output_size,
162 |           output_stride,
163 |           gradOutput.data<scalar_t>(),
164 |           gradOutput_size,
165 |           gradOutput_stride, 
166 |           gradInput1.data<scalar_t>(),
167 |           gradInput1_size,
168 |           gradInput1_stride,
169 |           norm_deg
170 |     );
171 | 
172 |     }));
173 | 
174 |     // TODO: Add ATen-equivalent check
175 | 
176 | //    THCudaCheck(cudaGetLastError());
177 | }
178 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/channelnorm_package/channelnorm_kernel.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | void channelnorm_kernel_forward(
 6 |     at::Tensor& input1,
 7 |     at::Tensor& output, 
 8 |     int norm_deg);
 9 | 
10 | 
11 | void channelnorm_kernel_backward(
12 |     at::Tensor& input1,
13 |     at::Tensor& output,
14 |     at::Tensor& gradOutput,
15 |     at::Tensor& gradInput1,
16 |     int norm_deg);
17 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/channelnorm_package/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import torch
 4 | 
 5 | from setuptools import setup
 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 7 | 
 8 | cxx_args = ['-std=c++11']
 9 | 
10 | nvcc_args = [
11 |     '-gencode', 'arch=compute_52,code=sm_52',
12 |     '-gencode', 'arch=compute_60,code=sm_60',
13 |     '-gencode', 'arch=compute_61,code=sm_61',
14 |     '-gencode', 'arch=compute_70,code=sm_70',
15 |     '-gencode', 'arch=compute_70,code=compute_70'
16 | ]
17 | 
18 | setup(
19 |     name='channelnorm_cuda',
20 |     ext_modules=[
21 |         CUDAExtension('channelnorm_cuda', [
22 |             'channelnorm_cuda.cc',
23 |             'channelnorm_kernel.cu'
24 |         ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
25 |     ],
26 |     cmdclass={
27 |         'build_ext': BuildExtension
28 |     })
29 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/correlation_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/networks/correlation_package/__init__.py


--------------------------------------------------------------------------------
/dvs/flownet2/networks/correlation_package/correlation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn.modules.module import Module
 3 | from torch.autograd import Function
 4 | import correlation_cuda
 5 | 
 6 | class CorrelationFunction(Function):
 7 | 
 8 |     @staticmethod
 9 |     def forward(ctx, input1, input2, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1):
10 |         ctx.save_for_backward(input1, input2)
11 | 
12 |         ctx.pad_size = pad_size
13 |         ctx.kernel_size = kernel_size
14 |         ctx.max_displacement = max_displacement
15 |         ctx.stride1 = stride1
16 |         ctx.stride2 = stride2
17 |         ctx.corr_multiply = corr_multiply
18 | 
19 |         with torch.cuda.device_of(input1):
20 |             rbot1 = input1.new()
21 |             rbot2 = input2.new()
22 |             output = input1.new()
23 | 
24 |             correlation_cuda.forward(input1, input2, rbot1, rbot2, output,
25 |                 ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply)
26 | 
27 |         return output
28 | 
29 |     @staticmethod
30 |     def backward(ctx, grad_output):
31 |         input1, input2 = ctx.saved_tensors
32 | 
33 |         with torch.cuda.device_of(input1):
34 |             rbot1 = input1.new()
35 |             rbot2 = input2.new()
36 | 
37 |             grad_input1 = input1.new()
38 |             grad_input2 = input2.new()
39 | 
40 |             correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
41 |                 ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply)
42 | 
43 |         return grad_input1, grad_input2, None, None, None, None, None, None
44 | 
45 | 
46 | class Correlation(Module):
47 |     def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1):
48 |         super(Correlation, self).__init__()
49 |         self.pad_size = pad_size
50 |         self.kernel_size = kernel_size
51 |         self.max_displacement = max_displacement
52 |         self.stride1 = stride1
53 |         self.stride2 = stride2
54 |         self.corr_multiply = corr_multiply
55 | 
56 |     def forward(self, input1, input2):
57 | 
58 |         result = CorrelationFunction.apply(input1, input2, self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply)
59 | 
60 |         return result
61 | 
62 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/correlation_package/correlation_cuda.cc:
--------------------------------------------------------------------------------
  1 | #include <torch/torch.h>
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/Context.h>
  4 | #include <ATen/cuda/CUDAContext.h>
  5 | #include <stdio.h>
  6 | #include <iostream>
  7 | 
  8 | #include "correlation_cuda_kernel.cuh"
  9 | 
 10 | int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output,
 11 |                        int pad_size,
 12 |                        int kernel_size,
 13 |                        int max_displacement,
 14 |                        int stride1,
 15 |                        int stride2,
 16 |                        int corr_type_multiply)
 17 | {
 18 | 
 19 |   int batchSize = input1.size(0);
 20 | 
 21 |   int nInputChannels = input1.size(1);
 22 |   int inputHeight = input1.size(2);
 23 |   int inputWidth = input1.size(3);
 24 | 
 25 |   int kernel_radius = (kernel_size - 1) / 2;
 26 |   int border_radius = kernel_radius + max_displacement;
 27 | 
 28 |   int paddedInputHeight = inputHeight + 2 * pad_size;
 29 |   int paddedInputWidth = inputWidth + 2 * pad_size;
 30 | 
 31 |   int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1);
 32 | 
 33 |   int outputHeight = ceil(static_cast<float>(paddedInputHeight - 2 * border_radius) / static_cast<float>(stride1));
 34 |   int outputwidth = ceil(static_cast<float>(paddedInputWidth - 2 * border_radius) / static_cast<float>(stride1));
 35 | 
 36 |   rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
 37 |   rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
 38 |   output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth});
 39 | 
 40 |   rInput1.fill_(0);
 41 |   rInput2.fill_(0);
 42 |   output.fill_(0);
 43 | 
 44 |   int success = correlation_forward_cuda_kernel(
 45 |     output,
 46 |     output.size(0), 
 47 |     output.size(1),
 48 |     output.size(2),
 49 |     output.size(3),
 50 |     output.stride(0),
 51 |     output.stride(1),
 52 |     output.stride(2),
 53 |     output.stride(3),
 54 |     input1,
 55 |     input1.size(1),
 56 |     input1.size(2),
 57 |     input1.size(3),
 58 |     input1.stride(0),
 59 |     input1.stride(1),
 60 |     input1.stride(2),
 61 |     input1.stride(3),
 62 |     input2,
 63 |     input2.size(1),
 64 |     input2.stride(0),
 65 |     input2.stride(1),
 66 |     input2.stride(2),
 67 |     input2.stride(3),
 68 |     rInput1,
 69 |     rInput2,
 70 |     pad_size,     
 71 |     kernel_size,
 72 |     max_displacement,
 73 |     stride1,
 74 |     stride2,
 75 |     corr_type_multiply,
 76 | 	at::cuda::getCurrentCUDAStream()
 77 |     //at::globalContext().getCurrentCUDAStream()
 78 |   );
 79 | 
 80 |   //check for errors
 81 |   if (!success) {
 82 |     AT_ERROR("CUDA call failed");
 83 |   }
 84 | 
 85 |   return 1;
 86 | 
 87 | }
 88 | 
 89 | int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput, 
 90 |                        at::Tensor& gradInput1, at::Tensor& gradInput2,
 91 |                        int pad_size,
 92 |                        int kernel_size,
 93 |                        int max_displacement,
 94 |                        int stride1,
 95 |                        int stride2,
 96 |                        int corr_type_multiply)
 97 | {
 98 | 
 99 |   int batchSize = input1.size(0);
100 |   int nInputChannels = input1.size(1);
101 |   int paddedInputHeight = input1.size(2)+ 2 * pad_size;
102 |   int paddedInputWidth = input1.size(3)+ 2 * pad_size;
103 | 
104 |   int height = input1.size(2);
105 |   int width = input1.size(3);
106 | 
107 |   rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
108 |   rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
109 |   gradInput1.resize_({batchSize, nInputChannels, height, width});
110 |   gradInput2.resize_({batchSize, nInputChannels, height, width});
111 | 
112 |   rInput1.fill_(0);
113 |   rInput2.fill_(0);
114 |   gradInput1.fill_(0);
115 |   gradInput2.fill_(0);
116 | 
117 |   int success = correlation_backward_cuda_kernel(gradOutput,
118 |                                                 gradOutput.size(0),
119 |                                                 gradOutput.size(1),
120 |                                                 gradOutput.size(2),
121 |                                                 gradOutput.size(3),
122 |                                                 gradOutput.stride(0),
123 |                                                 gradOutput.stride(1),
124 |                                                 gradOutput.stride(2),
125 |                                                 gradOutput.stride(3),
126 |                                                 input1,
127 |                                                 input1.size(1),
128 |                                                 input1.size(2),
129 |                                                 input1.size(3),
130 |                                                 input1.stride(0),
131 |                                                 input1.stride(1),
132 |                                                 input1.stride(2),
133 |                                                 input1.stride(3),
134 |                                                 input2,  
135 |                                                 input2.stride(0),
136 |                                                 input2.stride(1),
137 |                                                 input2.stride(2),
138 |                                                 input2.stride(3),
139 |                                                 gradInput1,
140 |                                                 gradInput1.stride(0),
141 |                                                 gradInput1.stride(1),
142 |                                                 gradInput1.stride(2),
143 |                                                 gradInput1.stride(3),
144 |                                                 gradInput2,
145 |                                                 gradInput2.size(1),
146 |                                                 gradInput2.stride(0),
147 |                                                 gradInput2.stride(1),
148 |                                                 gradInput2.stride(2),
149 |                                                 gradInput2.stride(3),
150 |                                                 rInput1,
151 |                                                 rInput2,
152 |                                                 pad_size,
153 |                                                 kernel_size,
154 |                                                 max_displacement,
155 |                                                 stride1, 
156 |                                                 stride2,
157 |                                                 corr_type_multiply,
158 | 												at::cuda::getCurrentCUDAStream()
159 |                                                 //at::globalContext().getCurrentCUDAStream()
160 |                                                );
161 | 
162 |   if (!success) {
163 |     AT_ERROR("CUDA call failed");
164 |   }
165 | 
166 |   return 1;
167 | }
168 | 
169 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
170 |   m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)");
171 |   m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)");
172 | }
173 | 
174 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/correlation_package/correlation_cuda_kernel.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/Context.h>
 5 | #include <cuda_runtime.h>
 6 | 
 7 | int correlation_forward_cuda_kernel(at::Tensor& output,
 8 |     int ob,
 9 |     int oc,
10 |     int oh,
11 |     int ow,
12 |     int osb,
13 |     int osc,
14 |     int osh,
15 |     int osw,
16 | 
17 |     at::Tensor& input1,
18 |     int ic,
19 |     int ih,
20 |     int iw,
21 |     int isb,
22 |     int isc,
23 |     int ish,
24 |     int isw,
25 | 
26 |     at::Tensor& input2,
27 |     int gc,
28 |     int gsb,
29 |     int gsc,
30 |     int gsh,
31 |     int gsw,
32 | 
33 |     at::Tensor& rInput1,
34 |     at::Tensor& rInput2,
35 |     int pad_size,
36 |     int kernel_size,
37 |     int max_displacement,
38 |     int stride1,
39 |     int stride2,
40 |     int corr_type_multiply,
41 |     cudaStream_t stream);
42 | 
43 | 
44 | int correlation_backward_cuda_kernel(   
45 |     at::Tensor& gradOutput,
46 |     int gob,
47 |     int goc,
48 |     int goh,
49 |     int gow,
50 |     int gosb,
51 |     int gosc,
52 |     int gosh,
53 |     int gosw,
54 | 
55 |     at::Tensor& input1,
56 |     int ic,
57 |     int ih,
58 |     int iw,
59 |     int isb,
60 |     int isc,
61 |     int ish,
62 |     int isw,
63 | 
64 |     at::Tensor& input2,
65 |     int gsb,
66 |     int gsc,
67 |     int gsh,
68 |     int gsw,
69 | 
70 |     at::Tensor& gradInput1, 
71 |     int gisb,
72 |     int gisc,
73 |     int gish,
74 |     int gisw,
75 | 
76 |     at::Tensor& gradInput2,
77 |     int ggc,
78 |     int ggsb,
79 |     int ggsc,
80 |     int ggsh,
81 |     int ggsw,
82 | 
83 |     at::Tensor& rInput1,
84 |     at::Tensor& rInput2,
85 |     int pad_size,
86 |     int kernel_size,
87 |     int max_displacement,
88 |     int stride1,
89 |     int stride2,
90 |     int corr_type_multiply,
91 |     cudaStream_t stream);
92 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/correlation_package/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import torch
 4 | 
 5 | from setuptools import setup, find_packages
 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 7 | 
 8 | cxx_args = ['-std=c++11']
 9 | 
10 | nvcc_args = [
11 |     '-gencode', 'arch=compute_50,code=sm_50',
12 |     '-gencode', 'arch=compute_52,code=sm_52',
13 |     '-gencode', 'arch=compute_60,code=sm_60',
14 |     '-gencode', 'arch=compute_61,code=sm_61',
15 |     '-gencode', 'arch=compute_70,code=sm_70',
16 |     '-gencode', 'arch=compute_70,code=compute_70'
17 | ]
18 | 
19 | setup(
20 |     name='correlation_cuda',
21 |     ext_modules=[
22 |         CUDAExtension('correlation_cuda', [
23 |             'correlation_cuda.cc',
24 |             'correlation_cuda_kernel.cu'
25 |         ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
26 |     ],
27 |     cmdclass={
28 |         'build_ext': BuildExtension
29 |     })
30 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/resample2d_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/networks/resample2d_package/__init__.py


--------------------------------------------------------------------------------
/dvs/flownet2/networks/resample2d_package/resample2d.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.autograd import Function, Variable
 3 | import resample2d_cuda
 4 | 
 5 | class Resample2dFunction(Function):
 6 | 
 7 |     @staticmethod
 8 |     def forward(ctx, input1, input2, kernel_size=1, bilinear= True):
 9 |         assert input1.is_contiguous()
10 |         assert input2.is_contiguous()
11 | 
12 |         ctx.save_for_backward(input1, input2)
13 |         ctx.kernel_size = kernel_size
14 |         ctx.bilinear = bilinear
15 | 
16 |         _, d, _, _ = input1.size()
17 |         b, _, h, w = input2.size()
18 |         output = input1.new(b, d, h, w).zero_()
19 | 
20 |         resample2d_cuda.forward(input1, input2, output, kernel_size, bilinear)
21 | 
22 |         return output
23 | 
24 |     @staticmethod
25 |     def backward(ctx, grad_output):
26 |         grad_output = grad_output.contiguous()
27 |         assert grad_output.is_contiguous()
28 | 
29 |         input1, input2 = ctx.saved_tensors
30 | 
31 |         grad_input1 = Variable(input1.new(input1.size()).zero_())
32 |         grad_input2 = Variable(input1.new(input2.size()).zero_())
33 | 
34 |         resample2d_cuda.backward(input1, input2, grad_output.data,
35 |                                  grad_input1.data, grad_input2.data,
36 |                                  ctx.kernel_size, ctx.bilinear)
37 | 
38 |         return grad_input1, grad_input2, None, None
39 | 
40 | class Resample2d(Module):
41 | 
42 |     def __init__(self, kernel_size=1, bilinear = True):
43 |         super(Resample2d, self).__init__()
44 |         self.kernel_size = kernel_size
45 |         self.bilinear = bilinear
46 | 
47 |     def forward(self, input1, input2):
48 |         input1_c = input1.contiguous()
49 |         return Resample2dFunction.apply(input1_c, input2, self.kernel_size, self.bilinear)
50 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/resample2d_package/resample2d_cuda.cc:
--------------------------------------------------------------------------------
 1 | #include <ATen/ATen.h>
 2 | #include <torch/torch.h>
 3 | 
 4 | #include "resample2d_kernel.cuh"
 5 | 
 6 | int resample2d_cuda_forward(
 7 |     at::Tensor& input1,
 8 |     at::Tensor& input2, 
 9 |     at::Tensor& output,
10 |     int kernel_size, bool bilinear) {
11 |       resample2d_kernel_forward(input1, input2, output, kernel_size, bilinear);
12 |     return 1;
13 | }
14 | 
15 | int resample2d_cuda_backward(
16 |     at::Tensor& input1, 
17 |     at::Tensor& input2,
18 |     at::Tensor& gradOutput,
19 |     at::Tensor& gradInput1, 
20 |     at::Tensor& gradInput2, 
21 |     int kernel_size, bool bilinear) {
22 |         resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size, bilinear);
23 |     return 1;
24 | }
25 | 
26 | 
27 | 
28 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
29 |   m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)");
30 |   m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)");
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/resample2d_package/resample2d_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | #include <ATen/Context.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #define CUDA_NUM_THREADS 512 
  6 | #define THREADS_PER_BLOCK 64 
  7 | 
  8 | #define DIM0(TENSOR) ((TENSOR).x)
  9 | #define DIM1(TENSOR) ((TENSOR).y)
 10 | #define DIM2(TENSOR) ((TENSOR).z)
 11 | #define DIM3(TENSOR) ((TENSOR).w)
 12 | 
 13 | #define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))])
 14 | 
 15 | template <typename scalar_t>
 16 | __global__ void kernel_resample2d_update_output(const int n, 
 17 |                                                const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
 18 |                                                const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride, 
 19 |                                                scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride, int kernel_size, bool bilinear) {
 20 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 21 | 
 22 |     if (index >= n) {
 23 |         return;
 24 |     }
 25 | 
 26 |     scalar_t val = 0.0f;
 27 | 
 28 |     int dim_b = DIM0(output_size);
 29 |     int dim_c = DIM1(output_size);
 30 |     int dim_h = DIM2(output_size);
 31 |     int dim_w = DIM3(output_size);
 32 |     int dim_chw = dim_c * dim_h * dim_w;
 33 |     int dim_hw  = dim_h * dim_w;
 34 | 
 35 |     int b = ( index / dim_chw ) % dim_b;
 36 |     int c = ( index / dim_hw )  % dim_c;
 37 |     int y = ( index / dim_w )   % dim_h;
 38 |     int x = ( index          )  % dim_w;
 39 | 
 40 |     scalar_t dx = DIM3_INDEX(input2, b, 0, y, x);
 41 |     scalar_t dy = DIM3_INDEX(input2, b, 1, y, x);
 42 | 
 43 |     scalar_t xf = static_cast<scalar_t>(x) + dx;
 44 |     scalar_t yf = static_cast<scalar_t>(y) + dy;
 45 |     scalar_t alpha = xf - floor(xf); // alpha
 46 |     scalar_t beta = yf - floor(yf); // beta
 47 | 
 48 |     if (bilinear) {
 49 |         int xL = max(min( int (floor(xf)),    dim_w-1), 0);
 50 |         int xR = max(min( int (floor(xf)+1), dim_w -1), 0);
 51 |         int yT = max(min( int (floor(yf)),    dim_h-1), 0);
 52 |         int yB = max(min( int (floor(yf)+1),  dim_h-1), 0);
 53 | 
 54 |         for (int fy = 0; fy < kernel_size; fy += 1) {
 55 |             for (int fx = 0; fx < kernel_size; fx += 1) {
 56 |                 val += static_cast<float>((1. - alpha)*(1. - beta) * DIM3_INDEX(input1, b, c, yT + fy, xL + fx));
 57 |                 val += static_cast<float>((alpha)*(1. - beta) * DIM3_INDEX(input1, b, c, yT + fy, xR + fx));
 58 |                 val += static_cast<float>((1. - alpha)*(beta) * DIM3_INDEX(input1, b, c, yB + fy, xL + fx));
 59 |                 val += static_cast<float>((alpha)*(beta) * DIM3_INDEX(input1, b, c, yB + fy, xR + fx));
 60 |             }
 61 |         }
 62 | 
 63 |         output[index] = val;
 64 |     }
 65 |     else {
 66 |         int xN = max(min( int (floor(xf + 0.5)), dim_w - 1), 0);
 67 |         int yN = max(min( int (floor(yf + 0.5)), dim_h - 1), 0);
 68 | 
 69 |         output[index] = static_cast<float> ( DIM3_INDEX(input1, b, c, yN, xN) );
 70 |     }
 71 | 
 72 | }
 73 | 
 74 | 
 75 | template <typename scalar_t>
 76 | __global__ void kernel_resample2d_backward_input1(
 77 |     const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
 78 |     const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride,
 79 |     const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride,
 80 |     scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, int kernel_size, bool bilinear) {
 81 | 
 82 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 83 | 
 84 |     if (index >= n) {
 85 |         return;
 86 |     }
 87 | 
 88 |     int dim_b = DIM0(gradOutput_size);
 89 |     int dim_c = DIM1(gradOutput_size);
 90 |     int dim_h = DIM2(gradOutput_size);
 91 |     int dim_w = DIM3(gradOutput_size);
 92 |     int dim_chw = dim_c * dim_h * dim_w;
 93 |     int dim_hw  = dim_h * dim_w;
 94 | 
 95 |     int b = ( index / dim_chw ) % dim_b;
 96 |     int c = ( index / dim_hw )  % dim_c;
 97 |     int y = ( index / dim_w )   % dim_h;
 98 |     int x = ( index          )  % dim_w;
 99 | 
100 |     scalar_t dx = DIM3_INDEX(input2, b, 0, y, x);
101 |     scalar_t dy = DIM3_INDEX(input2, b, 1, y, x);
102 | 
103 |     scalar_t xf = static_cast<scalar_t>(x) + dx;
104 |     scalar_t yf = static_cast<scalar_t>(y) + dy;
105 |     scalar_t alpha = xf - int(xf); // alpha
106 |     scalar_t beta = yf - int(yf); // beta
107 | 
108 |     int idim_h = DIM2(input1_size);
109 |     int idim_w = DIM3(input1_size);
110 | 
111 |     int xL = max(min( int (floor(xf)),    idim_w-1), 0);
112 |     int xR = max(min( int (floor(xf)+1), idim_w -1), 0);
113 |     int yT = max(min( int (floor(yf)),    idim_h-1), 0);
114 |     int yB = max(min( int (floor(yf)+1),  idim_h-1), 0);
115 | 
116 |     for (int fy = 0; fy < kernel_size; fy += 1) {
117 |         for (int fx = 0; fx < kernel_size; fx += 1) {
118 |             atomicAdd(&DIM3_INDEX(gradInput, b, c, (yT + fy), (xL + fx)), (1-alpha)*(1-beta) * DIM3_INDEX(gradOutput, b, c, y, x));
119 |             atomicAdd(&DIM3_INDEX(gradInput, b, c, (yT + fy), (xR + fx)),   (alpha)*(1-beta) * DIM3_INDEX(gradOutput, b, c, y, x));
120 |             atomicAdd(&DIM3_INDEX(gradInput, b, c, (yB + fy), (xL + fx)),   (1-alpha)*(beta) * DIM3_INDEX(gradOutput, b, c, y, x));
121 |             atomicAdd(&DIM3_INDEX(gradInput, b, c, (yB + fy), (xR + fx)),     (alpha)*(beta) * DIM3_INDEX(gradOutput, b, c, y, x));
122 |         }
123 |     }
124 | 
125 | }
126 | 
127 | template <typename scalar_t>
128 | __global__ void kernel_resample2d_backward_input2(
129 |     const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
130 |     const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride,
131 |     const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride,
132 |     scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, int kernel_size, bool bilinear) {
133 | 
134 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
135 | 
136 |     if (index >= n) {
137 |         return;
138 |     }
139 | 
140 |     scalar_t output = 0.0;
141 |     int kernel_rad = (kernel_size - 1)/2;
142 | 
143 |     int dim_b = DIM0(gradInput_size);
144 |     int dim_c = DIM1(gradInput_size);
145 |     int dim_h = DIM2(gradInput_size);
146 |     int dim_w = DIM3(gradInput_size);
147 |     int dim_chw = dim_c * dim_h * dim_w;
148 |     int dim_hw  = dim_h * dim_w;
149 | 
150 |     int b = ( index / dim_chw ) % dim_b;
151 |     int c = ( index / dim_hw )  % dim_c;
152 |     int y = ( index / dim_w )   % dim_h;
153 |     int x = ( index          )  % dim_w;
154 | 
155 |     int odim_c = DIM1(gradOutput_size);
156 | 
157 |     scalar_t dx = DIM3_INDEX(input2, b, 0, y, x);
158 |     scalar_t dy = DIM3_INDEX(input2, b, 1, y, x);
159 | 
160 |     scalar_t xf = static_cast<scalar_t>(x) + dx;
161 |     scalar_t yf = static_cast<scalar_t>(y) + dy;
162 | 
163 |     int xL = max(min( int (floor(xf)),    dim_w-1), 0);
164 |     int xR = max(min( int (floor(xf)+1), dim_w -1), 0);
165 |     int yT = max(min( int (floor(yf)),    dim_h-1), 0);
166 |     int yB = max(min( int (floor(yf)+1),  dim_h-1), 0);
167 |     
168 |     if (c % 2) {
169 |         float gamma = 1 - (xf - floor(xf)); // alpha
170 |         for (int i = 0; i <= 2*kernel_rad; ++i) {
171 |             for (int j = 0; j <= 2*kernel_rad; ++j) {
172 |                 for (int ch = 0; ch < odim_c; ++ch) {
173 |                     output += (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xL + i));
174 |                     output -= (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xL + i));
175 |                     output += (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xR + i));
176 |                     output -= (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xR + i));
177 |                 }
178 |             }
179 |         }
180 |     }
181 |     else {
182 |         float gamma = 1 - (yf - floor(yf)); // alpha
183 |         for (int i = 0; i <= 2*kernel_rad; ++i) {
184 |             for (int j = 0; j <= 2*kernel_rad; ++j) {
185 |                 for (int ch = 0; ch < odim_c; ++ch) {
186 |                     output += (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xR + i));
187 |                     output -= (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xL + i));
188 |                     output += (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xR + i));
189 |                     output -= (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xL + i));
190 |                 }
191 |             }
192 |         }
193 | 
194 |     }
195 | 
196 |     gradInput[index] = output;
197 | 
198 | }
199 | 
200 | void resample2d_kernel_forward(
201 |     at::Tensor& input1, 
202 |     at::Tensor& input2,
203 |     at::Tensor& output, 
204 |     int kernel_size,
205 |     bool bilinear) {
206 | 
207 |     int n = output.numel();
208 | 
209 |     const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
210 |     const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
211 | 
212 |     const long4 input2_size = make_long4(input2.size(0), input2.size(1), input2.size(2), input2.size(3));
213 |     const long4 input2_stride = make_long4(input2.stride(0), input2.stride(1), input2.stride(2), input2.stride(3));
214 | 
215 |     const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
216 |     const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
217 | 
218 |     // TODO: when atomicAdd gets resolved, change to AT_DISPATCH_FLOATING_TYPES_AND_HALF
219 | //    AT_DISPATCH_FLOATING_TYPES(input1.type(), "resample_forward_kernel", ([&] {
220 | 
221 |         kernel_resample2d_update_output<float><<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
222 | //at::globalContext().getCurrentCUDAStream() >>>(
223 |             n,
224 |             input1.data<float>(),
225 |             input1_size,
226 |             input1_stride, 
227 |             input2.data<float>(),
228 |             input2_size,
229 |             input2_stride,
230 |             output.data<float>(),
231 |             output_size,
232 |             output_stride,
233 |             kernel_size,
234 |             bilinear);
235 | 
236 | //    }));
237 | 
238 |         // TODO: ATen-equivalent check
239 | 
240 |        //    THCudaCheck(cudaGetLastError());
241 | 
242 | }
243 | 
244 | void resample2d_kernel_backward(
245 |     at::Tensor& input1,
246 |     at::Tensor& input2,
247 |     at::Tensor& gradOutput,
248 |     at::Tensor& gradInput1,
249 |     at::Tensor& gradInput2,
250 |     int kernel_size,
251 |     bool bilinear) {
252 | 
253 |     int n = gradOutput.numel();
254 | 
255 |     const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
256 |     const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
257 | 
258 |     const long4 input2_size = make_long4(input2.size(0), input2.size(1), input2.size(2), input2.size(3));
259 |     const long4 input2_stride = make_long4(input2.stride(0), input2.stride(1), input2.stride(2), input2.stride(3));
260 | 
261 |     const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3));
262 |     const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3));
263 | 
264 |     const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3));
265 |     const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3));
266 | 
267 | //    AT_DISPATCH_FLOATING_TYPES(input1.type(), "resample_backward_input1", ([&] {
268 | 
269 |         kernel_resample2d_backward_input1<float><<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
270 | //at::globalContext().getCurrentCUDAStream() >>>(
271 |             n, 
272 |             input1.data<float>(), 
273 |             input1_size,
274 |             input1_stride,
275 |             input2.data<float>(),
276 |             input2_size, 
277 |             input2_stride,
278 |             gradOutput.data<float>(),
279 |             gradOutput_size,
280 |             gradOutput_stride,
281 |             gradInput1.data<float>(),
282 |             gradInput1_size,
283 |             gradInput1_stride, 
284 |             kernel_size,
285 |             bilinear
286 |         );
287 | 
288 | //    }));
289 | 
290 |     const long4 gradInput2_size = make_long4(gradInput2.size(0), gradInput2.size(1), gradInput2.size(2), gradInput2.size(3));
291 |     const long4 gradInput2_stride = make_long4(gradInput2.stride(0), gradInput2.stride(1), gradInput2.stride(2), gradInput2.stride(3));
292 | 
293 |     n = gradInput2.numel();
294 | 
295 | //    AT_DISPATCH_FLOATING_TYPES(gradInput2.type(), "resample_backward_input2", ([&] {
296 | 
297 | 
298 |         kernel_resample2d_backward_input2<float><<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
299 | //at::globalContext().getCurrentCUDAStream() >>>(
300 |             n, 
301 |             input1.data<float>(), 
302 |             input1_size, 
303 |             input1_stride,
304 |             input2.data<float>(), 
305 |             input2_size,
306 |             input2_stride,
307 |             gradOutput.data<float>(),
308 |             gradOutput_size,
309 |             gradOutput_stride,
310 |             gradInput2.data<float>(),
311 |             gradInput2_size,
312 |             gradInput2_stride,
313 |             kernel_size,
314 |             bilinear
315 |        );
316 | 
317 | //    }));
318 | 
319 |     // TODO: Use the ATen equivalent to get last error
320 | 
321 |     //    THCudaCheck(cudaGetLastError());
322 | 
323 | }
324 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/resample2d_package/resample2d_kernel.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | void resample2d_kernel_forward(
 6 |     at::Tensor& input1,
 7 |     at::Tensor& input2,
 8 |     at::Tensor& output,
 9 |     int kernel_size,
10 |     bool bilinear);
11 | 
12 | void resample2d_kernel_backward(
13 |     at::Tensor& input1,
14 |     at::Tensor& input2,
15 |     at::Tensor& gradOutput,
16 |     at::Tensor& gradInput1, 
17 |     at::Tensor& gradInput2, 
18 |     int kernel_size,
19 |     bool bilinear);


--------------------------------------------------------------------------------
/dvs/flownet2/networks/resample2d_package/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import torch
 4 | 
 5 | from setuptools import setup
 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 7 | 
 8 | cxx_args = ['-std=c++11']
 9 | 
10 | nvcc_args = [
11 |     '-gencode', 'arch=compute_50,code=sm_50',
12 |     '-gencode', 'arch=compute_52,code=sm_52',
13 |     '-gencode', 'arch=compute_60,code=sm_60',
14 |     '-gencode', 'arch=compute_61,code=sm_61',
15 |     '-gencode', 'arch=compute_70,code=sm_70',
16 |     '-gencode', 'arch=compute_70,code=compute_70'
17 | ]
18 | 
19 | setup(
20 |     name='resample2d_cuda',
21 |     ext_modules=[
22 |         CUDAExtension('resample2d_cuda', [
23 |             'resample2d_cuda.cc',
24 |             'resample2d_kernel.cu'
25 |         ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
26 |     ],
27 |     cmdclass={
28 |         'build_ext': BuildExtension
29 |     })
30 | 


--------------------------------------------------------------------------------
/dvs/flownet2/networks/submodules.py:
--------------------------------------------------------------------------------
 1 | # freda (todo) : 
 2 | 
 3 | import torch.nn as nn
 4 | import torch
 5 | import numpy as np 
 6 | 
 7 | def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1):
 8 |     if batchNorm:
 9 |         return nn.Sequential(
10 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False),
11 |             nn.BatchNorm2d(out_planes),
12 |             nn.LeakyReLU(0.1,inplace=True)
13 |         )
14 |     else:
15 |         return nn.Sequential(
16 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
17 |             nn.LeakyReLU(0.1,inplace=True)
18 |         )
19 | 
20 | def i_conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, bias = True):
21 |     if batchNorm:
22 |         return nn.Sequential(
23 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias),
24 |             nn.BatchNorm2d(out_planes),
25 |         )
26 |     else:
27 |         return nn.Sequential(
28 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias),
29 |         )
30 | 
31 | def predict_flow(in_planes):
32 |     return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)
33 | 
34 | def deconv(in_planes, out_planes):
35 |     return nn.Sequential(
36 |         nn.ConvTranspose2d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True),
37 |         nn.LeakyReLU(0.1,inplace=True)
38 |     )
39 | 
40 | class tofp16(nn.Module):
41 |     def __init__(self):
42 |         super(tofp16, self).__init__()
43 | 
44 |     def forward(self, input):
45 |         return input.half()
46 | 
47 | 
48 | class tofp32(nn.Module):
49 |     def __init__(self):
50 |         super(tofp32, self).__init__()
51 | 
52 |     def forward(self, input):
53 |         return input.float()
54 | 
55 | 
56 | def init_deconv_bilinear(weight):
57 |     f_shape = weight.size()
58 |     heigh, width = f_shape[-2], f_shape[-1]
59 |     f = np.ceil(width/2.0)
60 |     c = (2 * f - 1 - f % 2) / (2.0 * f)
61 |     bilinear = np.zeros([heigh, width])
62 |     for x in range(width):
63 |         for y in range(heigh):
64 |             value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
65 |             bilinear[x, y] = value
66 |     weight.data.fill_(0.)
67 |     for i in range(f_shape[0]):
68 |         for j in range(f_shape[1]):
69 |             weight.data[i,j,:,:] = torch.from_numpy(bilinear)
70 | 
71 | 
72 | def save_grad(grads, name):
73 |     def hook(grad):
74 |         grads[name] = grad
75 |     return hook
76 | 
77 | '''
78 | def save_grad(grads, name):
79 |     def hook(grad):
80 |         grads[name] = grad
81 |     return hook
82 | import torch
83 | from channelnorm_package.modules.channelnorm import ChannelNorm 
84 | model = ChannelNorm().cuda()
85 | grads = {}
86 | a = 100*torch.autograd.Variable(torch.randn((1,3,5,5)).cuda(), requires_grad=True)
87 | a.register_hook(save_grad(grads, 'a'))
88 | b = model(a)
89 | y = torch.mean(b)
90 | y.backward()
91 | 
92 | '''
93 | 


--------------------------------------------------------------------------------
/dvs/flownet2/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \
3 | 	--inference_dataset_root ./../video \
4 | 	--resume ./FlowNet2_checkpoint.pth.tar \
5 | 	--inference_visualize
6 | 


--------------------------------------------------------------------------------
/dvs/flownet2/run_release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \
 3 | 	--inference_dataset_root ./../dataset_release/test \
 4 | 	--resume ./FlowNet2_checkpoint.pth.tar \
 5 | 	--inference_visualize
 6 | 
 7 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \
 8 | 	--inference_dataset_root ./../dataset_release/training \
 9 | 	--resume ./FlowNet2_checkpoint.pth.tar \
10 | 	--inference_visualize


--------------------------------------------------------------------------------
/dvs/flownet2/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/utils/__init__.py


--------------------------------------------------------------------------------
/dvs/flownet2/utils/flow_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import os.path
  4 | 
  5 | TAG_CHAR = np.array([202021.25], np.float32)
  6 | 
  7 | def readFlow(fn):
  8 |     """ Read .flo file in Middlebury format"""
  9 |     # Code adapted from:
 10 |     # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
 11 | 
 12 |     # WARNING: this will work on little-endian architectures (eg Intel x86) only!
 13 |     # print 'fn = %s'%(fn)
 14 |     with open(fn, 'rb') as f:
 15 |         magic = np.fromfile(f, np.float32, count=1)
 16 |         if 202021.25 != magic:
 17 |             print('Magic number incorrect. Invalid .flo file')
 18 |             return None
 19 |         else:
 20 |             w = np.fromfile(f, np.int32, count=1)
 21 |             h = np.fromfile(f, np.int32, count=1)
 22 |             # print 'Reading %d x %d flo file\n' % (w, h)
 23 |             data = np.fromfile(f, np.float32, count=2*int(w)*int(h))
 24 |             # Reshape data into 3D array (columns, rows, bands)
 25 |             # The reshape here is for visualization, the original code is (w,h,2)
 26 |             return np.resize(data, (int(h), int(w), 2))
 27 | 
 28 | def writeFlow(filename,uv,v=None):
 29 |     """ Write optical flow to file.
 30 |     
 31 |     If v is None, uv is assumed to contain both u and v channels,
 32 |     stacked in depth.
 33 |     Original code by Deqing Sun, adapted from Daniel Scharstein.
 34 |     """
 35 |     nBands = 2
 36 | 
 37 |     if v is None:
 38 |         assert(uv.ndim == 3)
 39 |         assert(uv.shape[2] == 2)
 40 |         u = uv[:,:,0]
 41 |         v = uv[:,:,1]
 42 |     else:
 43 |         u = uv
 44 | 
 45 |     assert(u.shape == v.shape)
 46 |     height,width = u.shape
 47 |     f = open(filename,'wb')
 48 |     # write the header
 49 |     f.write(TAG_CHAR)
 50 |     np.array(width).astype(np.int32).tofile(f)
 51 |     np.array(height).astype(np.int32).tofile(f)
 52 |     # arrange into matrix form
 53 |     tmp = np.zeros((height, width*nBands))
 54 |     tmp[:,np.arange(width)*2] = u
 55 |     tmp[:,np.arange(width)*2 + 1] = v
 56 |     tmp.astype(np.float32).tofile(f)
 57 |     f.close()
 58 | 
 59 | 
 60 | # ref: https://github.com/sampepose/flownet2-tf/
 61 | # blob/18f87081db44939414fc4a48834f9e0da3e69f4c/src/flowlib.py#L240
 62 | def visulize_flow_file(flow_filename, save_dir=None):
 63 | 	flow_data = readFlow(flow_filename)
 64 | 	img = flow2img(flow_data)
 65 | 	# plt.imshow(img)
 66 | 	# plt.show()
 67 | 	if save_dir:
 68 | 		idx = flow_filename.rfind("/") + 1
 69 | 		plt.imsave(os.path.join(save_dir, "%s-vis.png" % flow_filename[idx:-4]), img)
 70 | 
 71 | 
 72 | def flow2img(flow_data):
 73 | 	"""
 74 | 	convert optical flow into color image
 75 | 	:param flow_data:
 76 | 	:return: color image
 77 | 	"""
 78 | 	# print(flow_data.shape)
 79 | 	# print(type(flow_data))
 80 | 	u = flow_data[:, :, 0]
 81 | 	v = flow_data[:, :, 1]
 82 | 
 83 | 	UNKNOW_FLOW_THRESHOLD = 1e7
 84 | 	pr1 = abs(u) > UNKNOW_FLOW_THRESHOLD
 85 | 	pr2 = abs(v) > UNKNOW_FLOW_THRESHOLD
 86 | 	idx_unknown = (pr1 | pr2)
 87 | 	u[idx_unknown] = v[idx_unknown] = 0
 88 | 
 89 | 	# get max value in each direction
 90 | 	maxu = -999.
 91 | 	maxv = -999.
 92 | 	minu = 999.
 93 | 	minv = 999.
 94 | 	maxu = max(maxu, np.max(u))
 95 | 	maxv = max(maxv, np.max(v))
 96 | 	minu = min(minu, np.min(u))
 97 | 	minv = min(minv, np.min(v))
 98 | 
 99 | 	rad = np.sqrt(u ** 2 + v ** 2)
100 | 	maxrad = max(-1, np.max(rad))
101 | 	u = u / maxrad + np.finfo(float).eps
102 | 	v = v / maxrad + np.finfo(float).eps
103 | 
104 | 	img = compute_color(u, v)
105 | 
106 | 	idx = np.repeat(idx_unknown[:, :, np.newaxis], 3, axis=2)
107 | 	img[idx] = 0
108 | 
109 | 	return np.uint8(img)
110 | 
111 | 
112 | def compute_color(u, v):
113 | 	"""
114 | 	compute optical flow color map
115 | 	:param u: horizontal optical flow
116 | 	:param v: vertical optical flow
117 | 	:return:
118 | 	"""
119 | 
120 | 	height, width = u.shape
121 | 	img = np.zeros((height, width, 3))
122 | 
123 | 	NAN_idx = np.isnan(u) | np.isnan(v)
124 | 	u[NAN_idx] = v[NAN_idx] = 0
125 | 
126 | 	colorwheel = make_color_wheel()
127 | 	ncols = np.size(colorwheel, 0)
128 | 
129 | 	rad = np.sqrt(u ** 2 + v ** 2)
130 | 
131 | 	a = np.arctan2(-v, -u) / np.pi
132 | 
133 | 	fk = (a + 1) / 2 * (ncols - 1) + 1
134 | 
135 | 	k0 = np.floor(fk).astype(int)
136 | 
137 | 	k1 = k0 + 1
138 | 	k1[k1 == ncols + 1] = 1
139 | 	f = fk - k0
140 | 
141 | 	for i in range(0, np.size(colorwheel, 1)):
142 | 		tmp = colorwheel[:, i]
143 | 		col0 = tmp[k0 - 1] / 255
144 | 		col1 = tmp[k1 - 1] / 255
145 | 		col = (1 - f) * col0 + f * col1
146 | 
147 | 		idx = rad <= 1
148 | 		col[idx] = 1 - rad[idx] * (1 - col[idx])
149 | 		notidx = np.logical_not(idx)
150 | 
151 | 		col[notidx] *= 0.75
152 | 		img[:, :, i] = np.uint8(np.floor(255 * col * (1 - NAN_idx)))
153 | 
154 | 	return img
155 | 
156 | 
157 | def make_color_wheel():
158 | 	"""
159 | 	Generate color wheel according Middlebury color code
160 | 	:return: Color wheel
161 | 	"""
162 | 	RY = 15
163 | 	YG = 6
164 | 	GC = 4
165 | 	CB = 11
166 | 	BM = 13
167 | 	MR = 6
168 | 
169 | 	ncols = RY + YG + GC + CB + BM + MR
170 | 
171 | 	colorwheel = np.zeros([ncols, 3])
172 | 
173 | 	col = 0
174 | 
175 | 	# RY
176 | 	colorwheel[0:RY, 0] = 255
177 | 	colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY))
178 | 	col += RY
179 | 
180 | 	# YG
181 | 	colorwheel[col:col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG))
182 | 	colorwheel[col:col + YG, 1] = 255
183 | 	col += YG
184 | 
185 | 	# GC
186 | 	colorwheel[col:col + GC, 1] = 255
187 | 	colorwheel[col:col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC))
188 | 	col += GC
189 | 
190 | 	# CB
191 | 	colorwheel[col:col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB))
192 | 	colorwheel[col:col + CB, 2] = 255
193 | 	col += CB
194 | 
195 | 	# BM
196 | 	colorwheel[col:col + BM, 2] = 255
197 | 	colorwheel[col:col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM))
198 | 	col += + BM
199 | 
200 | 	# MR
201 | 	colorwheel[col:col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
202 | 	colorwheel[col:col + MR, 0] = 255
203 | 
204 | 	return colorwheel
205 | 


--------------------------------------------------------------------------------
/dvs/flownet2/utils/frame_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from os.path import *
 3 | from imageio import imread
 4 | from . import flow_utils 
 5 | 
 6 | def read_gen(file_name):
 7 |     ext = splitext(file_name)[-1]
 8 |     if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
 9 |         im = imread(file_name)
10 |         if im.shape[2] > 3:
11 |             return im[:,:,:3]
12 |         else:
13 |             return im
14 |     elif ext == '.bin' or ext == '.raw':
15 |         return np.load(file_name)
16 |     elif ext == '.flo':
17 |         return flow_utils.readFlow(file_name).astype(np.float32)
18 |     return []
19 | 


--------------------------------------------------------------------------------
/dvs/flownet2/utils/param_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | 
  5 | def parse_flownetc(modules, weights, biases):
  6 |     keys = [
  7 |     'conv1',
  8 |     'conv2',
  9 |     'conv3',
 10 |     'conv_redir',
 11 |     'conv3_1',
 12 |     'conv4',
 13 |     'conv4_1',
 14 |     'conv5',
 15 |     'conv5_1',
 16 |     'conv6',
 17 |     'conv6_1',
 18 |     
 19 |     'deconv5',
 20 |     'deconv4',
 21 |     'deconv3',
 22 |     'deconv2',
 23 |     
 24 |     'Convolution1',
 25 |     'Convolution2',
 26 |     'Convolution3',
 27 |     'Convolution4',
 28 |     'Convolution5',
 29 | 
 30 |     'upsample_flow6to5',
 31 |     'upsample_flow5to4',
 32 |     'upsample_flow4to3',
 33 |     'upsample_flow3to2',
 34 |     
 35 |     ]
 36 |     i = 0
 37 |     for m in modules:
 38 |         if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
 39 |             weight = weights[keys[i]].copy()
 40 |             bias = biases[keys[i]].copy()
 41 |             if keys[i] == 'conv1':
 42 |                 m.weight.data[:,:,:,:] = torch.from_numpy(np.flip(weight, axis=1).copy())
 43 |                 m.bias.data[:] = torch.from_numpy(bias)
 44 |             else:
 45 |                 m.weight.data[:,:,:,:] = torch.from_numpy(weight)
 46 |                 m.bias.data[:] = torch.from_numpy(bias)                    
 47 | 
 48 |             i = i + 1
 49 |     return
 50 | 
 51 | def parse_flownets(modules, weights, biases, param_prefix='net2_'):
 52 |     keys = [
 53 |     'conv1',
 54 |     'conv2',
 55 |     'conv3',
 56 |     'conv3_1',
 57 |     'conv4',
 58 |     'conv4_1',
 59 |     'conv5',
 60 |     'conv5_1',
 61 |     'conv6',
 62 |     'conv6_1',
 63 |     
 64 |     'deconv5',
 65 |     'deconv4',
 66 |     'deconv3',
 67 |     'deconv2',
 68 |     
 69 |     'predict_conv6',
 70 |     'predict_conv5',
 71 |     'predict_conv4',
 72 |     'predict_conv3',
 73 |     'predict_conv2',
 74 | 
 75 |     'upsample_flow6to5',
 76 |     'upsample_flow5to4',
 77 |     'upsample_flow4to3',
 78 |     'upsample_flow3to2',
 79 |     ]
 80 |     for i, k in enumerate(keys):
 81 |         if 'upsample' in k:
 82 |             keys[i] = param_prefix + param_prefix + k
 83 |         else:
 84 |             keys[i] = param_prefix + k
 85 |     i = 0
 86 |     for m in modules:
 87 |         if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
 88 |             weight = weights[keys[i]].copy()
 89 |             bias = biases[keys[i]].copy()
 90 |             if keys[i] == param_prefix+'conv1':
 91 |                 m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
 92 |                 m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
 93 |                 m.weight.data[:,6:9,:,:] = torch.from_numpy(np.flip(weight[:,6:9,:,:], axis=1).copy())
 94 |                 m.weight.data[:,9::,:,:] = torch.from_numpy(weight[:,9:,:,:].copy())
 95 |                 if m.bias is not None:
 96 |                     m.bias.data[:] = torch.from_numpy(bias)
 97 |             else:
 98 |                 m.weight.data[:,:,:,:] = torch.from_numpy(weight)
 99 |                 if m.bias is not None:
100 |                     m.bias.data[:] = torch.from_numpy(bias)
101 |             i = i + 1
102 |     return
103 | 
104 | def parse_flownetsonly(modules, weights, biases, param_prefix=''):
105 |     keys = [
106 |     'conv1',
107 |     'conv2',
108 |     'conv3',
109 |     'conv3_1',
110 |     'conv4',
111 |     'conv4_1',
112 |     'conv5',
113 |     'conv5_1',
114 |     'conv6',
115 |     'conv6_1',
116 |     
117 |     'deconv5',
118 |     'deconv4',
119 |     'deconv3',
120 |     'deconv2',
121 |     
122 |     'Convolution1',
123 |     'Convolution2',
124 |     'Convolution3',
125 |     'Convolution4',
126 |     'Convolution5',
127 | 
128 |     'upsample_flow6to5',
129 |     'upsample_flow5to4',
130 |     'upsample_flow4to3',
131 |     'upsample_flow3to2',
132 |     ]
133 |     for i, k in enumerate(keys):
134 |         if 'upsample' in k:
135 |             keys[i] = param_prefix + param_prefix + k
136 |         else:
137 |             keys[i] = param_prefix + k
138 |     i = 0
139 |     for m in modules:
140 |         if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
141 |             weight = weights[keys[i]].copy()
142 |             bias = biases[keys[i]].copy()
143 |             if keys[i] == param_prefix+'conv1':
144 |                 # print ("%s :"%(keys[i]), m.weight.size(), m.bias.size(), tf_w[keys[i]].shape[::-1])
145 |                 m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
146 |                 m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
147 |                 if m.bias is not None:
148 |                     m.bias.data[:] = torch.from_numpy(bias)
149 |             else:
150 |                 m.weight.data[:,:,:,:] = torch.from_numpy(weight)
151 |                 if m.bias is not None:
152 |                     m.bias.data[:] = torch.from_numpy(bias)
153 |             i = i + 1
154 |     return
155 | 
156 | def parse_flownetsd(modules, weights, biases, param_prefix='netsd_'):
157 |     keys = [
158 |     'conv0',
159 |     'conv1',
160 |     'conv1_1',
161 |     'conv2',
162 |     'conv2_1',
163 |     'conv3',
164 |     'conv3_1',
165 |     'conv4',
166 |     'conv4_1',
167 |     'conv5',
168 |     'conv5_1',
169 |     'conv6',
170 |     'conv6_1',
171 |     
172 |     'deconv5',
173 |     'deconv4',
174 |     'deconv3',
175 |     'deconv2',
176 | 
177 |     'interconv5',
178 |     'interconv4',
179 |     'interconv3',
180 |     'interconv2',
181 |     
182 |     'Convolution1',
183 |     'Convolution2',
184 |     'Convolution3',
185 |     'Convolution4',
186 |     'Convolution5',
187 | 
188 |     'upsample_flow6to5',
189 |     'upsample_flow5to4',
190 |     'upsample_flow4to3',
191 |     'upsample_flow3to2',
192 |     ]
193 |     for i, k in enumerate(keys):
194 |         keys[i] = param_prefix + k
195 | 
196 |     i = 0
197 |     for m in modules:
198 |         if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
199 |             weight = weights[keys[i]].copy()
200 |             bias = biases[keys[i]].copy()
201 |             if keys[i] == param_prefix+'conv0':
202 |                 m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
203 |                 m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
204 |                 if m.bias is not None:
205 |                     m.bias.data[:] = torch.from_numpy(bias)
206 |             else:
207 |                 m.weight.data[:,:,:,:] = torch.from_numpy(weight)
208 |                 if m.bias is not None:
209 |                     m.bias.data[:] = torch.from_numpy(bias)
210 |             i = i + 1
211 | 
212 |     return
213 | 
214 | def parse_flownetfusion(modules, weights, biases, param_prefix='fuse_'):
215 |     keys = [
216 |     'conv0',
217 |     'conv1',
218 |     'conv1_1',
219 |     'conv2',
220 |     'conv2_1',
221 | 
222 |     'deconv1',
223 |     'deconv0',
224 | 
225 |     'interconv1',
226 |     'interconv0',
227 |     
228 |     '_Convolution5',
229 |     '_Convolution6',
230 |     '_Convolution7',
231 | 
232 |     'upsample_flow2to1',
233 |     'upsample_flow1to0',
234 |     ]
235 |     for i, k in enumerate(keys):
236 |         keys[i] = param_prefix + k
237 | 
238 |     i = 0
239 |     for m in modules:
240 |         if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
241 |             weight = weights[keys[i]].copy()
242 |             bias = biases[keys[i]].copy()
243 |             if keys[i] == param_prefix+'conv0':
244 |                 m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
245 |                 m.weight.data[:,3::,:,:] = torch.from_numpy(weight[:,3:,:,:].copy())
246 |                 if m.bias is not None:
247 |                     m.bias.data[:] = torch.from_numpy(bias)
248 |             else:
249 |                 m.weight.data[:,:,:,:] = torch.from_numpy(weight)
250 |                 if m.bias is not None:
251 |                     m.bias.data[:] = torch.from_numpy(bias)
252 |             i = i + 1
253 | 
254 |     return
255 | 


--------------------------------------------------------------------------------
/dvs/flownet2/utils/tools.py:
--------------------------------------------------------------------------------
  1 | # freda (todo) : 
  2 | 
  3 | import os, time, sys, math
  4 | import subprocess, shutil
  5 | from os.path import *
  6 | import numpy as np
  7 | from inspect import isclass
  8 | from pytz import timezone
  9 | from datetime import datetime
 10 | import inspect
 11 | import torch
 12 | 
 13 | def datestr():
 14 |     pacific = timezone('US/Pacific')
 15 |     now = datetime.now(pacific)
 16 |     return '{}{:02}{:02}_{:02}{:02}'.format(now.year, now.month, now.day, now.hour, now.minute)
 17 | 
 18 | def module_to_dict(module, exclude=[]):
 19 |         return dict([(x, getattr(module, x)) for x in dir(module)
 20 |                      if isclass(getattr(module, x))
 21 |                      and x not in exclude
 22 |                      and getattr(module, x) not in exclude])
 23 | 
 24 | class TimerBlock: 
 25 |     def __init__(self, title):
 26 |         print(("{}".format(title)))
 27 | 
 28 |     def __enter__(self):
 29 |         self.start = time.clock()
 30 |         return self
 31 | 
 32 |     def __exit__(self, exc_type, exc_value, traceback):
 33 |         self.end = time.clock()
 34 |         self.interval = self.end - self.start
 35 | 
 36 |         if exc_type is not None:
 37 |             self.log("Operation failed\n")
 38 |         else:
 39 |             self.log("Operation finished\n")
 40 | 
 41 | 
 42 |     def log(self, string):
 43 |         duration = time.clock() - self.start
 44 |         units = 's'
 45 |         if duration > 60:
 46 |             duration = duration / 60.
 47 |             units = 'm'
 48 |         print(("  [{:.3f}{}] {}".format(duration, units, string)))
 49 |     
 50 |     def log2file(self, fid, string):
 51 |         fid = open(fid, 'a')
 52 |         fid.write("%s\n"%(string))
 53 |         fid.close()
 54 | 
 55 | def add_arguments_for_module(parser, module, argument_for_class, default, skip_params=[], parameter_defaults={}):
 56 |     argument_group = parser.add_argument_group(argument_for_class.capitalize())
 57 | 
 58 |     module_dict = module_to_dict(module)
 59 |     argument_group.add_argument('--' + argument_for_class, type=str, default=default, choices=list(module_dict.keys()))
 60 |     
 61 |     args, unknown_args = parser.parse_known_args()
 62 |     class_obj = module_dict[vars(args)[argument_for_class]]
 63 | 
 64 |     argspec = inspect.getargspec(class_obj.__init__)
 65 | 
 66 |     defaults = argspec.defaults[::-1] if argspec.defaults else None
 67 | 
 68 |     args = argspec.args[::-1]
 69 |     for i, arg in enumerate(args):
 70 |         cmd_arg = '{}_{}'.format(argument_for_class, arg)
 71 |         if arg not in skip_params + ['self', 'args']:
 72 |             if arg in list(parameter_defaults.keys()):
 73 |                 argument_group.add_argument('--{}'.format(cmd_arg), type=type(parameter_defaults[arg]), default=parameter_defaults[arg])
 74 |             elif (defaults is not None and i < len(defaults)):
 75 |                 argument_group.add_argument('--{}'.format(cmd_arg), type=type(defaults[i]), default=defaults[i])
 76 |             else:
 77 |                 print(("[Warning]: non-default argument '{}' detected on class '{}'. This argument cannot be modified via the command line"
 78 |                         .format(arg, module.__class__.__name__)))
 79 |             # We don't have a good way of dealing with inferring the type of the argument
 80 |             # TODO: try creating a custom action and using ast's infer type?
 81 |             # else:
 82 |             #     argument_group.add_argument('--{}'.format(cmd_arg), required=True)
 83 | 
 84 | def kwargs_from_args(args, argument_for_class):
 85 |     argument_for_class = argument_for_class + '_'
 86 |     return {key[len(argument_for_class):]: value for key, value in list(vars(args).items()) if argument_for_class in key and key != argument_for_class + 'class'}
 87 | 
 88 | def format_dictionary_of_losses(labels, values):
 89 |     try:
 90 |         string = ', '.join([('{}: {:' + ('.3f' if value >= 0.001 else '.1e') +'}').format(name, value) for name, value in zip(labels, values)])
 91 |     except (TypeError, ValueError) as e:
 92 |         print((list(zip(labels, values))))
 93 |         string = '[Log Error] ' + str(e)
 94 | 
 95 |     return string
 96 | 
 97 | 
 98 | class IteratorTimer():
 99 |     def __init__(self, iterable):
100 |         self.iterable = iterable
101 |         self.iterator = self.iterable.__iter__()
102 | 
103 |     def __iter__(self):
104 |         return self
105 | 
106 |     def __len__(self):
107 |         return len(self.iterable)
108 | 
109 |     def __next__(self):
110 |         start = time.time()
111 |         n = next(self.iterator)
112 |         self.last_duration = (time.time() - start)
113 |         return n
114 | 
115 |     next = __next__
116 | 
117 | def gpumemusage():
118 |     gpu_mem = subprocess.check_output("nvidia-smi | grep MiB | cut -f 3 -d '|'", shell=True).replace(' ', '').replace('\n', '').replace('i', '')
119 |     all_stat = [float(a) for a in gpu_mem.replace('/','').split('MB')[:-1]]
120 | 
121 |     gpu_mem = ''
122 |     for i in range(len(all_stat)/2):
123 |         curr, tot = all_stat[2*i], all_stat[2*i+1]
124 |         util = "%1.2f"%(100*curr/tot)+'%'
125 |         cmem = str(int(math.ceil(curr/1024.)))+'GB'
126 |         gmem = str(int(math.ceil(tot/1024.)))+'GB'
127 |         gpu_mem += util + '--' + join(cmem, gmem) + ' '
128 |     return gpu_mem
129 | 
130 | 
131 | def update_hyperparameter_schedule(args, epoch, global_iteration, optimizer):
132 |     if args.schedule_lr_frequency > 0:
133 |         for param_group in optimizer.param_groups:
134 |             if (global_iteration + 1) % args.schedule_lr_frequency == 0:
135 |                 param_group['lr'] /= float(args.schedule_lr_fraction)
136 |                 param_group['lr'] = float(np.maximum(param_group['lr'], 0.000001))
137 | 
138 | def save_checkpoint(state, is_best, path, prefix, filename='checkpoint.pth.tar'):
139 |     prefix_save = os.path.join(path, prefix)
140 |     name = prefix_save + '_' + filename
141 |     torch.save(state, name)
142 |     if is_best:
143 |         shutil.copyfile(name, prefix_save + '_model_best.pth.tar')
144 | 
145 | 


--------------------------------------------------------------------------------
/dvs/gyro/__init__.py:
--------------------------------------------------------------------------------
 1 | from .gyro_function import (
 2 |     GetGyroAtTimeStamp,
 3 |     QuaternionProduct,
 4 |     QuaternionReciprocal,
 5 |     ConvertQuaternionToAxisAngle,
 6 |     FindOISAtTimeStamp,
 7 |     GetMetadata,
 8 |     GetProjections,
 9 |     GetVirtualProjection,
10 |     GetForwardGrid,
11 |     CenterZoom,
12 |     GetWarpingFlow,
13 |     torch_norm_quat,
14 |     torch_QuaternionProduct, 
15 |     torch_QuaternionReciprocal,
16 |     torch_GetVirtualProjection,
17 |     get_static,
18 |     torch_GetForwardGrid,
19 |     torch_GetWarpingFlow,
20 |     train_GetGyroAtTimeStamp,
21 |     train_ConvertQuaternionToAxisAngle,
22 |     ConvertAxisAngleToQuaternion,
23 |     torch_ConvertAxisAngleToQuaternion,
24 |     torch_ConvertQuaternionToAxisAngle,
25 |     ConvertAxisAngleToQuaternion_no_angle,
26 |     ConvertQuaternionToAxisAngle_no_angle,
27 |     torch_GetHomographyTransformFromProjections,
28 |     torch_ApplyTransform,
29 |     norm_quat,
30 |     SlerpWithDefault
31 |     )
32 | from .gyro_io import (
33 |     LoadGyroData, 
34 |     LoadOISData, 
35 |     LoadFrameData, 
36 |     LoadStabResult,
37 |     get_grid, 
38 |     get_rotations, 
39 |     visual_rotation
40 |     )


--------------------------------------------------------------------------------
/dvs/gyro/gyro_io.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy import linalg as LA
  3 | import matplotlib.pyplot as plt
  4 | import scipy.io as sio
  5 | from .gyro_function import (
  6 |     ProcessGyroData, QuaternionProduct, QuaternionReciprocal, 
  7 |     ConvertQuaternionToAxisAngle, FindOISAtTimeStamp, GetMetadata,
  8 |     GetProjections, GetVirtualProjection, GetForwardGrid,
  9 |     CenterZoom, GetGyroAtTimeStamp, get_static, ConvertAxisAngleToQuaternion,
 10 |     ConvertAxisAngleToQuaternion_no_angle, ConvertQuaternionToAxisAngle_no_angle
 11 |     )
 12 | 
 13 | def load_gyro_mesh(input_name):
 14 |     data = LoadStabResult(input_name)
 15 |     w, h  = data["vertex_grid_size"][0]
 16 |     data["warping grid"] = np.reshape(data["warping grid"],(-1,int(w),int(h),4))
 17 |     return data
 18 | 
 19 | def get_grid(static_options, frame_data, quats_data, ois_data, virtual_data, no_shutter = False):
 20 |     grid = []
 21 |     result_poses = {}
 22 |     result_poses['virtual pose'] = virtual_data
 23 |     for i in range(len(virtual_data)):
 24 |         metadata = GetMetadata(frame_data, i)
 25 |         real_projections = GetProjections(static_options, metadata, quats_data, ois_data, no_shutter = no_shutter)
 26 |         virtual_projection = GetVirtualProjection(static_options, result_poses, metadata, i) 
 27 |         grid.append(GetForwardGrid(static_options, real_projections, virtual_projection))
 28 |     grid = np.array(grid)
 29 |     zoom_ratio = 1 / (1 - 2 * static_options["cropping_ratio"])
 30 |     curr_grid = CenterZoom(grid, zoom_ratio)
 31 |     curr_grid = np.transpose(curr_grid,(0,3,2,1))
 32 |     return curr_grid
 33 | 
 34 | def get_rotations(frame_data, quats_data, ois_data, num_frames):
 35 |     quats = np.zeros((num_frames, 4)) 
 36 |     for i in range(num_frames):
 37 |         quats[i,:] = GetGyroAtTimeStamp(quats_data, frame_data[i,0])
 38 | 
 39 |     rotations = np.zeros((num_frames,3))
 40 |     lens_offsets = np.zeros((num_frames, 2)) 
 41 |     for i in range(num_frames):
 42 |         if i != 0:
 43 |             quat_dif = QuaternionProduct(quats[i,:], QuaternionReciprocal(quats[i-1,:])) 
 44 |             axis_dif_cur = ConvertQuaternionToAxisAngle_no_angle(quat_dif)
 45 |             rotations[i,:] = axis_dif_cur
 46 |         lens_offsets[i,:] = FindOISAtTimeStamp(ois_data, frame_data[i, 4])     
 47 | 
 48 |     return rotations, lens_offsets
 49 | 
 50 | def visual_rotation(rotations_real, lens_offsets_real, rotations_virtual, lens_offsets_virtual, rotations_virtual2, lens_offsets_virtual2, path):
 51 |     # figure('units','normalized','outerposition',[0 0 1 1])
 52 |     plt.clf()
 53 |     plt.figure(figsize=(8,16))
 54 |     
 55 |     plt.subplot(5,1,1)
 56 |     plt.plot(rotations_real[:,0], "g")
 57 |     if rotations_virtual is not None:
 58 |         plt.plot(rotations_virtual[:,0], "b")
 59 |     if rotations_virtual2 is not None:
 60 |         plt.plot(rotations_virtual2[:,0], "r")
 61 |     plt.ylim(-0.02, 0.02)
 62 |     plt.xlabel('frame id')
 63 |     plt.ylabel('gyro x')
 64 | 
 65 |     plt.subplot(5,1,2)
 66 |     plt.plot(rotations_real[:,1], "g")
 67 |     if rotations_virtual is not None:
 68 |         plt.plot(rotations_virtual[:,1], "b")
 69 |     if rotations_virtual2 is not None:
 70 |         plt.plot(rotations_virtual2[:,1], "r")
 71 |     plt.ylim(-0.02, 0.02)
 72 |     plt.xlabel('frame id')
 73 |     plt.ylabel('gyro y')
 74 | 
 75 |     plt.subplot(5,1,3)
 76 |     plt.plot(rotations_real[:,2], "g")
 77 |     if rotations_virtual is not None:
 78 |         plt.plot(rotations_virtual[:,2], "b")
 79 |     if rotations_virtual2 is not None:
 80 |         plt.plot(rotations_virtual2[:,2], "r")
 81 |     plt.ylim(-0.02, 0.02)
 82 |     plt.xlabel('frame id')
 83 |     plt.ylabel('gyro z')
 84 |     
 85 |     plt.subplot(5,1,4)
 86 |     plt.plot(lens_offsets_real[:,0], "g")
 87 |     if lens_offsets_virtual is not None:
 88 |         plt.plot(lens_offsets_virtual[:,0], "b")
 89 |     if rotations_virtual2 is not None:
 90 |         plt.plot(lens_offsets_virtual2[:,0], "r")
 91 |     plt.xlabel('frame id')
 92 |     plt.ylabel('ois x')
 93 | 
 94 |     plt.subplot(5,1,5)
 95 |     plt.plot(lens_offsets_real[:,1], "g")
 96 |     if lens_offsets_virtual is not None:
 97 |         plt.plot(lens_offsets_virtual[:,1], "b")
 98 |     if rotations_virtual2 is not None:
 99 |         plt.plot(lens_offsets_virtual2[:,1], "r")
100 |     plt.xlabel('frame id')
101 |     plt.ylabel('ois y')
102 |     
103 |     plt.savefig(path[:-4]+".jpg")
104 |     return
105 | 
106 | def LoadOISData(ois_name):
107 |     ois_log = np.loadtxt(ois_name)
108 |     ois_log = ois_log[:, -3:]
109 |     return ois_log
110 | 
111 | def LoadFrameData(frame_log_name):
112 |     frame_data = np.loadtxt(frame_log_name)
113 |     frame_data[:, [0,4]] = frame_data[:, [0,4]] - np.expand_dims(frame_data[:,1]/2, axis = 1)
114 |     return frame_data
115 | 
116 | 
117 | def LoadGyroData(gyro_log_name):
118 |     raw_gyro_data = np.loadtxt(gyro_log_name) 
119 |     raw_gyro_data[:,0] = raw_gyro_data[:,0] * 1000 
120 |     raw_gyro_data = raw_gyro_data[:,[0, 2, 1, 3]]
121 | 
122 |     [_, quats_data]  = ProcessGyroData(raw_gyro_data) 
123 |     quats_data = np.concatenate((raw_gyro_data[:, 0, None], quats_data), axis = 1)
124 |     return quats_data
125 | 
126 | def LoadStabResult(input_name):
127 |     fid = open(input_name)
128 |     data = {}
129 |     while True:
130 |         name, val = ReadLine(fid)
131 |         if name == None:
132 |             break
133 |         if name in data:
134 |             data[name] = np.concatenate((data[name], val), axis=0)
135 |         else:
136 |             data[name] = val
137 |     fid.close()
138 |     print("Mesh length: ", len(list(data.values())[0]))
139 |     return data
140 | 
141 | 
142 | def ReadLine(fid):
143 |     name = ''
144 |     val = 0
145 |     tline = fid.readline()
146 |     if len(tline) == 0:
147 |         return None, None
148 |     if tline[-1] == "\n":
149 |         tline = tline[:-1]
150 |     ind = tline.find(':')
151 |     name = tline[:ind]
152 |     tmp_val= str2num(tline[ind+1:])
153 |     if len(tmp_val) > 0:
154 |         val = tmp_val
155 |     else:
156 |         tline = fid.readline()
157 |         if tline[-1] == "\n":
158 |             tline = tline[:-1]
159 |         val = str2num(tline)
160 |     return name, np.expand_dims(np.array(val), axis=0)
161 | 
162 | def str2num(string):
163 |     nums = string.split(" ")
164 |     nums = [float(_) for _ in nums if _ != ""]
165 |     return nums
166 |     
167 |     


--------------------------------------------------------------------------------
/dvs/inference.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | import torchvision
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | 
  8 | import time
  9 | import yaml
 10 | import argparse
 11 | import numpy as np
 12 | from printer import Printer
 13 | from dataset import get_data_loader, get_inference_data_loader
 14 | from model import Model
 15 | import datetime
 16 | import copy
 17 | from util import make_dir, get_optimizer, norm_flow
 18 | from gyro import (
 19 |     get_grid, 
 20 |     get_rotations, 
 21 |     visual_rotation,
 22 |     torch_QuaternionProduct,
 23 |     torch_norm_quat
 24 |     )
 25 | from warp import warp_video
 26 | 
 27 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 28 | 
 29 | def run(model, loader, cf, USE_CUDA=True):
 30 |     no_flo = False
 31 |     number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"]
 32 |     model.net.eval()
 33 |     model.unet.eval()
 34 |     activation = nn.Softshrink(0.0006) # 0.0036
 35 |     for i, data in enumerate(loader, 0):
 36 |         # get the inputs; data is a list of [inputs, labels]
 37 |         real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data
 38 |         print("Fininsh Load data")
 39 | 
 40 |         real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4]
 41 |         real_projections = real_projections.type(torch.float) 
 42 |         flo = flo.type(torch.float) 
 43 |         flo_back = flo_back.type(torch.float) 
 44 |         ois = ois.type(torch.float)
 45 | 
 46 |         batch_size, step, dim = real_inputs.size()
 47 |         times = times.numpy()
 48 |         real_queue_idx = real_queue_idx.numpy()
 49 |         virtual_queue = [None] * batch_size
 50 | 
 51 |         run_loss = 0
 52 |         model.net.init_hidden(batch_size)
 53 |         count = 0
 54 |         for j in range(step):
 55 |             if (j+1) % 100 == 0:
 56 |                 print("Step: "+str(j+1)+"/"+str(step))
 57 |             virtual_inputs, vt_1 = loader.dataset.get_virtual_data(
 58 |                 virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j]) 
 59 |             real_inputs_step = real_inputs[:,j,:]
 60 |             inputs = torch.cat((real_inputs_step,virtual_inputs), dim = 1) 
 61 | 
 62 |             # inputs = Variable(real_inputs_step)
 63 |             if USE_CUDA:
 64 |                 real_inputs_step = real_inputs_step.cuda()
 65 |                 virtual_inputs = virtual_inputs.cuda()
 66 |                 inputs = inputs.cuda()
 67 |                 if no_flo is False:
 68 |                     flo_step = flo[:,j].cuda()
 69 |                     flo_back_step = flo_back[:,j].cuda()
 70 |                 else:
 71 |                     flo_step = None
 72 |                     flo_back_step = None
 73 |                 vt_1 = vt_1.cuda()
 74 |                 real_projections_t = real_projections[:,j+1].cuda()
 75 |                 real_projections_t_1 = real_projections[:,j].cuda()
 76 |                 real_postion_anchor = real_postion[:,j].cuda()
 77 |                 ois_step = ois[:,j].cuda()
 78 | 
 79 |             if no_flo is False:
 80 |                 b, h, w, _ = flo_step.size()
 81 |                 flo_step = norm_flow(flo_step, h, w)
 82 |                 flo_back_step = norm_flow(flo_back_step, h, w)
 83 | 
 84 |             with torch.no_grad():
 85 |                 if no_flo is False:
 86 |                     flo_out = model.unet(flo_step, flo_back_step)
 87 |                 else:
 88 |                     flo_out = None
 89 |                 if j < 1:
 90 |                     for i in range(2):
 91 |                         out = model.net(inputs, flo_out, ois_step)
 92 |                 else:
 93 |                     out = model.net(inputs, flo_out, ois_step)
 94 | 
 95 |             real_position = real_inputs_step[:,40:44]
 96 |             virtual_position = virtual_inputs[:, -4:]
 97 | 
 98 |             out[:, :3] = activation(out[:, :3])
 99 |             out = torch_norm_quat(out)
100 | 
101 |             pos = torch_QuaternionProduct(virtual_position, real_postion_anchor)
102 |             loss_step = model.loss(out, vt_1, virtual_inputs, real_inputs_step, \
103 |                 flo_step, flo_back_step, real_projections_t, real_projections_t_1, real_postion_anchor, \
104 |                 follow = True, optical = True, undefine = True)
105 |             run_loss += loss_step
106 | 
107 |             out = torch_QuaternionProduct(out, pos)
108 | 
109 |             if USE_CUDA:
110 |                 out = out.cpu().detach().numpy() 
111 | 
112 |             virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1])
113 |     
114 |     run_loss /= step
115 |     print( "\nLoss: follow, angle, smooth, c2_smooth, undefine, optical")
116 |     print(run_loss.cpu().numpy()[:-1], "\n")
117 |     return np.squeeze(virtual_queue, axis=0)
118 | 
119 | 
120 | def inference(cf, data_path, USE_CUDA):
121 |     checkpoints_dir = cf['data']['checkpoints_dir']
122 |     checkpoints_dir = make_dir(checkpoints_dir, cf)
123 |     files = os.listdir(data_path)
124 |     for f in files:
125 |         if f[-3:] == "mp4" and "no_ois" not in f  and "no_shutter" not in f  and "gimbal" not in f.lower() and "grid" not in f.lower() and "flo" not in f.lower():
126 |             video_name = f[:-4]
127 | 
128 |     # Define the model
129 |     model = Model(cf) 
130 |     load_model = cf["model"]["load_model"]
131 | 
132 |     print("------Load Pretrined Model--------")
133 |     if load_model is not None:
134 |         checkpoint = torch.load(load_model)
135 |         print(load_model)
136 |     else:
137 |         load_last = os.path.join(checkpoints_dir, cf['data']['exp']+'_last.checkpoint')
138 |         checkpoint = torch.load(load_last)
139 |         print(load_last)
140 |     model.net.load_state_dict(checkpoint['state_dict'])
141 |     model.unet.load_state_dict(checkpoint['unet'])
142 |                 
143 |     if USE_CUDA:
144 |         model.net.cuda()
145 |         model.unet.cuda()
146 | 
147 |     print("-----------Load Dataset----------")
148 |     test_loader = get_inference_data_loader(cf, data_path, no_flo = False)
149 |     data = test_loader.dataset.data[0]
150 | 
151 |     start_time = time.time()
152 |     virtual_queue= run(model, test_loader, cf, USE_CUDA=USE_CUDA)
153 | 
154 |     virtual_data = np.zeros((1,5))
155 |     virtual_data[:,1:] = virtual_queue[0, 1:]
156 |     virtual_data[:,0] = data.frame[0,0]
157 |     virtual_queue = np.concatenate((virtual_data, virtual_queue), axis = 0)
158 | 
159 |     print(virtual_queue.shape)
160 |     time_used = (time.time() - start_time) / 60
161 | 
162 |     print("Time_used: %.4f minutes" % (time_used))
163 | 
164 |     
165 |     virtual_path = os.path.join("./test", cf['data']['exp'], data_path.split("/")[-1]+'.txt')
166 |     np.savetxt(virtual_path, virtual_queue, delimiter=' ')
167 | 
168 |     print("------Start Warping Video--------")
169 |     grid = get_grid(test_loader.dataset.static_options, \
170 |         data.frame[:data.length], data.gyro, data.ois, virtual_queue[:data.length,1:], no_shutter = False)
171 |     return data, virtual_queue, video_name, grid
172 | 
173 | def visual_result(cf, data, video_name, virtual_queue, virtual_queue2 = None, compare_exp = None):
174 |     print("------Start Visual Result--------")
175 |     rotations_virtual, lens_offsets_virtual = get_rotations(data.frame[:data.length], virtual_queue, np.zeros(data.ois.shape), data.length)
176 |     rotations_real, lens_offsets_real = get_rotations(data.frame[:data.length], data.gyro, data.ois, data.length)
177 |     if virtual_queue2 is not None:
178 |         rotations_virtual2, lens_offsets_virtual2 = get_rotations(data.frame[:data.length], virtual_queue2, np.zeros(data.ois.shape), data.length)
179 |         path = os.path.join("./test", cf['data']['exp'], video_name+'_'+compare_exp+'.jpg')
180 |     else:
181 |         rotations_virtual2, lens_offsets_virtual2 = None, None
182 |         path = os.path.join("./test", cf['data']['exp'], video_name+'.jpg')
183 |     
184 |     visual_rotation(rotations_real, lens_offsets_real, rotations_virtual, lens_offsets_virtual, rotations_virtual2, lens_offsets_virtual2, path)
185 | 
186 | 
187 | def main(args = None):
188 |     config_file = args.config
189 |     dir_path = args.dir_path
190 |     cf = yaml.load(open(config_file, 'r'))
191 | 
192 |     USE_CUDA = cf['data']["use_cuda"]
193 | 
194 |     log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'_test.log'), 'w+')
195 |     printer = Printer(sys.stdout, log_file).open()
196 | 
197 |     data_name = sorted(os.listdir(dir_path))
198 |     for i in range(len(data_name)):
199 |         print("Running Inference: " + str(i+1) + "/" + str(len(data_name)))
200 |         save_path = os.path.join("./test", cf['data']['exp'], data_name[i]+'_stab.mp4')
201 | 
202 |         data_path = os.path.join(dir_path, data_name[i])
203 |         data, virtual_queue, video_name, grid= inference(cf, data_path, USE_CUDA)
204 | 
205 |         virtual_queue2 = None
206 |         visual_result(cf, data, data_name[i], virtual_queue, virtual_queue2 = virtual_queue2, compare_exp = None)
207 | 
208 |         video_path = os.path.join(data_path, video_name+".mp4")
209 |         warp_video(grid, video_path, save_path, frame_number = False)
210 |     return 
211 | 
212 | if __name__ == '__main__':
213 |     parser = argparse.ArgumentParser("Training model")
214 |     parser.add_argument("--config", default="./conf/stabilzation.yaml", help="Config file.")
215 |     parser.add_argument("--dir_path", default="./video")
216 |     args = parser.parse_args()
217 |     main(args = args)


--------------------------------------------------------------------------------
/dvs/load_frame_sensor_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
  3 | import sys
  4 | import torch
  5 | import torchvision
  6 | import torch.nn as nn
  7 | from torch.autograd import Variable
  8 | 
  9 | import time
 10 | import yaml
 11 | import argparse
 12 | import numpy as np
 13 | from printer import Printer
 14 | from dataset import get_data_loader, get_inference_data_loader
 15 | from model import Model
 16 | import datetime
 17 | import copy
 18 | from util import make_dir, get_optimizer, norm_flow
 19 | from gyro import (
 20 |     get_grid, 
 21 |     get_rotations, 
 22 |     visual_rotation, 
 23 |     GetGyroAtTimeStamp, 
 24 |     torch_ConvertQuaternionToAxisAngle, 
 25 |     torch_ConvertAxisAngleToQuaternion,
 26 |     torch_QuaternionProduct,
 27 |     get_static
 28 |     )
 29 | from warp import warp_video
 30 | 
 31 | def run(loader, cf, USE_CUDA=True):
 32 |     number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"]
 33 |     for i, data in enumerate(loader, 0):
 34 |         # get the inputs; data is a list of [inputs, labels]
 35 |         real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data
 36 |         print("Fininsh Load data")
 37 | 
 38 |         real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4]
 39 |         real_projections = real_projections.type(torch.float) 
 40 |     
 41 |         batch_size, step, dim = real_inputs.size()
 42 |         times = times.numpy()
 43 |         real_queue_idx = real_queue_idx.numpy()
 44 |         virtual_queue = [None] * batch_size
 45 | 
 46 |         for j in range(step):
 47 |             virtual_inputs, vt_1 = loader.dataset.get_virtual_data(
 48 |                 virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j]) 
 49 |             real_inputs_step = real_inputs[:,j,:]
 50 |             if USE_CUDA:
 51 |                 real_inputs_step = real_inputs_step.cuda()
 52 |                 virtual_inputs = virtual_inputs.cuda()
 53 |                 real_postion_anchor = real_postion[:,j].cuda()
 54 | 
 55 |             out = real_inputs_step[:,40:44]
 56 |             
 57 |             virtual_position = virtual_inputs[:, -4:]
 58 |             pos = torch_QuaternionProduct(virtual_position, real_postion_anchor)
 59 | 
 60 |             out = torch_QuaternionProduct(out, pos)
 61 | 
 62 |             if USE_CUDA:
 63 |                 out = out.cpu().detach().numpy() 
 64 | 
 65 |             virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1])
 66 |     return np.squeeze(virtual_queue, axis=0)
 67 | 
 68 | def inference(cf, data_path, USE_CUDA):
 69 |     print("-----------Load Dataset----------")
 70 |     test_loader = get_inference_data_loader(cf, data_path)
 71 |     data = test_loader.dataset.data[0]
 72 |     test_loader.dataset.no_flo = True
 73 |     test_loader.dataset.static_options = get_static(ratio = 0)
 74 | 
 75 |     start_time = time.time()
 76 |     virtual_queue = run(test_loader, cf, USE_CUDA=USE_CUDA)
 77 | 
 78 |     virtual_data = np.zeros((1,5))
 79 |     virtual_data[:,1:] = virtual_queue[0, 1:]
 80 |     virtual_data[:,0] = data.frame[0,0]
 81 |     virtual_queue = np.concatenate((virtual_data, virtual_queue), axis = 0)
 82 |     
 83 |     files = os.listdir(data_path)
 84 |     for f in files:
 85 |         if f[-3:] == "mp4" and "no_ois" not in f and "gimbal" not in f.lower():
 86 |             video_name = f[:-4]
 87 |             print(video_name)
 88 |     virtual_path = os.path.join("./test", cf['data']['exp'], video_name+'.txt')
 89 | 
 90 |     print("------Start Visual Result--------")
 91 |     rotations_real, lens_offsets_real = get_rotations(data.frame[:data.length], data.gyro, data.ois, data.length)
 92 |     fig_path = os.path.join(data_path, video_name+"_real.jpg")
 93 |     visual_rotation(rotations_real, lens_offsets_real, None, None, None, None, fig_path)
 94 |     
 95 |     return
 96 | 
 97 | def main(args = None):
 98 |     config_file = args.config
 99 |     dir_path = args.dir_path
100 |     cf = yaml.load(open(config_file, 'r'))
101 |     
102 |     USE_CUDA = cf['data']["use_cuda"]
103 | 
104 |     checkpoints_dir = cf['data']['checkpoints_dir']
105 |     checkpoints_dir = make_dir(checkpoints_dir, cf)
106 | 
107 |     data_name = sorted(os.listdir(dir_path))
108 |     for i in range(len(data_name)):
109 |         print("Running: " + str(i+1) + "/" + str(len(data_name)))
110 |         inference(cf, os.path.join(dir_path, data_name[i]), USE_CUDA)
111 |     return 
112 | 
113 | if __name__ == '__main__':
114 |     parser = argparse.ArgumentParser("Training model")
115 |     parser.add_argument("--config", default="./conf/stabilzation.yaml", help="Config file.")
116 |     parser.add_argument("--dir_path", default="./video")
117 |     args = parser.parse_args()
118 |     main(args = args)


--------------------------------------------------------------------------------
/dvs/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from torch.autograd import Variable
  4 | import operator
  5 | import torch.nn.functional as F
  6 | import matplotlib.pyplot as plt
  7 | from gyro import (
  8 |     torch_QuaternionProduct, 
  9 |     torch_QuaternionReciprocal, 
 10 |     get_static, 
 11 |     torch_GetVirtualProjection,
 12 |     torch_GetForwardGrid,
 13 |     torch_GetWarpingFlow,
 14 |     torch_ConvertAxisAngleToQuaternion,
 15 |     torch_ConvertQuaternionToAxisAngle,
 16 |     torch_norm_quat,
 17 |     torch_GetHomographyTransformFromProjections,
 18 |     torch_ApplyTransform
 19 | )
 20 |         
 21 | class C2_Smooth_loss(torch.nn.Module):
 22 |     def __init__(self):
 23 |         super(C2_Smooth_loss, self).__init__()
 24 |         self.MSE = torch.nn.MSELoss()
 25 | 
 26 |     def forward(self, Qt, Qt_1, Qt_2):
 27 |         detaQt_1 = torch_QuaternionProduct(Qt_1, torch_QuaternionReciprocal(Qt_2))
 28 |         return self.MSE(Qt, detaQt_1)
 29 | 
 30 | class C1_Smooth_loss(torch.nn.Module):
 31 |     def __init__(self):
 32 |         super(C1_Smooth_loss, self).__init__()
 33 |         self.MSE = torch.nn.MSELoss()
 34 | 
 35 |     def forward(self, v_r_axis, v_axis_t_1 = None, real_postion = None):
 36 |         quat_zero = torch.zeros(v_r_axis.shape).cuda()
 37 |         quat_zero[:,3] = 1
 38 |         return self.MSE(v_r_axis, quat_zero)
 39 | 
 40 | class Follow_loss(torch.nn.Module):
 41 |     def __init__(self):
 42 |         super(Follow_loss, self).__init__()
 43 |         self.MSE = torch.nn.MSELoss()
 44 | 
 45 |     def forward(self, virtual_quat, real_quat, real_postion = None):
 46 |         if real_postion is not None:
 47 |             real_quat = torch_QuaternionProduct(real_quat, real_postion)
 48 |         return self.MSE(virtual_quat, real_quat)
 49 | 
 50 | class Stay_loss(torch.nn.Module):
 51 |     def __init__(self):
 52 |         super(Stay_loss, self).__init__()
 53 |         self.zero = torch.tensor([0.0,0.0,0.0,1.0]).cuda()
 54 | 
 55 |     def forward(self, virtual_quat):
 56 |         return torch.mean(torch.abs(virtual_quat - self.zero))
 57 | 
 58 | 
 59 | class Angle_loss(torch.nn.Module):
 60 |     def __init__(self):
 61 |         super(Angle_loss, self).__init__()
 62 |         self.MSE = torch.nn.MSELoss()
 63 | 
 64 |     def forward(self, Q1, Q2, threshold = 0.5236, logistic_beta1 = 100):
 65 |         batch_size = Q1.shape[0]
 66 |         Q3 = torch_norm_quat(torch_QuaternionProduct(Q2, torch_QuaternionReciprocal(Q1)))
 67 |         theta = torch.zeros(batch_size).cuda()
 68 |         index = (Q3[:,3] < 1).nonzero()
 69 |         theta[index] = torch.acos(Q3[index,3]) * 2
 70 |         loss = torch.mean(theta * (1 / (1 + torch.exp(-logistic_beta1 * (theta - threshold)))))
 71 |         return loss, theta
 72 | 
 73 | class Optical_loss(torch.nn.Module):
 74 |     def __init__(self):
 75 |         super(Optical_loss, self).__init__()
 76 |         self.static_options = get_static()
 77 |         self.mesh = get_mesh()
 78 | 
 79 |     def forward(self, Vt, Vt_1, flo, flo_back, real_projection_t, real_projection_t_1):
 80 |         virtual_projection_t = torch_GetVirtualProjection(self.static_options, Vt) 
 81 |         virtual_projection_t_1 = torch_GetVirtualProjection(self.static_options, Vt_1) 
 82 | 
 83 |         b, h, w = flo.size()[:3]
 84 | 
 85 |         grid_t = torch_GetForwardGrid(self.static_options, real_projection_t, virtual_projection_t)[:,:2,:,:].permute(0,1,3,2)
 86 |         grid_t = torch.nn.functional.upsample_bilinear(grid_t, size = (h, w)) # [B,C(xy),H,W]
 87 | 
 88 |         grid_t_1 = torch_GetForwardGrid(self.static_options, real_projection_t_1, virtual_projection_t_1)[:,:2,:,:].permute(0,1,3,2) 
 89 |         grid_t_1 = torch.nn.functional.upsample_bilinear(grid_t_1, size = (h, w)) # [B,C(xy),H,W]
 90 |         
 91 |         mesh = self.mesh.repeat(b, 1, 1, 1)
 92 |         flo = flo + mesh 
 93 |         flo_back = flo_back + mesh # [B,H,W,C]
 94 | 
 95 |         valid = (flo[:,:,:,0] > 0) * (flo[:,:,:,1] > 0) * (flo[:,:,:,0] < 1) * (flo[:,:,:,1] < 1)
 96 |         valid_f = torch.unsqueeze(valid, dim = 3).type(torch.cuda.FloatTensor)
 97 |         valid = torch.unsqueeze(valid, dim = 1).type(torch.cuda.FloatTensor)
 98 | 
 99 |         valid_back = (flo_back[:,:,:,0] > 0) * (flo_back[:,:,:,1] > 0) * (flo_back[:,:,:,0] < 1) * (flo_back[:,:,:,1] < 1)
100 |         valid_back_f = torch.unsqueeze(valid_back, dim = 3).type(torch.cuda.FloatTensor) 
101 |         valid_back = torch.unsqueeze(valid_back, dim = 1).type(torch.cuda.FloatTensor) # [B,C,H,W]
102 | 
103 |         flo = (flo * 2 - 1) * valid_f
104 |         flo_back = (flo_back * 2 - 1) * valid_back_f
105 | 
106 |         forward_t = torch.nn.functional.grid_sample(grid_t, flo, padding_mode="reflection") # default bilinear
107 |         backward_t_1 = torch.nn.functional.grid_sample(grid_t_1, flo_back, padding_mode="reflection") # default bilinear
108 | 
109 |         forward_diff = ((forward_t - grid_t_1) * valid) ** 2 
110 |         backward_diff = ((backward_t_1 - grid_t) * valid_back) ** 2
111 | 
112 |         forward_loss = torch.sum(forward_diff, dim = (1,2,3)) / torch.sum(valid, dim = (1,2,3))
113 |         backward_loss = torch.sum(backward_diff, dim = (1,2,3)) / torch.sum(valid_back, dim = (1,2,3))
114 | 
115 |         loss = forward_loss + backward_loss
116 |         loss = torch.min(loss, loss - loss + 1) #[0]
117 |         loss = torch.sum(loss) / b
118 | 
119 |         return loss 
120 | 
121 | 
122 | def get_mesh(height = 270, width = 480, USE_CUDA = True):
123 |     xs = np.linspace(0, 1, width, endpoint = False) + 0.5 / height
124 |     ys = np.linspace(0, 1, height, endpoint = False) + 0.5 / width
125 |     xmesh, ymesh = np.meshgrid(xs, ys)
126 |     # Reshape the sampling positions to a H x W x 2 tensor
127 |     mesh = torch.Tensor(np.expand_dims(np.moveaxis(np.array(list(zip(xmesh, ymesh))), 1, 2),axis=0))
128 |     if USE_CUDA:
129 |         mesh = mesh.cuda()
130 |     return mesh
131 | 
132 | class Undefine_loss(torch.nn.Module):
133 |     def __init__(self, ratio = 0.08, inner_ratio = 0.04, USE_CUDA = True):
134 |         super(Undefine_loss, self).__init__()
135 |         self.static_options = get_static() 
136 |         self.inner_ratio = inner_ratio
137 |         width = self.static_options["width"]
138 |         height = self.static_options["height"]
139 |         x0, x1, y0, y1 = \
140 |             int(width*ratio), int(width*(1-ratio)), int(height*ratio), int(height*(1-ratio))
141 |         self.norm = torch.Tensor([width, height, 1])
142 |         self.p00 = torch.Tensor([x0, y0, 1])
143 |         self.p01 = torch.Tensor([x0, y1, 1])
144 |         self.p10 = torch.Tensor([x1, y0, 1])
145 |         self.p11 = torch.Tensor([x1, y1, 1])
146 |         if USE_CUDA == True:
147 |             self.p00 = self.p00.cuda()
148 |             self.p01 = self.p01.cuda()
149 |             self.p10 = self.p10.cuda()
150 |             self.p11 = self.p11.cuda()
151 |             self.norm = self.norm.cuda()
152 | 
153 |     def forward(self, Vt, Rt, ratio = 0.04):
154 |         batch_size = Vt.size()[0]
155 | 
156 |         row_mid = self.static_options["num_grid_rows"] // 2
157 |         virtual_projection_t = torch_GetVirtualProjection(self.static_options, Vt) 
158 | 
159 |         real_projection_t = torch_GetVirtualProjection(self.static_options, Rt) 
160 | 
161 |         # virtual projection and real projection
162 |         transform = torch_GetHomographyTransformFromProjections(real_projection_t, virtual_projection_t)
163 |         
164 |         p00 = (torch_ApplyTransform(transform, self.p00) / self.norm)[:,:2]
165 |         p01 = (torch_ApplyTransform(transform, self.p01) / self.norm)[:,:2]
166 |         p10 = (torch_ApplyTransform(transform, self.p10) / self.norm)[:,:2]
167 |         p11 = (torch_ApplyTransform(transform, self.p11) / self.norm)[:,:2]
168 | 
169 |         loss = torch.stack((self.get_loss(p00), self.get_loss(p01), self.get_loss(p10), self.get_loss(p11)),dim = 1)
170 |         loss,_ = torch.max(loss, dim = 1)
171 | 
172 |         loss = torch.min(loss, loss - loss + 1) #[0]
173 |         loss = torch.sum(loss) / batch_size
174 | 
175 |         return loss
176 |     
177 |     def get_loss(self, p):
178 |         d =  (p - self.inner_ratio) * (p < self.inner_ratio).type(torch.cuda.FloatTensor) + \
179 |             (1 - self.inner_ratio - p) * (p > (1 - self.inner_ratio)).type(torch.cuda.FloatTensor)
180 |         return torch.sum(d**2, dim = 1) 
181 | 


--------------------------------------------------------------------------------
/dvs/metrics.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import cv2
  5 | import math
  6 | import pdb
  7 | import matplotlib.pyplot as plt
  8 | from printer import Printer
  9 | from warp import video2frame_one_seq
 10 | import datetime
 11 | import torch
 12 | import copy
 13 | import csv
 14 | import copyreg
 15 | import shutil
 16 | import matplotlib.pyplot as plt
 17 | from util import crop_video
 18 | 
 19 | def _pickle_keypoints(point):
 20 |     return cv2.KeyPoint, (*point.pt, point.size, point.angle,
 21 |                           point.response, point.octave, point.class_id)
 22 | 
 23 | copyreg.pickle(cv2.KeyPoint().__class__, _pickle_keypoints)
 24 | 
 25 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 26 | 
 27 | h_size = 480
 28 | w_size = 640
 29 | 
 30 | def crop_metric(M):
 31 |     points = np.array([[0,0,1],[0,h_size,1], [w_size,0,1], [w_size,h_size,1]]).T
 32 |     result = np.matmul(M,points).T
 33 |     result = result[:,:2]/result[:,2:]
 34 |     w_out = 1 - max(result[0,0], result[1,0], w_size - result[2,0], w_size - result[3,0], 0)/w_size
 35 |     h_out = 1 - max(result[0,1], result[2,1], h_size - result[1,1], h_size - result[3,1], 0)/h_size
 36 |     return w_out, h_out
 37 | 
 38 | # https://stackoverflow.com/questions/34389125/how-to-get-the-scale-factor-of-getperspectivetransform-in-opencv
 39 | def get_scale(M):
 40 |     h1 = M[0, 0]
 41 |     h2 = M[0, 1]
 42 |     h3 = M[0, 2]
 43 |     h4 = M[1, 0]
 44 |     h5 = M[1, 1]
 45 |     h6 = M[1, 2]
 46 |     h7 = M[2, 0]
 47 |     h8 = M[2, 1]
 48 |     QR = np.array([[h1-(h7*h3), h2-(h8*h3)], [h4-(h7*h6), h5-(h8*h6)]])
 49 |     Q, R = np.linalg.qr(QR)
 50 |     return abs(R[0,0]), abs(R[1,1])
 51 | 
 52 | # https://stackoverflow.com/questions/21019338/how-to-change-the-homography-with-the-scale-of-the-image
 53 | def get_rescale_matrix(M, sx, sy):
 54 |     S = np.eye(3, dtype = float)
 55 |     S[0,0] = sx
 56 |     S[1,1] = sy
 57 | 
 58 |     S1 = np.eye(3, dtype = float)
 59 |     S1[0,0] = 1/sx
 60 |     S1[1,1] = 1/sy
 61 |     return np.matmul(M, S1)
 62 | 
 63 | # Part of code reference from https://github.com/jinsc37/DIFRINT/blob/master/metrics.py
 64 | def metrics(in_src, out_src, package, crop_scale = False, re_compute = False):
 65 |     load_dic = None
 66 |     if re_compute and os.path.exists(package):
 67 |         print("Start load")
 68 |         load_dic = torch.load(package)
 69 |         print("Finish load")
 70 |     dic = {
 71 |         'M': None,
 72 |         'CR_seq': [],
 73 |         'DV_seq': [],
 74 |         'SS_t': None,
 75 |         'SS_r': None,
 76 |         'w_crop':[],
 77 |         'h_crop':[],
 78 |         'distortion': [],
 79 |         'count': 0,
 80 |         'in_sift': {},
 81 |         'out_sift': {},
 82 |         'fft_t': {},
 83 |         'fft_r': {}
 84 |         }
 85 | 
 86 |     if load_dic is not None:
 87 |         dic["in_sift"] = load_dic["in_sift"]
 88 |         dic["out_sift"] = load_dic["out_sift"]
 89 | 
 90 |     frameList_in = sorted(os.listdir(in_src))
 91 |     frameList = sorted(os.listdir(out_src))
 92 |     frameList = frameList[:min(len(frameList_in),len(frameList))]
 93 | 
 94 |     # Create brute-force matcher object
 95 |     bf = cv2.BFMatcher()
 96 | 
 97 |     # Apply the homography transformation if we have enough good matches 
 98 |     MIN_MATCH_COUNT = 10 #10
 99 | 
100 |     ratio = 0.7 #0.7
101 |     thresh = 5.0 #5.0
102 | 
103 |     Pt = np.asarray([[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]])
104 |     P_seq = []
105 |     count = 1
106 |     for index, f in enumerate(frameList, 0):
107 |         if f.endswith('.png'):
108 |             # Load the images in gray scale
109 |             img1 = cv2.imread(os.path.join(in_src, f), 0)  
110 |             img1 = cv2.resize(img1, (w_size,h_size), interpolation = cv2.INTER_LINEAR)
111 | 
112 |             img1o = cv2.imread(os.path.join(out_src, f), 0)
113 |             img1o = cv2.resize(img1o, (w_size,h_size), interpolation = cv2.INTER_LINEAR)
114 |             sift = cv2.SIFT_create()   
115 |             
116 |             if f in dic["in_sift"]:
117 |                 keyPoints1, descriptors1 = dic["in_sift"][f]
118 |             else:
119 |                 # Detect the SIFT key points and compute the descriptors for the two images
120 |                 keyPoints1, descriptors1 = sift.detectAndCompute(img1, None)
121 |                 dic["in_sift"][f] = (keyPoints1, descriptors1)
122 | 
123 |             if f in dic["out_sift"]:
124 |                 keyPoints1o, descriptors1o = dic["out_sift"][f]
125 |             else:
126 |                 keyPoints1o, descriptors1o = sift.detectAndCompute(img1o, None)
127 |                 dic["out_sift"][f] = (keyPoints1o, descriptors1o)
128 | 
129 |             # Match the descriptors
130 |             matches = bf.knnMatch(descriptors1, descriptors1o, k=2)
131 | 
132 |             # Select the good matches using the ratio test
133 |             goodMatches = []
134 | 
135 |             for m, n in matches:
136 |                 if m.distance < ratio * n.distance:
137 |                     goodMatches.append(m)
138 | 
139 |             if len(goodMatches) > MIN_MATCH_COUNT:
140 |                 # Get the good key points positions
141 |                 sourcePoints = np.float32([ keyPoints1[m.queryIdx].pt for m in goodMatches ]).reshape(-1, 1, 2)
142 |                 destinationPoints = np.float32([ keyPoints1o[m.trainIdx].pt for m in goodMatches ]).reshape(-1, 1, 2)
143 |                 
144 |                 M, mask = cv2.findHomography(sourcePoints, destinationPoints, method=cv2.RANSAC, ransacReprojThreshold=thresh)
145 |                 im_dst = cv2.warpPerspective(img1, M, (w_size,h_size))  
146 | 
147 |                 cm = []
148 |                 for i in range(6):
149 |                     for j in range(6):
150 |                         hs = int(h_size * (0.2 + 0.1 * i))
151 |                         he = int(h_size * (0.3 + 0.1 * i))
152 |                         ws = int(w_size * (0.2 + 0.1 * j))
153 |                         we = int(w_size * (0.3 + 0.1 * j))
154 |                         cm.append(np.corrcoef(img1o[hs:he, ws:we].flat, im_dst[hs:he, ws:we].flat))
155 |                 dic["distortion"].append(cm)
156 | 
157 |                 if crop_scale:
158 |                     sx, sy = get_scale(M)
159 |                     M_scale = get_rescale_matrix(M, sx, sy)
160 |                     w_crop, h_crop = crop_metric(M_scale)
161 |                 else:
162 |                     w_crop, h_crop = crop_metric(M)
163 |                 dic["w_crop"].append(w_crop)
164 |                 dic["h_crop"].append(h_crop)
165 | 
166 |             # Obtain Scale, Translation, Rotation, Distortion value
167 |             sx = M[0, 0]
168 |             sy = M[1, 1]
169 |             scaleRecovered = math.sqrt(np.abs(sx*sy))
170 | 
171 |             w, _ = np.linalg.eig(M[0:2,0:2])
172 |             w = np.sort(w)[::-1]
173 |             DV = w[1]/w[0]
174 |             #pdb.set_trace()
175 | 
176 |             dic["CR_seq"].append(1.0/scaleRecovered)
177 |             dic["DV_seq"].append(DV)  
178 | 
179 |             # For Stability score calculation
180 |             if count < len(frameList):
181 |                 f_path = f[:-9] + '%05d.png' % (int(f[-9:-4])+1)
182 |                 if f_path in dic["out_sift"]:
183 |                     keyPoints2o, descriptors2o = dic["out_sift"][f_path]
184 |                 else:
185 |                     img2o = cv2.imread(os.path.join(out_src, f_path), 0)
186 |                     img2o = cv2.resize(img2o, (w_size,h_size), interpolation = cv2.INTER_LINEAR)
187 |                     keyPoints2o, descriptors2o = sift.detectAndCompute(img2o, None)
188 |                     dic["out_sift"][f_path] = (keyPoints2o, descriptors2o)
189 |                 
190 |                 matches = bf.knnMatch(descriptors1o, descriptors2o, k=2)
191 |                 goodMatches = []
192 | 
193 |                 for m, n in matches:
194 |                     if m.distance < ratio * n.distance:
195 |                         goodMatches.append(m)
196 | 
197 |                 if len(goodMatches) > MIN_MATCH_COUNT:
198 |                     # Get the good key points positions
199 |                     sourcePoints = np.float32([ keyPoints1o[m.queryIdx].pt for m in goodMatches ]).reshape(-1, 1, 2)
200 |                     destinationPoints = np.float32([ keyPoints2o[m.trainIdx].pt for m in goodMatches ]).reshape(-1, 1, 2)
201 |                     
202 |                     # Obtain the homography matrix
203 |                     M, mask = cv2.findHomography(sourcePoints, destinationPoints, method=cv2.RANSAC, ransacReprojThreshold=thresh)
204 | 
205 |                 P_seq.append(np.matmul(Pt, M))
206 |                 Pt = np.matmul(Pt, M)
207 |             if count % 10 ==0:
208 |                 sys.stdout.write('\rFrame: ' + str(count) + '/' + str(len(frameList)))
209 |                 sys.stdout.flush()
210 |             dic["count"] = count
211 |             count += 1
212 | 
213 |     # Make 1D temporal signals
214 |     P_seq_t = np.asarray([1])
215 |     P_seq_r = np.asarray([1])
216 | 
217 |     #pdb.set_trace()
218 |     for Mp in P_seq:
219 |         sx = Mp[0, 0]
220 |         sy = Mp[1, 1]
221 |         c = Mp[0, 2]
222 |         f = Mp[1, 2]
223 | 
224 |         transRecovered = math.sqrt(c*c + f*f)
225 |         thetaRecovered = math.atan2(sx, sy) * 180 / math.pi
226 | 
227 |         P_seq_t = np.concatenate((P_seq_t, [transRecovered]), axis=0)
228 |         P_seq_r = np.concatenate((P_seq_r, [thetaRecovered]), axis=0)
229 | 
230 |     P_seq_t = np.delete(P_seq_t, 0)
231 |     P_seq_r = np.delete(P_seq_r, 0)
232 | 
233 |     # FFT
234 |     fft_t = np.fft.fft(P_seq_t)
235 |     fft_r = np.fft.fft(P_seq_r)
236 |     fft_t = abs(fft_t)**2
237 |     fft_r = abs(fft_r)**2
238 | 
239 |     fft_t = np.delete(fft_t, 0)
240 |     fft_r = np.delete(fft_r, 0)
241 |     fft_t = fft_t[:int(len(fft_t)/2)]
242 |     fft_r = fft_r[:int(len(fft_r)/2)]
243 | 
244 |     dic["fft_t"] = fft_t
245 |     dic["fft_r"] = fft_r
246 | 
247 |     SS_t = np.sum(fft_t[:5])/np.sum(fft_t)  
248 |     SS_r = np.sum(fft_r[:5])/np.sum(fft_r)
249 | 
250 |     dic["CR_seq"] = np.array(dic["CR_seq"])
251 |     dic["DV_seq"] = np.array(dic["DV_seq"])
252 |     dic["w_crop"] = np.array(dic["w_crop"])
253 |     dic["h_crop"] = np.array(dic["h_crop"])
254 |     dic["distortion"] = np.array(dic["distortion"])
255 |     dic["SS_t"] = SS_t
256 |     dic["SS_r"] = SS_r
257 |     
258 |     if not (re_compute and os.path.exists(package)):
259 |         torch.save(dic, package)
260 | 
261 |     DV_seq = np.absolute(dic["DV_seq"])
262 |     DV_seq = DV_seq[np.where((DV_seq >= 0.5) & (DV_seq <= 1))]
263 |     Distortion = str.format('{0:.4f}', np.nanmin(DV_seq))
264 |     Distortion_avg = str.format('{0:.4f}', np.nanmean(DV_seq))
265 | 
266 |     Trans = str.format('{0:.4f}', dic["SS_t"])
267 |     Rot = str.format('{0:.4f}', dic["SS_r"])
268 | 
269 |     w_crop = crop_rm_outlier(dic["w_crop"])
270 |     h_crop = crop_rm_outlier(dic["h_crop"])
271 | 
272 |     FOV = str.format( '{0:.4f}', min(np.nanmin(w_crop), np.nanmin(h_crop)) )
273 |     FOV_avg = str.format( '{0:.4f}', (np.nanmean(w_crop)+np.nanmean(h_crop)) / 2 )
274 | 
275 |     Correlation_avg = str.format( '{0:.4f}', np.nanmean(dic["distortion"][10:]) )
276 |     Correlation_min = str.format( '{0:.4f}', np.nanmin(dic["distortion"][10:]) )
277 | 
278 |     # Print results
279 |     print('\n***Distortion value (Avg, Min):')
280 |     print(Distortion_avg +' | '+  Distortion)
281 |     print('***Stability Score (Avg, Trans, Rot):')
282 |     print(str.format('{0:.4f}',  (dic["SS_t"]+dic["SS_r"])/2) +' | '+ Trans +' | '+ Rot )
283 |     print("=================")
284 |     print('***FOV ratio (Avg, Min):')
285 |     print( FOV_avg +' | '+ FOV )
286 |     print('***Correlation value (Avg, Min):')
287 |     print( Correlation_avg +' | '+ Correlation_min , "\n")  
288 | 
289 |     dic['in_sift'] = 0
290 |     dic['out_sift'] = 0
291 |     torch.save(dic, package[:-3]+"_light.pt") 
292 |     return float(FOV)
293 | 
294 | def crop_rm_outlier(crop):
295 |     crop = np.array(crop)
296 |     crop = crop[crop >= 0.5]
297 |     return sorted(crop)[5:]
298 | 
299 | if __name__ == '__main__':
300 |     metric_path = os.path.join("./test/stabilzation/metric")
301 |     if not os.path.exists(metric_path):
302 |         os.makedirs(metric_path)
303 | 
304 |     in_video = "./video/s_114_outdoor_running_trail_daytime/ControlCam_20200930_104820.mp4"
305 |     in_folder = os.path.join(metric_path, "in_frame")
306 |     if not os.path.exists(in_folder):
307 |         os.makedirs(in_folder)
308 |         print("Convert video to frames")
309 |         video2frame_one_seq(in_video, in_folder)
310 |         
311 |     out_video = "./test/stabilzation/s_114_outdoor_running_trail_daytime_stab.mp4"
312 |     out_folder = os.path.join(metric_path, "out_frame")
313 |     if not os.path.exists(out_folder):
314 |         os.makedirs(out_folder)
315 |         print("Convert video to frames")
316 |         video2frame_one_seq(out_video, out_folder)
317 |     
318 |     package = os.path.join(metric_path, "stabilzation.pt")
319 |     FOV = metrics(in_folder, out_folder, package)
320 | 
321 |     crop_path = out_video[:-4] + "_crop.mp4"
322 |     crop_video(out_video, crop_path, FOV)
323 | 


--------------------------------------------------------------------------------
/dvs/printer.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | class Printer(object):
 4 |     def __init__(self, *files):
 5 |         self.files = files
 6 |         
 7 |     #Redirect Printer
 8 |     def open(self):
 9 |         if not hasattr(sys, '_stdout'):
10 |             sys._stdout = sys.stdout
11 |         sys.stdout = self
12 |         return self
13 | 
14 |     #Restore the Default Printer
15 |     def close(self):
16 |         stdout = sys._stdout
17 |         for f in self.files:
18 |             if f != stdout:
19 |                 f.close()
20 |         sys.stdout = stdout
21 | 
22 |     #Overloading write() Function
23 |     def write(self, obj):
24 |         for f in self.files:
25 |             f.write(obj)
26 |             f.flush()
27 | 
28 |     def flush(self):
29 |         pass
30 | 
31 | if __name__ == '__main__':
32 |     print("Start testing")
33 |     t = Printer(sys.stdout, open('./test.txt', 'w+')).open()
34 |     print("In files")
35 |     t.close()
36 |     print("Not in files")


--------------------------------------------------------------------------------
/dvs/requirements.txt:
--------------------------------------------------------------------------------
 1 | colorama==0.4.4
 2 | ffmpeg==1.4
 3 | imageio==2.9.0
 4 | matplotlib==3.3.4
 5 | opencv-contrib-python==4.5.1.48
 6 | opencv-python==4.5.1.48
 7 | pytz==2021.1
 8 | PyYAML==5.4.1
 9 | scipy==1.5.4
10 | tensorboardX==2.1
11 | tqdm==4.59.0


--------------------------------------------------------------------------------
/dvs/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | import torchvision
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | 
  8 | import time
  9 | import yaml
 10 | import argparse
 11 | import numpy as np
 12 | from printer import Printer
 13 | from dataset import get_data_loader
 14 | from model import Model
 15 | import datetime
 16 | import copy
 17 | from util import make_dir, get_optimizer, AverageMeter, save_train_info, norm_flow
 18 | from gyro import torch_QuaternionProduct, torch_QuaternionReciprocal, torch_norm_quat
 19 | 
 20 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 21 | 
 22 | def run_epoch(model, loader, cf, epoch, lr, optimizer=None, is_training=True, USE_CUDA=True, clip_norm=0):
 23 |     no_flo = False
 24 |     number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"]
 25 |     avg_loss = AverageMeter()
 26 |     if is_training:
 27 |         model.net.train()
 28 |         model.unet.train()
 29 |     else:
 30 |         model.net.eval()
 31 |         model.unet.eval()
 32 |     for i, data in enumerate(loader, 0):
 33 |         # get the inputs; data is a list of [inputs, labels]
 34 |         real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data
 35 |         print("Fininsh Load data")
 36 | 
 37 |         real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4]
 38 |         real_projections = real_projections.type(torch.float) 
 39 |         flo = flo.type(torch.float) 
 40 |         flo_back = flo_back.type(torch.float) 
 41 |         ois = ois.type(torch.float) 
 42 | 
 43 |         batch_size, step, dim = real_inputs.size()
 44 |         times = times.numpy()
 45 |         real_queue_idx = real_queue_idx.numpy()
 46 |         virtual_queue = loader.dataset.random_init_virtual_queue(batch_size, real_postion[:,0,:].numpy(), times[:,1]) # TODO
 47 |         # virtual_queue = [None] * batch_size
 48 |         loss = 0
 49 |         model.net.init_hidden(batch_size)
 50 |         for j in range(step):
 51 |             virtual_inputs, vt_1 = loader.dataset.get_virtual_data(
 52 |                 virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j]) 
 53 |             
 54 |             real_inputs_step = real_inputs[:,j,:]
 55 |             inputs = torch.cat((real_inputs_step,virtual_inputs), dim = 1) 
 56 | 
 57 |             # inputs = Variable(real_inputs_step)
 58 |             if USE_CUDA:
 59 |                 real_inputs_step = real_inputs_step.cuda()
 60 |                 virtual_inputs = virtual_inputs.cuda()
 61 |                 inputs = inputs.cuda()
 62 |                 if no_flo is False:
 63 |                     flo_step = flo[:,j].cuda()
 64 |                     flo_back_step = flo_back[:,j].cuda()
 65 |                 else:
 66 |                     flo_step = None
 67 |                     flo_back_step = None
 68 |                 vt_1 = vt_1.cuda()
 69 |                 real_projections_t = real_projections[:,j+1].cuda()
 70 |                 real_projections_t_1 = real_projections[:,j].cuda()
 71 |                 real_postion_anchor = real_postion[:,j].cuda()
 72 |                 ois_step = ois[:,j].cuda()
 73 | 
 74 |             if no_flo is False:
 75 |                 b, h, w, _ = flo_step.size()
 76 |                 flo_step = norm_flow(flo_step, h, w)
 77 |                 flo_back_step = norm_flow(flo_back_step, h, w)
 78 | 
 79 |             if is_training:
 80 |                 if no_flo is False:
 81 |                     flo_out = model.unet(flo_step, flo_back_step)
 82 |                 else:
 83 |                     flo_out = None
 84 | 
 85 |                 if j < 1:
 86 |                     for i in range(2):
 87 |                         out = model.net(inputs, flo_out, ois_step)
 88 |                 else:
 89 |                     out = model.net(inputs, flo_out, ois_step)
 90 |             else:
 91 |                 with torch.no_grad():
 92 |                     if no_flo is False:
 93 |                         flo_out = model.unet(flo_step, flo_back_step)
 94 |                     else:
 95 |                         flo_out = None
 96 | 
 97 |                     if j < 1:
 98 |                         for i in range(2):
 99 |                             out = model.net(inputs, flo_out, ois_step)
100 |                     else:
101 |                         out = model.net(inputs, flo_out, ois_step)
102 | 
103 |             if epoch <= 30:
104 |                 follow = True
105 |             else:
106 |                 follow = False
107 | 
108 |             if epoch > 30:
109 |                 undefine = True
110 |             else:
111 |                 undefine = False
112 | 
113 |             if epoch > 40:
114 |                 optical = True
115 |             else:
116 |                 optical = False
117 |             
118 |             loss_step = model.loss(out, vt_1, virtual_inputs, real_inputs_step, \
119 |                 flo_step, flo_back_step, real_projections_t, real_projections_t_1, real_postion_anchor, \
120 |                 follow = follow, undefine = undefine, optical = optical, stay = optical)
121 | 
122 |             loss = loss_step
123 |             
124 |             virtual_position = virtual_inputs[:, -4:]
125 |             pos = torch_QuaternionProduct(virtual_position, real_postion_anchor)
126 |             out = torch_QuaternionProduct(out, pos)
127 | 
128 |             if USE_CUDA:
129 |                 out = out.cpu().detach().numpy() 
130 | 
131 |             virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1])
132 | 
133 |             if (j+1) % 10 == 0:
134 |                 print("Step: "+str(j+1)+"/"+str(step))
135 |                 print(loss)
136 |             loss = torch.sum(loss)
137 |             if is_training:
138 |                 optimizer.zero_grad()
139 |                 loss.backward(retain_graph=True)
140 |                 if clip_norm:
141 |                     nn.utils.clip_grad_norm_(model.net.parameters(), max_norm=clip_norm)
142 |                     nn.utils.clip_grad_norm_(model.unet.parameters(), max_norm=clip_norm)
143 |                 optimizer.step()
144 | 
145 |             avg_loss.update(loss.item(), batch_size) 
146 |     
147 |     return avg_loss.avg
148 | 
149 | 
150 | def train(args = None):
151 |     torch.autograd.set_detect_anomaly(True)
152 |     config_file = args.config
153 |     cf = yaml.load(open(config_file, 'r'))
154 |     
155 |     USE_CUDA = cf['data']["use_cuda"]
156 |     seed = cf['train']["seed"]
157 |     
158 |     torch.manual_seed(seed)
159 |     if USE_CUDA:
160 |         torch.cuda.manual_seed(seed)
161 | 
162 |     checkpoints_dir = cf['data']['checkpoints_dir']
163 |     epochs = cf["train"]["epoch"]
164 |     snapshot = cf["train"]["snapshot"]
165 |     decay_epoch = cf['train']['decay_epoch']
166 |     init_lr = cf["train"]["init_lr"]
167 |     lr_decay = cf["train"]["lr_decay"]
168 |     lr_step = cf["train"]["lr_step"]
169 |     clip_norm = cf["train"]["clip_norm"]
170 |     load_model = cf["model"]["load_model"]
171 | 
172 |     checkpoints_dir = make_dir(checkpoints_dir, cf)
173 | 
174 |     if load_model is None:
175 |         log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'.log'), 'w+')
176 |     else:
177 |         log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'.log'), 'a')
178 |     printer = Printer(sys.stdout, log_file).open()
179 |     
180 |     print('----Print Arguments Setting------') 
181 |     for key in cf:
182 |         print('{}:'.format(key))
183 |         for para in cf[key]:
184 |             print('{:50}:{}'.format(para,cf[key][para]))
185 |         print('\n')
186 | 
187 |     # Define the model
188 |     model = Model(cf) 
189 |     optimizer = get_optimizer(cf["train"]["optimizer"], model, init_lr, cf)
190 | 
191 |     for idx, m in enumerate(model.net.children()):
192 |         print('{}:{}'.format(idx,m))
193 |     for idx, m in enumerate(model.unet.children()):
194 |         print('{}:{}'.format(idx,m))
195 | 
196 |     if load_model is not None:
197 |         print("------Load Pretrined Model--------")
198 |         checkpoint = torch.load(load_model)
199 |         model.net.load_state_dict(checkpoint['state_dict'])
200 |         model.unet.load_state_dict(checkpoint['unet'])
201 |         print("------Resume Training Process-----")
202 |         optimizer.load_state_dict(checkpoint['optim_dict'])
203 |         epoch_load = checkpoint['epoch']
204 |         print("Epoch load: ", epoch_load)
205 |     else:
206 |         epoch_load = 0
207 |                 
208 |     if USE_CUDA:
209 |         model.net.cuda()
210 |         model.unet.cuda()
211 |         if load_model is not None:
212 |             for state in optimizer.state.values():
213 |                 for k, v in state.items():
214 |                     if isinstance(v, torch.Tensor):
215 |                         state[k] = v.cuda()
216 |             for param in optimizer.param_groups:
217 |                 init_lr = param['lr']
218 | 
219 |     print("-----------Load Dataset----------")
220 |     train_loader, test_loader = get_data_loader(cf, no_flo = False)
221 | 
222 |     print("----------Start Training----------")
223 |     currentDT = datetime.datetime.now()
224 |     print(currentDT.strftime(" %Y-%m-%d %H:%M:%S"))
225 |     
226 |     start_time = time.time()
227 | 
228 |     if lr_step:
229 |         decay_epoch = list(range(1+lr_step, epochs+1, lr_step))
230 |     
231 |     lr = init_lr
232 | 
233 |     for count in range(epoch_load+1, epochs+1):
234 |         if decay_epoch != None and count in decay_epoch:
235 |             lr *= lr_decay
236 |             for param in optimizer.param_groups:
237 |                 param['lr'] *= lr_decay
238 |         
239 |         print("Epoch: %d, learning_rate: %.5f" % (count, lr))
240 | 
241 |         train_loss = run_epoch(model, train_loader, cf, count, lr, optimizer=optimizer, clip_norm=clip_norm, is_training=True, USE_CUDA=USE_CUDA)
242 | 
243 |         test_loss = run_epoch(model, test_loader, cf, count, lr, is_training=False, USE_CUDA=USE_CUDA)
244 | 
245 |         time_used = (time.time() - start_time) / 60
246 |         print("Epoch %d done | TrLoss: %.4f | TestLoss: %.4f | Time_used: %.4f minutes" % (
247 |             count, train_loss,  test_loss, time_used))
248 | 
249 |         if count % snapshot == 0:
250 |             save_train_info("epoch", checkpoints_dir, cf, model, count, optimizer)
251 |             save_train_info("last", checkpoints_dir, cf, model, count, optimizer)
252 |             print("Model stored at epoch %d"%count)
253 | 
254 |     currentDT = datetime.datetime.now()
255 |     print(currentDT.strftime(" %Y-%m-%d %H:%M:%S"))
256 |     print("------------End Training----------")
257 |     return 
258 | 
259 | if __name__ == '__main__':
260 |     parser = argparse.ArgumentParser("Training model")
261 |     parser.add_argument("--config", default="./conf/stabilzation_train.yaml", help="Config file.")
262 |     args = parser.parse_args()
263 |     train(args = args)


--------------------------------------------------------------------------------
/dvs/util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import cv2
 4 | from itertools import chain
 5 | from warp import load_video, save_video
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | from gyro import get_rotations
 9 | import shutil
10 | 
11 | def save_train_info(name, checkpoints_dir, cf, model, count, optimizer = None):
12 |     path = None
13 |     if name == "last":
14 |         path = os.path.join(checkpoints_dir, cf['data']['exp']+'_last.checkpoint')
15 |     elif name == "best":
16 |         path = os.path.join(checkpoints_dir, cf['data']['exp']+'_best.checkpoint')
17 |     else:
18 |         path = os.path.join(checkpoints_dir, cf['data']['exp']+'_epoch%d.checkpoint'%count)
19 |     torch.save(model.save_checkpoint(epoch = count, optimizer=optimizer), path)
20 | 
21 | def make_dir(checkpoints_dir ,cf):
22 |     inference_path = "./test"
23 |     if not os.path.exists(checkpoints_dir):
24 |         os.makedirs(checkpoints_dir)
25 |     if not os.path.exists(cf["data"]["log"]):
26 |         os.makedirs(cf["data"]["log"])
27 |     if not os.path.exists(inference_path):
28 |         os.makedirs(inference_path)
29 |         
30 |     inference_path = os.path.join(inference_path, cf['data']['exp'])
31 |     if not os.path.exists(inference_path):
32 |         os.makedirs(inference_path)
33 |     checkpoints_dir = os.path.join(checkpoints_dir, cf['data']['exp'])
34 |     if not os.path.exists(checkpoints_dir):
35 |         os.makedirs(checkpoints_dir)
36 |     return checkpoints_dir
37 | 
38 | def get_optimizer(optimizer, model, init_lr, cf):
39 |     if optimizer == "adam":
40 |         optimizer = torch.optim.Adam(chain(model.net.parameters(), model.unet.parameters()), lr=init_lr, weight_decay=cf["train"]["weight_decay"])
41 |     elif optimizer == "sgd":
42 |         optimizer = torch.optim.SGD(chain(model.net.parameters(), model.unet.parameters()), lr=init_lr, momentum=cf["train"]["momentum"])
43 |     return optimizer
44 | 
45 | def crop_video(in_path, out_path, crop_ratio):
46 |     frame_array, fps, size = load_video(in_path)
47 |     hs = int((1-crop_ratio)*1080) + 1
48 |     he = int(crop_ratio*1080) - 1
49 |     ws = int((1-crop_ratio)*1920) + 1
50 |     we = int(crop_ratio*1920) - 1
51 |     for i in range(len(frame_array)):
52 |         frame_array[i] = cv2.resize(frame_array[i][hs:he,ws:we,:], size, interpolation = cv2.INTER_LINEAR)
53 |     save_video(out_path, frame_array, fps, size= size)
54 | 
55 | def norm_flow(flow, h, w):
56 |     if flow.shape[2] == 2:
57 |         flow[:,:,0] /= h
58 |         flow[:,:,1] /= w
59 |     else:
60 |         flow[:,:,:,0] /= h
61 |         flow[:,:,:,1] /= w
62 |     return flow
63 | 
64 | class AverageMeter(object):
65 |     def __init__(self):
66 |         self.reset()
67 | 
68 |     def reset(self):
69 |         self.avg = 0
70 |         self.sum = 0
71 |         self.cnt = 0
72 | 
73 |     def update(self, val, n=1):
74 |         self.sum += val * n
75 |         self.cnt += n
76 |         if self.cnt > 0:
77 |             self.avg = self.sum / self.cnt


--------------------------------------------------------------------------------
/dvs/warp/__init__.py:
--------------------------------------------------------------------------------
1 | from .warping import (
2 |     warp_video
3 |     )
4 | from .read_write import (
5 |     save_video,
6 |     load_video,
7 |     video2frame_one_seq
8 |     )


--------------------------------------------------------------------------------
/dvs/warp/rasterizer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | from numpy import array
  4 | import torch
  5 | import cv2
  6 | import time
  7 | 
  8 | device = torch.device("cuda")
  9 | 
 10 | def Rasterization(image, grid, get_mesh_only = False):
 11 |     # grid xy WH
 12 |     shape = image.size()
 13 |     height = shape[1]
 14 |     width = shape[2]
 15 |     wapper_upper_triangle, wapper_lower_triangle = grid_to_triangle(grid[:,:,:2])
 16 |     origin_upper_triangle, origin_lower_triangle = grid_to_triangle(grid[:,:,2:])
 17 | 
 18 | 
 19 |     [xmax, xmin, ymax, ymin], xlength, ylength = grid_size(wapper_upper_triangle, wapper_lower_triangle, height, width)
 20 | 
 21 |     xratio = xlength / width
 22 |     yratio = ylength / height
 23 | 
 24 |     wapper_triangle = torch.stack((wapper_upper_triangle,wapper_lower_triangle),dim = 1).to(device) # grid * upper/lower * point * xy
 25 |     origin_triangle = torch.stack((origin_upper_triangle,origin_lower_triangle),dim = 1).to(device) # grid * upper/lower * point * xy
 26 | 
 27 |     tran_triangle = torch.zeros(wapper_triangle.size()).to(device)
 28 | 
 29 |     tran_triangle[:,:,:,0] = (wapper_triangle[:,:,:,0] - xmin.view(-1,1,1).to(device)/width) / xratio
 30 |     tran_triangle[:,:,:,1] = (wapper_triangle[:,:,:,1] - ymin.view(-1,1,1).to(device)/height) / yratio
 31 | 
 32 |     mask = triangle2mask(tran_triangle, ylength, xlength) # consuming
 33 | 
 34 |     mask = torch.unsqueeze(mask, 4)
 35 |     origin_triangle = torch.unsqueeze(origin_triangle, 1)
 36 | 
 37 |     grid_sample = origin_triangle * mask # consuming
 38 |     grid_sample = torch.sum(torch.sum(grid_sample, dim = 3), dim = 2).view(-1,ylength,xlength,2) # consuming
 39 | 
 40 |     gxmin = min(0, int(torch.min(xmin)))
 41 |     gxmax = int(torch.max(xmin) + xlength)
 42 |     gymin = min(0, int(torch.min(ymin)))
 43 |     gymax = int(torch.max(ymin) + ylength)
 44 |     grid_merge = torch.zeros((max(gymax-gymin, height, height - gymin),max(gxmax - gxmin, width, width - gxmin),2)).to(device)
 45 |     for i in range(grid_sample.size()[0]):
 46 |         x_s = int(xmin[i] - gxmin)
 47 |         x_e = int(xmin[i] + xlength - gxmin)
 48 |         y_s = int(ymin[i] - gymin)
 49 |         y_e = int(ymin[i] + ylength -gymin)
 50 |         grid_merge[ y_s:y_e, x_s:x_e, :] += grid_sample[i, :, :, :]
 51 | 
 52 |     # grid_merge = grid_merge[min(-gxmin,0):min(-gxmin,0)+height, min(-gymin,0):min(-gymin,0)+width, :] 
 53 |     grid_merge = grid_merge[-gymin:-gymin+height, -gxmin:-gxmin+width, :] 
 54 |     # if get_mesh_only:
 55 |     #     grid_merge = grid_merge.cpu().numpy()
 56 |     #     mesh_grid = generate_mesh_grid(height, width)
 57 |     #     out = grid_merge - mesh_grid
 58 |     #     return np.concatenate((out[:,:,1:],out[:,:,:1]),2)
 59 |     
 60 |     shift = torch.tensor([0.5/height,0.5/width])[None, None, :].to(device)
 61 |     grid_merge = (grid_merge + 1*shift) * 2 - 1
 62 | 
 63 |     image[:3,:2,:2] = 0
 64 | 
 65 |     image = torch.unsqueeze(image, 0).to(device)
 66 |     grid_merge = torch.unsqueeze(grid_merge, 0)
 67 | 
 68 |     image = torch.nn.functional.grid_sample(image, grid_merge) # default bilinear
 69 | 
 70 |     image = torch.squeeze(image, 0)
 71 |     return image.cpu()
 72 | 
 73 | def grid_to_triangle(grid):
 74 |     grid_shape = grid.size()
 75 |     num = (grid_shape[0] - 1) * (grid_shape[1] - 1)
 76 | 
 77 |     upper_triangle = grid[:-1, :-1, :, None]
 78 |     upper_triangle = torch.cat(( upper_triangle, grid[1:, :-1, :, None]), dim = 3)
 79 |     upper_triangle = torch.cat(( upper_triangle, grid[:-1, 1:, :, None]), dim = 3)
 80 |     upper_triangle = upper_triangle.view(num, 2, 3)
 81 |     upper_triangle = torch.transpose(upper_triangle, 1, 2) # grid * point * xy
 82 |  
 83 |     lower_triangle = grid[:-1, 1:, :, None]
 84 |     lower_triangle = torch.cat(( lower_triangle, grid[1:, :-1, :, None]), dim = 3)
 85 |     lower_triangle = torch.cat(( lower_triangle, grid[1:, 1:, :, None]), dim = 3)
 86 |     lower_triangle = lower_triangle.view(num, 2, 3)
 87 |     lower_triangle = torch.transpose(lower_triangle, 1, 2)
 88 |     
 89 |     return upper_triangle,  lower_triangle # grid * point * xy
 90 | 
 91 | def grid_size(upper_triangle, lower_triangle, height, width):
 92 |     wapper_grid = torch.cat((upper_triangle, lower_triangle),dim =1)
 93 |     xmax = torch.floor(torch.max(wapper_grid[:,:,0]*width, 1)[0]) + 1
 94 |     ymax = torch.floor(torch.max(wapper_grid[:,:,1]*height, 1)[0]) + 1
 95 |     xmin = torch.floor(torch.min(wapper_grid[:,:,0]*width, 1)[0])
 96 |     ymin = torch.floor(torch.min(wapper_grid[:,:,1]*height, 1)[0])
 97 | 
 98 |     xlength = int(torch.max(xmax - xmin))
 99 |     ylength = int(torch.max(ymax - ymin))
100 | 
101 |     return [xmax, xmin, ymax, ymin], xlength, ylength
102 | 
103 | def generate_mesh_grid(height, width):
104 |     # Create a grid of sampling positions
105 |     xs = np.linspace(0, 1, width, endpoint=False)
106 |     ys = np.linspace(0, 1, height, endpoint=False)
107 |     xmesh, ymesh = np.meshgrid(xs, ys)
108 |     # Reshape the sampling positions to a H x W x 2 tensor
109 |     return np.moveaxis(array(list(zip(xmesh, ymesh))), 1, 2)
110 | 
111 | def triangle2mask(d, height, width): # d: [N x T x 3 x 2]
112 |     N = d.size()[0] # batch size
113 |     T = d.size()[1] # triangle number
114 |     P = height * width # The number of pixels in the output image.
115 | 
116 |     area = edgefunc(d[:, :, 1, :], d[:, :, 2, :], d[:, :, None, 0, :])
117 | 
118 |     gridcpu = generate_mesh_grid(height, width)
119 |     
120 |     gridcpu = np.reshape(gridcpu, (height*width, 2))
121 | 
122 |     grid = torch.Tensor(gridcpu)
123 |     grid = grid.unsqueeze(0).repeat((N, T, 1, 1)) # [N x T x P x 2]
124 | 
125 |     grid = grid.to(device)
126 | 
127 |     # Evaluate the edge functions at every position.
128 |     # We should get a [N x P] vector out of each.
129 |     w0 = edgefunc(d[:, :, 1, :], d[:, :, 2, :], grid) / area
130 |     w1 = edgefunc(d[:, :, 2, :], d[:, :, 0, :], grid) / area
131 |     w2 = edgefunc(d[:, :, 0, :], d[:, :, 1, :], grid) / area
132 | 
133 |     # Only pixels inside the triangles will have color
134 |     # [N x P]
135 | 
136 |     mask = (w0 > 0) & (w1 > 0) & (w2 > 0)
137 |     mask = torch.unsqueeze(mask, 3).type(torch.cuda.FloatTensor)
138 | 
139 |     w = torch.stack((w0,w1,w2),dim = 3) * mask
140 | 
141 |     return torch.transpose(w, 1, 2) # [N x P x T x 3]
142 |     
143 | 
144 | def edgefunc(v0, v1, p):
145 |     """
146 |     let P = H * W
147 |     v0 and v1 have vertex positions for all T triangles.
148 |     Their shapes are [N x T X 2]
149 |     p is a list of sampling points as a [N x T X P x 2] tensor.
150 |     Each of the T triangles has an [P x 2] matrix of sampling points.
151 |     returns a [N x T x P] matrix
152 |     """
153 |     P = p.size()[2]
154 |     
155 |     # Take all the x and y coordinates of all the positions as a
156 |     # [N x S] tensor
157 |     py = p[:, :, :, 1]
158 |     px = p[:, :, :, 0]
159 | 
160 |     # We need to manually broadcast the vector to cover all sample points
161 |     x10 = v0[:, :, 0] - v1[:, :, 0] # [N x T]
162 |     y01 = v1[:, :, 1] - v0[:, :, 1] # [N x T]
163 | 
164 |     x10 = x10.unsqueeze(2).repeat((1, 1, P)) # [N x T x P]
165 |     y01 = y01.unsqueeze(2).repeat((1, 1, P)) # [N x T x P]
166 | 
167 |     cross = v0[:,:,1]*v1[:,:,0] - v0[:,:,0]*v1[:,:,1] # [N x T]
168 |     cross = cross.unsqueeze(2).repeat((1, 1, P)) # [N x T x P]
169 | 
170 |     return y01*px + x10*py + cross
171 | 
172 | if __name__ == '__main__':
173 |     print(generate_mesh_grid(2,3))


--------------------------------------------------------------------------------
/dvs/warp/read_write.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import os
  4 | from PIL import Image, ImageDraw, ImageFont
  5 | import matplotlib.pyplot as plt
  6 | import ffmpeg  
  7 | import json
  8 | import torch 
  9 | import argparse
 10 |     
 11 | def load_video(path, save_dir = None, resize = None, length = -1): # N x H x W x C
 12 |     vidcap = cv2.VideoCapture(path)
 13 |     fps = vidcap.get(cv2.CAP_PROP_FPS)
 14 |     success,image = vidcap.read()
 15 |     print(image.shape)
 16 |     height, width, layers = image.shape
 17 |     if resize is None:
 18 |         size = (width,height)
 19 |     elif type(resize) is int:
 20 |         size = (width//resize,height//resize)
 21 |     else:
 22 |         size = resize
 23 |     count = 0
 24 |     frames = []
 25 |     while success:  
 26 |         if resize is not None:
 27 |             image = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR)
 28 |         if save_dir != None:
 29 |             path = os.path.join(save_dir, "frame_" + str(count).zfill(4) + ".png")
 30 |             cv2.imwrite(path, image) 
 31 |         frames.append(image)
 32 |         success,image = vidcap.read()
 33 |         count += 1
 34 |         if length > 0 and count >= length:
 35 |             break
 36 |     print("Video length: ", len(frames))
 37 |     return frames, fps, size
 38 | 
 39 | def video2frame(path, resize = None):
 40 |     data_name = sorted(os.listdir(path))
 41 |     for i in range(len(data_name)):
 42 |         print(str(i+1)+" / " + str(len(data_name)))
 43 |         data_folder = os.path.join(path, data_name[i])
 44 |         print(data_folder)
 45 |         files = os.listdir(data_folder)
 46 |         for f in files:
 47 |             if f[-4:] == ".mp4":
 48 |                 video_name = f
 49 |         video_path = os.path.join(data_folder, video_name)
 50 |         frame_folder = os.path.join(data_folder, "frames")
 51 |         if not os.path.exists(frame_folder):
 52 |             os.makedirs(frame_folder)
 53 |         load_video(video_path, save_dir = frame_folder, resize=resize)
 54 | 
 55 | def video2frame_one_seq(path, save_dir = None, resize = None): # N x H x W x C
 56 |     vidcap = cv2.VideoCapture(path)
 57 |     fps = vidcap.get(cv2.CAP_PROP_FPS)
 58 |     success,image = vidcap.read()
 59 |     print(path)
 60 |     print(image.shape)	
 61 |     height, width, layers = image.shape
 62 |     if resize is None:
 63 |         size = (width,height)
 64 |     elif type(resize) is int:
 65 |         size = (width//resize,height//resize)
 66 |     else:
 67 |         size = resize
 68 |     count = 0
 69 |     while success:  
 70 |         if resize is not None:
 71 |             image = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR)
 72 |         if save_dir != None:
 73 |             path = os.path.join(save_dir, "frame_" + str(count).zfill(5) + ".png")
 74 |             cv2.imwrite(path, image) 
 75 |         success,image = vidcap.read()
 76 |         count += 1
 77 |     return fps, size
 78 | 
 79 | def save_video(path,frame_array, fps, size, losses = None, frame_number = False, writer = None):
 80 |     if writer is None:
 81 |         if path[-3:] == "mp4":
 82 |             out = cv2.VideoWriter(path,cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
 83 |         else:
 84 |             out = cv2.VideoWriter(path,cv2.VideoWriter_fourcc('M','J','P','G'), fps, size)
 85 |     else:
 86 |         out = writer
 87 |     for i in range(len(frame_array)):
 88 |         # writing to a image array
 89 |         if frame_number:
 90 |             frame_array[i] = draw_number(np.asarray(frame_array[i]), i)
 91 |         if losses is not None:
 92 |             frame_array[i] = draw_number(np.asarray(frame_array[i]), losses[i], x = 900, message = "Loss: ")
 93 |         out.write(frame_array[i])
 94 |     if writer is None:
 95 |         out.release()
 96 | 
 97 | def draw_number(frame, num, x = 10, y = 10, message = "Frame: "):
 98 |     image=Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
 99 |     draw = ImageDraw.Draw(image)
100 |     font = ImageFont.truetype("./data/arial.ttf", 45)
101 |      
102 |     message = message + str(num)
103 |     color = 'rgb(0, 0, 0)' # black color
104 |     
105 |     draw.text((x, y), message, fill=color, font=font)
106 |     return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
107 | 
108 | if __name__ == "__main__":
109 |     parser = argparse.ArgumentParser("FlowNet2 Preparation")
110 |     parser.add_argument("--dir_path", default="./video")
111 |     args = parser.parse_args()
112 |     dir_path = args.dir_path
113 |     if dir_path == "./video":
114 |         video2frame(dir_path, resize = 4)
115 |     else:
116 |         video2frame(os.path.join(dir_path, "test"), resize = 4)
117 |         video2frame(os.path.join(dir_path, "training"), resize = 4)


--------------------------------------------------------------------------------
/dvs/warp/warping.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .read_write import load_video, save_video
 3 | import torch
 4 | import cv2
 5 | from .rasterizer import Rasterization
 6 | import time
 7 | import os
 8 | 
 9 | def warp_video(mesh_path, video_path, save_path, losses = None, frame_number = False, fps_fix = None):
10 |     if type(mesh_path) == str:
11 |         print("Error")
12 |     else:
13 |         grid_data = mesh_path
14 | 
15 |     frame_array, fps, size = load_video(video_path, length = grid_data.shape[0])
16 |     if fps_fix is not None:
17 |         fps = fps_fix
18 |     length = min(grid_data.shape[0], len(frame_array))
19 |     seq_length = 100
20 |     seq = length//seq_length
21 |     writer = cv2.VideoWriter(save_path,cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
22 |     for i in range(seq+1):
23 |         if seq_length*i==length:
24 |             break
25 |         print("Frame: "+str(i*seq_length)+"/"+str(length))
26 |         frame_array_save = warpping_rast(grid_data[seq_length*i:min(seq_length*(i+1),length)], frame_array[seq_length*i:min(seq_length*(i+1),length)], losses = losses)
27 |         save_video(save_path,frame_array_save, fps, size, losses = losses, frame_number = frame_number, writer = writer)
28 |     writer.release()
29 | 
30 | def warpping_rast(grid_data, frame_array, losses = None):
31 |     output = []
32 |     for i in range(0, min(len(frame_array), grid_data.shape[0])):
33 |         frame = warpping_one_frame_rast(frame_array[i], grid_data[i])
34 |         output.append(frame)
35 |     return output
36 | 
37 | def warpping_one_frame_rast(image, grid):
38 |     img = torch.Tensor(image).permute(2,0,1)/255
39 |     grid = torch.Tensor(grid)
40 |     output_image = Rasterization(img, grid)
41 |     return np.clip(output_image.permute(1,2,0).numpy() * 255, 0, 255).astype("uint8")
42 | 


--------------------------------------------------------------------------------