├── .gitignore
├── LICENSE
├── README.md
├── docs
├── code-of-conduct.md
└── contributing.md
└── dvs
├── checkpoint
└── stabilzation
│ └── stabilzation_last.checkpoint
├── conf
├── stabilzation.yaml
└── stabilzation_train.yaml
├── data
└── arial.ttf
├── dataset.py
├── flownet2
├── LICENSE
├── README.md
├── __init__.py
├── convert.py
├── datasets.py
├── install.sh
├── losses.py
├── main.py
├── models.py
├── networks
│ ├── FlowNetC.py
│ ├── FlowNetFusion.py
│ ├── FlowNetS.py
│ ├── FlowNetSD.py
│ ├── __init__.py
│ ├── channelnorm_package
│ │ ├── __init__.py
│ │ ├── channelnorm.py
│ │ ├── channelnorm_cuda.cc
│ │ ├── channelnorm_kernel.cu
│ │ ├── channelnorm_kernel.cuh
│ │ └── setup.py
│ ├── correlation_package
│ │ ├── __init__.py
│ │ ├── correlation.py
│ │ ├── correlation_cuda.cc
│ │ ├── correlation_cuda_kernel.cu
│ │ ├── correlation_cuda_kernel.cuh
│ │ └── setup.py
│ ├── resample2d_package
│ │ ├── __init__.py
│ │ ├── resample2d.py
│ │ ├── resample2d_cuda.cc
│ │ ├── resample2d_kernel.cu
│ │ ├── resample2d_kernel.cuh
│ │ └── setup.py
│ └── submodules.py
├── run.sh
├── run_release.sh
└── utils
│ ├── __init__.py
│ ├── flow_utils.py
│ ├── frame_utils.py
│ ├── param_utils.py
│ └── tools.py
├── gyro
├── __init__.py
├── gyro_function.py
└── gyro_io.py
├── inference.py
├── load_frame_sensor_data.py
├── loss.py
├── metrics.py
├── model.py
├── printer.py
├── requirements.txt
├── train.py
├── util.py
└── warp
├── __init__.py
├── rasterizer.py
├── read_write.py
└── warping.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .torch
3 | _ext
4 | *.o
5 | _ext/
6 | *.png
7 | *.jpg
8 | *.tar
9 | log/*
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Deep Online Fused Video Stabilization
2 |
3 | [[Paper]](https://openaccess.thecvf.com/content/WACV2022/papers/Shi_Deep_Online_Fused_Video_Stabilization_WACV_2022_paper.pdf)[[Supplementary]](https://zhmeishi.github.io/dvs/paper/dvs_supp.pdf) [[Project Page]](https://zhmeishi.github.io/dvs/) [[Dataset]](https://storage.googleapis.com/dataset_release/all.zip) [[Our Result]](https://storage.googleapis.com/dataset_release/inference_result_release.zip) [[More Results]](https://zhmeishi.github.io/dvs/supp/results.html)
4 |
5 | This repository contains the Pytorch implementation of our method in the paper "Deep Online Fused Video Stabilization".
6 |
7 | ## Environment Setting
8 | Python version >= 3.6
9 | Pytorch with CUDA >= 1.0.0 (guide is [here](https://pytorch.org/get-started/locally/))
10 | Install other used packages:
11 | ```
12 | cd dvs
13 | pip install -r requirements.txt --ignore-installed
14 | ```
15 |
16 | ## Data Preparation
17 | Download sample video [here](https://drive.google.com/file/d/1PpF3-6BbQKy9fldjIfwa5AlbtQflx3sG/view?usp=sharing).
18 | Uncompress the *video* folder under the *dvs* folder.
19 | ```
20 | python load_frame_sensor_data.py
21 | ```
22 | Demo of curve visualization:
23 | The **gyro/OIS curve visualization** can be found at *dvs/video/s_114_outdoor_running_trail_daytime/ControlCam_20200930_104820_real.jpg*.
24 |
25 |
26 | ## FlowNet2 Preparation
27 | Note, we provide optical flow result of one test video in our Data Preparation. If you would like to generate them for all test videos, please follow [FlowNet2 official website](https://github.com/NVIDIA/flownet2-pytorch) and guide below. Otherwise, you can skip this section.
28 |
29 | Note, FlowNet2 installation is tricky. Please use Python=3.6 and Pytorch=1.0.0. More details are [here](https://github.com/NVIDIA/flownet2-pytorch/issues/156) or contact us for any questions.
30 |
31 | Download FlowNet2 model *FlowNet2_checkpoint.pth.tar* [here](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view). Move it under folder *dvs/flownet2*.
32 | ```
33 | python warp/read_write.py # video2frames
34 | cd flownet2
35 | bash install.sh # install package
36 | bash run.sh # generate optical flow file for dataset
37 | ```
38 |
39 | ## Running Inference
40 | ```
41 | python inference.py
42 | python metrics.py
43 | ```
44 | The loss and metric information will be printed in the terminal. The metric numbers can be slightly different due to difference on opencv/pytorch versions.
45 |
46 | The result is under *dvs/test/stabilzation*.
47 | In *s_114_outdoor_running_trail_daytime.jpg*, the blue curve is the output of our models, and the green curve is the input.
48 | *s_114_outdoor_running_trail_daytime_stab.mp4* is uncropped stabilized video.
49 | *s_114_outdoor_running_trail_daytime_stab_crop.mp4* is cropped stabilized video. Note, the cropped video is generated after running the metrics code.
50 |
51 | ## Training
52 | Download dataset for training and test [here](https://storage.googleapis.com/dataset_release/all.zip).
53 | Uncompress *all.zip* and move *dataset_release* folder under the *dvs* folder.
54 |
55 | Follow FlowNet2 Preparation Section.
56 | ```
57 | python warp/read_write.py --dir_path ./dataset_release # video2frames
58 | cd flownet2
59 | bash run_release.sh # generate optical flow file for dataset
60 | ```
61 |
62 | Run training code.
63 | ```
64 | python train.py
65 | ```
66 | The model is saved in *checkpoint/stabilzation_train*.
67 |
68 | ## Citation
69 | If you use this code or dataset for your research, please cite our paper.
70 | ```
71 | @inproceedings{shi2022deep,
72 | title={Deep Online Fused Video Stabilization},
73 | author={Shi, Zhenmei and Shi, Fuhao and Lai, Wei-Sheng and Liang, Chia-Kai and Liang, Yingyu},
74 | booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
75 | pages={1250--1258},
76 | year={2022}
77 | }
78 | ```
79 |
--------------------------------------------------------------------------------
/docs/code-of-conduct.md:
--------------------------------------------------------------------------------
1 | # Google Open Source Community Guidelines
2 |
3 | At Google, we recognize and celebrate the creativity and collaboration of open
4 | source contributors and the diversity of skills, experiences, cultures, and
5 | opinions they bring to the projects and communities they participate in.
6 |
7 | Every one of Google's open source projects and communities are inclusive
8 | environments, based on treating all individuals respectfully, regardless of
9 | gender identity and expression, sexual orientation, disabilities,
10 | neurodiversity, physical appearance, body size, ethnicity, nationality, race,
11 | age, religion, or similar personal characteristic.
12 |
13 | We value diverse opinions, but we value respectful behavior more.
14 |
15 | Respectful behavior includes:
16 |
17 | * Being considerate, kind, constructive, and helpful.
18 | * Not engaging in demeaning, discriminatory, harassing, hateful, sexualized, or
19 | physically threatening behavior, speech, and imagery.
20 | * Not engaging in unwanted physical contact.
21 |
22 | Some Google open source projects [may adopt][] an explicit project code of
23 | conduct, which may have additional detailed expectations for participants. Most
24 | of those projects will use our [modified Contributor Covenant][].
25 |
26 | [may adopt]: https://opensource.google/docs/releasing/preparing/#conduct
27 | [modified Contributor Covenant]: https://opensource.google/docs/releasing/template/CODE_OF_CONDUCT/
28 |
29 | ## Resolve peacefully
30 |
31 | We do not believe that all conflict is necessarily bad; healthy debate and
32 | disagreement often yields positive results. However, it is never okay to be
33 | disrespectful.
34 |
35 | If you see someone behaving disrespectfully, you are encouraged to address the
36 | behavior directly with those involved. Many issues can be resolved quickly and
37 | easily, and this gives people more control over the outcome of their dispute.
38 | If you are unable to resolve the matter for any reason, or if the behavior is
39 | threatening or harassing, report it. We are dedicated to providing an
40 | environment where participants feel welcome and safe.
41 |
42 | ## Reporting problems
43 |
44 | Some Google open source projects may adopt a project-specific code of conduct.
45 | In those cases, a Google employee will be identified as the Project Steward,
46 | who will receive and handle reports of code of conduct violations. In the event
47 | that a project hasn’t identified a Project Steward, you can report problems by
48 | emailing opensource@google.com.
49 |
50 | We will investigate every complaint, but you may not receive a direct response.
51 | We will use our discretion in determining when and how to follow up on reported
52 | incidents, which may range from not taking action to permanent expulsion from
53 | the project and project-sponsored spaces. We will notify the accused of the
54 | report and provide them an opportunity to discuss it before any action is
55 | taken. The identity of the reporter will be omitted from the details of the
56 | report supplied to the accused. In potentially harmful situations, such as
57 | ongoing harassment or threats to anyone's safety, we may take action without
58 | notice.
59 |
60 | *This document was adapted from the [IndieWeb Code of Conduct][] and can also
61 | be found at .*
62 |
63 | [IndieWeb Code of Conduct]: https://indieweb.org/code-of-conduct
64 |
--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
25 | ## Community Guidelines
26 |
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google/conduct/).
29 |
--------------------------------------------------------------------------------
/dvs/checkpoint/stabilzation/stabilzation_last.checkpoint:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/checkpoint/stabilzation/stabilzation_last.checkpoint
--------------------------------------------------------------------------------
/dvs/conf/stabilzation.yaml:
--------------------------------------------------------------------------------
1 | data:
2 | exp: 'stabilzation'
3 | checkpoints_dir: './checkpoint'
4 | log: './log'
5 | data_dir: './video'
6 | use_cuda: true
7 | batch_size: 16
8 | resize_ratio: 0.25
9 | number_real: 10
10 | number_virtual: 2
11 | time_train: 2000 # ms
12 | sample_freq: 40 # ms
13 | channel_size: 1
14 | num_workers: 16 # num_workers for data_loader
15 | model:
16 | load_model: null
17 | cnn:
18 | activate_function: relu # sigmoid, relu, tanh, quadratic
19 | batch_norm: true
20 | gap: false
21 | layers:
22 | rnn:
23 | layers:
24 | - - 512
25 | - true
26 | - - 512
27 | - true
28 | fc:
29 | activate_function: relu
30 | batch_norm: false # (batch_norm and drop_out) is False
31 | layers:
32 | - - 256
33 | - true
34 | - - 4 # last layer should be equal to nr_class
35 | - true
36 | drop_out: 0
37 | train:
38 | optimizer: "adam" # adam or sgd
39 | momentum: 0.9 # for sgd
40 | decay_epoch: null
41 | epoch: 400
42 | snapshot: 2
43 | init_lr: 0.0001
44 | lr_decay: 0.5
45 | lr_step: 200 # if > 0 decay_epoch should be null
46 | seed: 1
47 | weight_decay: 0.0001
48 | clip_norm: False
49 | init: "xavier_uniform" # xavier_uniform or xavier_normal
50 | loss:
51 | follow: 10
52 | angle: 1
53 | smooth: 10 #10
54 | c2_smooth: 200 #20
55 | undefine: 2.0
56 | opt: 0.1
57 | stay: 0
--------------------------------------------------------------------------------
/dvs/conf/stabilzation_train.yaml:
--------------------------------------------------------------------------------
1 | data:
2 | exp: 'stabilzation_train'
3 | checkpoints_dir: './checkpoint'
4 | log: './log'
5 | data_dir: './dataset_release'
6 | use_cuda: true
7 | batch_size: 16
8 | resize_ratio: 0.25
9 | number_real: 10
10 | number_virtual: 2
11 | time_train: 2000 # ms
12 | sample_freq: 40 # ms
13 | channel_size: 1
14 | num_workers: 16 # num_workers for data_loader
15 | model:
16 | load_model: null
17 | cnn:
18 | activate_function: relu # sigmoid, relu, tanh, quadratic
19 | batch_norm: true
20 | gap: false
21 | layers:
22 | rnn:
23 | layers:
24 | - - 512
25 | - true
26 | - - 512
27 | - true
28 | fc:
29 | activate_function: relu
30 | batch_norm: false # (batch_norm and drop_out) is False
31 | layers:
32 | - - 256
33 | - true
34 | - - 4 # last layer should be equal to nr_class
35 | - true
36 | drop_out: 0
37 | train:
38 | optimizer: "adam" # adam or sgd
39 | momentum: 0.9 # for sgd
40 | decay_epoch: null
41 | epoch: 400
42 | snapshot: 2
43 | init_lr: 0.0001
44 | lr_decay: 0.5
45 | lr_step: 200 # if > 0 decay_epoch should be null
46 | seed: 1
47 | weight_decay: 0.0001
48 | clip_norm: False
49 | init: "xavier_uniform" # xavier_uniform or xavier_normal
50 | loss:
51 | follow: 10
52 | angle: 1
53 | smooth: 10 #10
54 | c2_smooth: 200 #20
55 | undefine: 2.0
56 | opt: 0.1
57 | stay: 0
--------------------------------------------------------------------------------
/dvs/data/arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/data/arial.ttf
--------------------------------------------------------------------------------
/dvs/dataset.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import Dataset
2 | import os
3 | import collections
4 | from gyro import (
5 | LoadGyroData,
6 | LoadOISData,
7 | LoadFrameData,
8 | GetGyroAtTimeStamp,
9 | get_static,
10 | GetMetadata,
11 | GetProjections,
12 | train_GetGyroAtTimeStamp,
13 | QuaternionProduct,
14 | QuaternionReciprocal,
15 | FindOISAtTimeStamp,
16 | norm_quat
17 | )
18 | import random
19 | import numpy as np
20 | import torchvision.transforms as transforms
21 | import torch
22 | from flownet2 import flow_utils
23 | from scipy import ndimage, misc
24 | from numpy import linalg as LA
25 |
26 | def get_data_loader(cf, no_flo = False):
27 | size = cf["data"]["batch_size"]
28 | num_workers = cf["data"]["num_workers"]
29 | train_data, test_data = get_dataset(cf, no_flo)
30 | trainloader = torch.utils.data.DataLoader(train_data, batch_size=size,shuffle=True, pin_memory=True, num_workers=num_workers)
31 | testloader = torch.utils.data.DataLoader(test_data, batch_size=size,shuffle=False, pin_memory=True, num_workers=num_workers)
32 | return trainloader,testloader
33 |
34 | def get_dataset(cf, no_flo = False):
35 | resize_ratio = cf["data"]["resize_ratio"]
36 | train_transform, test_transform = _data_transforms()
37 | train_path = os.path.join(cf["data"]["data_dir"], "training")
38 | test_path = os.path.join(cf["data"]["data_dir"], "test")
39 | if not os.path.exists(train_path):
40 | train_path = cf["data"]["data_dir"]
41 | if not os.path.exists(test_path):
42 | test_path = cf["data"]["data_dir"]
43 |
44 | train_data = Dataset_Gyro(
45 | train_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"],
46 | time_train = cf["data"]["time_train"]*1000000, transform = train_transform, resize_ratio = resize_ratio, no_flo = no_flo)
47 | test_data = Dataset_Gyro(
48 | test_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"],
49 | time_train = cf["data"]["time_train"]*1000000, transform = test_transform, resize_ratio = resize_ratio, no_flo = no_flo)
50 | return train_data, test_data
51 |
52 | def get_inference_data_loader(cf, data_path, no_flo = False):
53 | test_data = get_inference_dataset(cf, data_path, no_flo)
54 | testloader = torch.utils.data.DataLoader(test_data, batch_size=1,shuffle=False, pin_memory=True, num_workers=1)
55 | return testloader
56 |
57 | def get_inference_dataset(cf, data_path, no_flo = False):
58 | resize_ratio = cf["data"]["resize_ratio"]
59 | _, test_transform = _data_transforms()
60 | test_data = Dataset_Gyro(
61 | data_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"],
62 | time_train = cf["data"]["time_train"]*1000000, transform = test_transform, resize_ratio = resize_ratio,
63 | inference_only = True, no_flo = no_flo)
64 | return test_data
65 |
66 | def _data_transforms():
67 |
68 | test_transform = transforms.Compose(
69 | [transforms.ToTensor(),
70 | ])
71 | train_transform = transforms.Compose(
72 | [transforms.ToTensor(),
73 | ])
74 |
75 | return train_transform, test_transform
76 |
77 | class DVS_data():
78 | def __init__(self):
79 | self.gyro = None
80 | self.ois = None
81 | self.frame = None
82 | self.length = 0
83 | self.flo_path = None
84 | self.flo_shape = None
85 | self.flo_back_path = None
86 |
87 | class Dataset_Gyro(Dataset):
88 | def __init__(self, path, sample_freq = 33*1000000, number_real = 10, time_train = 2000*1000000, \
89 | transform = None, inference_only = False, no_flo = False, resize_ratio = 1):
90 | r"""
91 | Arguments:
92 | sample_freq: real quaternions [t-sample_freq*number_real, t+sample_freq*number_real] ns
93 | number_real: real gyro num in half time_interval
94 | time_train: time for a batch ns
95 | """
96 | self.sample_freq = sample_freq
97 | self.number_real = number_real
98 | self.no_flo = no_flo
99 | self.resize_ratio = resize_ratio
100 | self.static_options = get_static()
101 | self.inference_only = inference_only
102 |
103 | self.ois_ratio = np.array([self.static_options["crop_window_width"] / self.static_options["width"], \
104 | self.static_options["crop_window_height"] / self.static_options["height"]]) * 0.01
105 | self.unit_size = 4
106 |
107 | if inference_only:
108 | self.length = 1
109 | self.data = [self.process_one_video(path)]
110 | self.number_train = self.data[0].length
111 | return
112 |
113 | self.time_train = time_train
114 | self.number_train = time_train//self.sample_freq
115 |
116 | self.data_name = sorted(os.listdir(path))
117 | self.length = len(self.data_name)
118 | self.data = []
119 | for i in range(self.length):
120 | self.data.append(self.process_one_video(os.path.join(path,self.data_name[i])))
121 |
122 | def process_one_video(self, path):
123 | dvs_data = DVS_data()
124 | files = sorted(os.listdir(path))
125 | print(path)
126 | for f in files:
127 | file_path = os.path.join(path,f)
128 | if "gimbal" in file_path.lower():
129 | continue
130 | if "frame" in f and "txt" in f:
131 | dvs_data.frame = LoadFrameData(file_path)
132 | print("frame:", dvs_data.frame.shape, end=" ")
133 | elif "gyro" in f:
134 | dvs_data.gyro = LoadGyroData(file_path)
135 | dvs_data.gyro = preprocess_gyro(dvs_data.gyro)
136 | print("gyro:", dvs_data.gyro.shape, end=" ")
137 | elif "ois" in f and "txt" in f:
138 | dvs_data.ois = LoadOISData(file_path)
139 | print("ois:", dvs_data.ois.shape, end=" ")
140 | elif f == "flo":
141 | dvs_data.flo_path, dvs_data.flo_shape = LoadFlow(file_path)
142 | print("flo_path:", len(dvs_data.flo_path), end=" ")
143 | print("flo_shape:", dvs_data.flo_shape, end=" ")
144 | elif f == "flo_back":
145 | dvs_data.flo_back_path, _ = LoadFlow(file_path)
146 |
147 | print()
148 | if dvs_data.flo_path is not None:
149 | dvs_data.length = min(dvs_data.frame.shape[0] - 1, len(dvs_data.flo_path))
150 | else:
151 | dvs_data.length = dvs_data.frame.shape[0] - 1
152 | return dvs_data
153 |
154 | def generate_quaternions(self, dvs_data):
155 | first_id = random.randint(0, dvs_data.length - self.number_train) + 1 # skip the first frame
156 |
157 | sample_data = np.zeros((self.number_train, 2 * self.number_real + 1, self.unit_size), dtype=np.float32)
158 | sample_ois = np.zeros((self.number_train, 2), dtype=np.float32)
159 |
160 | sample_time = np.zeros((self.number_train+1), dtype=np.float32)
161 | sample_time[0] = get_timestamp(dvs_data.frame, first_id - 1)
162 |
163 | real_postion = np.zeros((self.number_train, 4), dtype=np.float32)
164 |
165 | time_start = sample_time[0]
166 |
167 | for i in range(self.number_train):
168 | sample_time[i+1] = get_timestamp(dvs_data.frame, first_id + i)
169 | real_postion[i] = GetGyroAtTimeStamp(dvs_data.gyro, sample_time[i+1] - self.sample_freq)
170 | sample_ois[i] = self.get_ois_at_timestamp(dvs_data.ois, sample_time[i+1])
171 | for j in range(-self.number_real, self.number_real+1):
172 | index = j + self.number_real
173 | time_stamp = sample_time[i+1] + self.sample_freq * j
174 | sample_data[i, index] = self.get_data_at_timestamp(dvs_data.gyro, dvs_data.ois, time_stamp, real_postion[i])
175 |
176 | sample_data = np.reshape(sample_data, (self.number_train, (2*self.number_real+1) * self.unit_size))
177 | return sample_data, sample_time, first_id, real_postion, sample_ois
178 |
179 | def load_flo(self, idx, first_id):
180 | shape = self.data[idx].flo_shape
181 | h, w = shape[0], shape[1]
182 | flo = np.zeros((self.number_train, h, w, 2))
183 | flo_back = np.zeros((self.number_train, h, w, 2))
184 |
185 | for i in range(self.number_train):
186 | frame_id = i + first_id
187 | f = flow_utils.readFlow(self.data[idx].flo_path[frame_id-1]).astype(np.float32)
188 | flo[i] = f
189 |
190 | f_b = flow_utils.readFlow(self.data[idx].flo_back_path[frame_id-1]).astype(np.float32)
191 | flo_back[i] = f_b
192 |
193 | return flo, flo_back
194 |
195 | def load_real_projections(self, idx, first_id):
196 | real_projections = np.zeros((self.number_train + 1, self.static_options["num_grid_rows"], 3, 3))
197 | for i in range(self.number_train + 1):
198 | frame_id = i + first_id
199 | metadata = GetMetadata(self.data[idx].frame, frame_id - 1)
200 | real_projections[i] = np.array(GetProjections(self.static_options, metadata, self.data[idx].gyro, np.zeros(self.data[idx].ois.shape), no_shutter = True))
201 | return real_projections
202 |
203 | def __getitem__(self, idx):
204 | inputs, times, first_id, real_postion, ois = self.generate_quaternions(self.data[idx])
205 | real_projections = self.load_real_projections(idx, first_id)
206 | if self.no_flo:
207 | flo, flo_back = 0, 0
208 | else:
209 | flo, flo_back = self.load_flo(idx, first_id)
210 | return inputs, times, flo, flo_back, real_projections, real_postion, ois, idx
211 |
212 | def __len__(self):
213 | return self.length
214 |
215 | def get_virtual_data(self, virtual_queue, real_queue_idx, pre_times, cur_times, time_start, batch_size, number_virtual, quat_t_1):
216 | # virtual_queue: [batch_size, num, 5 (timestamp, quats)]
217 | # eular angle,
218 | # deta R angular velocity [Q't-1, Q't-2]
219 | # output virtual angular velocity, x, x*dtime => detaQt
220 | virtual_data = np.zeros((batch_size, number_virtual, 4), dtype=np.float32)
221 | vt_1 = np.zeros((batch_size, 4), dtype=np.float32)
222 | quat_t_1 = quat_t_1.numpy()
223 | for i in range(batch_size):
224 | sample_time = cur_times[i]
225 | for j in range(number_virtual):
226 | time_stamp = sample_time - self.sample_freq * (number_virtual - j)
227 | virtual_data[i, j] = get_virtual_at_timestamp(virtual_queue[i], self.data[real_queue_idx[i]].gyro, time_stamp, time_start[i], quat_t_1[i])
228 | vt_1[i] = get_virtual_at_timestamp(virtual_queue[i], self.data[real_queue_idx[i]].gyro, pre_times[i], time_start[i], None)
229 | virtual_data = np.reshape(virtual_data, (batch_size, number_virtual * 4))
230 | return torch.tensor(virtual_data, dtype=torch.float), torch.tensor(vt_1, dtype=torch.float)
231 |
232 | def update_virtual_queue(self, batch_size, virtual_queue, out, times):
233 | virtual_data = np.zeros((batch_size, 5))
234 | virtual_data[:,0] = times
235 | virtual_data[:, 1:] = out
236 | virtual_data = np.expand_dims(virtual_data, axis = 1)
237 |
238 | if None in virtual_queue:
239 | virtual_queue = virtual_data
240 | else:
241 | virtual_queue = np.concatenate((virtual_queue, virtual_data), axis = 1)
242 | return virtual_queue
243 |
244 | def random_init_virtual_queue(self, batch_size, real_postion, times):
245 | virtual_queue = np.zeros((batch_size, 3, 5))
246 | virtual_queue[:, 2, 0] = times - 0.1 * self.sample_freq
247 | virtual_queue[:, 1, 0] = times - 1.1 * self.sample_freq
248 | virtual_queue[:, 0, 0] = times - 2.1 * self.sample_freq
249 | for i in range(batch_size):
250 | quat = np.random.uniform(low=-0.06, high= 0.06, size=4) # transfer to angle # 0.05
251 | quat[3] = 1
252 | quat = quat / LA.norm(quat)
253 | quat = norm_quat(QuaternionProduct(real_postion[i], quat))
254 | virtual_queue[i, 2, 1:] = quat
255 | virtual_queue[i, 1, 1:] = quat
256 | virtual_queue[i, 0, 1:] = quat
257 | return virtual_queue
258 |
259 | def get_data_at_timestamp(self, gyro_data, ois_data, time_stamp, quat_t_1):
260 | quat_t = GetGyroAtTimeStamp(gyro_data, time_stamp)
261 | quat_dif = QuaternionProduct(quat_t, QuaternionReciprocal(quat_t_1))
262 | return quat_dif
263 |
264 | def get_ois_at_timestamp(self, ois_data, time_stamp):
265 | ois_t = FindOISAtTimeStamp(ois_data, time_stamp)
266 | ois_t = np.array(ois_t) / self.ois_ratio
267 | return ois_t
268 |
269 | def get_timestamp(frame_data, idx):
270 | sample_time = frame_data[idx, 0]
271 | metadata = GetMetadata(frame_data, idx)
272 | timestmap_ns = metadata["timestamp_ns"] + metadata["rs_time_ns"] * 0.5
273 | return timestmap_ns
274 |
275 | def preprocess_gyro(gyro, extend = 200):
276 | fake_gyro = np.zeros((extend, 5))
277 | time_start = gyro[0,0]
278 | for i in range(extend):
279 | fake_gyro[-i-1, 0] = time_start - (gyro[i+1, 0] - time_start)
280 | fake_gyro[-i-1, 4] = gyro[i+1, 4]
281 | fake_gyro[-i-1, 1:4] = -gyro[i+1, 1:4]
282 |
283 | new_gyro = np.concatenate((fake_gyro, gyro), axis = 0)
284 | return new_gyro
285 |
286 | def LoadFlow(path):
287 | file_names = sorted(os.listdir(path))
288 | file_path =[]
289 | for n in file_names:
290 | file_path.append(os.path.join(path, n))
291 | return file_path, flow_utils.readFlow(file_path[0]).shape
292 |
293 | def get_virtual_at_timestamp(virtual_queue, real_queue, time_stamp, time_start, quat_t_1 = None, sample_freq = None):
294 | if virtual_queue is None:
295 | quat_t = GetGyroAtTimeStamp(real_queue, time_stamp)
296 | else:
297 | quat_t = train_GetGyroAtTimeStamp(virtual_queue, time_stamp)
298 | if quat_t is None:
299 | quat_t = GetGyroAtTimeStamp(real_queue, time_stamp)
300 |
301 | if quat_t_1 is None:
302 | return quat_t
303 | else:
304 | quat_dif = QuaternionProduct(quat_t, QuaternionReciprocal(quat_t_1))
305 | return quat_dif
306 |
--------------------------------------------------------------------------------
/dvs/flownet2/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2017 NVIDIA CORPORATION
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
--------------------------------------------------------------------------------
/dvs/flownet2/README.md:
--------------------------------------------------------------------------------
1 | # flownet2-pytorch
2 |
3 | Pytorch implementation of [FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks](https://arxiv.org/abs/1612.01925).
4 |
5 | Multiple GPU training is supported, and the code provides examples for training or inference on [MPI-Sintel](http://sintel.is.tue.mpg.de/) clean and final datasets. The same commands can be used for training or inference with other datasets. See below for more detail.
6 |
7 | Inference using fp16 (half-precision) is also supported.
8 |
9 | For more help, type
10 |
11 | python main.py --help
12 |
13 | ## Network architectures
14 | Below are the different flownet neural network architectures that are provided.
15 | A batchnorm version for each network is also available.
16 |
17 | - **FlowNet2S**
18 | - **FlowNet2C**
19 | - **FlowNet2CS**
20 | - **FlowNet2CSS**
21 | - **FlowNet2SD**
22 | - **FlowNet2**
23 |
24 | ## Custom layers
25 |
26 | `FlowNet2` or `FlowNet2C*` achitectures rely on custom layers `Resample2d` or `Correlation`.
27 | A pytorch implementation of these layers with cuda kernels are available at [./networks](./networks).
28 | Note : Currently, half precision kernels are not available for these layers.
29 |
30 | ## Data Loaders
31 |
32 | Dataloaders for FlyingChairs, FlyingThings, ChairsSDHom and ImagesFromFolder are available in [datasets.py](./datasets.py).
33 |
34 | ## Loss Functions
35 |
36 | L1 and L2 losses with multi-scale support are available in [losses.py](./losses.py).
37 |
38 | ## Installation
39 |
40 | # get flownet2-pytorch source
41 | git clone https://github.com/NVIDIA/flownet2-pytorch.git
42 | cd flownet2-pytorch
43 |
44 | # install custom layers
45 | bash install.sh
46 |
47 | ### Python requirements
48 | Currently, the code supports python 3
49 | * numpy
50 | * PyTorch ( == 0.4.1, for <= 0.4.0 see branch [python36-PyTorch0.4](https://github.com/NVIDIA/flownet2-pytorch/tree/python36-PyTorch0.4))
51 | * scipy
52 | * scikit-image
53 | * tensorboardX
54 | * colorama, tqdm, setproctitle
55 |
56 | ## Converted Caffe Pre-trained Models
57 | We've included caffe pre-trained models. Should you use these pre-trained weights, please adhere to the [license agreements](https://drive.google.com/file/d/1TVv0BnNFh3rpHZvD-easMb9jYrPE2Eqd/view?usp=sharing).
58 |
59 | * [FlowNet2](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view?usp=sharing)[620MB]
60 | * [FlowNet2-C](https://drive.google.com/file/d/1BFT6b7KgKJC8rA59RmOVAXRM_S7aSfKE/view?usp=sharing)[149MB]
61 | * [FlowNet2-CS](https://drive.google.com/file/d/1iBJ1_o7PloaINpa8m7u_7TsLCX0Dt_jS/view?usp=sharing)[297MB]
62 | * [FlowNet2-CSS](https://drive.google.com/file/d/157zuzVf4YMN6ABAQgZc8rRmR5cgWzSu8/view?usp=sharing)[445MB]
63 | * [FlowNet2-CSS-ft-sd](https://drive.google.com/file/d/1R5xafCIzJCXc8ia4TGfC65irmTNiMg6u/view?usp=sharing)[445MB]
64 | * [FlowNet2-S](https://drive.google.com/file/d/1V61dZjFomwlynwlYklJHC-TLfdFom3Lg/view?usp=sharing)[148MB]
65 | * [FlowNet2-SD](https://drive.google.com/file/d/1QW03eyYG_vD-dT-Mx4wopYvtPu_msTKn/view?usp=sharing)[173MB]
66 |
67 | ## Inference
68 | # Example on MPISintel Clean
69 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset MpiSintelClean \
70 | --inference_dataset_root /path/to/mpi-sintel/clean/dataset \
71 | --resume /path/to/checkpoints
72 |
73 | ## Training and validation
74 |
75 | # Example on MPISintel Final and Clean, with L1Loss on FlowNet2 model
76 | python main.py --batch_size 8 --model FlowNet2 --loss=L1Loss --optimizer=Adam --optimizer_lr=1e-4 \
77 | --training_dataset MpiSintelFinal --training_dataset_root /path/to/mpi-sintel/final/dataset \
78 | --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset
79 |
80 | # Example on MPISintel Final and Clean, with MultiScale loss on FlowNet2C model
81 | python main.py --batch_size 8 --model FlowNet2C --optimizer=Adam --optimizer_lr=1e-4 --loss=MultiScale --loss_norm=L1 \
82 | --loss_numScales=5 --loss_startScale=4 --optimizer_lr=1e-4 --crop_size 384 512 \
83 | --training_dataset FlyingChairs --training_dataset_root /path/to/flying-chairs/dataset \
84 | --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset
85 |
86 | ## Results on MPI-Sintel
87 | [](https://www.youtube.com/watch?v=HtBmabY8aeU "Predicted flows on MPI-Sintel")
88 |
89 | ## Reference
90 | If you find this implementation useful in your work, please acknowledge it appropriately and cite the paper:
91 | ````
92 | @InProceedings{IMKDB17,
93 | author = "E. Ilg and N. Mayer and T. Saikia and M. Keuper and A. Dosovitskiy and T. Brox",
94 | title = "FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks",
95 | booktitle = "IEEE Conference on Computer Vision and Pattern Recognition (CVPR)",
96 | month = "Jul",
97 | year = "2017",
98 | url = "http://lmb.informatik.uni-freiburg.de//Publications/2017/IMKDB17"
99 | }
100 | ````
101 | ```
102 | @misc{flownet2-pytorch,
103 | author = {Fitsum Reda and Robert Pottorff and Jon Barker and Bryan Catanzaro},
104 | title = {flownet2-pytorch: Pytorch implementation of FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks},
105 | year = {2017},
106 | publisher = {GitHub},
107 | journal = {GitHub repository},
108 | howpublished = {\url{https://github.com/NVIDIA/flownet2-pytorch}}
109 | }
110 | ```
111 | ## Related Optical Flow Work from Nvidia
112 | Code (in Caffe and Pytorch): [PWC-Net](https://github.com/NVlabs/PWC-Net)
113 | Paper : [PWC-Net: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume](https://arxiv.org/abs/1709.02371).
114 |
115 | ## Acknowledgments
116 | Parts of this code were derived, as noted in the code, from [ClementPinard/FlowNetPytorch](https://github.com/ClementPinard/FlowNetPytorch).
117 |
--------------------------------------------------------------------------------
/dvs/flownet2/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import flow_utils, tools
--------------------------------------------------------------------------------
/dvs/flownet2/convert.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python2.7
2 |
3 | import caffe
4 | from caffe.proto import caffe_pb2
5 | import sys, os
6 |
7 | import torch
8 | import torch.nn as nn
9 |
10 | import argparse, tempfile
11 | import numpy as np
12 |
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument('caffe_model', help='input model in hdf5 or caffemodel format')
15 | parser.add_argument('prototxt_template',help='prototxt template')
16 | parser.add_argument('flownet2_pytorch', help='path to flownet2-pytorch')
17 |
18 | args = parser.parse_args()
19 |
20 | args.rgb_max = 255
21 | args.fp16 = False
22 | args.grads = {}
23 |
24 | # load models
25 | sys.path.append(args.flownet2_pytorch)
26 |
27 | import models
28 | from utils.param_utils import *
29 |
30 | width = 256
31 | height = 256
32 | keys = {'TARGET_WIDTH': width,
33 | 'TARGET_HEIGHT': height,
34 | 'ADAPTED_WIDTH':width,
35 | 'ADAPTED_HEIGHT':height,
36 | 'SCALE_WIDTH':1.,
37 | 'SCALE_HEIGHT':1.,}
38 |
39 | template = '\n'.join(np.loadtxt(args.prototxt_template, dtype=str, delimiter='\n'))
40 | for k in keys:
41 | template = template.replace('$%s$'%(k),str(keys[k]))
42 |
43 | prototxt = tempfile.NamedTemporaryFile(mode='w', delete=True)
44 | prototxt.write(template)
45 | prototxt.flush()
46 |
47 | net = caffe.Net(prototxt.name, args.caffe_model, caffe.TEST)
48 |
49 | weights = {}
50 | biases = {}
51 |
52 | for k, v in list(net.params.items()):
53 | weights[k] = np.array(v[0].data).reshape(v[0].data.shape)
54 | biases[k] = np.array(v[1].data).reshape(v[1].data.shape)
55 | print((k, weights[k].shape, biases[k].shape))
56 |
57 | if 'FlowNet2/' in args.caffe_model:
58 | model = models.FlowNet2(args)
59 |
60 | parse_flownetc(model.flownetc.modules(), weights, biases)
61 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
62 | parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
63 | parse_flownetsd(model.flownets_d.modules(), weights, biases, param_prefix='netsd_')
64 | parse_flownetfusion(model.flownetfusion.modules(), weights, biases, param_prefix='fuse_')
65 |
66 | state = {'epoch': 0,
67 | 'state_dict': model.state_dict(),
68 | 'best_EPE': 1e10}
69 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2_checkpoint.pth.tar'))
70 |
71 | elif 'FlowNet2-C/' in args.caffe_model:
72 | model = models.FlowNet2C(args)
73 |
74 | parse_flownetc(model.modules(), weights, biases)
75 | state = {'epoch': 0,
76 | 'state_dict': model.state_dict(),
77 | 'best_EPE': 1e10}
78 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-C_checkpoint.pth.tar'))
79 |
80 | elif 'FlowNet2-CS/' in args.caffe_model:
81 | model = models.FlowNet2CS(args)
82 |
83 | parse_flownetc(model.flownetc.modules(), weights, biases)
84 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
85 |
86 | state = {'epoch': 0,
87 | 'state_dict': model.state_dict(),
88 | 'best_EPE': 1e10}
89 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CS_checkpoint.pth.tar'))
90 |
91 | elif 'FlowNet2-CSS/' in args.caffe_model:
92 | model = models.FlowNet2CSS(args)
93 |
94 | parse_flownetc(model.flownetc.modules(), weights, biases)
95 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
96 | parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
97 |
98 | state = {'epoch': 0,
99 | 'state_dict': model.state_dict(),
100 | 'best_EPE': 1e10}
101 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS_checkpoint.pth.tar'))
102 |
103 | elif 'FlowNet2-CSS-ft-sd/' in args.caffe_model:
104 | model = models.FlowNet2CSS(args)
105 |
106 | parse_flownetc(model.flownetc.modules(), weights, biases)
107 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
108 | parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
109 |
110 | state = {'epoch': 0,
111 | 'state_dict': model.state_dict(),
112 | 'best_EPE': 1e10}
113 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS-ft-sd_checkpoint.pth.tar'))
114 |
115 | elif 'FlowNet2-S/' in args.caffe_model:
116 | model = models.FlowNet2S(args)
117 |
118 | parse_flownetsonly(model.modules(), weights, biases, param_prefix='')
119 | state = {'epoch': 0,
120 | 'state_dict': model.state_dict(),
121 | 'best_EPE': 1e10}
122 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-S_checkpoint.pth.tar'))
123 |
124 | elif 'FlowNet2-SD/' in args.caffe_model:
125 | model = models.FlowNet2SD(args)
126 |
127 | parse_flownetsd(model.modules(), weights, biases, param_prefix='')
128 |
129 | state = {'epoch': 0,
130 | 'state_dict': model.state_dict(),
131 | 'best_EPE': 1e10}
132 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-SD_checkpoint.pth.tar'))
133 |
134 | else:
135 | print(('model type cound not be determined from input caffe model %s'%(args.caffe_model)))
136 | quit()
137 | print(("done converting ", args.caffe_model))
--------------------------------------------------------------------------------
/dvs/flownet2/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd ./networks/correlation_package
3 | rm -rf *_cuda.egg-info build dist __pycache__
4 | python3 setup.py install --user
5 |
6 | cd ../resample2d_package
7 | rm -rf *_cuda.egg-info build dist __pycache__
8 | python3 setup.py install --user
9 |
10 | cd ../channelnorm_package
11 | rm -rf *_cuda.egg-info build dist __pycache__
12 | python3 setup.py install --user
13 |
14 | cd ..
15 |
--------------------------------------------------------------------------------
/dvs/flownet2/losses.py:
--------------------------------------------------------------------------------
1 | '''
2 | Portions of this code copyright 2017, Clement Pinard
3 | '''
4 |
5 | # freda (todo) : adversarial loss
6 |
7 | import torch
8 | import torch.nn as nn
9 | import math
10 |
11 | def EPE(input_flow, target_flow):
12 | return torch.norm(target_flow-input_flow,p=2,dim=1).mean()
13 |
14 | class L1(nn.Module):
15 | def __init__(self):
16 | super(L1, self).__init__()
17 | def forward(self, output, target):
18 | lossvalue = torch.abs(output - target).mean()
19 | return lossvalue
20 |
21 | class L2(nn.Module):
22 | def __init__(self):
23 | super(L2, self).__init__()
24 | def forward(self, output, target):
25 | lossvalue = torch.norm(output-target,p=2,dim=1).mean()
26 | return lossvalue
27 |
28 | class L1Loss(nn.Module):
29 | def __init__(self, args):
30 | super(L1Loss, self).__init__()
31 | self.args = args
32 | self.loss = L1()
33 | self.loss_labels = ['L1', 'EPE']
34 |
35 | def forward(self, output, target):
36 | lossvalue = self.loss(output, target)
37 | epevalue = EPE(output, target)
38 | return [lossvalue, epevalue]
39 |
40 | class L2Loss(nn.Module):
41 | def __init__(self, args):
42 | super(L2Loss, self).__init__()
43 | self.args = args
44 | self.loss = L2()
45 | self.loss_labels = ['L2', 'EPE']
46 |
47 | def forward(self, output, target):
48 | lossvalue = self.loss(output, target)
49 | epevalue = EPE(output, target)
50 | return [lossvalue, epevalue]
51 |
52 | class MultiScale(nn.Module):
53 | def __init__(self, args, startScale = 4, numScales = 5, l_weight= 0.32, norm= 'L1'):
54 | super(MultiScale,self).__init__()
55 |
56 | self.startScale = startScale
57 | self.numScales = numScales
58 | self.loss_weights = torch.FloatTensor([(l_weight / 2 ** scale) for scale in range(self.numScales)])
59 | self.args = args
60 | self.l_type = norm
61 | self.div_flow = 0.05
62 | assert(len(self.loss_weights) == self.numScales)
63 |
64 | if self.l_type == 'L1':
65 | self.loss = L1()
66 | else:
67 | self.loss = L2()
68 |
69 | self.multiScales = [nn.AvgPool2d(self.startScale * (2**scale), self.startScale * (2**scale)) for scale in range(self.numScales)]
70 | self.loss_labels = ['MultiScale-'+self.l_type, 'EPE'],
71 |
72 | def forward(self, output, target):
73 | lossvalue = 0
74 | epevalue = 0
75 |
76 | if type(output) is tuple:
77 | target = self.div_flow * target
78 | for i, output_ in enumerate(output):
79 | target_ = self.multiScales[i](target)
80 | epevalue += self.loss_weights[i]*EPE(output_, target_)
81 | lossvalue += self.loss_weights[i]*self.loss(output_, target_)
82 | return [lossvalue, epevalue]
83 | else:
84 | epevalue += EPE(output, target)
85 | lossvalue += self.loss(output, target)
86 | return [lossvalue, epevalue]
87 |
88 |
--------------------------------------------------------------------------------
/dvs/flownet2/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
4 | import torch
5 | import torch.nn as nn
6 | from torch.utils.data import DataLoader
7 | from torch.autograd import Variable
8 | from tensorboardX import SummaryWriter
9 |
10 | import argparse, os, sys, subprocess
11 | import colorama
12 | import numpy as np
13 | from tqdm import tqdm
14 | from glob import glob
15 | from os.path import *
16 |
17 | import models, datasets
18 | from utils import flow_utils, tools
19 | import time
20 |
21 | # Reusable function for inference
22 | def inference(args, epoch, data_path, data_loader, model, offset=0):
23 |
24 | model.eval()
25 |
26 | if args.save_flow or args.render_validation:
27 | flow_folder = "{}/flo".format(data_path)
28 | flow_back_folder = "{}/flo_back".format(data_path)
29 | if not os.path.exists(flow_folder):
30 | os.makedirs(flow_folder)
31 | if not os.path.exists(flow_back_folder):
32 | os.makedirs(flow_back_folder)
33 |
34 | # visualization folder
35 | if args.inference_visualize:
36 | flow_vis_folder = "{}/flo_vis".format(data_path)
37 | if not os.path.exists(flow_vis_folder):
38 | os.makedirs(flow_vis_folder)
39 | flow_back_vis_folder = "{}/flo_back_vis".format(data_path)
40 | if not os.path.exists(flow_back_vis_folder):
41 | os.makedirs(flow_back_vis_folder)
42 |
43 | args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches
44 |
45 | progress = tqdm(data_loader, ncols=100, total=np.minimum(len(data_loader), args.inference_n_batches), desc='Inferencing ',
46 | leave=True, position=offset)
47 |
48 | for batch_idx, (data) in enumerate(progress):
49 | data = data[0]
50 | data_back = torch.cat((data[:,:,1:,:,:], data[:,:,:1,:,:]), dim = 2)
51 | if args.cuda:
52 | data_forward = data.cuda(non_blocking=True)
53 | data_back = data_back.cuda(non_blocking=True)
54 | data_forward = Variable(data_forward)
55 | data_back = Variable(data_back)
56 |
57 | flo_path = join(flow_folder, '%06d.flo'%(batch_idx))
58 | flo_back_path = join(flow_back_folder, '%06d.flo'%(batch_idx))
59 | frame_size = data_loader.dataset.frame_size
60 | if not os.path.exists(flo_path):
61 | with torch.no_grad():
62 | output = model(data_forward)[:,:,:frame_size[0], :frame_size[1]]
63 | if args.save_flow or args.render_validation:
64 | _pflow = output[0].data.cpu().numpy().transpose(1, 2, 0)
65 | flow_utils.writeFlow( flo_path, _pflow)
66 | if args.inference_visualize:
67 | flow_utils.visulize_flow_file(
68 | join(flow_folder, '%06d.flo' % (batch_idx)),flow_vis_folder)
69 |
70 | if not os.path.exists(flo_back_path):
71 | with torch.no_grad():
72 | output = model(data_back)[:,:,:frame_size[0], :frame_size[1]]
73 | if args.save_flow or args.render_validation:
74 | _pflow = output[0].data.cpu().numpy().transpose(1, 2, 0)
75 | flow_utils.writeFlow( flo_back_path, _pflow)
76 | if args.inference_visualize:
77 | flow_utils.visulize_flow_file(
78 | join(flow_back_folder, '%06d.flo' % (batch_idx)), flow_back_vis_folder)
79 |
80 | progress.update(1)
81 |
82 | if batch_idx == (args.inference_n_batches - 1):
83 | break
84 | progress.close()
85 | return
86 |
87 | if __name__ == '__main__':
88 | parser = argparse.ArgumentParser()
89 | parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).')
90 | parser.add_argument('--fp16_scale', type=float, default=1024., help='Loss scaling, positive power of 2 values can improve fp16 convergence.')
91 |
92 | parser.add_argument('--start_epoch', type=int, default=1)
93 | parser.add_argument('--batch_size', '-b', type=int, default=8, help="Batch size")
94 | parser.add_argument('--crop_size', type=int, nargs='+', default = [256, 256], help="Spatial dimension to crop training samples for training")
95 | parser.add_argument("--rgb_max", type=float, default = 255.)
96 |
97 | parser.add_argument('--number_workers', '-nw', '--num_workers', type=int, default=8)
98 | parser.add_argument('--number_gpus', '-ng', type=int, default=-1, help='number of GPUs to use')
99 | parser.add_argument('--no_cuda', action='store_true')
100 |
101 | parser.add_argument('--save', '-s', default='./Google', type=str, help='directory for saving')
102 |
103 | parser.add_argument('--inference', action='store_true')
104 | parser.add_argument('--inference_visualize', action='store_true',
105 | help="visualize the optical flow during inference")
106 | parser.add_argument('--inference_size', type=int, nargs='+', default = [-1,-1], help='spatial size divisible by 64. default (-1,-1) - largest possible valid size would be used')
107 | parser.add_argument('--inference_batch_size', type=int, default=1)
108 | parser.add_argument('--inference_n_batches', type=int, default=-1)
109 | parser.add_argument('--save_flow', action='store_true', help='save predicted flows to file')
110 |
111 | parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
112 | parser.add_argument('--log_frequency', '--summ_iter', type=int, default=1, help="Log every n batches")
113 |
114 | tools.add_arguments_for_module(parser, models, argument_for_class='model', default='FlowNet2')
115 |
116 | tools.add_arguments_for_module(parser, datasets, argument_for_class='inference_dataset', default='Google',
117 | skip_params=['is_cropped'],
118 | parameter_defaults={'root': './Google/train',
119 | 'replicates': 1})
120 |
121 | main_dir = os.path.dirname(os.path.realpath(__file__))
122 | os.chdir(main_dir)
123 |
124 | # Parse the official arguments
125 | with tools.TimerBlock("Parsing Arguments") as block:
126 | args = parser.parse_args()
127 | if args.number_gpus < 0 : args.number_gpus = torch.cuda.device_count()
128 |
129 | # Get argument defaults (hastag #thisisahack)
130 | parser.add_argument('--IGNORE', action='store_true')
131 | defaults = vars(parser.parse_args(['--IGNORE']))
132 |
133 | # Print all arguments, color the non-defaults
134 | for argument, value in sorted(vars(args).items()):
135 | reset = colorama.Style.RESET_ALL
136 | color = reset if value == defaults[argument] else colorama.Fore.MAGENTA
137 | block.log('{}{}: {}{}'.format(color, argument, value, reset))
138 |
139 | args.model_class = tools.module_to_dict(models)[args.model]
140 |
141 | args.inference_dataset_class = tools.module_to_dict(datasets)[args.inference_dataset]
142 |
143 | args.cuda = not args.no_cuda and torch.cuda.is_available()
144 | # args.current_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]).rstrip()
145 | args.log_file = join(args.save, 'args.txt')
146 |
147 | # dict to collect activation gradients (for training debug purpose)
148 | args.grads = {}
149 |
150 | args.total_epochs = 1
151 | args.inference_dir = "{}/inference".format(args.save)
152 |
153 | print('Source Code')
154 | # print((' Current Git Hash: {}\n'.format(args.current_hash)))
155 |
156 | # Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments
157 | with tools.TimerBlock("Initializing Datasets") as block:
158 | args.effective_batch_size = args.batch_size * args.number_gpus
159 | args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus
160 | args.effective_number_workers = args.number_workers * args.number_gpus
161 | gpuargs = {'num_workers': args.effective_number_workers,
162 | 'pin_memory': True,
163 | 'drop_last' : True} if args.cuda else {}
164 | inf_gpuargs = gpuargs.copy()
165 | inf_gpuargs['num_workers'] = args.number_workers
166 |
167 | block.log('Inference Dataset: {}'.format(args.inference_dataset))
168 |
169 | dataset_root = args.inference_dataset_root
170 | data_name = sorted(os.listdir(dataset_root))
171 |
172 | block.log(data_name)
173 | inference_loaders = {}
174 | for i in range(len(data_name)):
175 | dataset_path = os.path.join(dataset_root, data_name[i])
176 | args.inference_dataset_root = dataset_path
177 | inference_dataset = args.inference_dataset_class(args, False, **tools.kwargs_from_args(args, 'inference_dataset'))
178 | inference_loaders[dataset_path] = DataLoader(inference_dataset, batch_size=args.effective_inference_batch_size, shuffle=False, **inf_gpuargs)
179 | block.log('Inference Input: {}'.format(' '.join([str([d for d in x.size()]) for x in inference_dataset[0][0]])))
180 |
181 | # Dynamically load model and loss class with parameters passed in via "--model_[param]=[value]" or "--loss_[param]=[value]" arguments
182 | with tools.TimerBlock("Building {} model".format(args.model)) as block:
183 | class Model(nn.Module):
184 | def __init__(self, args):
185 | super(Model, self).__init__()
186 | kwargs = tools.kwargs_from_args(args, 'model')
187 | self.model = args.model_class(args, **kwargs)
188 |
189 | def forward(self, data):
190 | output = self.model(data)
191 | return output
192 |
193 | model = Model(args)
194 |
195 | block.log('Effective Batch Size: {}'.format(args.effective_batch_size))
196 | block.log('Number of parameters: {}'.format(sum([p.data.nelement() if p.requires_grad else 0 for p in model.parameters()])))
197 |
198 | if args.cuda and args.number_gpus > 0:
199 | block.log('Initializing CUDA')
200 | model = model.cuda()
201 | block.log('Parallelizing')
202 | model = nn.parallel.DataParallel(model, device_ids=list(range(args.number_gpus)))
203 |
204 | # Load weights if needed, otherwise randomly initialize
205 | if args.resume and os.path.isfile(args.resume):
206 | block.log("Loading checkpoint '{}'".format(args.resume))
207 | checkpoint = torch.load(args.resume)
208 | model.module.model.load_state_dict(checkpoint['state_dict'])
209 | block.log("Loaded checkpoint '{}' (at epoch {})".format(args.resume, checkpoint['epoch']))
210 |
211 | elif args.resume and args.inference:
212 | block.log("No checkpoint found at '{}'".format(args.resume))
213 | quit()
214 |
215 | else:
216 | block.log("Random initialization")
217 |
218 | block.log("Initializing save directory: {}".format(args.save))
219 | if not os.path.exists(args.save):
220 | os.makedirs(args.save)
221 |
222 | # Log all arguments to file
223 | for argument, value in sorted(vars(args).items()):
224 | block.log2file(args.log_file, '{}: {}'.format(argument, value))
225 |
226 | for data_path in inference_loaders:
227 | # Primary epoch loop
228 | progress = tqdm(list(range(args.start_epoch, args.total_epochs + 1)), miniters=1, ncols=100, desc='Overall Progress', leave=True, position=0)
229 | offset = 1
230 |
231 | for epoch in progress:
232 | stats = inference(args=args, epoch=epoch - 1, data_path = data_path, data_loader=inference_loaders[data_path], model=model, offset=offset)
233 | offset += 1
234 | print("\n")
--------------------------------------------------------------------------------
/dvs/flownet2/networks/FlowNetC.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.nn import init
4 |
5 | import math
6 | import numpy as np
7 |
8 | from .correlation_package.correlation import Correlation
9 |
10 | from .submodules import *
11 | 'Parameter count , 39,175,298 '
12 |
13 | class FlowNetC(nn.Module):
14 | def __init__(self,args, batchNorm=True, div_flow = 20):
15 | super(FlowNetC,self).__init__()
16 |
17 | self.batchNorm = batchNorm
18 | self.div_flow = div_flow
19 |
20 | self.conv1 = conv(self.batchNorm, 3, 64, kernel_size=7, stride=2)
21 | self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2)
22 | self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2)
23 | self.conv_redir = conv(self.batchNorm, 256, 32, kernel_size=1, stride=1)
24 |
25 | if args.fp16:
26 | self.corr = nn.Sequential(
27 | tofp32(),
28 | Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1),
29 | tofp16())
30 | else:
31 | self.corr = Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1)
32 |
33 | self.corr_activation = nn.LeakyReLU(0.1,inplace=True)
34 | self.conv3_1 = conv(self.batchNorm, 473, 256)
35 | self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
36 | self.conv4_1 = conv(self.batchNorm, 512, 512)
37 | self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
38 | self.conv5_1 = conv(self.batchNorm, 512, 512)
39 | self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
40 | self.conv6_1 = conv(self.batchNorm,1024, 1024)
41 |
42 | self.deconv5 = deconv(1024,512)
43 | self.deconv4 = deconv(1026,256)
44 | self.deconv3 = deconv(770,128)
45 | self.deconv2 = deconv(386,64)
46 |
47 | self.predict_flow6 = predict_flow(1024)
48 | self.predict_flow5 = predict_flow(1026)
49 | self.predict_flow4 = predict_flow(770)
50 | self.predict_flow3 = predict_flow(386)
51 | self.predict_flow2 = predict_flow(194)
52 |
53 | self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
54 | self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
55 | self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
56 | self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
57 |
58 | for m in self.modules():
59 | if isinstance(m, nn.Conv2d):
60 | if m.bias is not None:
61 | init.uniform_(m.bias)
62 | init.xavier_uniform_(m.weight)
63 |
64 | if isinstance(m, nn.ConvTranspose2d):
65 | if m.bias is not None:
66 | init.uniform_(m.bias)
67 | init.xavier_uniform_(m.weight)
68 | # init_deconv_bilinear(m.weight)
69 | self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
70 |
71 | def forward(self, x):
72 | x1 = x[:,0:3,:,:]
73 | x2 = x[:,3::,:,:]
74 |
75 | out_conv1a = self.conv1(x1)
76 | out_conv2a = self.conv2(out_conv1a)
77 | out_conv3a = self.conv3(out_conv2a)
78 |
79 | # FlownetC bottom input stream
80 | out_conv1b = self.conv1(x2)
81 |
82 | out_conv2b = self.conv2(out_conv1b)
83 | out_conv3b = self.conv3(out_conv2b)
84 |
85 | # Merge streams
86 | out_corr = self.corr(out_conv3a, out_conv3b) # False
87 | out_corr = self.corr_activation(out_corr)
88 |
89 | # Redirect top input stream and concatenate
90 | out_conv_redir = self.conv_redir(out_conv3a)
91 |
92 | in_conv3_1 = torch.cat((out_conv_redir, out_corr), 1)
93 |
94 | # Merged conv layers
95 | out_conv3_1 = self.conv3_1(in_conv3_1)
96 |
97 | out_conv4 = self.conv4_1(self.conv4(out_conv3_1))
98 |
99 | out_conv5 = self.conv5_1(self.conv5(out_conv4))
100 | out_conv6 = self.conv6_1(self.conv6(out_conv5))
101 |
102 | flow6 = self.predict_flow6(out_conv6)
103 | flow6_up = self.upsampled_flow6_to_5(flow6)
104 | out_deconv5 = self.deconv5(out_conv6)
105 |
106 | concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
107 |
108 | flow5 = self.predict_flow5(concat5)
109 | flow5_up = self.upsampled_flow5_to_4(flow5)
110 | out_deconv4 = self.deconv4(concat5)
111 | concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
112 |
113 | flow4 = self.predict_flow4(concat4)
114 | flow4_up = self.upsampled_flow4_to_3(flow4)
115 | out_deconv3 = self.deconv3(concat4)
116 | concat3 = torch.cat((out_conv3_1,out_deconv3,flow4_up),1)
117 |
118 | flow3 = self.predict_flow3(concat3)
119 | flow3_up = self.upsampled_flow3_to_2(flow3)
120 | out_deconv2 = self.deconv2(concat3)
121 | concat2 = torch.cat((out_conv2a,out_deconv2,flow3_up),1)
122 |
123 | flow2 = self.predict_flow2(concat2)
124 |
125 | if self.training:
126 | return flow2,flow3,flow4,flow5,flow6
127 | else:
128 | return flow2,
129 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/FlowNetFusion.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.nn import init
4 |
5 | import math
6 | import numpy as np
7 |
8 | from .submodules import *
9 | 'Parameter count = 581,226'
10 |
11 | class FlowNetFusion(nn.Module):
12 | def __init__(self,args, batchNorm=True):
13 | super(FlowNetFusion,self).__init__()
14 |
15 | self.batchNorm = batchNorm
16 | self.conv0 = conv(self.batchNorm, 11, 64)
17 | self.conv1 = conv(self.batchNorm, 64, 64, stride=2)
18 | self.conv1_1 = conv(self.batchNorm, 64, 128)
19 | self.conv2 = conv(self.batchNorm, 128, 128, stride=2)
20 | self.conv2_1 = conv(self.batchNorm, 128, 128)
21 |
22 | self.deconv1 = deconv(128,32)
23 | self.deconv0 = deconv(162,16)
24 |
25 | self.inter_conv1 = i_conv(self.batchNorm, 162, 32)
26 | self.inter_conv0 = i_conv(self.batchNorm, 82, 16)
27 |
28 | self.predict_flow2 = predict_flow(128)
29 | self.predict_flow1 = predict_flow(32)
30 | self.predict_flow0 = predict_flow(16)
31 |
32 | self.upsampled_flow2_to_1 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
33 | self.upsampled_flow1_to_0 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
34 |
35 | for m in self.modules():
36 | if isinstance(m, nn.Conv2d):
37 | if m.bias is not None:
38 | init.uniform_(m.bias)
39 | init.xavier_uniform_(m.weight)
40 |
41 | if isinstance(m, nn.ConvTranspose2d):
42 | if m.bias is not None:
43 | init.uniform_(m.bias)
44 | init.xavier_uniform_(m.weight)
45 | # init_deconv_bilinear(m.weight)
46 |
47 | def forward(self, x):
48 | out_conv0 = self.conv0(x)
49 | out_conv1 = self.conv1_1(self.conv1(out_conv0))
50 | out_conv2 = self.conv2_1(self.conv2(out_conv1))
51 |
52 | flow2 = self.predict_flow2(out_conv2)
53 | flow2_up = self.upsampled_flow2_to_1(flow2)
54 | out_deconv1 = self.deconv1(out_conv2)
55 |
56 | concat1 = torch.cat((out_conv1,out_deconv1,flow2_up),1)
57 | out_interconv1 = self.inter_conv1(concat1)
58 | flow1 = self.predict_flow1(out_interconv1)
59 | flow1_up = self.upsampled_flow1_to_0(flow1)
60 | out_deconv0 = self.deconv0(concat1)
61 |
62 | concat0 = torch.cat((out_conv0,out_deconv0,flow1_up),1)
63 | out_interconv0 = self.inter_conv0(concat0)
64 | flow0 = self.predict_flow0(out_interconv0)
65 |
66 | return flow0
67 |
68 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/FlowNetS.py:
--------------------------------------------------------------------------------
1 | '''
2 | Portions of this code copyright 2017, Clement Pinard
3 | '''
4 |
5 | import torch
6 | import torch.nn as nn
7 | from torch.nn import init
8 |
9 | import math
10 | import numpy as np
11 |
12 | from .submodules import *
13 | 'Parameter count : 38,676,504 '
14 |
15 | class FlowNetS(nn.Module):
16 | def __init__(self, args, input_channels = 12, batchNorm=True):
17 | super(FlowNetS,self).__init__()
18 |
19 | self.batchNorm = batchNorm
20 | self.conv1 = conv(self.batchNorm, input_channels, 64, kernel_size=7, stride=2)
21 | self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2)
22 | self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2)
23 | self.conv3_1 = conv(self.batchNorm, 256, 256)
24 | self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
25 | self.conv4_1 = conv(self.batchNorm, 512, 512)
26 | self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
27 | self.conv5_1 = conv(self.batchNorm, 512, 512)
28 | self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
29 | self.conv6_1 = conv(self.batchNorm,1024, 1024)
30 |
31 | self.deconv5 = deconv(1024,512)
32 | self.deconv4 = deconv(1026,256)
33 | self.deconv3 = deconv(770,128)
34 | self.deconv2 = deconv(386,64)
35 |
36 | self.predict_flow6 = predict_flow(1024)
37 | self.predict_flow5 = predict_flow(1026)
38 | self.predict_flow4 = predict_flow(770)
39 | self.predict_flow3 = predict_flow(386)
40 | self.predict_flow2 = predict_flow(194)
41 |
42 | self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
43 | self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
44 | self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
45 | self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
46 |
47 | for m in self.modules():
48 | if isinstance(m, nn.Conv2d):
49 | if m.bias is not None:
50 | init.uniform_(m.bias)
51 | init.xavier_uniform_(m.weight)
52 |
53 | if isinstance(m, nn.ConvTranspose2d):
54 | if m.bias is not None:
55 | init.uniform_(m.bias)
56 | init.xavier_uniform_(m.weight)
57 | # init_deconv_bilinear(m.weight)
58 | self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
59 |
60 | def forward(self, x):
61 | out_conv1 = self.conv1(x)
62 |
63 | out_conv2 = self.conv2(out_conv1)
64 | out_conv3 = self.conv3_1(self.conv3(out_conv2))
65 | out_conv4 = self.conv4_1(self.conv4(out_conv3))
66 | out_conv5 = self.conv5_1(self.conv5(out_conv4))
67 | out_conv6 = self.conv6_1(self.conv6(out_conv5))
68 |
69 | flow6 = self.predict_flow6(out_conv6)
70 | flow6_up = self.upsampled_flow6_to_5(flow6)
71 | out_deconv5 = self.deconv5(out_conv6)
72 |
73 | concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
74 | flow5 = self.predict_flow5(concat5)
75 | flow5_up = self.upsampled_flow5_to_4(flow5)
76 | out_deconv4 = self.deconv4(concat5)
77 |
78 | concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
79 | flow4 = self.predict_flow4(concat4)
80 | flow4_up = self.upsampled_flow4_to_3(flow4)
81 | out_deconv3 = self.deconv3(concat4)
82 |
83 | concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
84 | flow3 = self.predict_flow3(concat3)
85 | flow3_up = self.upsampled_flow3_to_2(flow3)
86 | out_deconv2 = self.deconv2(concat3)
87 |
88 | concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
89 | flow2 = self.predict_flow2(concat2)
90 |
91 | if self.training:
92 | return flow2,flow3,flow4,flow5,flow6
93 | else:
94 | return flow2,
95 |
96 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/FlowNetSD.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.nn import init
4 |
5 | import math
6 | import numpy as np
7 |
8 | from .submodules import *
9 | 'Parameter count = 45,371,666'
10 |
11 | class FlowNetSD(nn.Module):
12 | def __init__(self, args, batchNorm=True):
13 | super(FlowNetSD,self).__init__()
14 |
15 | self.batchNorm = batchNorm
16 | self.conv0 = conv(self.batchNorm, 6, 64)
17 | self.conv1 = conv(self.batchNorm, 64, 64, stride=2)
18 | self.conv1_1 = conv(self.batchNorm, 64, 128)
19 | self.conv2 = conv(self.batchNorm, 128, 128, stride=2)
20 | self.conv2_1 = conv(self.batchNorm, 128, 128)
21 | self.conv3 = conv(self.batchNorm, 128, 256, stride=2)
22 | self.conv3_1 = conv(self.batchNorm, 256, 256)
23 | self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
24 | self.conv4_1 = conv(self.batchNorm, 512, 512)
25 | self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
26 | self.conv5_1 = conv(self.batchNorm, 512, 512)
27 | self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
28 | self.conv6_1 = conv(self.batchNorm,1024, 1024)
29 |
30 | self.deconv5 = deconv(1024,512)
31 | self.deconv4 = deconv(1026,256)
32 | self.deconv3 = deconv(770,128)
33 | self.deconv2 = deconv(386,64)
34 |
35 | self.inter_conv5 = i_conv(self.batchNorm, 1026, 512)
36 | self.inter_conv4 = i_conv(self.batchNorm, 770, 256)
37 | self.inter_conv3 = i_conv(self.batchNorm, 386, 128)
38 | self.inter_conv2 = i_conv(self.batchNorm, 194, 64)
39 |
40 | self.predict_flow6 = predict_flow(1024)
41 | self.predict_flow5 = predict_flow(512)
42 | self.predict_flow4 = predict_flow(256)
43 | self.predict_flow3 = predict_flow(128)
44 | self.predict_flow2 = predict_flow(64)
45 |
46 | self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
47 | self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
48 | self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
49 | self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
50 |
51 | for m in self.modules():
52 | if isinstance(m, nn.Conv2d):
53 | if m.bias is not None:
54 | init.uniform_(m.bias)
55 | init.xavier_uniform_(m.weight)
56 |
57 | if isinstance(m, nn.ConvTranspose2d):
58 | if m.bias is not None:
59 | init.uniform_(m.bias)
60 | init.xavier_uniform_(m.weight)
61 | # init_deconv_bilinear(m.weight)
62 | self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
63 |
64 |
65 |
66 | def forward(self, x):
67 | out_conv0 = self.conv0(x)
68 | out_conv1 = self.conv1_1(self.conv1(out_conv0))
69 | out_conv2 = self.conv2_1(self.conv2(out_conv1))
70 |
71 | out_conv3 = self.conv3_1(self.conv3(out_conv2))
72 | out_conv4 = self.conv4_1(self.conv4(out_conv3))
73 | out_conv5 = self.conv5_1(self.conv5(out_conv4))
74 | out_conv6 = self.conv6_1(self.conv6(out_conv5))
75 |
76 | flow6 = self.predict_flow6(out_conv6)
77 | flow6_up = self.upsampled_flow6_to_5(flow6)
78 | out_deconv5 = self.deconv5(out_conv6)
79 |
80 | concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
81 | out_interconv5 = self.inter_conv5(concat5)
82 | flow5 = self.predict_flow5(out_interconv5)
83 |
84 | flow5_up = self.upsampled_flow5_to_4(flow5)
85 | out_deconv4 = self.deconv4(concat5)
86 |
87 | concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
88 | out_interconv4 = self.inter_conv4(concat4)
89 | flow4 = self.predict_flow4(out_interconv4)
90 | flow4_up = self.upsampled_flow4_to_3(flow4)
91 | out_deconv3 = self.deconv3(concat4)
92 |
93 | concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
94 | out_interconv3 = self.inter_conv3(concat3)
95 | flow3 = self.predict_flow3(out_interconv3)
96 | flow3_up = self.upsampled_flow3_to_2(flow3)
97 | out_deconv2 = self.deconv2(concat3)
98 |
99 | concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
100 | out_interconv2 = self.inter_conv2(concat2)
101 | flow2 = self.predict_flow2(out_interconv2)
102 |
103 | if self.training:
104 | return flow2,flow3,flow4,flow5,flow6
105 | else:
106 | return flow2,
107 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/networks/__init__.py
--------------------------------------------------------------------------------
/dvs/flownet2/networks/channelnorm_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/networks/channelnorm_package/__init__.py
--------------------------------------------------------------------------------
/dvs/flownet2/networks/channelnorm_package/channelnorm.py:
--------------------------------------------------------------------------------
1 | from torch.autograd import Function, Variable
2 | from torch.nn.modules.module import Module
3 | import channelnorm_cuda
4 |
5 | class ChannelNormFunction(Function):
6 |
7 | @staticmethod
8 | def forward(ctx, input1, norm_deg=2):
9 | assert input1.is_contiguous()
10 | b, _, h, w = input1.size()
11 | output = input1.new(b, 1, h, w).zero_()
12 |
13 | channelnorm_cuda.forward(input1, output, norm_deg)
14 | ctx.save_for_backward(input1, output)
15 | ctx.norm_deg = norm_deg
16 |
17 | return output
18 |
19 | @staticmethod
20 | def backward(ctx, grad_output):
21 | input1, output = ctx.saved_tensors
22 |
23 | grad_input1 = Variable(input1.new(input1.size()).zero_())
24 |
25 | channelnorm_cuda.backward(input1, output, grad_output.data,
26 | grad_input1.data, ctx.norm_deg)
27 |
28 | return grad_input1, None
29 |
30 |
31 | class ChannelNorm(Module):
32 |
33 | def __init__(self, norm_deg=2):
34 | super(ChannelNorm, self).__init__()
35 | self.norm_deg = norm_deg
36 |
37 | def forward(self, input1):
38 | return ChannelNormFunction.apply(input1, self.norm_deg)
39 |
40 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/channelnorm_package/channelnorm_cuda.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "channelnorm_kernel.cuh"
5 |
6 | int channelnorm_cuda_forward(
7 | at::Tensor& input1,
8 | at::Tensor& output,
9 | int norm_deg) {
10 |
11 | channelnorm_kernel_forward(input1, output, norm_deg);
12 | return 1;
13 | }
14 |
15 |
16 | int channelnorm_cuda_backward(
17 | at::Tensor& input1,
18 | at::Tensor& output,
19 | at::Tensor& gradOutput,
20 | at::Tensor& gradInput1,
21 | int norm_deg) {
22 |
23 | channelnorm_kernel_backward(input1, output, gradOutput, gradInput1, norm_deg);
24 | return 1;
25 | }
26 |
27 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
28 | m.def("forward", &channelnorm_cuda_forward, "Channel norm forward (CUDA)");
29 | m.def("backward", &channelnorm_cuda_backward, "Channel norm backward (CUDA)");
30 | }
31 |
32 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/channelnorm_package/channelnorm_kernel.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "channelnorm_kernel.cuh"
6 |
7 | #define CUDA_NUM_THREADS 512
8 |
9 | #define DIM0(TENSOR) ((TENSOR).x)
10 | #define DIM1(TENSOR) ((TENSOR).y)
11 | #define DIM2(TENSOR) ((TENSOR).z)
12 | #define DIM3(TENSOR) ((TENSOR).w)
13 |
14 | #define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))])
15 |
16 | using at::Half;
17 |
18 | template
19 | __global__ void kernel_channelnorm_update_output(
20 | const int n,
21 | const scalar_t* __restrict__ input1,
22 | const long4 input1_size,
23 | const long4 input1_stride,
24 | scalar_t* __restrict__ output,
25 | const long4 output_size,
26 | const long4 output_stride,
27 | int norm_deg) {
28 |
29 | int index = blockIdx.x * blockDim.x + threadIdx.x;
30 |
31 | if (index >= n) {
32 | return;
33 | }
34 |
35 | int dim_b = DIM0(output_size);
36 | int dim_c = DIM1(output_size);
37 | int dim_h = DIM2(output_size);
38 | int dim_w = DIM3(output_size);
39 | int dim_chw = dim_c * dim_h * dim_w;
40 |
41 | int b = ( index / dim_chw ) % dim_b;
42 | int y = ( index / dim_w ) % dim_h;
43 | int x = ( index ) % dim_w;
44 |
45 | int i1dim_c = DIM1(input1_size);
46 | int i1dim_h = DIM2(input1_size);
47 | int i1dim_w = DIM3(input1_size);
48 | int i1dim_chw = i1dim_c * i1dim_h * i1dim_w;
49 | int i1dim_hw = i1dim_h * i1dim_w;
50 |
51 | float result = 0.0;
52 |
53 | for (int c = 0; c < i1dim_c; ++c) {
54 | int i1Index = b * i1dim_chw + c * i1dim_hw + y * i1dim_w + x;
55 | scalar_t val = input1[i1Index];
56 | result += static_cast(val * val);
57 | }
58 | result = sqrt(result);
59 | output[index] = static_cast(result);
60 | }
61 |
62 |
63 | template
64 | __global__ void kernel_channelnorm_backward_input1(
65 | const int n,
66 | const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
67 | const scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride,
68 | const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride,
69 | scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride,
70 | int norm_deg) {
71 |
72 | int index = blockIdx.x * blockDim.x + threadIdx.x;
73 |
74 | if (index >= n) {
75 | return;
76 | }
77 |
78 | float val = 0.0;
79 |
80 | int dim_b = DIM0(gradInput_size);
81 | int dim_c = DIM1(gradInput_size);
82 | int dim_h = DIM2(gradInput_size);
83 | int dim_w = DIM3(gradInput_size);
84 | int dim_chw = dim_c * dim_h * dim_w;
85 | int dim_hw = dim_h * dim_w;
86 |
87 | int b = ( index / dim_chw ) % dim_b;
88 | int y = ( index / dim_w ) % dim_h;
89 | int x = ( index ) % dim_w;
90 |
91 |
92 | int outIndex = b * dim_hw + y * dim_w + x;
93 | val = static_cast(gradOutput[outIndex]) * static_cast(input1[index]) / (static_cast(output[outIndex])+1e-9);
94 | gradInput[index] = static_cast(val);
95 |
96 | }
97 |
98 | void channelnorm_kernel_forward(
99 | at::Tensor& input1,
100 | at::Tensor& output,
101 | int norm_deg) {
102 |
103 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
104 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
105 |
106 | const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
107 | const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
108 |
109 | int n = output.numel();
110 |
111 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_forward", ([&] {
112 |
113 | kernel_channelnorm_update_output<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
114 | //at::globalContext().getCurrentCUDAStream() >>>(
115 | n,
116 | input1.data(),
117 | input1_size,
118 | input1_stride,
119 | output.data(),
120 | output_size,
121 | output_stride,
122 | norm_deg);
123 |
124 | }));
125 |
126 | // TODO: ATen-equivalent check
127 |
128 | // THCudaCheck(cudaGetLastError());
129 | }
130 |
131 | void channelnorm_kernel_backward(
132 | at::Tensor& input1,
133 | at::Tensor& output,
134 | at::Tensor& gradOutput,
135 | at::Tensor& gradInput1,
136 | int norm_deg) {
137 |
138 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
139 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
140 |
141 | const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
142 | const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
143 |
144 | const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3));
145 | const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3));
146 |
147 | const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3));
148 | const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3));
149 |
150 | int n = gradInput1.numel();
151 |
152 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_backward_input1", ([&] {
153 |
154 | kernel_channelnorm_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
155 | //at::globalContext().getCurrentCUDAStream() >>>(
156 | n,
157 | input1.data(),
158 | input1_size,
159 | input1_stride,
160 | output.data(),
161 | output_size,
162 | output_stride,
163 | gradOutput.data(),
164 | gradOutput_size,
165 | gradOutput_stride,
166 | gradInput1.data(),
167 | gradInput1_size,
168 | gradInput1_stride,
169 | norm_deg
170 | );
171 |
172 | }));
173 |
174 | // TODO: Add ATen-equivalent check
175 |
176 | // THCudaCheck(cudaGetLastError());
177 | }
178 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/channelnorm_package/channelnorm_kernel.cuh:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | void channelnorm_kernel_forward(
6 | at::Tensor& input1,
7 | at::Tensor& output,
8 | int norm_deg);
9 |
10 |
11 | void channelnorm_kernel_backward(
12 | at::Tensor& input1,
13 | at::Tensor& output,
14 | at::Tensor& gradOutput,
15 | at::Tensor& gradInput1,
16 | int norm_deg);
17 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/channelnorm_package/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import os
3 | import torch
4 |
5 | from setuptools import setup
6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
7 |
8 | cxx_args = ['-std=c++11']
9 |
10 | nvcc_args = [
11 | '-gencode', 'arch=compute_52,code=sm_52',
12 | '-gencode', 'arch=compute_60,code=sm_60',
13 | '-gencode', 'arch=compute_61,code=sm_61',
14 | '-gencode', 'arch=compute_70,code=sm_70',
15 | '-gencode', 'arch=compute_70,code=compute_70'
16 | ]
17 |
18 | setup(
19 | name='channelnorm_cuda',
20 | ext_modules=[
21 | CUDAExtension('channelnorm_cuda', [
22 | 'channelnorm_cuda.cc',
23 | 'channelnorm_kernel.cu'
24 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
25 | ],
26 | cmdclass={
27 | 'build_ext': BuildExtension
28 | })
29 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/correlation_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/networks/correlation_package/__init__.py
--------------------------------------------------------------------------------
/dvs/flownet2/networks/correlation_package/correlation.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn.modules.module import Module
3 | from torch.autograd import Function
4 | import correlation_cuda
5 |
6 | class CorrelationFunction(Function):
7 |
8 | @staticmethod
9 | def forward(ctx, input1, input2, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1):
10 | ctx.save_for_backward(input1, input2)
11 |
12 | ctx.pad_size = pad_size
13 | ctx.kernel_size = kernel_size
14 | ctx.max_displacement = max_displacement
15 | ctx.stride1 = stride1
16 | ctx.stride2 = stride2
17 | ctx.corr_multiply = corr_multiply
18 |
19 | with torch.cuda.device_of(input1):
20 | rbot1 = input1.new()
21 | rbot2 = input2.new()
22 | output = input1.new()
23 |
24 | correlation_cuda.forward(input1, input2, rbot1, rbot2, output,
25 | ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply)
26 |
27 | return output
28 |
29 | @staticmethod
30 | def backward(ctx, grad_output):
31 | input1, input2 = ctx.saved_tensors
32 |
33 | with torch.cuda.device_of(input1):
34 | rbot1 = input1.new()
35 | rbot2 = input2.new()
36 |
37 | grad_input1 = input1.new()
38 | grad_input2 = input2.new()
39 |
40 | correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
41 | ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply)
42 |
43 | return grad_input1, grad_input2, None, None, None, None, None, None
44 |
45 |
46 | class Correlation(Module):
47 | def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1):
48 | super(Correlation, self).__init__()
49 | self.pad_size = pad_size
50 | self.kernel_size = kernel_size
51 | self.max_displacement = max_displacement
52 | self.stride1 = stride1
53 | self.stride2 = stride2
54 | self.corr_multiply = corr_multiply
55 |
56 | def forward(self, input1, input2):
57 |
58 | result = CorrelationFunction.apply(input1, input2, self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply)
59 |
60 | return result
61 |
62 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/correlation_package/correlation_cuda.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 |
8 | #include "correlation_cuda_kernel.cuh"
9 |
10 | int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output,
11 | int pad_size,
12 | int kernel_size,
13 | int max_displacement,
14 | int stride1,
15 | int stride2,
16 | int corr_type_multiply)
17 | {
18 |
19 | int batchSize = input1.size(0);
20 |
21 | int nInputChannels = input1.size(1);
22 | int inputHeight = input1.size(2);
23 | int inputWidth = input1.size(3);
24 |
25 | int kernel_radius = (kernel_size - 1) / 2;
26 | int border_radius = kernel_radius + max_displacement;
27 |
28 | int paddedInputHeight = inputHeight + 2 * pad_size;
29 | int paddedInputWidth = inputWidth + 2 * pad_size;
30 |
31 | int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1);
32 |
33 | int outputHeight = ceil(static_cast(paddedInputHeight - 2 * border_radius) / static_cast(stride1));
34 | int outputwidth = ceil(static_cast(paddedInputWidth - 2 * border_radius) / static_cast(stride1));
35 |
36 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
37 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
38 | output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth});
39 |
40 | rInput1.fill_(0);
41 | rInput2.fill_(0);
42 | output.fill_(0);
43 |
44 | int success = correlation_forward_cuda_kernel(
45 | output,
46 | output.size(0),
47 | output.size(1),
48 | output.size(2),
49 | output.size(3),
50 | output.stride(0),
51 | output.stride(1),
52 | output.stride(2),
53 | output.stride(3),
54 | input1,
55 | input1.size(1),
56 | input1.size(2),
57 | input1.size(3),
58 | input1.stride(0),
59 | input1.stride(1),
60 | input1.stride(2),
61 | input1.stride(3),
62 | input2,
63 | input2.size(1),
64 | input2.stride(0),
65 | input2.stride(1),
66 | input2.stride(2),
67 | input2.stride(3),
68 | rInput1,
69 | rInput2,
70 | pad_size,
71 | kernel_size,
72 | max_displacement,
73 | stride1,
74 | stride2,
75 | corr_type_multiply,
76 | at::cuda::getCurrentCUDAStream()
77 | //at::globalContext().getCurrentCUDAStream()
78 | );
79 |
80 | //check for errors
81 | if (!success) {
82 | AT_ERROR("CUDA call failed");
83 | }
84 |
85 | return 1;
86 |
87 | }
88 |
89 | int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput,
90 | at::Tensor& gradInput1, at::Tensor& gradInput2,
91 | int pad_size,
92 | int kernel_size,
93 | int max_displacement,
94 | int stride1,
95 | int stride2,
96 | int corr_type_multiply)
97 | {
98 |
99 | int batchSize = input1.size(0);
100 | int nInputChannels = input1.size(1);
101 | int paddedInputHeight = input1.size(2)+ 2 * pad_size;
102 | int paddedInputWidth = input1.size(3)+ 2 * pad_size;
103 |
104 | int height = input1.size(2);
105 | int width = input1.size(3);
106 |
107 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
108 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
109 | gradInput1.resize_({batchSize, nInputChannels, height, width});
110 | gradInput2.resize_({batchSize, nInputChannels, height, width});
111 |
112 | rInput1.fill_(0);
113 | rInput2.fill_(0);
114 | gradInput1.fill_(0);
115 | gradInput2.fill_(0);
116 |
117 | int success = correlation_backward_cuda_kernel(gradOutput,
118 | gradOutput.size(0),
119 | gradOutput.size(1),
120 | gradOutput.size(2),
121 | gradOutput.size(3),
122 | gradOutput.stride(0),
123 | gradOutput.stride(1),
124 | gradOutput.stride(2),
125 | gradOutput.stride(3),
126 | input1,
127 | input1.size(1),
128 | input1.size(2),
129 | input1.size(3),
130 | input1.stride(0),
131 | input1.stride(1),
132 | input1.stride(2),
133 | input1.stride(3),
134 | input2,
135 | input2.stride(0),
136 | input2.stride(1),
137 | input2.stride(2),
138 | input2.stride(3),
139 | gradInput1,
140 | gradInput1.stride(0),
141 | gradInput1.stride(1),
142 | gradInput1.stride(2),
143 | gradInput1.stride(3),
144 | gradInput2,
145 | gradInput2.size(1),
146 | gradInput2.stride(0),
147 | gradInput2.stride(1),
148 | gradInput2.stride(2),
149 | gradInput2.stride(3),
150 | rInput1,
151 | rInput2,
152 | pad_size,
153 | kernel_size,
154 | max_displacement,
155 | stride1,
156 | stride2,
157 | corr_type_multiply,
158 | at::cuda::getCurrentCUDAStream()
159 | //at::globalContext().getCurrentCUDAStream()
160 | );
161 |
162 | if (!success) {
163 | AT_ERROR("CUDA call failed");
164 | }
165 |
166 | return 1;
167 | }
168 |
169 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
170 | m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)");
171 | m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)");
172 | }
173 |
174 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/correlation_package/correlation_cuda_kernel.cuh:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | int correlation_forward_cuda_kernel(at::Tensor& output,
8 | int ob,
9 | int oc,
10 | int oh,
11 | int ow,
12 | int osb,
13 | int osc,
14 | int osh,
15 | int osw,
16 |
17 | at::Tensor& input1,
18 | int ic,
19 | int ih,
20 | int iw,
21 | int isb,
22 | int isc,
23 | int ish,
24 | int isw,
25 |
26 | at::Tensor& input2,
27 | int gc,
28 | int gsb,
29 | int gsc,
30 | int gsh,
31 | int gsw,
32 |
33 | at::Tensor& rInput1,
34 | at::Tensor& rInput2,
35 | int pad_size,
36 | int kernel_size,
37 | int max_displacement,
38 | int stride1,
39 | int stride2,
40 | int corr_type_multiply,
41 | cudaStream_t stream);
42 |
43 |
44 | int correlation_backward_cuda_kernel(
45 | at::Tensor& gradOutput,
46 | int gob,
47 | int goc,
48 | int goh,
49 | int gow,
50 | int gosb,
51 | int gosc,
52 | int gosh,
53 | int gosw,
54 |
55 | at::Tensor& input1,
56 | int ic,
57 | int ih,
58 | int iw,
59 | int isb,
60 | int isc,
61 | int ish,
62 | int isw,
63 |
64 | at::Tensor& input2,
65 | int gsb,
66 | int gsc,
67 | int gsh,
68 | int gsw,
69 |
70 | at::Tensor& gradInput1,
71 | int gisb,
72 | int gisc,
73 | int gish,
74 | int gisw,
75 |
76 | at::Tensor& gradInput2,
77 | int ggc,
78 | int ggsb,
79 | int ggsc,
80 | int ggsh,
81 | int ggsw,
82 |
83 | at::Tensor& rInput1,
84 | at::Tensor& rInput2,
85 | int pad_size,
86 | int kernel_size,
87 | int max_displacement,
88 | int stride1,
89 | int stride2,
90 | int corr_type_multiply,
91 | cudaStream_t stream);
92 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/correlation_package/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import os
3 | import torch
4 |
5 | from setuptools import setup, find_packages
6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
7 |
8 | cxx_args = ['-std=c++11']
9 |
10 | nvcc_args = [
11 | '-gencode', 'arch=compute_50,code=sm_50',
12 | '-gencode', 'arch=compute_52,code=sm_52',
13 | '-gencode', 'arch=compute_60,code=sm_60',
14 | '-gencode', 'arch=compute_61,code=sm_61',
15 | '-gencode', 'arch=compute_70,code=sm_70',
16 | '-gencode', 'arch=compute_70,code=compute_70'
17 | ]
18 |
19 | setup(
20 | name='correlation_cuda',
21 | ext_modules=[
22 | CUDAExtension('correlation_cuda', [
23 | 'correlation_cuda.cc',
24 | 'correlation_cuda_kernel.cu'
25 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
26 | ],
27 | cmdclass={
28 | 'build_ext': BuildExtension
29 | })
30 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/resample2d_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/networks/resample2d_package/__init__.py
--------------------------------------------------------------------------------
/dvs/flownet2/networks/resample2d_package/resample2d.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from torch.autograd import Function, Variable
3 | import resample2d_cuda
4 |
5 | class Resample2dFunction(Function):
6 |
7 | @staticmethod
8 | def forward(ctx, input1, input2, kernel_size=1, bilinear= True):
9 | assert input1.is_contiguous()
10 | assert input2.is_contiguous()
11 |
12 | ctx.save_for_backward(input1, input2)
13 | ctx.kernel_size = kernel_size
14 | ctx.bilinear = bilinear
15 |
16 | _, d, _, _ = input1.size()
17 | b, _, h, w = input2.size()
18 | output = input1.new(b, d, h, w).zero_()
19 |
20 | resample2d_cuda.forward(input1, input2, output, kernel_size, bilinear)
21 |
22 | return output
23 |
24 | @staticmethod
25 | def backward(ctx, grad_output):
26 | grad_output = grad_output.contiguous()
27 | assert grad_output.is_contiguous()
28 |
29 | input1, input2 = ctx.saved_tensors
30 |
31 | grad_input1 = Variable(input1.new(input1.size()).zero_())
32 | grad_input2 = Variable(input1.new(input2.size()).zero_())
33 |
34 | resample2d_cuda.backward(input1, input2, grad_output.data,
35 | grad_input1.data, grad_input2.data,
36 | ctx.kernel_size, ctx.bilinear)
37 |
38 | return grad_input1, grad_input2, None, None
39 |
40 | class Resample2d(Module):
41 |
42 | def __init__(self, kernel_size=1, bilinear = True):
43 | super(Resample2d, self).__init__()
44 | self.kernel_size = kernel_size
45 | self.bilinear = bilinear
46 |
47 | def forward(self, input1, input2):
48 | input1_c = input1.contiguous()
49 | return Resample2dFunction.apply(input1_c, input2, self.kernel_size, self.bilinear)
50 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/resample2d_package/resample2d_cuda.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "resample2d_kernel.cuh"
5 |
6 | int resample2d_cuda_forward(
7 | at::Tensor& input1,
8 | at::Tensor& input2,
9 | at::Tensor& output,
10 | int kernel_size, bool bilinear) {
11 | resample2d_kernel_forward(input1, input2, output, kernel_size, bilinear);
12 | return 1;
13 | }
14 |
15 | int resample2d_cuda_backward(
16 | at::Tensor& input1,
17 | at::Tensor& input2,
18 | at::Tensor& gradOutput,
19 | at::Tensor& gradInput1,
20 | at::Tensor& gradInput2,
21 | int kernel_size, bool bilinear) {
22 | resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size, bilinear);
23 | return 1;
24 | }
25 |
26 |
27 |
28 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
29 | m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)");
30 | m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)");
31 | }
32 |
33 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/resample2d_package/resample2d_kernel.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #define CUDA_NUM_THREADS 512
6 | #define THREADS_PER_BLOCK 64
7 |
8 | #define DIM0(TENSOR) ((TENSOR).x)
9 | #define DIM1(TENSOR) ((TENSOR).y)
10 | #define DIM2(TENSOR) ((TENSOR).z)
11 | #define DIM3(TENSOR) ((TENSOR).w)
12 |
13 | #define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))])
14 |
15 | template
16 | __global__ void kernel_resample2d_update_output(const int n,
17 | const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
18 | const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride,
19 | scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride, int kernel_size, bool bilinear) {
20 | int index = blockIdx.x * blockDim.x + threadIdx.x;
21 |
22 | if (index >= n) {
23 | return;
24 | }
25 |
26 | scalar_t val = 0.0f;
27 |
28 | int dim_b = DIM0(output_size);
29 | int dim_c = DIM1(output_size);
30 | int dim_h = DIM2(output_size);
31 | int dim_w = DIM3(output_size);
32 | int dim_chw = dim_c * dim_h * dim_w;
33 | int dim_hw = dim_h * dim_w;
34 |
35 | int b = ( index / dim_chw ) % dim_b;
36 | int c = ( index / dim_hw ) % dim_c;
37 | int y = ( index / dim_w ) % dim_h;
38 | int x = ( index ) % dim_w;
39 |
40 | scalar_t dx = DIM3_INDEX(input2, b, 0, y, x);
41 | scalar_t dy = DIM3_INDEX(input2, b, 1, y, x);
42 |
43 | scalar_t xf = static_cast(x) + dx;
44 | scalar_t yf = static_cast(y) + dy;
45 | scalar_t alpha = xf - floor(xf); // alpha
46 | scalar_t beta = yf - floor(yf); // beta
47 |
48 | if (bilinear) {
49 | int xL = max(min( int (floor(xf)), dim_w-1), 0);
50 | int xR = max(min( int (floor(xf)+1), dim_w -1), 0);
51 | int yT = max(min( int (floor(yf)), dim_h-1), 0);
52 | int yB = max(min( int (floor(yf)+1), dim_h-1), 0);
53 |
54 | for (int fy = 0; fy < kernel_size; fy += 1) {
55 | for (int fx = 0; fx < kernel_size; fx += 1) {
56 | val += static_cast((1. - alpha)*(1. - beta) * DIM3_INDEX(input1, b, c, yT + fy, xL + fx));
57 | val += static_cast((alpha)*(1. - beta) * DIM3_INDEX(input1, b, c, yT + fy, xR + fx));
58 | val += static_cast((1. - alpha)*(beta) * DIM3_INDEX(input1, b, c, yB + fy, xL + fx));
59 | val += static_cast((alpha)*(beta) * DIM3_INDEX(input1, b, c, yB + fy, xR + fx));
60 | }
61 | }
62 |
63 | output[index] = val;
64 | }
65 | else {
66 | int xN = max(min( int (floor(xf + 0.5)), dim_w - 1), 0);
67 | int yN = max(min( int (floor(yf + 0.5)), dim_h - 1), 0);
68 |
69 | output[index] = static_cast ( DIM3_INDEX(input1, b, c, yN, xN) );
70 | }
71 |
72 | }
73 |
74 |
75 | template
76 | __global__ void kernel_resample2d_backward_input1(
77 | const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
78 | const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride,
79 | const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride,
80 | scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, int kernel_size, bool bilinear) {
81 |
82 | int index = blockIdx.x * blockDim.x + threadIdx.x;
83 |
84 | if (index >= n) {
85 | return;
86 | }
87 |
88 | int dim_b = DIM0(gradOutput_size);
89 | int dim_c = DIM1(gradOutput_size);
90 | int dim_h = DIM2(gradOutput_size);
91 | int dim_w = DIM3(gradOutput_size);
92 | int dim_chw = dim_c * dim_h * dim_w;
93 | int dim_hw = dim_h * dim_w;
94 |
95 | int b = ( index / dim_chw ) % dim_b;
96 | int c = ( index / dim_hw ) % dim_c;
97 | int y = ( index / dim_w ) % dim_h;
98 | int x = ( index ) % dim_w;
99 |
100 | scalar_t dx = DIM3_INDEX(input2, b, 0, y, x);
101 | scalar_t dy = DIM3_INDEX(input2, b, 1, y, x);
102 |
103 | scalar_t xf = static_cast(x) + dx;
104 | scalar_t yf = static_cast(y) + dy;
105 | scalar_t alpha = xf - int(xf); // alpha
106 | scalar_t beta = yf - int(yf); // beta
107 |
108 | int idim_h = DIM2(input1_size);
109 | int idim_w = DIM3(input1_size);
110 |
111 | int xL = max(min( int (floor(xf)), idim_w-1), 0);
112 | int xR = max(min( int (floor(xf)+1), idim_w -1), 0);
113 | int yT = max(min( int (floor(yf)), idim_h-1), 0);
114 | int yB = max(min( int (floor(yf)+1), idim_h-1), 0);
115 |
116 | for (int fy = 0; fy < kernel_size; fy += 1) {
117 | for (int fx = 0; fx < kernel_size; fx += 1) {
118 | atomicAdd(&DIM3_INDEX(gradInput, b, c, (yT + fy), (xL + fx)), (1-alpha)*(1-beta) * DIM3_INDEX(gradOutput, b, c, y, x));
119 | atomicAdd(&DIM3_INDEX(gradInput, b, c, (yT + fy), (xR + fx)), (alpha)*(1-beta) * DIM3_INDEX(gradOutput, b, c, y, x));
120 | atomicAdd(&DIM3_INDEX(gradInput, b, c, (yB + fy), (xL + fx)), (1-alpha)*(beta) * DIM3_INDEX(gradOutput, b, c, y, x));
121 | atomicAdd(&DIM3_INDEX(gradInput, b, c, (yB + fy), (xR + fx)), (alpha)*(beta) * DIM3_INDEX(gradOutput, b, c, y, x));
122 | }
123 | }
124 |
125 | }
126 |
127 | template
128 | __global__ void kernel_resample2d_backward_input2(
129 | const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
130 | const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride,
131 | const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride,
132 | scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, int kernel_size, bool bilinear) {
133 |
134 | int index = blockIdx.x * blockDim.x + threadIdx.x;
135 |
136 | if (index >= n) {
137 | return;
138 | }
139 |
140 | scalar_t output = 0.0;
141 | int kernel_rad = (kernel_size - 1)/2;
142 |
143 | int dim_b = DIM0(gradInput_size);
144 | int dim_c = DIM1(gradInput_size);
145 | int dim_h = DIM2(gradInput_size);
146 | int dim_w = DIM3(gradInput_size);
147 | int dim_chw = dim_c * dim_h * dim_w;
148 | int dim_hw = dim_h * dim_w;
149 |
150 | int b = ( index / dim_chw ) % dim_b;
151 | int c = ( index / dim_hw ) % dim_c;
152 | int y = ( index / dim_w ) % dim_h;
153 | int x = ( index ) % dim_w;
154 |
155 | int odim_c = DIM1(gradOutput_size);
156 |
157 | scalar_t dx = DIM3_INDEX(input2, b, 0, y, x);
158 | scalar_t dy = DIM3_INDEX(input2, b, 1, y, x);
159 |
160 | scalar_t xf = static_cast(x) + dx;
161 | scalar_t yf = static_cast(y) + dy;
162 |
163 | int xL = max(min( int (floor(xf)), dim_w-1), 0);
164 | int xR = max(min( int (floor(xf)+1), dim_w -1), 0);
165 | int yT = max(min( int (floor(yf)), dim_h-1), 0);
166 | int yB = max(min( int (floor(yf)+1), dim_h-1), 0);
167 |
168 | if (c % 2) {
169 | float gamma = 1 - (xf - floor(xf)); // alpha
170 | for (int i = 0; i <= 2*kernel_rad; ++i) {
171 | for (int j = 0; j <= 2*kernel_rad; ++j) {
172 | for (int ch = 0; ch < odim_c; ++ch) {
173 | output += (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xL + i));
174 | output -= (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xL + i));
175 | output += (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xR + i));
176 | output -= (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xR + i));
177 | }
178 | }
179 | }
180 | }
181 | else {
182 | float gamma = 1 - (yf - floor(yf)); // alpha
183 | for (int i = 0; i <= 2*kernel_rad; ++i) {
184 | for (int j = 0; j <= 2*kernel_rad; ++j) {
185 | for (int ch = 0; ch < odim_c; ++ch) {
186 | output += (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xR + i));
187 | output -= (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xL + i));
188 | output += (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xR + i));
189 | output -= (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xL + i));
190 | }
191 | }
192 | }
193 |
194 | }
195 |
196 | gradInput[index] = output;
197 |
198 | }
199 |
200 | void resample2d_kernel_forward(
201 | at::Tensor& input1,
202 | at::Tensor& input2,
203 | at::Tensor& output,
204 | int kernel_size,
205 | bool bilinear) {
206 |
207 | int n = output.numel();
208 |
209 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
210 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
211 |
212 | const long4 input2_size = make_long4(input2.size(0), input2.size(1), input2.size(2), input2.size(3));
213 | const long4 input2_stride = make_long4(input2.stride(0), input2.stride(1), input2.stride(2), input2.stride(3));
214 |
215 | const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
216 | const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
217 |
218 | // TODO: when atomicAdd gets resolved, change to AT_DISPATCH_FLOATING_TYPES_AND_HALF
219 | // AT_DISPATCH_FLOATING_TYPES(input1.type(), "resample_forward_kernel", ([&] {
220 |
221 | kernel_resample2d_update_output<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
222 | //at::globalContext().getCurrentCUDAStream() >>>(
223 | n,
224 | input1.data(),
225 | input1_size,
226 | input1_stride,
227 | input2.data(),
228 | input2_size,
229 | input2_stride,
230 | output.data(),
231 | output_size,
232 | output_stride,
233 | kernel_size,
234 | bilinear);
235 |
236 | // }));
237 |
238 | // TODO: ATen-equivalent check
239 |
240 | // THCudaCheck(cudaGetLastError());
241 |
242 | }
243 |
244 | void resample2d_kernel_backward(
245 | at::Tensor& input1,
246 | at::Tensor& input2,
247 | at::Tensor& gradOutput,
248 | at::Tensor& gradInput1,
249 | at::Tensor& gradInput2,
250 | int kernel_size,
251 | bool bilinear) {
252 |
253 | int n = gradOutput.numel();
254 |
255 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
256 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
257 |
258 | const long4 input2_size = make_long4(input2.size(0), input2.size(1), input2.size(2), input2.size(3));
259 | const long4 input2_stride = make_long4(input2.stride(0), input2.stride(1), input2.stride(2), input2.stride(3));
260 |
261 | const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3));
262 | const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3));
263 |
264 | const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3));
265 | const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3));
266 |
267 | // AT_DISPATCH_FLOATING_TYPES(input1.type(), "resample_backward_input1", ([&] {
268 |
269 | kernel_resample2d_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
270 | //at::globalContext().getCurrentCUDAStream() >>>(
271 | n,
272 | input1.data(),
273 | input1_size,
274 | input1_stride,
275 | input2.data(),
276 | input2_size,
277 | input2_stride,
278 | gradOutput.data(),
279 | gradOutput_size,
280 | gradOutput_stride,
281 | gradInput1.data(),
282 | gradInput1_size,
283 | gradInput1_stride,
284 | kernel_size,
285 | bilinear
286 | );
287 |
288 | // }));
289 |
290 | const long4 gradInput2_size = make_long4(gradInput2.size(0), gradInput2.size(1), gradInput2.size(2), gradInput2.size(3));
291 | const long4 gradInput2_stride = make_long4(gradInput2.stride(0), gradInput2.stride(1), gradInput2.stride(2), gradInput2.stride(3));
292 |
293 | n = gradInput2.numel();
294 |
295 | // AT_DISPATCH_FLOATING_TYPES(gradInput2.type(), "resample_backward_input2", ([&] {
296 |
297 |
298 | kernel_resample2d_backward_input2<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
299 | //at::globalContext().getCurrentCUDAStream() >>>(
300 | n,
301 | input1.data(),
302 | input1_size,
303 | input1_stride,
304 | input2.data(),
305 | input2_size,
306 | input2_stride,
307 | gradOutput.data(),
308 | gradOutput_size,
309 | gradOutput_stride,
310 | gradInput2.data(),
311 | gradInput2_size,
312 | gradInput2_stride,
313 | kernel_size,
314 | bilinear
315 | );
316 |
317 | // }));
318 |
319 | // TODO: Use the ATen equivalent to get last error
320 |
321 | // THCudaCheck(cudaGetLastError());
322 |
323 | }
324 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/resample2d_package/resample2d_kernel.cuh:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | void resample2d_kernel_forward(
6 | at::Tensor& input1,
7 | at::Tensor& input2,
8 | at::Tensor& output,
9 | int kernel_size,
10 | bool bilinear);
11 |
12 | void resample2d_kernel_backward(
13 | at::Tensor& input1,
14 | at::Tensor& input2,
15 | at::Tensor& gradOutput,
16 | at::Tensor& gradInput1,
17 | at::Tensor& gradInput2,
18 | int kernel_size,
19 | bool bilinear);
--------------------------------------------------------------------------------
/dvs/flownet2/networks/resample2d_package/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import os
3 | import torch
4 |
5 | from setuptools import setup
6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
7 |
8 | cxx_args = ['-std=c++11']
9 |
10 | nvcc_args = [
11 | '-gencode', 'arch=compute_50,code=sm_50',
12 | '-gencode', 'arch=compute_52,code=sm_52',
13 | '-gencode', 'arch=compute_60,code=sm_60',
14 | '-gencode', 'arch=compute_61,code=sm_61',
15 | '-gencode', 'arch=compute_70,code=sm_70',
16 | '-gencode', 'arch=compute_70,code=compute_70'
17 | ]
18 |
19 | setup(
20 | name='resample2d_cuda',
21 | ext_modules=[
22 | CUDAExtension('resample2d_cuda', [
23 | 'resample2d_cuda.cc',
24 | 'resample2d_kernel.cu'
25 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
26 | ],
27 | cmdclass={
28 | 'build_ext': BuildExtension
29 | })
30 |
--------------------------------------------------------------------------------
/dvs/flownet2/networks/submodules.py:
--------------------------------------------------------------------------------
1 | # freda (todo) :
2 |
3 | import torch.nn as nn
4 | import torch
5 | import numpy as np
6 |
7 | def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1):
8 | if batchNorm:
9 | return nn.Sequential(
10 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False),
11 | nn.BatchNorm2d(out_planes),
12 | nn.LeakyReLU(0.1,inplace=True)
13 | )
14 | else:
15 | return nn.Sequential(
16 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
17 | nn.LeakyReLU(0.1,inplace=True)
18 | )
19 |
20 | def i_conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, bias = True):
21 | if batchNorm:
22 | return nn.Sequential(
23 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias),
24 | nn.BatchNorm2d(out_planes),
25 | )
26 | else:
27 | return nn.Sequential(
28 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias),
29 | )
30 |
31 | def predict_flow(in_planes):
32 | return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)
33 |
34 | def deconv(in_planes, out_planes):
35 | return nn.Sequential(
36 | nn.ConvTranspose2d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True),
37 | nn.LeakyReLU(0.1,inplace=True)
38 | )
39 |
40 | class tofp16(nn.Module):
41 | def __init__(self):
42 | super(tofp16, self).__init__()
43 |
44 | def forward(self, input):
45 | return input.half()
46 |
47 |
48 | class tofp32(nn.Module):
49 | def __init__(self):
50 | super(tofp32, self).__init__()
51 |
52 | def forward(self, input):
53 | return input.float()
54 |
55 |
56 | def init_deconv_bilinear(weight):
57 | f_shape = weight.size()
58 | heigh, width = f_shape[-2], f_shape[-1]
59 | f = np.ceil(width/2.0)
60 | c = (2 * f - 1 - f % 2) / (2.0 * f)
61 | bilinear = np.zeros([heigh, width])
62 | for x in range(width):
63 | for y in range(heigh):
64 | value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
65 | bilinear[x, y] = value
66 | weight.data.fill_(0.)
67 | for i in range(f_shape[0]):
68 | for j in range(f_shape[1]):
69 | weight.data[i,j,:,:] = torch.from_numpy(bilinear)
70 |
71 |
72 | def save_grad(grads, name):
73 | def hook(grad):
74 | grads[name] = grad
75 | return hook
76 |
77 | '''
78 | def save_grad(grads, name):
79 | def hook(grad):
80 | grads[name] = grad
81 | return hook
82 | import torch
83 | from channelnorm_package.modules.channelnorm import ChannelNorm
84 | model = ChannelNorm().cuda()
85 | grads = {}
86 | a = 100*torch.autograd.Variable(torch.randn((1,3,5,5)).cuda(), requires_grad=True)
87 | a.register_hook(save_grad(grads, 'a'))
88 | b = model(a)
89 | y = torch.mean(b)
90 | y.backward()
91 |
92 | '''
93 |
--------------------------------------------------------------------------------
/dvs/flownet2/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \
3 | --inference_dataset_root ./../video \
4 | --resume ./FlowNet2_checkpoint.pth.tar \
5 | --inference_visualize
6 |
--------------------------------------------------------------------------------
/dvs/flownet2/run_release.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \
3 | --inference_dataset_root ./../dataset_release/test \
4 | --resume ./FlowNet2_checkpoint.pth.tar \
5 | --inference_visualize
6 |
7 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \
8 | --inference_dataset_root ./../dataset_release/training \
9 | --resume ./FlowNet2_checkpoint.pth.tar \
10 | --inference_visualize
--------------------------------------------------------------------------------
/dvs/flownet2/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/utils/__init__.py
--------------------------------------------------------------------------------
/dvs/flownet2/utils/flow_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import os.path
4 |
5 | TAG_CHAR = np.array([202021.25], np.float32)
6 |
7 | def readFlow(fn):
8 | """ Read .flo file in Middlebury format"""
9 | # Code adapted from:
10 | # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
11 |
12 | # WARNING: this will work on little-endian architectures (eg Intel x86) only!
13 | # print 'fn = %s'%(fn)
14 | with open(fn, 'rb') as f:
15 | magic = np.fromfile(f, np.float32, count=1)
16 | if 202021.25 != magic:
17 | print('Magic number incorrect. Invalid .flo file')
18 | return None
19 | else:
20 | w = np.fromfile(f, np.int32, count=1)
21 | h = np.fromfile(f, np.int32, count=1)
22 | # print 'Reading %d x %d flo file\n' % (w, h)
23 | data = np.fromfile(f, np.float32, count=2*int(w)*int(h))
24 | # Reshape data into 3D array (columns, rows, bands)
25 | # The reshape here is for visualization, the original code is (w,h,2)
26 | return np.resize(data, (int(h), int(w), 2))
27 |
28 | def writeFlow(filename,uv,v=None):
29 | """ Write optical flow to file.
30 |
31 | If v is None, uv is assumed to contain both u and v channels,
32 | stacked in depth.
33 | Original code by Deqing Sun, adapted from Daniel Scharstein.
34 | """
35 | nBands = 2
36 |
37 | if v is None:
38 | assert(uv.ndim == 3)
39 | assert(uv.shape[2] == 2)
40 | u = uv[:,:,0]
41 | v = uv[:,:,1]
42 | else:
43 | u = uv
44 |
45 | assert(u.shape == v.shape)
46 | height,width = u.shape
47 | f = open(filename,'wb')
48 | # write the header
49 | f.write(TAG_CHAR)
50 | np.array(width).astype(np.int32).tofile(f)
51 | np.array(height).astype(np.int32).tofile(f)
52 | # arrange into matrix form
53 | tmp = np.zeros((height, width*nBands))
54 | tmp[:,np.arange(width)*2] = u
55 | tmp[:,np.arange(width)*2 + 1] = v
56 | tmp.astype(np.float32).tofile(f)
57 | f.close()
58 |
59 |
60 | # ref: https://github.com/sampepose/flownet2-tf/
61 | # blob/18f87081db44939414fc4a48834f9e0da3e69f4c/src/flowlib.py#L240
62 | def visulize_flow_file(flow_filename, save_dir=None):
63 | flow_data = readFlow(flow_filename)
64 | img = flow2img(flow_data)
65 | # plt.imshow(img)
66 | # plt.show()
67 | if save_dir:
68 | idx = flow_filename.rfind("/") + 1
69 | plt.imsave(os.path.join(save_dir, "%s-vis.png" % flow_filename[idx:-4]), img)
70 |
71 |
72 | def flow2img(flow_data):
73 | """
74 | convert optical flow into color image
75 | :param flow_data:
76 | :return: color image
77 | """
78 | # print(flow_data.shape)
79 | # print(type(flow_data))
80 | u = flow_data[:, :, 0]
81 | v = flow_data[:, :, 1]
82 |
83 | UNKNOW_FLOW_THRESHOLD = 1e7
84 | pr1 = abs(u) > UNKNOW_FLOW_THRESHOLD
85 | pr2 = abs(v) > UNKNOW_FLOW_THRESHOLD
86 | idx_unknown = (pr1 | pr2)
87 | u[idx_unknown] = v[idx_unknown] = 0
88 |
89 | # get max value in each direction
90 | maxu = -999.
91 | maxv = -999.
92 | minu = 999.
93 | minv = 999.
94 | maxu = max(maxu, np.max(u))
95 | maxv = max(maxv, np.max(v))
96 | minu = min(minu, np.min(u))
97 | minv = min(minv, np.min(v))
98 |
99 | rad = np.sqrt(u ** 2 + v ** 2)
100 | maxrad = max(-1, np.max(rad))
101 | u = u / maxrad + np.finfo(float).eps
102 | v = v / maxrad + np.finfo(float).eps
103 |
104 | img = compute_color(u, v)
105 |
106 | idx = np.repeat(idx_unknown[:, :, np.newaxis], 3, axis=2)
107 | img[idx] = 0
108 |
109 | return np.uint8(img)
110 |
111 |
112 | def compute_color(u, v):
113 | """
114 | compute optical flow color map
115 | :param u: horizontal optical flow
116 | :param v: vertical optical flow
117 | :return:
118 | """
119 |
120 | height, width = u.shape
121 | img = np.zeros((height, width, 3))
122 |
123 | NAN_idx = np.isnan(u) | np.isnan(v)
124 | u[NAN_idx] = v[NAN_idx] = 0
125 |
126 | colorwheel = make_color_wheel()
127 | ncols = np.size(colorwheel, 0)
128 |
129 | rad = np.sqrt(u ** 2 + v ** 2)
130 |
131 | a = np.arctan2(-v, -u) / np.pi
132 |
133 | fk = (a + 1) / 2 * (ncols - 1) + 1
134 |
135 | k0 = np.floor(fk).astype(int)
136 |
137 | k1 = k0 + 1
138 | k1[k1 == ncols + 1] = 1
139 | f = fk - k0
140 |
141 | for i in range(0, np.size(colorwheel, 1)):
142 | tmp = colorwheel[:, i]
143 | col0 = tmp[k0 - 1] / 255
144 | col1 = tmp[k1 - 1] / 255
145 | col = (1 - f) * col0 + f * col1
146 |
147 | idx = rad <= 1
148 | col[idx] = 1 - rad[idx] * (1 - col[idx])
149 | notidx = np.logical_not(idx)
150 |
151 | col[notidx] *= 0.75
152 | img[:, :, i] = np.uint8(np.floor(255 * col * (1 - NAN_idx)))
153 |
154 | return img
155 |
156 |
157 | def make_color_wheel():
158 | """
159 | Generate color wheel according Middlebury color code
160 | :return: Color wheel
161 | """
162 | RY = 15
163 | YG = 6
164 | GC = 4
165 | CB = 11
166 | BM = 13
167 | MR = 6
168 |
169 | ncols = RY + YG + GC + CB + BM + MR
170 |
171 | colorwheel = np.zeros([ncols, 3])
172 |
173 | col = 0
174 |
175 | # RY
176 | colorwheel[0:RY, 0] = 255
177 | colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY))
178 | col += RY
179 |
180 | # YG
181 | colorwheel[col:col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG))
182 | colorwheel[col:col + YG, 1] = 255
183 | col += YG
184 |
185 | # GC
186 | colorwheel[col:col + GC, 1] = 255
187 | colorwheel[col:col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC))
188 | col += GC
189 |
190 | # CB
191 | colorwheel[col:col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB))
192 | colorwheel[col:col + CB, 2] = 255
193 | col += CB
194 |
195 | # BM
196 | colorwheel[col:col + BM, 2] = 255
197 | colorwheel[col:col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM))
198 | col += + BM
199 |
200 | # MR
201 | colorwheel[col:col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
202 | colorwheel[col:col + MR, 0] = 255
203 |
204 | return colorwheel
205 |
--------------------------------------------------------------------------------
/dvs/flownet2/utils/frame_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from os.path import *
3 | from imageio import imread
4 | from . import flow_utils
5 |
6 | def read_gen(file_name):
7 | ext = splitext(file_name)[-1]
8 | if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
9 | im = imread(file_name)
10 | if im.shape[2] > 3:
11 | return im[:,:,:3]
12 | else:
13 | return im
14 | elif ext == '.bin' or ext == '.raw':
15 | return np.load(file_name)
16 | elif ext == '.flo':
17 | return flow_utils.readFlow(file_name).astype(np.float32)
18 | return []
19 |
--------------------------------------------------------------------------------
/dvs/flownet2/utils/param_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 |
5 | def parse_flownetc(modules, weights, biases):
6 | keys = [
7 | 'conv1',
8 | 'conv2',
9 | 'conv3',
10 | 'conv_redir',
11 | 'conv3_1',
12 | 'conv4',
13 | 'conv4_1',
14 | 'conv5',
15 | 'conv5_1',
16 | 'conv6',
17 | 'conv6_1',
18 |
19 | 'deconv5',
20 | 'deconv4',
21 | 'deconv3',
22 | 'deconv2',
23 |
24 | 'Convolution1',
25 | 'Convolution2',
26 | 'Convolution3',
27 | 'Convolution4',
28 | 'Convolution5',
29 |
30 | 'upsample_flow6to5',
31 | 'upsample_flow5to4',
32 | 'upsample_flow4to3',
33 | 'upsample_flow3to2',
34 |
35 | ]
36 | i = 0
37 | for m in modules:
38 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
39 | weight = weights[keys[i]].copy()
40 | bias = biases[keys[i]].copy()
41 | if keys[i] == 'conv1':
42 | m.weight.data[:,:,:,:] = torch.from_numpy(np.flip(weight, axis=1).copy())
43 | m.bias.data[:] = torch.from_numpy(bias)
44 | else:
45 | m.weight.data[:,:,:,:] = torch.from_numpy(weight)
46 | m.bias.data[:] = torch.from_numpy(bias)
47 |
48 | i = i + 1
49 | return
50 |
51 | def parse_flownets(modules, weights, biases, param_prefix='net2_'):
52 | keys = [
53 | 'conv1',
54 | 'conv2',
55 | 'conv3',
56 | 'conv3_1',
57 | 'conv4',
58 | 'conv4_1',
59 | 'conv5',
60 | 'conv5_1',
61 | 'conv6',
62 | 'conv6_1',
63 |
64 | 'deconv5',
65 | 'deconv4',
66 | 'deconv3',
67 | 'deconv2',
68 |
69 | 'predict_conv6',
70 | 'predict_conv5',
71 | 'predict_conv4',
72 | 'predict_conv3',
73 | 'predict_conv2',
74 |
75 | 'upsample_flow6to5',
76 | 'upsample_flow5to4',
77 | 'upsample_flow4to3',
78 | 'upsample_flow3to2',
79 | ]
80 | for i, k in enumerate(keys):
81 | if 'upsample' in k:
82 | keys[i] = param_prefix + param_prefix + k
83 | else:
84 | keys[i] = param_prefix + k
85 | i = 0
86 | for m in modules:
87 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
88 | weight = weights[keys[i]].copy()
89 | bias = biases[keys[i]].copy()
90 | if keys[i] == param_prefix+'conv1':
91 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
92 | m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
93 | m.weight.data[:,6:9,:,:] = torch.from_numpy(np.flip(weight[:,6:9,:,:], axis=1).copy())
94 | m.weight.data[:,9::,:,:] = torch.from_numpy(weight[:,9:,:,:].copy())
95 | if m.bias is not None:
96 | m.bias.data[:] = torch.from_numpy(bias)
97 | else:
98 | m.weight.data[:,:,:,:] = torch.from_numpy(weight)
99 | if m.bias is not None:
100 | m.bias.data[:] = torch.from_numpy(bias)
101 | i = i + 1
102 | return
103 |
104 | def parse_flownetsonly(modules, weights, biases, param_prefix=''):
105 | keys = [
106 | 'conv1',
107 | 'conv2',
108 | 'conv3',
109 | 'conv3_1',
110 | 'conv4',
111 | 'conv4_1',
112 | 'conv5',
113 | 'conv5_1',
114 | 'conv6',
115 | 'conv6_1',
116 |
117 | 'deconv5',
118 | 'deconv4',
119 | 'deconv3',
120 | 'deconv2',
121 |
122 | 'Convolution1',
123 | 'Convolution2',
124 | 'Convolution3',
125 | 'Convolution4',
126 | 'Convolution5',
127 |
128 | 'upsample_flow6to5',
129 | 'upsample_flow5to4',
130 | 'upsample_flow4to3',
131 | 'upsample_flow3to2',
132 | ]
133 | for i, k in enumerate(keys):
134 | if 'upsample' in k:
135 | keys[i] = param_prefix + param_prefix + k
136 | else:
137 | keys[i] = param_prefix + k
138 | i = 0
139 | for m in modules:
140 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
141 | weight = weights[keys[i]].copy()
142 | bias = biases[keys[i]].copy()
143 | if keys[i] == param_prefix+'conv1':
144 | # print ("%s :"%(keys[i]), m.weight.size(), m.bias.size(), tf_w[keys[i]].shape[::-1])
145 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
146 | m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
147 | if m.bias is not None:
148 | m.bias.data[:] = torch.from_numpy(bias)
149 | else:
150 | m.weight.data[:,:,:,:] = torch.from_numpy(weight)
151 | if m.bias is not None:
152 | m.bias.data[:] = torch.from_numpy(bias)
153 | i = i + 1
154 | return
155 |
156 | def parse_flownetsd(modules, weights, biases, param_prefix='netsd_'):
157 | keys = [
158 | 'conv0',
159 | 'conv1',
160 | 'conv1_1',
161 | 'conv2',
162 | 'conv2_1',
163 | 'conv3',
164 | 'conv3_1',
165 | 'conv4',
166 | 'conv4_1',
167 | 'conv5',
168 | 'conv5_1',
169 | 'conv6',
170 | 'conv6_1',
171 |
172 | 'deconv5',
173 | 'deconv4',
174 | 'deconv3',
175 | 'deconv2',
176 |
177 | 'interconv5',
178 | 'interconv4',
179 | 'interconv3',
180 | 'interconv2',
181 |
182 | 'Convolution1',
183 | 'Convolution2',
184 | 'Convolution3',
185 | 'Convolution4',
186 | 'Convolution5',
187 |
188 | 'upsample_flow6to5',
189 | 'upsample_flow5to4',
190 | 'upsample_flow4to3',
191 | 'upsample_flow3to2',
192 | ]
193 | for i, k in enumerate(keys):
194 | keys[i] = param_prefix + k
195 |
196 | i = 0
197 | for m in modules:
198 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
199 | weight = weights[keys[i]].copy()
200 | bias = biases[keys[i]].copy()
201 | if keys[i] == param_prefix+'conv0':
202 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
203 | m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
204 | if m.bias is not None:
205 | m.bias.data[:] = torch.from_numpy(bias)
206 | else:
207 | m.weight.data[:,:,:,:] = torch.from_numpy(weight)
208 | if m.bias is not None:
209 | m.bias.data[:] = torch.from_numpy(bias)
210 | i = i + 1
211 |
212 | return
213 |
214 | def parse_flownetfusion(modules, weights, biases, param_prefix='fuse_'):
215 | keys = [
216 | 'conv0',
217 | 'conv1',
218 | 'conv1_1',
219 | 'conv2',
220 | 'conv2_1',
221 |
222 | 'deconv1',
223 | 'deconv0',
224 |
225 | 'interconv1',
226 | 'interconv0',
227 |
228 | '_Convolution5',
229 | '_Convolution6',
230 | '_Convolution7',
231 |
232 | 'upsample_flow2to1',
233 | 'upsample_flow1to0',
234 | ]
235 | for i, k in enumerate(keys):
236 | keys[i] = param_prefix + k
237 |
238 | i = 0
239 | for m in modules:
240 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
241 | weight = weights[keys[i]].copy()
242 | bias = biases[keys[i]].copy()
243 | if keys[i] == param_prefix+'conv0':
244 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
245 | m.weight.data[:,3::,:,:] = torch.from_numpy(weight[:,3:,:,:].copy())
246 | if m.bias is not None:
247 | m.bias.data[:] = torch.from_numpy(bias)
248 | else:
249 | m.weight.data[:,:,:,:] = torch.from_numpy(weight)
250 | if m.bias is not None:
251 | m.bias.data[:] = torch.from_numpy(bias)
252 | i = i + 1
253 |
254 | return
255 |
--------------------------------------------------------------------------------
/dvs/flownet2/utils/tools.py:
--------------------------------------------------------------------------------
1 | # freda (todo) :
2 |
3 | import os, time, sys, math
4 | import subprocess, shutil
5 | from os.path import *
6 | import numpy as np
7 | from inspect import isclass
8 | from pytz import timezone
9 | from datetime import datetime
10 | import inspect
11 | import torch
12 |
13 | def datestr():
14 | pacific = timezone('US/Pacific')
15 | now = datetime.now(pacific)
16 | return '{}{:02}{:02}_{:02}{:02}'.format(now.year, now.month, now.day, now.hour, now.minute)
17 |
18 | def module_to_dict(module, exclude=[]):
19 | return dict([(x, getattr(module, x)) for x in dir(module)
20 | if isclass(getattr(module, x))
21 | and x not in exclude
22 | and getattr(module, x) not in exclude])
23 |
24 | class TimerBlock:
25 | def __init__(self, title):
26 | print(("{}".format(title)))
27 |
28 | def __enter__(self):
29 | self.start = time.clock()
30 | return self
31 |
32 | def __exit__(self, exc_type, exc_value, traceback):
33 | self.end = time.clock()
34 | self.interval = self.end - self.start
35 |
36 | if exc_type is not None:
37 | self.log("Operation failed\n")
38 | else:
39 | self.log("Operation finished\n")
40 |
41 |
42 | def log(self, string):
43 | duration = time.clock() - self.start
44 | units = 's'
45 | if duration > 60:
46 | duration = duration / 60.
47 | units = 'm'
48 | print((" [{:.3f}{}] {}".format(duration, units, string)))
49 |
50 | def log2file(self, fid, string):
51 | fid = open(fid, 'a')
52 | fid.write("%s\n"%(string))
53 | fid.close()
54 |
55 | def add_arguments_for_module(parser, module, argument_for_class, default, skip_params=[], parameter_defaults={}):
56 | argument_group = parser.add_argument_group(argument_for_class.capitalize())
57 |
58 | module_dict = module_to_dict(module)
59 | argument_group.add_argument('--' + argument_for_class, type=str, default=default, choices=list(module_dict.keys()))
60 |
61 | args, unknown_args = parser.parse_known_args()
62 | class_obj = module_dict[vars(args)[argument_for_class]]
63 |
64 | argspec = inspect.getargspec(class_obj.__init__)
65 |
66 | defaults = argspec.defaults[::-1] if argspec.defaults else None
67 |
68 | args = argspec.args[::-1]
69 | for i, arg in enumerate(args):
70 | cmd_arg = '{}_{}'.format(argument_for_class, arg)
71 | if arg not in skip_params + ['self', 'args']:
72 | if arg in list(parameter_defaults.keys()):
73 | argument_group.add_argument('--{}'.format(cmd_arg), type=type(parameter_defaults[arg]), default=parameter_defaults[arg])
74 | elif (defaults is not None and i < len(defaults)):
75 | argument_group.add_argument('--{}'.format(cmd_arg), type=type(defaults[i]), default=defaults[i])
76 | else:
77 | print(("[Warning]: non-default argument '{}' detected on class '{}'. This argument cannot be modified via the command line"
78 | .format(arg, module.__class__.__name__)))
79 | # We don't have a good way of dealing with inferring the type of the argument
80 | # TODO: try creating a custom action and using ast's infer type?
81 | # else:
82 | # argument_group.add_argument('--{}'.format(cmd_arg), required=True)
83 |
84 | def kwargs_from_args(args, argument_for_class):
85 | argument_for_class = argument_for_class + '_'
86 | return {key[len(argument_for_class):]: value for key, value in list(vars(args).items()) if argument_for_class in key and key != argument_for_class + 'class'}
87 |
88 | def format_dictionary_of_losses(labels, values):
89 | try:
90 | string = ', '.join([('{}: {:' + ('.3f' if value >= 0.001 else '.1e') +'}').format(name, value) for name, value in zip(labels, values)])
91 | except (TypeError, ValueError) as e:
92 | print((list(zip(labels, values))))
93 | string = '[Log Error] ' + str(e)
94 |
95 | return string
96 |
97 |
98 | class IteratorTimer():
99 | def __init__(self, iterable):
100 | self.iterable = iterable
101 | self.iterator = self.iterable.__iter__()
102 |
103 | def __iter__(self):
104 | return self
105 |
106 | def __len__(self):
107 | return len(self.iterable)
108 |
109 | def __next__(self):
110 | start = time.time()
111 | n = next(self.iterator)
112 | self.last_duration = (time.time() - start)
113 | return n
114 |
115 | next = __next__
116 |
117 | def gpumemusage():
118 | gpu_mem = subprocess.check_output("nvidia-smi | grep MiB | cut -f 3 -d '|'", shell=True).replace(' ', '').replace('\n', '').replace('i', '')
119 | all_stat = [float(a) for a in gpu_mem.replace('/','').split('MB')[:-1]]
120 |
121 | gpu_mem = ''
122 | for i in range(len(all_stat)/2):
123 | curr, tot = all_stat[2*i], all_stat[2*i+1]
124 | util = "%1.2f"%(100*curr/tot)+'%'
125 | cmem = str(int(math.ceil(curr/1024.)))+'GB'
126 | gmem = str(int(math.ceil(tot/1024.)))+'GB'
127 | gpu_mem += util + '--' + join(cmem, gmem) + ' '
128 | return gpu_mem
129 |
130 |
131 | def update_hyperparameter_schedule(args, epoch, global_iteration, optimizer):
132 | if args.schedule_lr_frequency > 0:
133 | for param_group in optimizer.param_groups:
134 | if (global_iteration + 1) % args.schedule_lr_frequency == 0:
135 | param_group['lr'] /= float(args.schedule_lr_fraction)
136 | param_group['lr'] = float(np.maximum(param_group['lr'], 0.000001))
137 |
138 | def save_checkpoint(state, is_best, path, prefix, filename='checkpoint.pth.tar'):
139 | prefix_save = os.path.join(path, prefix)
140 | name = prefix_save + '_' + filename
141 | torch.save(state, name)
142 | if is_best:
143 | shutil.copyfile(name, prefix_save + '_model_best.pth.tar')
144 |
145 |
--------------------------------------------------------------------------------
/dvs/gyro/__init__.py:
--------------------------------------------------------------------------------
1 | from .gyro_function import (
2 | GetGyroAtTimeStamp,
3 | QuaternionProduct,
4 | QuaternionReciprocal,
5 | ConvertQuaternionToAxisAngle,
6 | FindOISAtTimeStamp,
7 | GetMetadata,
8 | GetProjections,
9 | GetVirtualProjection,
10 | GetForwardGrid,
11 | CenterZoom,
12 | GetWarpingFlow,
13 | torch_norm_quat,
14 | torch_QuaternionProduct,
15 | torch_QuaternionReciprocal,
16 | torch_GetVirtualProjection,
17 | get_static,
18 | torch_GetForwardGrid,
19 | torch_GetWarpingFlow,
20 | train_GetGyroAtTimeStamp,
21 | train_ConvertQuaternionToAxisAngle,
22 | ConvertAxisAngleToQuaternion,
23 | torch_ConvertAxisAngleToQuaternion,
24 | torch_ConvertQuaternionToAxisAngle,
25 | ConvertAxisAngleToQuaternion_no_angle,
26 | ConvertQuaternionToAxisAngle_no_angle,
27 | torch_GetHomographyTransformFromProjections,
28 | torch_ApplyTransform,
29 | norm_quat,
30 | SlerpWithDefault
31 | )
32 | from .gyro_io import (
33 | LoadGyroData,
34 | LoadOISData,
35 | LoadFrameData,
36 | LoadStabResult,
37 | get_grid,
38 | get_rotations,
39 | visual_rotation
40 | )
--------------------------------------------------------------------------------
/dvs/gyro/gyro_io.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numpy import linalg as LA
3 | import matplotlib.pyplot as plt
4 | import scipy.io as sio
5 | from .gyro_function import (
6 | ProcessGyroData, QuaternionProduct, QuaternionReciprocal,
7 | ConvertQuaternionToAxisAngle, FindOISAtTimeStamp, GetMetadata,
8 | GetProjections, GetVirtualProjection, GetForwardGrid,
9 | CenterZoom, GetGyroAtTimeStamp, get_static, ConvertAxisAngleToQuaternion,
10 | ConvertAxisAngleToQuaternion_no_angle, ConvertQuaternionToAxisAngle_no_angle
11 | )
12 |
13 | def load_gyro_mesh(input_name):
14 | data = LoadStabResult(input_name)
15 | w, h = data["vertex_grid_size"][0]
16 | data["warping grid"] = np.reshape(data["warping grid"],(-1,int(w),int(h),4))
17 | return data
18 |
19 | def get_grid(static_options, frame_data, quats_data, ois_data, virtual_data, no_shutter = False):
20 | grid = []
21 | result_poses = {}
22 | result_poses['virtual pose'] = virtual_data
23 | for i in range(len(virtual_data)):
24 | metadata = GetMetadata(frame_data, i)
25 | real_projections = GetProjections(static_options, metadata, quats_data, ois_data, no_shutter = no_shutter)
26 | virtual_projection = GetVirtualProjection(static_options, result_poses, metadata, i)
27 | grid.append(GetForwardGrid(static_options, real_projections, virtual_projection))
28 | grid = np.array(grid)
29 | zoom_ratio = 1 / (1 - 2 * static_options["cropping_ratio"])
30 | curr_grid = CenterZoom(grid, zoom_ratio)
31 | curr_grid = np.transpose(curr_grid,(0,3,2,1))
32 | return curr_grid
33 |
34 | def get_rotations(frame_data, quats_data, ois_data, num_frames):
35 | quats = np.zeros((num_frames, 4))
36 | for i in range(num_frames):
37 | quats[i,:] = GetGyroAtTimeStamp(quats_data, frame_data[i,0])
38 |
39 | rotations = np.zeros((num_frames,3))
40 | lens_offsets = np.zeros((num_frames, 2))
41 | for i in range(num_frames):
42 | if i != 0:
43 | quat_dif = QuaternionProduct(quats[i,:], QuaternionReciprocal(quats[i-1,:]))
44 | axis_dif_cur = ConvertQuaternionToAxisAngle_no_angle(quat_dif)
45 | rotations[i,:] = axis_dif_cur
46 | lens_offsets[i,:] = FindOISAtTimeStamp(ois_data, frame_data[i, 4])
47 |
48 | return rotations, lens_offsets
49 |
50 | def visual_rotation(rotations_real, lens_offsets_real, rotations_virtual, lens_offsets_virtual, rotations_virtual2, lens_offsets_virtual2, path):
51 | # figure('units','normalized','outerposition',[0 0 1 1])
52 | plt.clf()
53 | plt.figure(figsize=(8,16))
54 |
55 | plt.subplot(5,1,1)
56 | plt.plot(rotations_real[:,0], "g")
57 | if rotations_virtual is not None:
58 | plt.plot(rotations_virtual[:,0], "b")
59 | if rotations_virtual2 is not None:
60 | plt.plot(rotations_virtual2[:,0], "r")
61 | plt.ylim(-0.02, 0.02)
62 | plt.xlabel('frame id')
63 | plt.ylabel('gyro x')
64 |
65 | plt.subplot(5,1,2)
66 | plt.plot(rotations_real[:,1], "g")
67 | if rotations_virtual is not None:
68 | plt.plot(rotations_virtual[:,1], "b")
69 | if rotations_virtual2 is not None:
70 | plt.plot(rotations_virtual2[:,1], "r")
71 | plt.ylim(-0.02, 0.02)
72 | plt.xlabel('frame id')
73 | plt.ylabel('gyro y')
74 |
75 | plt.subplot(5,1,3)
76 | plt.plot(rotations_real[:,2], "g")
77 | if rotations_virtual is not None:
78 | plt.plot(rotations_virtual[:,2], "b")
79 | if rotations_virtual2 is not None:
80 | plt.plot(rotations_virtual2[:,2], "r")
81 | plt.ylim(-0.02, 0.02)
82 | plt.xlabel('frame id')
83 | plt.ylabel('gyro z')
84 |
85 | plt.subplot(5,1,4)
86 | plt.plot(lens_offsets_real[:,0], "g")
87 | if lens_offsets_virtual is not None:
88 | plt.plot(lens_offsets_virtual[:,0], "b")
89 | if rotations_virtual2 is not None:
90 | plt.plot(lens_offsets_virtual2[:,0], "r")
91 | plt.xlabel('frame id')
92 | plt.ylabel('ois x')
93 |
94 | plt.subplot(5,1,5)
95 | plt.plot(lens_offsets_real[:,1], "g")
96 | if lens_offsets_virtual is not None:
97 | plt.plot(lens_offsets_virtual[:,1], "b")
98 | if rotations_virtual2 is not None:
99 | plt.plot(lens_offsets_virtual2[:,1], "r")
100 | plt.xlabel('frame id')
101 | plt.ylabel('ois y')
102 |
103 | plt.savefig(path[:-4]+".jpg")
104 | return
105 |
106 | def LoadOISData(ois_name):
107 | ois_log = np.loadtxt(ois_name)
108 | ois_log = ois_log[:, -3:]
109 | return ois_log
110 |
111 | def LoadFrameData(frame_log_name):
112 | frame_data = np.loadtxt(frame_log_name)
113 | frame_data[:, [0,4]] = frame_data[:, [0,4]] - np.expand_dims(frame_data[:,1]/2, axis = 1)
114 | return frame_data
115 |
116 |
117 | def LoadGyroData(gyro_log_name):
118 | raw_gyro_data = np.loadtxt(gyro_log_name)
119 | raw_gyro_data[:,0] = raw_gyro_data[:,0] * 1000
120 | raw_gyro_data = raw_gyro_data[:,[0, 2, 1, 3]]
121 |
122 | [_, quats_data] = ProcessGyroData(raw_gyro_data)
123 | quats_data = np.concatenate((raw_gyro_data[:, 0, None], quats_data), axis = 1)
124 | return quats_data
125 |
126 | def LoadStabResult(input_name):
127 | fid = open(input_name)
128 | data = {}
129 | while True:
130 | name, val = ReadLine(fid)
131 | if name == None:
132 | break
133 | if name in data:
134 | data[name] = np.concatenate((data[name], val), axis=0)
135 | else:
136 | data[name] = val
137 | fid.close()
138 | print("Mesh length: ", len(list(data.values())[0]))
139 | return data
140 |
141 |
142 | def ReadLine(fid):
143 | name = ''
144 | val = 0
145 | tline = fid.readline()
146 | if len(tline) == 0:
147 | return None, None
148 | if tline[-1] == "\n":
149 | tline = tline[:-1]
150 | ind = tline.find(':')
151 | name = tline[:ind]
152 | tmp_val= str2num(tline[ind+1:])
153 | if len(tmp_val) > 0:
154 | val = tmp_val
155 | else:
156 | tline = fid.readline()
157 | if tline[-1] == "\n":
158 | tline = tline[:-1]
159 | val = str2num(tline)
160 | return name, np.expand_dims(np.array(val), axis=0)
161 |
162 | def str2num(string):
163 | nums = string.split(" ")
164 | nums = [float(_) for _ in nums if _ != ""]
165 | return nums
166 |
167 |
--------------------------------------------------------------------------------
/dvs/inference.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import torch
4 | import torchvision
5 | import torch.nn as nn
6 | from torch.autograd import Variable
7 |
8 | import time
9 | import yaml
10 | import argparse
11 | import numpy as np
12 | from printer import Printer
13 | from dataset import get_data_loader, get_inference_data_loader
14 | from model import Model
15 | import datetime
16 | import copy
17 | from util import make_dir, get_optimizer, norm_flow
18 | from gyro import (
19 | get_grid,
20 | get_rotations,
21 | visual_rotation,
22 | torch_QuaternionProduct,
23 | torch_norm_quat
24 | )
25 | from warp import warp_video
26 |
27 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
28 |
29 | def run(model, loader, cf, USE_CUDA=True):
30 | no_flo = False
31 | number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"]
32 | model.net.eval()
33 | model.unet.eval()
34 | activation = nn.Softshrink(0.0006) # 0.0036
35 | for i, data in enumerate(loader, 0):
36 | # get the inputs; data is a list of [inputs, labels]
37 | real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data
38 | print("Fininsh Load data")
39 |
40 | real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4]
41 | real_projections = real_projections.type(torch.float)
42 | flo = flo.type(torch.float)
43 | flo_back = flo_back.type(torch.float)
44 | ois = ois.type(torch.float)
45 |
46 | batch_size, step, dim = real_inputs.size()
47 | times = times.numpy()
48 | real_queue_idx = real_queue_idx.numpy()
49 | virtual_queue = [None] * batch_size
50 |
51 | run_loss = 0
52 | model.net.init_hidden(batch_size)
53 | count = 0
54 | for j in range(step):
55 | if (j+1) % 100 == 0:
56 | print("Step: "+str(j+1)+"/"+str(step))
57 | virtual_inputs, vt_1 = loader.dataset.get_virtual_data(
58 | virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j])
59 | real_inputs_step = real_inputs[:,j,:]
60 | inputs = torch.cat((real_inputs_step,virtual_inputs), dim = 1)
61 |
62 | # inputs = Variable(real_inputs_step)
63 | if USE_CUDA:
64 | real_inputs_step = real_inputs_step.cuda()
65 | virtual_inputs = virtual_inputs.cuda()
66 | inputs = inputs.cuda()
67 | if no_flo is False:
68 | flo_step = flo[:,j].cuda()
69 | flo_back_step = flo_back[:,j].cuda()
70 | else:
71 | flo_step = None
72 | flo_back_step = None
73 | vt_1 = vt_1.cuda()
74 | real_projections_t = real_projections[:,j+1].cuda()
75 | real_projections_t_1 = real_projections[:,j].cuda()
76 | real_postion_anchor = real_postion[:,j].cuda()
77 | ois_step = ois[:,j].cuda()
78 |
79 | if no_flo is False:
80 | b, h, w, _ = flo_step.size()
81 | flo_step = norm_flow(flo_step, h, w)
82 | flo_back_step = norm_flow(flo_back_step, h, w)
83 |
84 | with torch.no_grad():
85 | if no_flo is False:
86 | flo_out = model.unet(flo_step, flo_back_step)
87 | else:
88 | flo_out = None
89 | if j < 1:
90 | for i in range(2):
91 | out = model.net(inputs, flo_out, ois_step)
92 | else:
93 | out = model.net(inputs, flo_out, ois_step)
94 |
95 | real_position = real_inputs_step[:,40:44]
96 | virtual_position = virtual_inputs[:, -4:]
97 |
98 | out[:, :3] = activation(out[:, :3])
99 | out = torch_norm_quat(out)
100 |
101 | pos = torch_QuaternionProduct(virtual_position, real_postion_anchor)
102 | loss_step = model.loss(out, vt_1, virtual_inputs, real_inputs_step, \
103 | flo_step, flo_back_step, real_projections_t, real_projections_t_1, real_postion_anchor, \
104 | follow = True, optical = True, undefine = True)
105 | run_loss += loss_step
106 |
107 | out = torch_QuaternionProduct(out, pos)
108 |
109 | if USE_CUDA:
110 | out = out.cpu().detach().numpy()
111 |
112 | virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1])
113 |
114 | run_loss /= step
115 | print( "\nLoss: follow, angle, smooth, c2_smooth, undefine, optical")
116 | print(run_loss.cpu().numpy()[:-1], "\n")
117 | return np.squeeze(virtual_queue, axis=0)
118 |
119 |
120 | def inference(cf, data_path, USE_CUDA):
121 | checkpoints_dir = cf['data']['checkpoints_dir']
122 | checkpoints_dir = make_dir(checkpoints_dir, cf)
123 | files = os.listdir(data_path)
124 | for f in files:
125 | if f[-3:] == "mp4" and "no_ois" not in f and "no_shutter" not in f and "gimbal" not in f.lower() and "grid" not in f.lower() and "flo" not in f.lower():
126 | video_name = f[:-4]
127 |
128 | # Define the model
129 | model = Model(cf)
130 | load_model = cf["model"]["load_model"]
131 |
132 | print("------Load Pretrined Model--------")
133 | if load_model is not None:
134 | checkpoint = torch.load(load_model)
135 | print(load_model)
136 | else:
137 | load_last = os.path.join(checkpoints_dir, cf['data']['exp']+'_last.checkpoint')
138 | checkpoint = torch.load(load_last)
139 | print(load_last)
140 | model.net.load_state_dict(checkpoint['state_dict'])
141 | model.unet.load_state_dict(checkpoint['unet'])
142 |
143 | if USE_CUDA:
144 | model.net.cuda()
145 | model.unet.cuda()
146 |
147 | print("-----------Load Dataset----------")
148 | test_loader = get_inference_data_loader(cf, data_path, no_flo = False)
149 | data = test_loader.dataset.data[0]
150 |
151 | start_time = time.time()
152 | virtual_queue= run(model, test_loader, cf, USE_CUDA=USE_CUDA)
153 |
154 | virtual_data = np.zeros((1,5))
155 | virtual_data[:,1:] = virtual_queue[0, 1:]
156 | virtual_data[:,0] = data.frame[0,0]
157 | virtual_queue = np.concatenate((virtual_data, virtual_queue), axis = 0)
158 |
159 | print(virtual_queue.shape)
160 | time_used = (time.time() - start_time) / 60
161 |
162 | print("Time_used: %.4f minutes" % (time_used))
163 |
164 |
165 | virtual_path = os.path.join("./test", cf['data']['exp'], data_path.split("/")[-1]+'.txt')
166 | np.savetxt(virtual_path, virtual_queue, delimiter=' ')
167 |
168 | print("------Start Warping Video--------")
169 | grid = get_grid(test_loader.dataset.static_options, \
170 | data.frame[:data.length], data.gyro, data.ois, virtual_queue[:data.length,1:], no_shutter = False)
171 | return data, virtual_queue, video_name, grid
172 |
173 | def visual_result(cf, data, video_name, virtual_queue, virtual_queue2 = None, compare_exp = None):
174 | print("------Start Visual Result--------")
175 | rotations_virtual, lens_offsets_virtual = get_rotations(data.frame[:data.length], virtual_queue, np.zeros(data.ois.shape), data.length)
176 | rotations_real, lens_offsets_real = get_rotations(data.frame[:data.length], data.gyro, data.ois, data.length)
177 | if virtual_queue2 is not None:
178 | rotations_virtual2, lens_offsets_virtual2 = get_rotations(data.frame[:data.length], virtual_queue2, np.zeros(data.ois.shape), data.length)
179 | path = os.path.join("./test", cf['data']['exp'], video_name+'_'+compare_exp+'.jpg')
180 | else:
181 | rotations_virtual2, lens_offsets_virtual2 = None, None
182 | path = os.path.join("./test", cf['data']['exp'], video_name+'.jpg')
183 |
184 | visual_rotation(rotations_real, lens_offsets_real, rotations_virtual, lens_offsets_virtual, rotations_virtual2, lens_offsets_virtual2, path)
185 |
186 |
187 | def main(args = None):
188 | config_file = args.config
189 | dir_path = args.dir_path
190 | cf = yaml.load(open(config_file, 'r'))
191 |
192 | USE_CUDA = cf['data']["use_cuda"]
193 |
194 | log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'_test.log'), 'w+')
195 | printer = Printer(sys.stdout, log_file).open()
196 |
197 | data_name = sorted(os.listdir(dir_path))
198 | for i in range(len(data_name)):
199 | print("Running Inference: " + str(i+1) + "/" + str(len(data_name)))
200 | save_path = os.path.join("./test", cf['data']['exp'], data_name[i]+'_stab.mp4')
201 |
202 | data_path = os.path.join(dir_path, data_name[i])
203 | data, virtual_queue, video_name, grid= inference(cf, data_path, USE_CUDA)
204 |
205 | virtual_queue2 = None
206 | visual_result(cf, data, data_name[i], virtual_queue, virtual_queue2 = virtual_queue2, compare_exp = None)
207 |
208 | video_path = os.path.join(data_path, video_name+".mp4")
209 | warp_video(grid, video_path, save_path, frame_number = False)
210 | return
211 |
212 | if __name__ == '__main__':
213 | parser = argparse.ArgumentParser("Training model")
214 | parser.add_argument("--config", default="./conf/stabilzation.yaml", help="Config file.")
215 | parser.add_argument("--dir_path", default="./video")
216 | args = parser.parse_args()
217 | main(args = args)
--------------------------------------------------------------------------------
/dvs/load_frame_sensor_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
3 | import sys
4 | import torch
5 | import torchvision
6 | import torch.nn as nn
7 | from torch.autograd import Variable
8 |
9 | import time
10 | import yaml
11 | import argparse
12 | import numpy as np
13 | from printer import Printer
14 | from dataset import get_data_loader, get_inference_data_loader
15 | from model import Model
16 | import datetime
17 | import copy
18 | from util import make_dir, get_optimizer, norm_flow
19 | from gyro import (
20 | get_grid,
21 | get_rotations,
22 | visual_rotation,
23 | GetGyroAtTimeStamp,
24 | torch_ConvertQuaternionToAxisAngle,
25 | torch_ConvertAxisAngleToQuaternion,
26 | torch_QuaternionProduct,
27 | get_static
28 | )
29 | from warp import warp_video
30 |
31 | def run(loader, cf, USE_CUDA=True):
32 | number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"]
33 | for i, data in enumerate(loader, 0):
34 | # get the inputs; data is a list of [inputs, labels]
35 | real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data
36 | print("Fininsh Load data")
37 |
38 | real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4]
39 | real_projections = real_projections.type(torch.float)
40 |
41 | batch_size, step, dim = real_inputs.size()
42 | times = times.numpy()
43 | real_queue_idx = real_queue_idx.numpy()
44 | virtual_queue = [None] * batch_size
45 |
46 | for j in range(step):
47 | virtual_inputs, vt_1 = loader.dataset.get_virtual_data(
48 | virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j])
49 | real_inputs_step = real_inputs[:,j,:]
50 | if USE_CUDA:
51 | real_inputs_step = real_inputs_step.cuda()
52 | virtual_inputs = virtual_inputs.cuda()
53 | real_postion_anchor = real_postion[:,j].cuda()
54 |
55 | out = real_inputs_step[:,40:44]
56 |
57 | virtual_position = virtual_inputs[:, -4:]
58 | pos = torch_QuaternionProduct(virtual_position, real_postion_anchor)
59 |
60 | out = torch_QuaternionProduct(out, pos)
61 |
62 | if USE_CUDA:
63 | out = out.cpu().detach().numpy()
64 |
65 | virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1])
66 | return np.squeeze(virtual_queue, axis=0)
67 |
68 | def inference(cf, data_path, USE_CUDA):
69 | print("-----------Load Dataset----------")
70 | test_loader = get_inference_data_loader(cf, data_path)
71 | data = test_loader.dataset.data[0]
72 | test_loader.dataset.no_flo = True
73 | test_loader.dataset.static_options = get_static(ratio = 0)
74 |
75 | start_time = time.time()
76 | virtual_queue = run(test_loader, cf, USE_CUDA=USE_CUDA)
77 |
78 | virtual_data = np.zeros((1,5))
79 | virtual_data[:,1:] = virtual_queue[0, 1:]
80 | virtual_data[:,0] = data.frame[0,0]
81 | virtual_queue = np.concatenate((virtual_data, virtual_queue), axis = 0)
82 |
83 | files = os.listdir(data_path)
84 | for f in files:
85 | if f[-3:] == "mp4" and "no_ois" not in f and "gimbal" not in f.lower():
86 | video_name = f[:-4]
87 | print(video_name)
88 | virtual_path = os.path.join("./test", cf['data']['exp'], video_name+'.txt')
89 |
90 | print("------Start Visual Result--------")
91 | rotations_real, lens_offsets_real = get_rotations(data.frame[:data.length], data.gyro, data.ois, data.length)
92 | fig_path = os.path.join(data_path, video_name+"_real.jpg")
93 | visual_rotation(rotations_real, lens_offsets_real, None, None, None, None, fig_path)
94 |
95 | return
96 |
97 | def main(args = None):
98 | config_file = args.config
99 | dir_path = args.dir_path
100 | cf = yaml.load(open(config_file, 'r'))
101 |
102 | USE_CUDA = cf['data']["use_cuda"]
103 |
104 | checkpoints_dir = cf['data']['checkpoints_dir']
105 | checkpoints_dir = make_dir(checkpoints_dir, cf)
106 |
107 | data_name = sorted(os.listdir(dir_path))
108 | for i in range(len(data_name)):
109 | print("Running: " + str(i+1) + "/" + str(len(data_name)))
110 | inference(cf, os.path.join(dir_path, data_name[i]), USE_CUDA)
111 | return
112 |
113 | if __name__ == '__main__':
114 | parser = argparse.ArgumentParser("Training model")
115 | parser.add_argument("--config", default="./conf/stabilzation.yaml", help="Config file.")
116 | parser.add_argument("--dir_path", default="./video")
117 | args = parser.parse_args()
118 | main(args = args)
--------------------------------------------------------------------------------
/dvs/loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from torch.autograd import Variable
4 | import operator
5 | import torch.nn.functional as F
6 | import matplotlib.pyplot as plt
7 | from gyro import (
8 | torch_QuaternionProduct,
9 | torch_QuaternionReciprocal,
10 | get_static,
11 | torch_GetVirtualProjection,
12 | torch_GetForwardGrid,
13 | torch_GetWarpingFlow,
14 | torch_ConvertAxisAngleToQuaternion,
15 | torch_ConvertQuaternionToAxisAngle,
16 | torch_norm_quat,
17 | torch_GetHomographyTransformFromProjections,
18 | torch_ApplyTransform
19 | )
20 |
21 | class C2_Smooth_loss(torch.nn.Module):
22 | def __init__(self):
23 | super(C2_Smooth_loss, self).__init__()
24 | self.MSE = torch.nn.MSELoss()
25 |
26 | def forward(self, Qt, Qt_1, Qt_2):
27 | detaQt_1 = torch_QuaternionProduct(Qt_1, torch_QuaternionReciprocal(Qt_2))
28 | return self.MSE(Qt, detaQt_1)
29 |
30 | class C1_Smooth_loss(torch.nn.Module):
31 | def __init__(self):
32 | super(C1_Smooth_loss, self).__init__()
33 | self.MSE = torch.nn.MSELoss()
34 |
35 | def forward(self, v_r_axis, v_axis_t_1 = None, real_postion = None):
36 | quat_zero = torch.zeros(v_r_axis.shape).cuda()
37 | quat_zero[:,3] = 1
38 | return self.MSE(v_r_axis, quat_zero)
39 |
40 | class Follow_loss(torch.nn.Module):
41 | def __init__(self):
42 | super(Follow_loss, self).__init__()
43 | self.MSE = torch.nn.MSELoss()
44 |
45 | def forward(self, virtual_quat, real_quat, real_postion = None):
46 | if real_postion is not None:
47 | real_quat = torch_QuaternionProduct(real_quat, real_postion)
48 | return self.MSE(virtual_quat, real_quat)
49 |
50 | class Stay_loss(torch.nn.Module):
51 | def __init__(self):
52 | super(Stay_loss, self).__init__()
53 | self.zero = torch.tensor([0.0,0.0,0.0,1.0]).cuda()
54 |
55 | def forward(self, virtual_quat):
56 | return torch.mean(torch.abs(virtual_quat - self.zero))
57 |
58 |
59 | class Angle_loss(torch.nn.Module):
60 | def __init__(self):
61 | super(Angle_loss, self).__init__()
62 | self.MSE = torch.nn.MSELoss()
63 |
64 | def forward(self, Q1, Q2, threshold = 0.5236, logistic_beta1 = 100):
65 | batch_size = Q1.shape[0]
66 | Q3 = torch_norm_quat(torch_QuaternionProduct(Q2, torch_QuaternionReciprocal(Q1)))
67 | theta = torch.zeros(batch_size).cuda()
68 | index = (Q3[:,3] < 1).nonzero()
69 | theta[index] = torch.acos(Q3[index,3]) * 2
70 | loss = torch.mean(theta * (1 / (1 + torch.exp(-logistic_beta1 * (theta - threshold)))))
71 | return loss, theta
72 |
73 | class Optical_loss(torch.nn.Module):
74 | def __init__(self):
75 | super(Optical_loss, self).__init__()
76 | self.static_options = get_static()
77 | self.mesh = get_mesh()
78 |
79 | def forward(self, Vt, Vt_1, flo, flo_back, real_projection_t, real_projection_t_1):
80 | virtual_projection_t = torch_GetVirtualProjection(self.static_options, Vt)
81 | virtual_projection_t_1 = torch_GetVirtualProjection(self.static_options, Vt_1)
82 |
83 | b, h, w = flo.size()[:3]
84 |
85 | grid_t = torch_GetForwardGrid(self.static_options, real_projection_t, virtual_projection_t)[:,:2,:,:].permute(0,1,3,2)
86 | grid_t = torch.nn.functional.upsample_bilinear(grid_t, size = (h, w)) # [B,C(xy),H,W]
87 |
88 | grid_t_1 = torch_GetForwardGrid(self.static_options, real_projection_t_1, virtual_projection_t_1)[:,:2,:,:].permute(0,1,3,2)
89 | grid_t_1 = torch.nn.functional.upsample_bilinear(grid_t_1, size = (h, w)) # [B,C(xy),H,W]
90 |
91 | mesh = self.mesh.repeat(b, 1, 1, 1)
92 | flo = flo + mesh
93 | flo_back = flo_back + mesh # [B,H,W,C]
94 |
95 | valid = (flo[:,:,:,0] > 0) * (flo[:,:,:,1] > 0) * (flo[:,:,:,0] < 1) * (flo[:,:,:,1] < 1)
96 | valid_f = torch.unsqueeze(valid, dim = 3).type(torch.cuda.FloatTensor)
97 | valid = torch.unsqueeze(valid, dim = 1).type(torch.cuda.FloatTensor)
98 |
99 | valid_back = (flo_back[:,:,:,0] > 0) * (flo_back[:,:,:,1] > 0) * (flo_back[:,:,:,0] < 1) * (flo_back[:,:,:,1] < 1)
100 | valid_back_f = torch.unsqueeze(valid_back, dim = 3).type(torch.cuda.FloatTensor)
101 | valid_back = torch.unsqueeze(valid_back, dim = 1).type(torch.cuda.FloatTensor) # [B,C,H,W]
102 |
103 | flo = (flo * 2 - 1) * valid_f
104 | flo_back = (flo_back * 2 - 1) * valid_back_f
105 |
106 | forward_t = torch.nn.functional.grid_sample(grid_t, flo, padding_mode="reflection") # default bilinear
107 | backward_t_1 = torch.nn.functional.grid_sample(grid_t_1, flo_back, padding_mode="reflection") # default bilinear
108 |
109 | forward_diff = ((forward_t - grid_t_1) * valid) ** 2
110 | backward_diff = ((backward_t_1 - grid_t) * valid_back) ** 2
111 |
112 | forward_loss = torch.sum(forward_diff, dim = (1,2,3)) / torch.sum(valid, dim = (1,2,3))
113 | backward_loss = torch.sum(backward_diff, dim = (1,2,3)) / torch.sum(valid_back, dim = (1,2,3))
114 |
115 | loss = forward_loss + backward_loss
116 | loss = torch.min(loss, loss - loss + 1) #[0]
117 | loss = torch.sum(loss) / b
118 |
119 | return loss
120 |
121 |
122 | def get_mesh(height = 270, width = 480, USE_CUDA = True):
123 | xs = np.linspace(0, 1, width, endpoint = False) + 0.5 / height
124 | ys = np.linspace(0, 1, height, endpoint = False) + 0.5 / width
125 | xmesh, ymesh = np.meshgrid(xs, ys)
126 | # Reshape the sampling positions to a H x W x 2 tensor
127 | mesh = torch.Tensor(np.expand_dims(np.moveaxis(np.array(list(zip(xmesh, ymesh))), 1, 2),axis=0))
128 | if USE_CUDA:
129 | mesh = mesh.cuda()
130 | return mesh
131 |
132 | class Undefine_loss(torch.nn.Module):
133 | def __init__(self, ratio = 0.08, inner_ratio = 0.04, USE_CUDA = True):
134 | super(Undefine_loss, self).__init__()
135 | self.static_options = get_static()
136 | self.inner_ratio = inner_ratio
137 | width = self.static_options["width"]
138 | height = self.static_options["height"]
139 | x0, x1, y0, y1 = \
140 | int(width*ratio), int(width*(1-ratio)), int(height*ratio), int(height*(1-ratio))
141 | self.norm = torch.Tensor([width, height, 1])
142 | self.p00 = torch.Tensor([x0, y0, 1])
143 | self.p01 = torch.Tensor([x0, y1, 1])
144 | self.p10 = torch.Tensor([x1, y0, 1])
145 | self.p11 = torch.Tensor([x1, y1, 1])
146 | if USE_CUDA == True:
147 | self.p00 = self.p00.cuda()
148 | self.p01 = self.p01.cuda()
149 | self.p10 = self.p10.cuda()
150 | self.p11 = self.p11.cuda()
151 | self.norm = self.norm.cuda()
152 |
153 | def forward(self, Vt, Rt, ratio = 0.04):
154 | batch_size = Vt.size()[0]
155 |
156 | row_mid = self.static_options["num_grid_rows"] // 2
157 | virtual_projection_t = torch_GetVirtualProjection(self.static_options, Vt)
158 |
159 | real_projection_t = torch_GetVirtualProjection(self.static_options, Rt)
160 |
161 | # virtual projection and real projection
162 | transform = torch_GetHomographyTransformFromProjections(real_projection_t, virtual_projection_t)
163 |
164 | p00 = (torch_ApplyTransform(transform, self.p00) / self.norm)[:,:2]
165 | p01 = (torch_ApplyTransform(transform, self.p01) / self.norm)[:,:2]
166 | p10 = (torch_ApplyTransform(transform, self.p10) / self.norm)[:,:2]
167 | p11 = (torch_ApplyTransform(transform, self.p11) / self.norm)[:,:2]
168 |
169 | loss = torch.stack((self.get_loss(p00), self.get_loss(p01), self.get_loss(p10), self.get_loss(p11)),dim = 1)
170 | loss,_ = torch.max(loss, dim = 1)
171 |
172 | loss = torch.min(loss, loss - loss + 1) #[0]
173 | loss = torch.sum(loss) / batch_size
174 |
175 | return loss
176 |
177 | def get_loss(self, p):
178 | d = (p - self.inner_ratio) * (p < self.inner_ratio).type(torch.cuda.FloatTensor) + \
179 | (1 - self.inner_ratio - p) * (p > (1 - self.inner_ratio)).type(torch.cuda.FloatTensor)
180 | return torch.sum(d**2, dim = 1)
181 |
--------------------------------------------------------------------------------
/dvs/metrics.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import numpy as np
4 | import cv2
5 | import math
6 | import pdb
7 | import matplotlib.pyplot as plt
8 | from printer import Printer
9 | from warp import video2frame_one_seq
10 | import datetime
11 | import torch
12 | import copy
13 | import csv
14 | import copyreg
15 | import shutil
16 | import matplotlib.pyplot as plt
17 | from util import crop_video
18 |
19 | def _pickle_keypoints(point):
20 | return cv2.KeyPoint, (*point.pt, point.size, point.angle,
21 | point.response, point.octave, point.class_id)
22 |
23 | copyreg.pickle(cv2.KeyPoint().__class__, _pickle_keypoints)
24 |
25 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
26 |
27 | h_size = 480
28 | w_size = 640
29 |
30 | def crop_metric(M):
31 | points = np.array([[0,0,1],[0,h_size,1], [w_size,0,1], [w_size,h_size,1]]).T
32 | result = np.matmul(M,points).T
33 | result = result[:,:2]/result[:,2:]
34 | w_out = 1 - max(result[0,0], result[1,0], w_size - result[2,0], w_size - result[3,0], 0)/w_size
35 | h_out = 1 - max(result[0,1], result[2,1], h_size - result[1,1], h_size - result[3,1], 0)/h_size
36 | return w_out, h_out
37 |
38 | # https://stackoverflow.com/questions/34389125/how-to-get-the-scale-factor-of-getperspectivetransform-in-opencv
39 | def get_scale(M):
40 | h1 = M[0, 0]
41 | h2 = M[0, 1]
42 | h3 = M[0, 2]
43 | h4 = M[1, 0]
44 | h5 = M[1, 1]
45 | h6 = M[1, 2]
46 | h7 = M[2, 0]
47 | h8 = M[2, 1]
48 | QR = np.array([[h1-(h7*h3), h2-(h8*h3)], [h4-(h7*h6), h5-(h8*h6)]])
49 | Q, R = np.linalg.qr(QR)
50 | return abs(R[0,0]), abs(R[1,1])
51 |
52 | # https://stackoverflow.com/questions/21019338/how-to-change-the-homography-with-the-scale-of-the-image
53 | def get_rescale_matrix(M, sx, sy):
54 | S = np.eye(3, dtype = float)
55 | S[0,0] = sx
56 | S[1,1] = sy
57 |
58 | S1 = np.eye(3, dtype = float)
59 | S1[0,0] = 1/sx
60 | S1[1,1] = 1/sy
61 | return np.matmul(M, S1)
62 |
63 | # Part of code reference from https://github.com/jinsc37/DIFRINT/blob/master/metrics.py
64 | def metrics(in_src, out_src, package, crop_scale = False, re_compute = False):
65 | load_dic = None
66 | if re_compute and os.path.exists(package):
67 | print("Start load")
68 | load_dic = torch.load(package)
69 | print("Finish load")
70 | dic = {
71 | 'M': None,
72 | 'CR_seq': [],
73 | 'DV_seq': [],
74 | 'SS_t': None,
75 | 'SS_r': None,
76 | 'w_crop':[],
77 | 'h_crop':[],
78 | 'distortion': [],
79 | 'count': 0,
80 | 'in_sift': {},
81 | 'out_sift': {},
82 | 'fft_t': {},
83 | 'fft_r': {}
84 | }
85 |
86 | if load_dic is not None:
87 | dic["in_sift"] = load_dic["in_sift"]
88 | dic["out_sift"] = load_dic["out_sift"]
89 |
90 | frameList_in = sorted(os.listdir(in_src))
91 | frameList = sorted(os.listdir(out_src))
92 | frameList = frameList[:min(len(frameList_in),len(frameList))]
93 |
94 | # Create brute-force matcher object
95 | bf = cv2.BFMatcher()
96 |
97 | # Apply the homography transformation if we have enough good matches
98 | MIN_MATCH_COUNT = 10 #10
99 |
100 | ratio = 0.7 #0.7
101 | thresh = 5.0 #5.0
102 |
103 | Pt = np.asarray([[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]])
104 | P_seq = []
105 | count = 1
106 | for index, f in enumerate(frameList, 0):
107 | if f.endswith('.png'):
108 | # Load the images in gray scale
109 | img1 = cv2.imread(os.path.join(in_src, f), 0)
110 | img1 = cv2.resize(img1, (w_size,h_size), interpolation = cv2.INTER_LINEAR)
111 |
112 | img1o = cv2.imread(os.path.join(out_src, f), 0)
113 | img1o = cv2.resize(img1o, (w_size,h_size), interpolation = cv2.INTER_LINEAR)
114 | sift = cv2.SIFT_create()
115 |
116 | if f in dic["in_sift"]:
117 | keyPoints1, descriptors1 = dic["in_sift"][f]
118 | else:
119 | # Detect the SIFT key points and compute the descriptors for the two images
120 | keyPoints1, descriptors1 = sift.detectAndCompute(img1, None)
121 | dic["in_sift"][f] = (keyPoints1, descriptors1)
122 |
123 | if f in dic["out_sift"]:
124 | keyPoints1o, descriptors1o = dic["out_sift"][f]
125 | else:
126 | keyPoints1o, descriptors1o = sift.detectAndCompute(img1o, None)
127 | dic["out_sift"][f] = (keyPoints1o, descriptors1o)
128 |
129 | # Match the descriptors
130 | matches = bf.knnMatch(descriptors1, descriptors1o, k=2)
131 |
132 | # Select the good matches using the ratio test
133 | goodMatches = []
134 |
135 | for m, n in matches:
136 | if m.distance < ratio * n.distance:
137 | goodMatches.append(m)
138 |
139 | if len(goodMatches) > MIN_MATCH_COUNT:
140 | # Get the good key points positions
141 | sourcePoints = np.float32([ keyPoints1[m.queryIdx].pt for m in goodMatches ]).reshape(-1, 1, 2)
142 | destinationPoints = np.float32([ keyPoints1o[m.trainIdx].pt for m in goodMatches ]).reshape(-1, 1, 2)
143 |
144 | M, mask = cv2.findHomography(sourcePoints, destinationPoints, method=cv2.RANSAC, ransacReprojThreshold=thresh)
145 | im_dst = cv2.warpPerspective(img1, M, (w_size,h_size))
146 |
147 | cm = []
148 | for i in range(6):
149 | for j in range(6):
150 | hs = int(h_size * (0.2 + 0.1 * i))
151 | he = int(h_size * (0.3 + 0.1 * i))
152 | ws = int(w_size * (0.2 + 0.1 * j))
153 | we = int(w_size * (0.3 + 0.1 * j))
154 | cm.append(np.corrcoef(img1o[hs:he, ws:we].flat, im_dst[hs:he, ws:we].flat))
155 | dic["distortion"].append(cm)
156 |
157 | if crop_scale:
158 | sx, sy = get_scale(M)
159 | M_scale = get_rescale_matrix(M, sx, sy)
160 | w_crop, h_crop = crop_metric(M_scale)
161 | else:
162 | w_crop, h_crop = crop_metric(M)
163 | dic["w_crop"].append(w_crop)
164 | dic["h_crop"].append(h_crop)
165 |
166 | # Obtain Scale, Translation, Rotation, Distortion value
167 | sx = M[0, 0]
168 | sy = M[1, 1]
169 | scaleRecovered = math.sqrt(np.abs(sx*sy))
170 |
171 | w, _ = np.linalg.eig(M[0:2,0:2])
172 | w = np.sort(w)[::-1]
173 | DV = w[1]/w[0]
174 | #pdb.set_trace()
175 |
176 | dic["CR_seq"].append(1.0/scaleRecovered)
177 | dic["DV_seq"].append(DV)
178 |
179 | # For Stability score calculation
180 | if count < len(frameList):
181 | f_path = f[:-9] + '%05d.png' % (int(f[-9:-4])+1)
182 | if f_path in dic["out_sift"]:
183 | keyPoints2o, descriptors2o = dic["out_sift"][f_path]
184 | else:
185 | img2o = cv2.imread(os.path.join(out_src, f_path), 0)
186 | img2o = cv2.resize(img2o, (w_size,h_size), interpolation = cv2.INTER_LINEAR)
187 | keyPoints2o, descriptors2o = sift.detectAndCompute(img2o, None)
188 | dic["out_sift"][f_path] = (keyPoints2o, descriptors2o)
189 |
190 | matches = bf.knnMatch(descriptors1o, descriptors2o, k=2)
191 | goodMatches = []
192 |
193 | for m, n in matches:
194 | if m.distance < ratio * n.distance:
195 | goodMatches.append(m)
196 |
197 | if len(goodMatches) > MIN_MATCH_COUNT:
198 | # Get the good key points positions
199 | sourcePoints = np.float32([ keyPoints1o[m.queryIdx].pt for m in goodMatches ]).reshape(-1, 1, 2)
200 | destinationPoints = np.float32([ keyPoints2o[m.trainIdx].pt for m in goodMatches ]).reshape(-1, 1, 2)
201 |
202 | # Obtain the homography matrix
203 | M, mask = cv2.findHomography(sourcePoints, destinationPoints, method=cv2.RANSAC, ransacReprojThreshold=thresh)
204 |
205 | P_seq.append(np.matmul(Pt, M))
206 | Pt = np.matmul(Pt, M)
207 | if count % 10 ==0:
208 | sys.stdout.write('\rFrame: ' + str(count) + '/' + str(len(frameList)))
209 | sys.stdout.flush()
210 | dic["count"] = count
211 | count += 1
212 |
213 | # Make 1D temporal signals
214 | P_seq_t = np.asarray([1])
215 | P_seq_r = np.asarray([1])
216 |
217 | #pdb.set_trace()
218 | for Mp in P_seq:
219 | sx = Mp[0, 0]
220 | sy = Mp[1, 1]
221 | c = Mp[0, 2]
222 | f = Mp[1, 2]
223 |
224 | transRecovered = math.sqrt(c*c + f*f)
225 | thetaRecovered = math.atan2(sx, sy) * 180 / math.pi
226 |
227 | P_seq_t = np.concatenate((P_seq_t, [transRecovered]), axis=0)
228 | P_seq_r = np.concatenate((P_seq_r, [thetaRecovered]), axis=0)
229 |
230 | P_seq_t = np.delete(P_seq_t, 0)
231 | P_seq_r = np.delete(P_seq_r, 0)
232 |
233 | # FFT
234 | fft_t = np.fft.fft(P_seq_t)
235 | fft_r = np.fft.fft(P_seq_r)
236 | fft_t = abs(fft_t)**2
237 | fft_r = abs(fft_r)**2
238 |
239 | fft_t = np.delete(fft_t, 0)
240 | fft_r = np.delete(fft_r, 0)
241 | fft_t = fft_t[:int(len(fft_t)/2)]
242 | fft_r = fft_r[:int(len(fft_r)/2)]
243 |
244 | dic["fft_t"] = fft_t
245 | dic["fft_r"] = fft_r
246 |
247 | SS_t = np.sum(fft_t[:5])/np.sum(fft_t)
248 | SS_r = np.sum(fft_r[:5])/np.sum(fft_r)
249 |
250 | dic["CR_seq"] = np.array(dic["CR_seq"])
251 | dic["DV_seq"] = np.array(dic["DV_seq"])
252 | dic["w_crop"] = np.array(dic["w_crop"])
253 | dic["h_crop"] = np.array(dic["h_crop"])
254 | dic["distortion"] = np.array(dic["distortion"])
255 | dic["SS_t"] = SS_t
256 | dic["SS_r"] = SS_r
257 |
258 | if not (re_compute and os.path.exists(package)):
259 | torch.save(dic, package)
260 |
261 | DV_seq = np.absolute(dic["DV_seq"])
262 | DV_seq = DV_seq[np.where((DV_seq >= 0.5) & (DV_seq <= 1))]
263 | Distortion = str.format('{0:.4f}', np.nanmin(DV_seq))
264 | Distortion_avg = str.format('{0:.4f}', np.nanmean(DV_seq))
265 |
266 | Trans = str.format('{0:.4f}', dic["SS_t"])
267 | Rot = str.format('{0:.4f}', dic["SS_r"])
268 |
269 | w_crop = crop_rm_outlier(dic["w_crop"])
270 | h_crop = crop_rm_outlier(dic["h_crop"])
271 |
272 | FOV = str.format( '{0:.4f}', min(np.nanmin(w_crop), np.nanmin(h_crop)) )
273 | FOV_avg = str.format( '{0:.4f}', (np.nanmean(w_crop)+np.nanmean(h_crop)) / 2 )
274 |
275 | Correlation_avg = str.format( '{0:.4f}', np.nanmean(dic["distortion"][10:]) )
276 | Correlation_min = str.format( '{0:.4f}', np.nanmin(dic["distortion"][10:]) )
277 |
278 | # Print results
279 | print('\n***Distortion value (Avg, Min):')
280 | print(Distortion_avg +' | '+ Distortion)
281 | print('***Stability Score (Avg, Trans, Rot):')
282 | print(str.format('{0:.4f}', (dic["SS_t"]+dic["SS_r"])/2) +' | '+ Trans +' | '+ Rot )
283 | print("=================")
284 | print('***FOV ratio (Avg, Min):')
285 | print( FOV_avg +' | '+ FOV )
286 | print('***Correlation value (Avg, Min):')
287 | print( Correlation_avg +' | '+ Correlation_min , "\n")
288 |
289 | dic['in_sift'] = 0
290 | dic['out_sift'] = 0
291 | torch.save(dic, package[:-3]+"_light.pt")
292 | return float(FOV)
293 |
294 | def crop_rm_outlier(crop):
295 | crop = np.array(crop)
296 | crop = crop[crop >= 0.5]
297 | return sorted(crop)[5:]
298 |
299 | if __name__ == '__main__':
300 | metric_path = os.path.join("./test/stabilzation/metric")
301 | if not os.path.exists(metric_path):
302 | os.makedirs(metric_path)
303 |
304 | in_video = "./video/s_114_outdoor_running_trail_daytime/ControlCam_20200930_104820.mp4"
305 | in_folder = os.path.join(metric_path, "in_frame")
306 | if not os.path.exists(in_folder):
307 | os.makedirs(in_folder)
308 | print("Convert video to frames")
309 | video2frame_one_seq(in_video, in_folder)
310 |
311 | out_video = "./test/stabilzation/s_114_outdoor_running_trail_daytime_stab.mp4"
312 | out_folder = os.path.join(metric_path, "out_frame")
313 | if not os.path.exists(out_folder):
314 | os.makedirs(out_folder)
315 | print("Convert video to frames")
316 | video2frame_one_seq(out_video, out_folder)
317 |
318 | package = os.path.join(metric_path, "stabilzation.pt")
319 | FOV = metrics(in_folder, out_folder, package)
320 |
321 | crop_path = out_video[:-4] + "_crop.mp4"
322 | crop_video(out_video, crop_path, FOV)
323 |
--------------------------------------------------------------------------------
/dvs/printer.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | class Printer(object):
4 | def __init__(self, *files):
5 | self.files = files
6 |
7 | #Redirect Printer
8 | def open(self):
9 | if not hasattr(sys, '_stdout'):
10 | sys._stdout = sys.stdout
11 | sys.stdout = self
12 | return self
13 |
14 | #Restore the Default Printer
15 | def close(self):
16 | stdout = sys._stdout
17 | for f in self.files:
18 | if f != stdout:
19 | f.close()
20 | sys.stdout = stdout
21 |
22 | #Overloading write() Function
23 | def write(self, obj):
24 | for f in self.files:
25 | f.write(obj)
26 | f.flush()
27 |
28 | def flush(self):
29 | pass
30 |
31 | if __name__ == '__main__':
32 | print("Start testing")
33 | t = Printer(sys.stdout, open('./test.txt', 'w+')).open()
34 | print("In files")
35 | t.close()
36 | print("Not in files")
--------------------------------------------------------------------------------
/dvs/requirements.txt:
--------------------------------------------------------------------------------
1 | colorama==0.4.4
2 | ffmpeg==1.4
3 | imageio==2.9.0
4 | matplotlib==3.3.4
5 | opencv-contrib-python==4.5.1.48
6 | opencv-python==4.5.1.48
7 | pytz==2021.1
8 | PyYAML==5.4.1
9 | scipy==1.5.4
10 | tensorboardX==2.1
11 | tqdm==4.59.0
--------------------------------------------------------------------------------
/dvs/train.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import torch
4 | import torchvision
5 | import torch.nn as nn
6 | from torch.autograd import Variable
7 |
8 | import time
9 | import yaml
10 | import argparse
11 | import numpy as np
12 | from printer import Printer
13 | from dataset import get_data_loader
14 | from model import Model
15 | import datetime
16 | import copy
17 | from util import make_dir, get_optimizer, AverageMeter, save_train_info, norm_flow
18 | from gyro import torch_QuaternionProduct, torch_QuaternionReciprocal, torch_norm_quat
19 |
20 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
21 |
22 | def run_epoch(model, loader, cf, epoch, lr, optimizer=None, is_training=True, USE_CUDA=True, clip_norm=0):
23 | no_flo = False
24 | number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"]
25 | avg_loss = AverageMeter()
26 | if is_training:
27 | model.net.train()
28 | model.unet.train()
29 | else:
30 | model.net.eval()
31 | model.unet.eval()
32 | for i, data in enumerate(loader, 0):
33 | # get the inputs; data is a list of [inputs, labels]
34 | real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data
35 | print("Fininsh Load data")
36 |
37 | real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4]
38 | real_projections = real_projections.type(torch.float)
39 | flo = flo.type(torch.float)
40 | flo_back = flo_back.type(torch.float)
41 | ois = ois.type(torch.float)
42 |
43 | batch_size, step, dim = real_inputs.size()
44 | times = times.numpy()
45 | real_queue_idx = real_queue_idx.numpy()
46 | virtual_queue = loader.dataset.random_init_virtual_queue(batch_size, real_postion[:,0,:].numpy(), times[:,1]) # TODO
47 | # virtual_queue = [None] * batch_size
48 | loss = 0
49 | model.net.init_hidden(batch_size)
50 | for j in range(step):
51 | virtual_inputs, vt_1 = loader.dataset.get_virtual_data(
52 | virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j])
53 |
54 | real_inputs_step = real_inputs[:,j,:]
55 | inputs = torch.cat((real_inputs_step,virtual_inputs), dim = 1)
56 |
57 | # inputs = Variable(real_inputs_step)
58 | if USE_CUDA:
59 | real_inputs_step = real_inputs_step.cuda()
60 | virtual_inputs = virtual_inputs.cuda()
61 | inputs = inputs.cuda()
62 | if no_flo is False:
63 | flo_step = flo[:,j].cuda()
64 | flo_back_step = flo_back[:,j].cuda()
65 | else:
66 | flo_step = None
67 | flo_back_step = None
68 | vt_1 = vt_1.cuda()
69 | real_projections_t = real_projections[:,j+1].cuda()
70 | real_projections_t_1 = real_projections[:,j].cuda()
71 | real_postion_anchor = real_postion[:,j].cuda()
72 | ois_step = ois[:,j].cuda()
73 |
74 | if no_flo is False:
75 | b, h, w, _ = flo_step.size()
76 | flo_step = norm_flow(flo_step, h, w)
77 | flo_back_step = norm_flow(flo_back_step, h, w)
78 |
79 | if is_training:
80 | if no_flo is False:
81 | flo_out = model.unet(flo_step, flo_back_step)
82 | else:
83 | flo_out = None
84 |
85 | if j < 1:
86 | for i in range(2):
87 | out = model.net(inputs, flo_out, ois_step)
88 | else:
89 | out = model.net(inputs, flo_out, ois_step)
90 | else:
91 | with torch.no_grad():
92 | if no_flo is False:
93 | flo_out = model.unet(flo_step, flo_back_step)
94 | else:
95 | flo_out = None
96 |
97 | if j < 1:
98 | for i in range(2):
99 | out = model.net(inputs, flo_out, ois_step)
100 | else:
101 | out = model.net(inputs, flo_out, ois_step)
102 |
103 | if epoch <= 30:
104 | follow = True
105 | else:
106 | follow = False
107 |
108 | if epoch > 30:
109 | undefine = True
110 | else:
111 | undefine = False
112 |
113 | if epoch > 40:
114 | optical = True
115 | else:
116 | optical = False
117 |
118 | loss_step = model.loss(out, vt_1, virtual_inputs, real_inputs_step, \
119 | flo_step, flo_back_step, real_projections_t, real_projections_t_1, real_postion_anchor, \
120 | follow = follow, undefine = undefine, optical = optical, stay = optical)
121 |
122 | loss = loss_step
123 |
124 | virtual_position = virtual_inputs[:, -4:]
125 | pos = torch_QuaternionProduct(virtual_position, real_postion_anchor)
126 | out = torch_QuaternionProduct(out, pos)
127 |
128 | if USE_CUDA:
129 | out = out.cpu().detach().numpy()
130 |
131 | virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1])
132 |
133 | if (j+1) % 10 == 0:
134 | print("Step: "+str(j+1)+"/"+str(step))
135 | print(loss)
136 | loss = torch.sum(loss)
137 | if is_training:
138 | optimizer.zero_grad()
139 | loss.backward(retain_graph=True)
140 | if clip_norm:
141 | nn.utils.clip_grad_norm_(model.net.parameters(), max_norm=clip_norm)
142 | nn.utils.clip_grad_norm_(model.unet.parameters(), max_norm=clip_norm)
143 | optimizer.step()
144 |
145 | avg_loss.update(loss.item(), batch_size)
146 |
147 | return avg_loss.avg
148 |
149 |
150 | def train(args = None):
151 | torch.autograd.set_detect_anomaly(True)
152 | config_file = args.config
153 | cf = yaml.load(open(config_file, 'r'))
154 |
155 | USE_CUDA = cf['data']["use_cuda"]
156 | seed = cf['train']["seed"]
157 |
158 | torch.manual_seed(seed)
159 | if USE_CUDA:
160 | torch.cuda.manual_seed(seed)
161 |
162 | checkpoints_dir = cf['data']['checkpoints_dir']
163 | epochs = cf["train"]["epoch"]
164 | snapshot = cf["train"]["snapshot"]
165 | decay_epoch = cf['train']['decay_epoch']
166 | init_lr = cf["train"]["init_lr"]
167 | lr_decay = cf["train"]["lr_decay"]
168 | lr_step = cf["train"]["lr_step"]
169 | clip_norm = cf["train"]["clip_norm"]
170 | load_model = cf["model"]["load_model"]
171 |
172 | checkpoints_dir = make_dir(checkpoints_dir, cf)
173 |
174 | if load_model is None:
175 | log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'.log'), 'w+')
176 | else:
177 | log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'.log'), 'a')
178 | printer = Printer(sys.stdout, log_file).open()
179 |
180 | print('----Print Arguments Setting------')
181 | for key in cf:
182 | print('{}:'.format(key))
183 | for para in cf[key]:
184 | print('{:50}:{}'.format(para,cf[key][para]))
185 | print('\n')
186 |
187 | # Define the model
188 | model = Model(cf)
189 | optimizer = get_optimizer(cf["train"]["optimizer"], model, init_lr, cf)
190 |
191 | for idx, m in enumerate(model.net.children()):
192 | print('{}:{}'.format(idx,m))
193 | for idx, m in enumerate(model.unet.children()):
194 | print('{}:{}'.format(idx,m))
195 |
196 | if load_model is not None:
197 | print("------Load Pretrined Model--------")
198 | checkpoint = torch.load(load_model)
199 | model.net.load_state_dict(checkpoint['state_dict'])
200 | model.unet.load_state_dict(checkpoint['unet'])
201 | print("------Resume Training Process-----")
202 | optimizer.load_state_dict(checkpoint['optim_dict'])
203 | epoch_load = checkpoint['epoch']
204 | print("Epoch load: ", epoch_load)
205 | else:
206 | epoch_load = 0
207 |
208 | if USE_CUDA:
209 | model.net.cuda()
210 | model.unet.cuda()
211 | if load_model is not None:
212 | for state in optimizer.state.values():
213 | for k, v in state.items():
214 | if isinstance(v, torch.Tensor):
215 | state[k] = v.cuda()
216 | for param in optimizer.param_groups:
217 | init_lr = param['lr']
218 |
219 | print("-----------Load Dataset----------")
220 | train_loader, test_loader = get_data_loader(cf, no_flo = False)
221 |
222 | print("----------Start Training----------")
223 | currentDT = datetime.datetime.now()
224 | print(currentDT.strftime(" %Y-%m-%d %H:%M:%S"))
225 |
226 | start_time = time.time()
227 |
228 | if lr_step:
229 | decay_epoch = list(range(1+lr_step, epochs+1, lr_step))
230 |
231 | lr = init_lr
232 |
233 | for count in range(epoch_load+1, epochs+1):
234 | if decay_epoch != None and count in decay_epoch:
235 | lr *= lr_decay
236 | for param in optimizer.param_groups:
237 | param['lr'] *= lr_decay
238 |
239 | print("Epoch: %d, learning_rate: %.5f" % (count, lr))
240 |
241 | train_loss = run_epoch(model, train_loader, cf, count, lr, optimizer=optimizer, clip_norm=clip_norm, is_training=True, USE_CUDA=USE_CUDA)
242 |
243 | test_loss = run_epoch(model, test_loader, cf, count, lr, is_training=False, USE_CUDA=USE_CUDA)
244 |
245 | time_used = (time.time() - start_time) / 60
246 | print("Epoch %d done | TrLoss: %.4f | TestLoss: %.4f | Time_used: %.4f minutes" % (
247 | count, train_loss, test_loss, time_used))
248 |
249 | if count % snapshot == 0:
250 | save_train_info("epoch", checkpoints_dir, cf, model, count, optimizer)
251 | save_train_info("last", checkpoints_dir, cf, model, count, optimizer)
252 | print("Model stored at epoch %d"%count)
253 |
254 | currentDT = datetime.datetime.now()
255 | print(currentDT.strftime(" %Y-%m-%d %H:%M:%S"))
256 | print("------------End Training----------")
257 | return
258 |
259 | if __name__ == '__main__':
260 | parser = argparse.ArgumentParser("Training model")
261 | parser.add_argument("--config", default="./conf/stabilzation_train.yaml", help="Config file.")
262 | args = parser.parse_args()
263 | train(args = args)
--------------------------------------------------------------------------------
/dvs/util.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import cv2
4 | from itertools import chain
5 | from warp import load_video, save_video
6 | import numpy as np
7 | import matplotlib.pyplot as plt
8 | from gyro import get_rotations
9 | import shutil
10 |
11 | def save_train_info(name, checkpoints_dir, cf, model, count, optimizer = None):
12 | path = None
13 | if name == "last":
14 | path = os.path.join(checkpoints_dir, cf['data']['exp']+'_last.checkpoint')
15 | elif name == "best":
16 | path = os.path.join(checkpoints_dir, cf['data']['exp']+'_best.checkpoint')
17 | else:
18 | path = os.path.join(checkpoints_dir, cf['data']['exp']+'_epoch%d.checkpoint'%count)
19 | torch.save(model.save_checkpoint(epoch = count, optimizer=optimizer), path)
20 |
21 | def make_dir(checkpoints_dir ,cf):
22 | inference_path = "./test"
23 | if not os.path.exists(checkpoints_dir):
24 | os.makedirs(checkpoints_dir)
25 | if not os.path.exists(cf["data"]["log"]):
26 | os.makedirs(cf["data"]["log"])
27 | if not os.path.exists(inference_path):
28 | os.makedirs(inference_path)
29 |
30 | inference_path = os.path.join(inference_path, cf['data']['exp'])
31 | if not os.path.exists(inference_path):
32 | os.makedirs(inference_path)
33 | checkpoints_dir = os.path.join(checkpoints_dir, cf['data']['exp'])
34 | if not os.path.exists(checkpoints_dir):
35 | os.makedirs(checkpoints_dir)
36 | return checkpoints_dir
37 |
38 | def get_optimizer(optimizer, model, init_lr, cf):
39 | if optimizer == "adam":
40 | optimizer = torch.optim.Adam(chain(model.net.parameters(), model.unet.parameters()), lr=init_lr, weight_decay=cf["train"]["weight_decay"])
41 | elif optimizer == "sgd":
42 | optimizer = torch.optim.SGD(chain(model.net.parameters(), model.unet.parameters()), lr=init_lr, momentum=cf["train"]["momentum"])
43 | return optimizer
44 |
45 | def crop_video(in_path, out_path, crop_ratio):
46 | frame_array, fps, size = load_video(in_path)
47 | hs = int((1-crop_ratio)*1080) + 1
48 | he = int(crop_ratio*1080) - 1
49 | ws = int((1-crop_ratio)*1920) + 1
50 | we = int(crop_ratio*1920) - 1
51 | for i in range(len(frame_array)):
52 | frame_array[i] = cv2.resize(frame_array[i][hs:he,ws:we,:], size, interpolation = cv2.INTER_LINEAR)
53 | save_video(out_path, frame_array, fps, size= size)
54 |
55 | def norm_flow(flow, h, w):
56 | if flow.shape[2] == 2:
57 | flow[:,:,0] /= h
58 | flow[:,:,1] /= w
59 | else:
60 | flow[:,:,:,0] /= h
61 | flow[:,:,:,1] /= w
62 | return flow
63 |
64 | class AverageMeter(object):
65 | def __init__(self):
66 | self.reset()
67 |
68 | def reset(self):
69 | self.avg = 0
70 | self.sum = 0
71 | self.cnt = 0
72 |
73 | def update(self, val, n=1):
74 | self.sum += val * n
75 | self.cnt += n
76 | if self.cnt > 0:
77 | self.avg = self.sum / self.cnt
--------------------------------------------------------------------------------
/dvs/warp/__init__.py:
--------------------------------------------------------------------------------
1 | from .warping import (
2 | warp_video
3 | )
4 | from .read_write import (
5 | save_video,
6 | load_video,
7 | video2frame_one_seq
8 | )
--------------------------------------------------------------------------------
/dvs/warp/rasterizer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | from numpy import array
4 | import torch
5 | import cv2
6 | import time
7 |
8 | device = torch.device("cuda")
9 |
10 | def Rasterization(image, grid, get_mesh_only = False):
11 | # grid xy WH
12 | shape = image.size()
13 | height = shape[1]
14 | width = shape[2]
15 | wapper_upper_triangle, wapper_lower_triangle = grid_to_triangle(grid[:,:,:2])
16 | origin_upper_triangle, origin_lower_triangle = grid_to_triangle(grid[:,:,2:])
17 |
18 |
19 | [xmax, xmin, ymax, ymin], xlength, ylength = grid_size(wapper_upper_triangle, wapper_lower_triangle, height, width)
20 |
21 | xratio = xlength / width
22 | yratio = ylength / height
23 |
24 | wapper_triangle = torch.stack((wapper_upper_triangle,wapper_lower_triangle),dim = 1).to(device) # grid * upper/lower * point * xy
25 | origin_triangle = torch.stack((origin_upper_triangle,origin_lower_triangle),dim = 1).to(device) # grid * upper/lower * point * xy
26 |
27 | tran_triangle = torch.zeros(wapper_triangle.size()).to(device)
28 |
29 | tran_triangle[:,:,:,0] = (wapper_triangle[:,:,:,0] - xmin.view(-1,1,1).to(device)/width) / xratio
30 | tran_triangle[:,:,:,1] = (wapper_triangle[:,:,:,1] - ymin.view(-1,1,1).to(device)/height) / yratio
31 |
32 | mask = triangle2mask(tran_triangle, ylength, xlength) # consuming
33 |
34 | mask = torch.unsqueeze(mask, 4)
35 | origin_triangle = torch.unsqueeze(origin_triangle, 1)
36 |
37 | grid_sample = origin_triangle * mask # consuming
38 | grid_sample = torch.sum(torch.sum(grid_sample, dim = 3), dim = 2).view(-1,ylength,xlength,2) # consuming
39 |
40 | gxmin = min(0, int(torch.min(xmin)))
41 | gxmax = int(torch.max(xmin) + xlength)
42 | gymin = min(0, int(torch.min(ymin)))
43 | gymax = int(torch.max(ymin) + ylength)
44 | grid_merge = torch.zeros((max(gymax-gymin, height, height - gymin),max(gxmax - gxmin, width, width - gxmin),2)).to(device)
45 | for i in range(grid_sample.size()[0]):
46 | x_s = int(xmin[i] - gxmin)
47 | x_e = int(xmin[i] + xlength - gxmin)
48 | y_s = int(ymin[i] - gymin)
49 | y_e = int(ymin[i] + ylength -gymin)
50 | grid_merge[ y_s:y_e, x_s:x_e, :] += grid_sample[i, :, :, :]
51 |
52 | # grid_merge = grid_merge[min(-gxmin,0):min(-gxmin,0)+height, min(-gymin,0):min(-gymin,0)+width, :]
53 | grid_merge = grid_merge[-gymin:-gymin+height, -gxmin:-gxmin+width, :]
54 | # if get_mesh_only:
55 | # grid_merge = grid_merge.cpu().numpy()
56 | # mesh_grid = generate_mesh_grid(height, width)
57 | # out = grid_merge - mesh_grid
58 | # return np.concatenate((out[:,:,1:],out[:,:,:1]),2)
59 |
60 | shift = torch.tensor([0.5/height,0.5/width])[None, None, :].to(device)
61 | grid_merge = (grid_merge + 1*shift) * 2 - 1
62 |
63 | image[:3,:2,:2] = 0
64 |
65 | image = torch.unsqueeze(image, 0).to(device)
66 | grid_merge = torch.unsqueeze(grid_merge, 0)
67 |
68 | image = torch.nn.functional.grid_sample(image, grid_merge) # default bilinear
69 |
70 | image = torch.squeeze(image, 0)
71 | return image.cpu()
72 |
73 | def grid_to_triangle(grid):
74 | grid_shape = grid.size()
75 | num = (grid_shape[0] - 1) * (grid_shape[1] - 1)
76 |
77 | upper_triangle = grid[:-1, :-1, :, None]
78 | upper_triangle = torch.cat(( upper_triangle, grid[1:, :-1, :, None]), dim = 3)
79 | upper_triangle = torch.cat(( upper_triangle, grid[:-1, 1:, :, None]), dim = 3)
80 | upper_triangle = upper_triangle.view(num, 2, 3)
81 | upper_triangle = torch.transpose(upper_triangle, 1, 2) # grid * point * xy
82 |
83 | lower_triangle = grid[:-1, 1:, :, None]
84 | lower_triangle = torch.cat(( lower_triangle, grid[1:, :-1, :, None]), dim = 3)
85 | lower_triangle = torch.cat(( lower_triangle, grid[1:, 1:, :, None]), dim = 3)
86 | lower_triangle = lower_triangle.view(num, 2, 3)
87 | lower_triangle = torch.transpose(lower_triangle, 1, 2)
88 |
89 | return upper_triangle, lower_triangle # grid * point * xy
90 |
91 | def grid_size(upper_triangle, lower_triangle, height, width):
92 | wapper_grid = torch.cat((upper_triangle, lower_triangle),dim =1)
93 | xmax = torch.floor(torch.max(wapper_grid[:,:,0]*width, 1)[0]) + 1
94 | ymax = torch.floor(torch.max(wapper_grid[:,:,1]*height, 1)[0]) + 1
95 | xmin = torch.floor(torch.min(wapper_grid[:,:,0]*width, 1)[0])
96 | ymin = torch.floor(torch.min(wapper_grid[:,:,1]*height, 1)[0])
97 |
98 | xlength = int(torch.max(xmax - xmin))
99 | ylength = int(torch.max(ymax - ymin))
100 |
101 | return [xmax, xmin, ymax, ymin], xlength, ylength
102 |
103 | def generate_mesh_grid(height, width):
104 | # Create a grid of sampling positions
105 | xs = np.linspace(0, 1, width, endpoint=False)
106 | ys = np.linspace(0, 1, height, endpoint=False)
107 | xmesh, ymesh = np.meshgrid(xs, ys)
108 | # Reshape the sampling positions to a H x W x 2 tensor
109 | return np.moveaxis(array(list(zip(xmesh, ymesh))), 1, 2)
110 |
111 | def triangle2mask(d, height, width): # d: [N x T x 3 x 2]
112 | N = d.size()[0] # batch size
113 | T = d.size()[1] # triangle number
114 | P = height * width # The number of pixels in the output image.
115 |
116 | area = edgefunc(d[:, :, 1, :], d[:, :, 2, :], d[:, :, None, 0, :])
117 |
118 | gridcpu = generate_mesh_grid(height, width)
119 |
120 | gridcpu = np.reshape(gridcpu, (height*width, 2))
121 |
122 | grid = torch.Tensor(gridcpu)
123 | grid = grid.unsqueeze(0).repeat((N, T, 1, 1)) # [N x T x P x 2]
124 |
125 | grid = grid.to(device)
126 |
127 | # Evaluate the edge functions at every position.
128 | # We should get a [N x P] vector out of each.
129 | w0 = edgefunc(d[:, :, 1, :], d[:, :, 2, :], grid) / area
130 | w1 = edgefunc(d[:, :, 2, :], d[:, :, 0, :], grid) / area
131 | w2 = edgefunc(d[:, :, 0, :], d[:, :, 1, :], grid) / area
132 |
133 | # Only pixels inside the triangles will have color
134 | # [N x P]
135 |
136 | mask = (w0 > 0) & (w1 > 0) & (w2 > 0)
137 | mask = torch.unsqueeze(mask, 3).type(torch.cuda.FloatTensor)
138 |
139 | w = torch.stack((w0,w1,w2),dim = 3) * mask
140 |
141 | return torch.transpose(w, 1, 2) # [N x P x T x 3]
142 |
143 |
144 | def edgefunc(v0, v1, p):
145 | """
146 | let P = H * W
147 | v0 and v1 have vertex positions for all T triangles.
148 | Their shapes are [N x T X 2]
149 | p is a list of sampling points as a [N x T X P x 2] tensor.
150 | Each of the T triangles has an [P x 2] matrix of sampling points.
151 | returns a [N x T x P] matrix
152 | """
153 | P = p.size()[2]
154 |
155 | # Take all the x and y coordinates of all the positions as a
156 | # [N x S] tensor
157 | py = p[:, :, :, 1]
158 | px = p[:, :, :, 0]
159 |
160 | # We need to manually broadcast the vector to cover all sample points
161 | x10 = v0[:, :, 0] - v1[:, :, 0] # [N x T]
162 | y01 = v1[:, :, 1] - v0[:, :, 1] # [N x T]
163 |
164 | x10 = x10.unsqueeze(2).repeat((1, 1, P)) # [N x T x P]
165 | y01 = y01.unsqueeze(2).repeat((1, 1, P)) # [N x T x P]
166 |
167 | cross = v0[:,:,1]*v1[:,:,0] - v0[:,:,0]*v1[:,:,1] # [N x T]
168 | cross = cross.unsqueeze(2).repeat((1, 1, P)) # [N x T x P]
169 |
170 | return y01*px + x10*py + cross
171 |
172 | if __name__ == '__main__':
173 | print(generate_mesh_grid(2,3))
--------------------------------------------------------------------------------
/dvs/warp/read_write.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import os
4 | from PIL import Image, ImageDraw, ImageFont
5 | import matplotlib.pyplot as plt
6 | import ffmpeg
7 | import json
8 | import torch
9 | import argparse
10 |
11 | def load_video(path, save_dir = None, resize = None, length = -1): # N x H x W x C
12 | vidcap = cv2.VideoCapture(path)
13 | fps = vidcap.get(cv2.CAP_PROP_FPS)
14 | success,image = vidcap.read()
15 | print(image.shape)
16 | height, width, layers = image.shape
17 | if resize is None:
18 | size = (width,height)
19 | elif type(resize) is int:
20 | size = (width//resize,height//resize)
21 | else:
22 | size = resize
23 | count = 0
24 | frames = []
25 | while success:
26 | if resize is not None:
27 | image = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR)
28 | if save_dir != None:
29 | path = os.path.join(save_dir, "frame_" + str(count).zfill(4) + ".png")
30 | cv2.imwrite(path, image)
31 | frames.append(image)
32 | success,image = vidcap.read()
33 | count += 1
34 | if length > 0 and count >= length:
35 | break
36 | print("Video length: ", len(frames))
37 | return frames, fps, size
38 |
39 | def video2frame(path, resize = None):
40 | data_name = sorted(os.listdir(path))
41 | for i in range(len(data_name)):
42 | print(str(i+1)+" / " + str(len(data_name)))
43 | data_folder = os.path.join(path, data_name[i])
44 | print(data_folder)
45 | files = os.listdir(data_folder)
46 | for f in files:
47 | if f[-4:] == ".mp4":
48 | video_name = f
49 | video_path = os.path.join(data_folder, video_name)
50 | frame_folder = os.path.join(data_folder, "frames")
51 | if not os.path.exists(frame_folder):
52 | os.makedirs(frame_folder)
53 | load_video(video_path, save_dir = frame_folder, resize=resize)
54 |
55 | def video2frame_one_seq(path, save_dir = None, resize = None): # N x H x W x C
56 | vidcap = cv2.VideoCapture(path)
57 | fps = vidcap.get(cv2.CAP_PROP_FPS)
58 | success,image = vidcap.read()
59 | print(path)
60 | print(image.shape)
61 | height, width, layers = image.shape
62 | if resize is None:
63 | size = (width,height)
64 | elif type(resize) is int:
65 | size = (width//resize,height//resize)
66 | else:
67 | size = resize
68 | count = 0
69 | while success:
70 | if resize is not None:
71 | image = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR)
72 | if save_dir != None:
73 | path = os.path.join(save_dir, "frame_" + str(count).zfill(5) + ".png")
74 | cv2.imwrite(path, image)
75 | success,image = vidcap.read()
76 | count += 1
77 | return fps, size
78 |
79 | def save_video(path,frame_array, fps, size, losses = None, frame_number = False, writer = None):
80 | if writer is None:
81 | if path[-3:] == "mp4":
82 | out = cv2.VideoWriter(path,cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
83 | else:
84 | out = cv2.VideoWriter(path,cv2.VideoWriter_fourcc('M','J','P','G'), fps, size)
85 | else:
86 | out = writer
87 | for i in range(len(frame_array)):
88 | # writing to a image array
89 | if frame_number:
90 | frame_array[i] = draw_number(np.asarray(frame_array[i]), i)
91 | if losses is not None:
92 | frame_array[i] = draw_number(np.asarray(frame_array[i]), losses[i], x = 900, message = "Loss: ")
93 | out.write(frame_array[i])
94 | if writer is None:
95 | out.release()
96 |
97 | def draw_number(frame, num, x = 10, y = 10, message = "Frame: "):
98 | image=Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
99 | draw = ImageDraw.Draw(image)
100 | font = ImageFont.truetype("./data/arial.ttf", 45)
101 |
102 | message = message + str(num)
103 | color = 'rgb(0, 0, 0)' # black color
104 |
105 | draw.text((x, y), message, fill=color, font=font)
106 | return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
107 |
108 | if __name__ == "__main__":
109 | parser = argparse.ArgumentParser("FlowNet2 Preparation")
110 | parser.add_argument("--dir_path", default="./video")
111 | args = parser.parse_args()
112 | dir_path = args.dir_path
113 | if dir_path == "./video":
114 | video2frame(dir_path, resize = 4)
115 | else:
116 | video2frame(os.path.join(dir_path, "test"), resize = 4)
117 | video2frame(os.path.join(dir_path, "training"), resize = 4)
--------------------------------------------------------------------------------
/dvs/warp/warping.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .read_write import load_video, save_video
3 | import torch
4 | import cv2
5 | from .rasterizer import Rasterization
6 | import time
7 | import os
8 |
9 | def warp_video(mesh_path, video_path, save_path, losses = None, frame_number = False, fps_fix = None):
10 | if type(mesh_path) == str:
11 | print("Error")
12 | else:
13 | grid_data = mesh_path
14 |
15 | frame_array, fps, size = load_video(video_path, length = grid_data.shape[0])
16 | if fps_fix is not None:
17 | fps = fps_fix
18 | length = min(grid_data.shape[0], len(frame_array))
19 | seq_length = 100
20 | seq = length//seq_length
21 | writer = cv2.VideoWriter(save_path,cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
22 | for i in range(seq+1):
23 | if seq_length*i==length:
24 | break
25 | print("Frame: "+str(i*seq_length)+"/"+str(length))
26 | frame_array_save = warpping_rast(grid_data[seq_length*i:min(seq_length*(i+1),length)], frame_array[seq_length*i:min(seq_length*(i+1),length)], losses = losses)
27 | save_video(save_path,frame_array_save, fps, size, losses = losses, frame_number = frame_number, writer = writer)
28 | writer.release()
29 |
30 | def warpping_rast(grid_data, frame_array, losses = None):
31 | output = []
32 | for i in range(0, min(len(frame_array), grid_data.shape[0])):
33 | frame = warpping_one_frame_rast(frame_array[i], grid_data[i])
34 | output.append(frame)
35 | return output
36 |
37 | def warpping_one_frame_rast(image, grid):
38 | img = torch.Tensor(image).permute(2,0,1)/255
39 | grid = torch.Tensor(grid)
40 | output_image = Rasterization(img, grid)
41 | return np.clip(output_image.permute(1,2,0).numpy() * 255, 0, 255).astype("uint8")
42 |
--------------------------------------------------------------------------------