├── .gitignore ├── LICENSE ├── README.md ├── docs ├── code-of-conduct.md └── contributing.md └── dvs ├── checkpoint └── stabilzation │ └── stabilzation_last.checkpoint ├── conf ├── stabilzation.yaml └── stabilzation_train.yaml ├── data └── arial.ttf ├── dataset.py ├── flownet2 ├── LICENSE ├── README.md ├── __init__.py ├── convert.py ├── datasets.py ├── install.sh ├── losses.py ├── main.py ├── models.py ├── networks │ ├── FlowNetC.py │ ├── FlowNetFusion.py │ ├── FlowNetS.py │ ├── FlowNetSD.py │ ├── __init__.py │ ├── channelnorm_package │ │ ├── __init__.py │ │ ├── channelnorm.py │ │ ├── channelnorm_cuda.cc │ │ ├── channelnorm_kernel.cu │ │ ├── channelnorm_kernel.cuh │ │ └── setup.py │ ├── correlation_package │ │ ├── __init__.py │ │ ├── correlation.py │ │ ├── correlation_cuda.cc │ │ ├── correlation_cuda_kernel.cu │ │ ├── correlation_cuda_kernel.cuh │ │ └── setup.py │ ├── resample2d_package │ │ ├── __init__.py │ │ ├── resample2d.py │ │ ├── resample2d_cuda.cc │ │ ├── resample2d_kernel.cu │ │ ├── resample2d_kernel.cuh │ │ └── setup.py │ └── submodules.py ├── run.sh ├── run_release.sh └── utils │ ├── __init__.py │ ├── flow_utils.py │ ├── frame_utils.py │ ├── param_utils.py │ └── tools.py ├── gyro ├── __init__.py ├── gyro_function.py └── gyro_io.py ├── inference.py ├── load_frame_sensor_data.py ├── loss.py ├── metrics.py ├── model.py ├── printer.py ├── requirements.txt ├── train.py ├── util.py └── warp ├── __init__.py ├── rasterizer.py ├── read_write.py └── warping.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .torch 3 | _ext 4 | *.o 5 | _ext/ 6 | *.png 7 | *.jpg 8 | *.tar 9 | log/* 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Online Fused Video Stabilization 2 | 3 | [[Paper]](https://openaccess.thecvf.com/content/WACV2022/papers/Shi_Deep_Online_Fused_Video_Stabilization_WACV_2022_paper.pdf)[[Supplementary]](https://zhmeishi.github.io/dvs/paper/dvs_supp.pdf) [[Project Page]](https://zhmeishi.github.io/dvs/) [[Dataset]](https://storage.googleapis.com/dataset_release/all.zip) [[Our Result]](https://storage.googleapis.com/dataset_release/inference_result_release.zip) [[More Results]](https://zhmeishi.github.io/dvs/supp/results.html) 4 | 5 | This repository contains the Pytorch implementation of our method in the paper "Deep Online Fused Video Stabilization". 6 | 7 | ## Environment Setting 8 | Python version >= 3.6 9 | Pytorch with CUDA >= 1.0.0 (guide is [here](https://pytorch.org/get-started/locally/)) 10 | Install other used packages: 11 | ``` 12 | cd dvs 13 | pip install -r requirements.txt --ignore-installed 14 | ``` 15 | 16 | ## Data Preparation 17 | Download sample video [here](https://drive.google.com/file/d/1PpF3-6BbQKy9fldjIfwa5AlbtQflx3sG/view?usp=sharing). 18 | Uncompress the *video* folder under the *dvs* folder. 19 | ``` 20 | python load_frame_sensor_data.py 21 | ``` 22 | Demo of curve visualization: 23 | The **gyro/OIS curve visualization** can be found at *dvs/video/s_114_outdoor_running_trail_daytime/ControlCam_20200930_104820_real.jpg*. 24 | 25 | 26 | ## FlowNet2 Preparation 27 | Note, we provide optical flow result of one test video in our Data Preparation. If you would like to generate them for all test videos, please follow [FlowNet2 official website](https://github.com/NVIDIA/flownet2-pytorch) and guide below. Otherwise, you can skip this section. 28 | 29 | Note, FlowNet2 installation is tricky. Please use Python=3.6 and Pytorch=1.0.0. More details are [here](https://github.com/NVIDIA/flownet2-pytorch/issues/156) or contact us for any questions. 30 | 31 | Download FlowNet2 model *FlowNet2_checkpoint.pth.tar* [here](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view). Move it under folder *dvs/flownet2*. 32 | ``` 33 | python warp/read_write.py # video2frames 34 | cd flownet2 35 | bash install.sh # install package 36 | bash run.sh # generate optical flow file for dataset 37 | ``` 38 | 39 | ## Running Inference 40 | ``` 41 | python inference.py 42 | python metrics.py 43 | ``` 44 | The loss and metric information will be printed in the terminal. The metric numbers can be slightly different due to difference on opencv/pytorch versions. 45 | 46 | The result is under *dvs/test/stabilzation*. 47 | In *s_114_outdoor_running_trail_daytime.jpg*, the blue curve is the output of our models, and the green curve is the input. 48 | *s_114_outdoor_running_trail_daytime_stab.mp4* is uncropped stabilized video. 49 | *s_114_outdoor_running_trail_daytime_stab_crop.mp4* is cropped stabilized video. Note, the cropped video is generated after running the metrics code. 50 | 51 | ## Training 52 | Download dataset for training and test [here](https://storage.googleapis.com/dataset_release/all.zip). 53 | Uncompress *all.zip* and move *dataset_release* folder under the *dvs* folder. 54 | 55 | Follow FlowNet2 Preparation Section. 56 | ``` 57 | python warp/read_write.py --dir_path ./dataset_release # video2frames 58 | cd flownet2 59 | bash run_release.sh # generate optical flow file for dataset 60 | ``` 61 | 62 | Run training code. 63 | ``` 64 | python train.py 65 | ``` 66 | The model is saved in *checkpoint/stabilzation_train*. 67 | 68 | ## Citation 69 | If you use this code or dataset for your research, please cite our paper. 70 | ``` 71 | @inproceedings{shi2022deep, 72 | title={Deep Online Fused Video Stabilization}, 73 | author={Shi, Zhenmei and Shi, Fuhao and Lai, Wei-Sheng and Liang, Chia-Kai and Liang, Yingyu}, 74 | booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision}, 75 | pages={1250--1258}, 76 | year={2022} 77 | } 78 | ``` 79 | -------------------------------------------------------------------------------- /docs/code-of-conduct.md: -------------------------------------------------------------------------------- 1 | # Google Open Source Community Guidelines 2 | 3 | At Google, we recognize and celebrate the creativity and collaboration of open 4 | source contributors and the diversity of skills, experiences, cultures, and 5 | opinions they bring to the projects and communities they participate in. 6 | 7 | Every one of Google's open source projects and communities are inclusive 8 | environments, based on treating all individuals respectfully, regardless of 9 | gender identity and expression, sexual orientation, disabilities, 10 | neurodiversity, physical appearance, body size, ethnicity, nationality, race, 11 | age, religion, or similar personal characteristic. 12 | 13 | We value diverse opinions, but we value respectful behavior more. 14 | 15 | Respectful behavior includes: 16 | 17 | * Being considerate, kind, constructive, and helpful. 18 | * Not engaging in demeaning, discriminatory, harassing, hateful, sexualized, or 19 | physically threatening behavior, speech, and imagery. 20 | * Not engaging in unwanted physical contact. 21 | 22 | Some Google open source projects [may adopt][] an explicit project code of 23 | conduct, which may have additional detailed expectations for participants. Most 24 | of those projects will use our [modified Contributor Covenant][]. 25 | 26 | [may adopt]: https://opensource.google/docs/releasing/preparing/#conduct 27 | [modified Contributor Covenant]: https://opensource.google/docs/releasing/template/CODE_OF_CONDUCT/ 28 | 29 | ## Resolve peacefully 30 | 31 | We do not believe that all conflict is necessarily bad; healthy debate and 32 | disagreement often yields positive results. However, it is never okay to be 33 | disrespectful. 34 | 35 | If you see someone behaving disrespectfully, you are encouraged to address the 36 | behavior directly with those involved. Many issues can be resolved quickly and 37 | easily, and this gives people more control over the outcome of their dispute. 38 | If you are unable to resolve the matter for any reason, or if the behavior is 39 | threatening or harassing, report it. We are dedicated to providing an 40 | environment where participants feel welcome and safe. 41 | 42 | ## Reporting problems 43 | 44 | Some Google open source projects may adopt a project-specific code of conduct. 45 | In those cases, a Google employee will be identified as the Project Steward, 46 | who will receive and handle reports of code of conduct violations. In the event 47 | that a project hasn’t identified a Project Steward, you can report problems by 48 | emailing opensource@google.com. 49 | 50 | We will investigate every complaint, but you may not receive a direct response. 51 | We will use our discretion in determining when and how to follow up on reported 52 | incidents, which may range from not taking action to permanent expulsion from 53 | the project and project-sponsored spaces. We will notify the accused of the 54 | report and provide them an opportunity to discuss it before any action is 55 | taken. The identity of the reporter will be omitted from the details of the 56 | report supplied to the accused. In potentially harmful situations, such as 57 | ongoing harassment or threats to anyone's safety, we may take action without 58 | notice. 59 | 60 | *This document was adapted from the [IndieWeb Code of Conduct][] and can also 61 | be found at .* 62 | 63 | [IndieWeb Code of Conduct]: https://indieweb.org/code-of-conduct 64 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google/conduct/). 29 | -------------------------------------------------------------------------------- /dvs/checkpoint/stabilzation/stabilzation_last.checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/checkpoint/stabilzation/stabilzation_last.checkpoint -------------------------------------------------------------------------------- /dvs/conf/stabilzation.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | exp: 'stabilzation' 3 | checkpoints_dir: './checkpoint' 4 | log: './log' 5 | data_dir: './video' 6 | use_cuda: true 7 | batch_size: 16 8 | resize_ratio: 0.25 9 | number_real: 10 10 | number_virtual: 2 11 | time_train: 2000 # ms 12 | sample_freq: 40 # ms 13 | channel_size: 1 14 | num_workers: 16 # num_workers for data_loader 15 | model: 16 | load_model: null 17 | cnn: 18 | activate_function: relu # sigmoid, relu, tanh, quadratic 19 | batch_norm: true 20 | gap: false 21 | layers: 22 | rnn: 23 | layers: 24 | - - 512 25 | - true 26 | - - 512 27 | - true 28 | fc: 29 | activate_function: relu 30 | batch_norm: false # (batch_norm and drop_out) is False 31 | layers: 32 | - - 256 33 | - true 34 | - - 4 # last layer should be equal to nr_class 35 | - true 36 | drop_out: 0 37 | train: 38 | optimizer: "adam" # adam or sgd 39 | momentum: 0.9 # for sgd 40 | decay_epoch: null 41 | epoch: 400 42 | snapshot: 2 43 | init_lr: 0.0001 44 | lr_decay: 0.5 45 | lr_step: 200 # if > 0 decay_epoch should be null 46 | seed: 1 47 | weight_decay: 0.0001 48 | clip_norm: False 49 | init: "xavier_uniform" # xavier_uniform or xavier_normal 50 | loss: 51 | follow: 10 52 | angle: 1 53 | smooth: 10 #10 54 | c2_smooth: 200 #20 55 | undefine: 2.0 56 | opt: 0.1 57 | stay: 0 -------------------------------------------------------------------------------- /dvs/conf/stabilzation_train.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | exp: 'stabilzation_train' 3 | checkpoints_dir: './checkpoint' 4 | log: './log' 5 | data_dir: './dataset_release' 6 | use_cuda: true 7 | batch_size: 16 8 | resize_ratio: 0.25 9 | number_real: 10 10 | number_virtual: 2 11 | time_train: 2000 # ms 12 | sample_freq: 40 # ms 13 | channel_size: 1 14 | num_workers: 16 # num_workers for data_loader 15 | model: 16 | load_model: null 17 | cnn: 18 | activate_function: relu # sigmoid, relu, tanh, quadratic 19 | batch_norm: true 20 | gap: false 21 | layers: 22 | rnn: 23 | layers: 24 | - - 512 25 | - true 26 | - - 512 27 | - true 28 | fc: 29 | activate_function: relu 30 | batch_norm: false # (batch_norm and drop_out) is False 31 | layers: 32 | - - 256 33 | - true 34 | - - 4 # last layer should be equal to nr_class 35 | - true 36 | drop_out: 0 37 | train: 38 | optimizer: "adam" # adam or sgd 39 | momentum: 0.9 # for sgd 40 | decay_epoch: null 41 | epoch: 400 42 | snapshot: 2 43 | init_lr: 0.0001 44 | lr_decay: 0.5 45 | lr_step: 200 # if > 0 decay_epoch should be null 46 | seed: 1 47 | weight_decay: 0.0001 48 | clip_norm: False 49 | init: "xavier_uniform" # xavier_uniform or xavier_normal 50 | loss: 51 | follow: 10 52 | angle: 1 53 | smooth: 10 #10 54 | c2_smooth: 200 #20 55 | undefine: 2.0 56 | opt: 0.1 57 | stay: 0 -------------------------------------------------------------------------------- /dvs/data/arial.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/data/arial.ttf -------------------------------------------------------------------------------- /dvs/dataset.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset 2 | import os 3 | import collections 4 | from gyro import ( 5 | LoadGyroData, 6 | LoadOISData, 7 | LoadFrameData, 8 | GetGyroAtTimeStamp, 9 | get_static, 10 | GetMetadata, 11 | GetProjections, 12 | train_GetGyroAtTimeStamp, 13 | QuaternionProduct, 14 | QuaternionReciprocal, 15 | FindOISAtTimeStamp, 16 | norm_quat 17 | ) 18 | import random 19 | import numpy as np 20 | import torchvision.transforms as transforms 21 | import torch 22 | from flownet2 import flow_utils 23 | from scipy import ndimage, misc 24 | from numpy import linalg as LA 25 | 26 | def get_data_loader(cf, no_flo = False): 27 | size = cf["data"]["batch_size"] 28 | num_workers = cf["data"]["num_workers"] 29 | train_data, test_data = get_dataset(cf, no_flo) 30 | trainloader = torch.utils.data.DataLoader(train_data, batch_size=size,shuffle=True, pin_memory=True, num_workers=num_workers) 31 | testloader = torch.utils.data.DataLoader(test_data, batch_size=size,shuffle=False, pin_memory=True, num_workers=num_workers) 32 | return trainloader,testloader 33 | 34 | def get_dataset(cf, no_flo = False): 35 | resize_ratio = cf["data"]["resize_ratio"] 36 | train_transform, test_transform = _data_transforms() 37 | train_path = os.path.join(cf["data"]["data_dir"], "training") 38 | test_path = os.path.join(cf["data"]["data_dir"], "test") 39 | if not os.path.exists(train_path): 40 | train_path = cf["data"]["data_dir"] 41 | if not os.path.exists(test_path): 42 | test_path = cf["data"]["data_dir"] 43 | 44 | train_data = Dataset_Gyro( 45 | train_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"], 46 | time_train = cf["data"]["time_train"]*1000000, transform = train_transform, resize_ratio = resize_ratio, no_flo = no_flo) 47 | test_data = Dataset_Gyro( 48 | test_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"], 49 | time_train = cf["data"]["time_train"]*1000000, transform = test_transform, resize_ratio = resize_ratio, no_flo = no_flo) 50 | return train_data, test_data 51 | 52 | def get_inference_data_loader(cf, data_path, no_flo = False): 53 | test_data = get_inference_dataset(cf, data_path, no_flo) 54 | testloader = torch.utils.data.DataLoader(test_data, batch_size=1,shuffle=False, pin_memory=True, num_workers=1) 55 | return testloader 56 | 57 | def get_inference_dataset(cf, data_path, no_flo = False): 58 | resize_ratio = cf["data"]["resize_ratio"] 59 | _, test_transform = _data_transforms() 60 | test_data = Dataset_Gyro( 61 | data_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"], 62 | time_train = cf["data"]["time_train"]*1000000, transform = test_transform, resize_ratio = resize_ratio, 63 | inference_only = True, no_flo = no_flo) 64 | return test_data 65 | 66 | def _data_transforms(): 67 | 68 | test_transform = transforms.Compose( 69 | [transforms.ToTensor(), 70 | ]) 71 | train_transform = transforms.Compose( 72 | [transforms.ToTensor(), 73 | ]) 74 | 75 | return train_transform, test_transform 76 | 77 | class DVS_data(): 78 | def __init__(self): 79 | self.gyro = None 80 | self.ois = None 81 | self.frame = None 82 | self.length = 0 83 | self.flo_path = None 84 | self.flo_shape = None 85 | self.flo_back_path = None 86 | 87 | class Dataset_Gyro(Dataset): 88 | def __init__(self, path, sample_freq = 33*1000000, number_real = 10, time_train = 2000*1000000, \ 89 | transform = None, inference_only = False, no_flo = False, resize_ratio = 1): 90 | r""" 91 | Arguments: 92 | sample_freq: real quaternions [t-sample_freq*number_real, t+sample_freq*number_real] ns 93 | number_real: real gyro num in half time_interval 94 | time_train: time for a batch ns 95 | """ 96 | self.sample_freq = sample_freq 97 | self.number_real = number_real 98 | self.no_flo = no_flo 99 | self.resize_ratio = resize_ratio 100 | self.static_options = get_static() 101 | self.inference_only = inference_only 102 | 103 | self.ois_ratio = np.array([self.static_options["crop_window_width"] / self.static_options["width"], \ 104 | self.static_options["crop_window_height"] / self.static_options["height"]]) * 0.01 105 | self.unit_size = 4 106 | 107 | if inference_only: 108 | self.length = 1 109 | self.data = [self.process_one_video(path)] 110 | self.number_train = self.data[0].length 111 | return 112 | 113 | self.time_train = time_train 114 | self.number_train = time_train//self.sample_freq 115 | 116 | self.data_name = sorted(os.listdir(path)) 117 | self.length = len(self.data_name) 118 | self.data = [] 119 | for i in range(self.length): 120 | self.data.append(self.process_one_video(os.path.join(path,self.data_name[i]))) 121 | 122 | def process_one_video(self, path): 123 | dvs_data = DVS_data() 124 | files = sorted(os.listdir(path)) 125 | print(path) 126 | for f in files: 127 | file_path = os.path.join(path,f) 128 | if "gimbal" in file_path.lower(): 129 | continue 130 | if "frame" in f and "txt" in f: 131 | dvs_data.frame = LoadFrameData(file_path) 132 | print("frame:", dvs_data.frame.shape, end=" ") 133 | elif "gyro" in f: 134 | dvs_data.gyro = LoadGyroData(file_path) 135 | dvs_data.gyro = preprocess_gyro(dvs_data.gyro) 136 | print("gyro:", dvs_data.gyro.shape, end=" ") 137 | elif "ois" in f and "txt" in f: 138 | dvs_data.ois = LoadOISData(file_path) 139 | print("ois:", dvs_data.ois.shape, end=" ") 140 | elif f == "flo": 141 | dvs_data.flo_path, dvs_data.flo_shape = LoadFlow(file_path) 142 | print("flo_path:", len(dvs_data.flo_path), end=" ") 143 | print("flo_shape:", dvs_data.flo_shape, end=" ") 144 | elif f == "flo_back": 145 | dvs_data.flo_back_path, _ = LoadFlow(file_path) 146 | 147 | print() 148 | if dvs_data.flo_path is not None: 149 | dvs_data.length = min(dvs_data.frame.shape[0] - 1, len(dvs_data.flo_path)) 150 | else: 151 | dvs_data.length = dvs_data.frame.shape[0] - 1 152 | return dvs_data 153 | 154 | def generate_quaternions(self, dvs_data): 155 | first_id = random.randint(0, dvs_data.length - self.number_train) + 1 # skip the first frame 156 | 157 | sample_data = np.zeros((self.number_train, 2 * self.number_real + 1, self.unit_size), dtype=np.float32) 158 | sample_ois = np.zeros((self.number_train, 2), dtype=np.float32) 159 | 160 | sample_time = np.zeros((self.number_train+1), dtype=np.float32) 161 | sample_time[0] = get_timestamp(dvs_data.frame, first_id - 1) 162 | 163 | real_postion = np.zeros((self.number_train, 4), dtype=np.float32) 164 | 165 | time_start = sample_time[0] 166 | 167 | for i in range(self.number_train): 168 | sample_time[i+1] = get_timestamp(dvs_data.frame, first_id + i) 169 | real_postion[i] = GetGyroAtTimeStamp(dvs_data.gyro, sample_time[i+1] - self.sample_freq) 170 | sample_ois[i] = self.get_ois_at_timestamp(dvs_data.ois, sample_time[i+1]) 171 | for j in range(-self.number_real, self.number_real+1): 172 | index = j + self.number_real 173 | time_stamp = sample_time[i+1] + self.sample_freq * j 174 | sample_data[i, index] = self.get_data_at_timestamp(dvs_data.gyro, dvs_data.ois, time_stamp, real_postion[i]) 175 | 176 | sample_data = np.reshape(sample_data, (self.number_train, (2*self.number_real+1) * self.unit_size)) 177 | return sample_data, sample_time, first_id, real_postion, sample_ois 178 | 179 | def load_flo(self, idx, first_id): 180 | shape = self.data[idx].flo_shape 181 | h, w = shape[0], shape[1] 182 | flo = np.zeros((self.number_train, h, w, 2)) 183 | flo_back = np.zeros((self.number_train, h, w, 2)) 184 | 185 | for i in range(self.number_train): 186 | frame_id = i + first_id 187 | f = flow_utils.readFlow(self.data[idx].flo_path[frame_id-1]).astype(np.float32) 188 | flo[i] = f 189 | 190 | f_b = flow_utils.readFlow(self.data[idx].flo_back_path[frame_id-1]).astype(np.float32) 191 | flo_back[i] = f_b 192 | 193 | return flo, flo_back 194 | 195 | def load_real_projections(self, idx, first_id): 196 | real_projections = np.zeros((self.number_train + 1, self.static_options["num_grid_rows"], 3, 3)) 197 | for i in range(self.number_train + 1): 198 | frame_id = i + first_id 199 | metadata = GetMetadata(self.data[idx].frame, frame_id - 1) 200 | real_projections[i] = np.array(GetProjections(self.static_options, metadata, self.data[idx].gyro, np.zeros(self.data[idx].ois.shape), no_shutter = True)) 201 | return real_projections 202 | 203 | def __getitem__(self, idx): 204 | inputs, times, first_id, real_postion, ois = self.generate_quaternions(self.data[idx]) 205 | real_projections = self.load_real_projections(idx, first_id) 206 | if self.no_flo: 207 | flo, flo_back = 0, 0 208 | else: 209 | flo, flo_back = self.load_flo(idx, first_id) 210 | return inputs, times, flo, flo_back, real_projections, real_postion, ois, idx 211 | 212 | def __len__(self): 213 | return self.length 214 | 215 | def get_virtual_data(self, virtual_queue, real_queue_idx, pre_times, cur_times, time_start, batch_size, number_virtual, quat_t_1): 216 | # virtual_queue: [batch_size, num, 5 (timestamp, quats)] 217 | # eular angle, 218 | # deta R angular velocity [Q't-1, Q't-2] 219 | # output virtual angular velocity, x, x*dtime => detaQt 220 | virtual_data = np.zeros((batch_size, number_virtual, 4), dtype=np.float32) 221 | vt_1 = np.zeros((batch_size, 4), dtype=np.float32) 222 | quat_t_1 = quat_t_1.numpy() 223 | for i in range(batch_size): 224 | sample_time = cur_times[i] 225 | for j in range(number_virtual): 226 | time_stamp = sample_time - self.sample_freq * (number_virtual - j) 227 | virtual_data[i, j] = get_virtual_at_timestamp(virtual_queue[i], self.data[real_queue_idx[i]].gyro, time_stamp, time_start[i], quat_t_1[i]) 228 | vt_1[i] = get_virtual_at_timestamp(virtual_queue[i], self.data[real_queue_idx[i]].gyro, pre_times[i], time_start[i], None) 229 | virtual_data = np.reshape(virtual_data, (batch_size, number_virtual * 4)) 230 | return torch.tensor(virtual_data, dtype=torch.float), torch.tensor(vt_1, dtype=torch.float) 231 | 232 | def update_virtual_queue(self, batch_size, virtual_queue, out, times): 233 | virtual_data = np.zeros((batch_size, 5)) 234 | virtual_data[:,0] = times 235 | virtual_data[:, 1:] = out 236 | virtual_data = np.expand_dims(virtual_data, axis = 1) 237 | 238 | if None in virtual_queue: 239 | virtual_queue = virtual_data 240 | else: 241 | virtual_queue = np.concatenate((virtual_queue, virtual_data), axis = 1) 242 | return virtual_queue 243 | 244 | def random_init_virtual_queue(self, batch_size, real_postion, times): 245 | virtual_queue = np.zeros((batch_size, 3, 5)) 246 | virtual_queue[:, 2, 0] = times - 0.1 * self.sample_freq 247 | virtual_queue[:, 1, 0] = times - 1.1 * self.sample_freq 248 | virtual_queue[:, 0, 0] = times - 2.1 * self.sample_freq 249 | for i in range(batch_size): 250 | quat = np.random.uniform(low=-0.06, high= 0.06, size=4) # transfer to angle # 0.05 251 | quat[3] = 1 252 | quat = quat / LA.norm(quat) 253 | quat = norm_quat(QuaternionProduct(real_postion[i], quat)) 254 | virtual_queue[i, 2, 1:] = quat 255 | virtual_queue[i, 1, 1:] = quat 256 | virtual_queue[i, 0, 1:] = quat 257 | return virtual_queue 258 | 259 | def get_data_at_timestamp(self, gyro_data, ois_data, time_stamp, quat_t_1): 260 | quat_t = GetGyroAtTimeStamp(gyro_data, time_stamp) 261 | quat_dif = QuaternionProduct(quat_t, QuaternionReciprocal(quat_t_1)) 262 | return quat_dif 263 | 264 | def get_ois_at_timestamp(self, ois_data, time_stamp): 265 | ois_t = FindOISAtTimeStamp(ois_data, time_stamp) 266 | ois_t = np.array(ois_t) / self.ois_ratio 267 | return ois_t 268 | 269 | def get_timestamp(frame_data, idx): 270 | sample_time = frame_data[idx, 0] 271 | metadata = GetMetadata(frame_data, idx) 272 | timestmap_ns = metadata["timestamp_ns"] + metadata["rs_time_ns"] * 0.5 273 | return timestmap_ns 274 | 275 | def preprocess_gyro(gyro, extend = 200): 276 | fake_gyro = np.zeros((extend, 5)) 277 | time_start = gyro[0,0] 278 | for i in range(extend): 279 | fake_gyro[-i-1, 0] = time_start - (gyro[i+1, 0] - time_start) 280 | fake_gyro[-i-1, 4] = gyro[i+1, 4] 281 | fake_gyro[-i-1, 1:4] = -gyro[i+1, 1:4] 282 | 283 | new_gyro = np.concatenate((fake_gyro, gyro), axis = 0) 284 | return new_gyro 285 | 286 | def LoadFlow(path): 287 | file_names = sorted(os.listdir(path)) 288 | file_path =[] 289 | for n in file_names: 290 | file_path.append(os.path.join(path, n)) 291 | return file_path, flow_utils.readFlow(file_path[0]).shape 292 | 293 | def get_virtual_at_timestamp(virtual_queue, real_queue, time_stamp, time_start, quat_t_1 = None, sample_freq = None): 294 | if virtual_queue is None: 295 | quat_t = GetGyroAtTimeStamp(real_queue, time_stamp) 296 | else: 297 | quat_t = train_GetGyroAtTimeStamp(virtual_queue, time_stamp) 298 | if quat_t is None: 299 | quat_t = GetGyroAtTimeStamp(real_queue, time_stamp) 300 | 301 | if quat_t_1 is None: 302 | return quat_t 303 | else: 304 | quat_dif = QuaternionProduct(quat_t, QuaternionReciprocal(quat_t_1)) 305 | return quat_dif 306 | -------------------------------------------------------------------------------- /dvs/flownet2/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 NVIDIA CORPORATION 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /dvs/flownet2/README.md: -------------------------------------------------------------------------------- 1 | # flownet2-pytorch 2 | 3 | Pytorch implementation of [FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks](https://arxiv.org/abs/1612.01925). 4 | 5 | Multiple GPU training is supported, and the code provides examples for training or inference on [MPI-Sintel](http://sintel.is.tue.mpg.de/) clean and final datasets. The same commands can be used for training or inference with other datasets. See below for more detail. 6 | 7 | Inference using fp16 (half-precision) is also supported. 8 | 9 | For more help, type
10 | 11 | python main.py --help 12 | 13 | ## Network architectures 14 | Below are the different flownet neural network architectures that are provided.
15 | A batchnorm version for each network is also available. 16 | 17 | - **FlowNet2S** 18 | - **FlowNet2C** 19 | - **FlowNet2CS** 20 | - **FlowNet2CSS** 21 | - **FlowNet2SD** 22 | - **FlowNet2** 23 | 24 | ## Custom layers 25 | 26 | `FlowNet2` or `FlowNet2C*` achitectures rely on custom layers `Resample2d` or `Correlation`.
27 | A pytorch implementation of these layers with cuda kernels are available at [./networks](./networks).
28 | Note : Currently, half precision kernels are not available for these layers. 29 | 30 | ## Data Loaders 31 | 32 | Dataloaders for FlyingChairs, FlyingThings, ChairsSDHom and ImagesFromFolder are available in [datasets.py](./datasets.py).
33 | 34 | ## Loss Functions 35 | 36 | L1 and L2 losses with multi-scale support are available in [losses.py](./losses.py).
37 | 38 | ## Installation 39 | 40 | # get flownet2-pytorch source 41 | git clone https://github.com/NVIDIA/flownet2-pytorch.git 42 | cd flownet2-pytorch 43 | 44 | # install custom layers 45 | bash install.sh 46 | 47 | ### Python requirements 48 | Currently, the code supports python 3 49 | * numpy 50 | * PyTorch ( == 0.4.1, for <= 0.4.0 see branch [python36-PyTorch0.4](https://github.com/NVIDIA/flownet2-pytorch/tree/python36-PyTorch0.4)) 51 | * scipy 52 | * scikit-image 53 | * tensorboardX 54 | * colorama, tqdm, setproctitle 55 | 56 | ## Converted Caffe Pre-trained Models 57 | We've included caffe pre-trained models. Should you use these pre-trained weights, please adhere to the [license agreements](https://drive.google.com/file/d/1TVv0BnNFh3rpHZvD-easMb9jYrPE2Eqd/view?usp=sharing). 58 | 59 | * [FlowNet2](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view?usp=sharing)[620MB] 60 | * [FlowNet2-C](https://drive.google.com/file/d/1BFT6b7KgKJC8rA59RmOVAXRM_S7aSfKE/view?usp=sharing)[149MB] 61 | * [FlowNet2-CS](https://drive.google.com/file/d/1iBJ1_o7PloaINpa8m7u_7TsLCX0Dt_jS/view?usp=sharing)[297MB] 62 | * [FlowNet2-CSS](https://drive.google.com/file/d/157zuzVf4YMN6ABAQgZc8rRmR5cgWzSu8/view?usp=sharing)[445MB] 63 | * [FlowNet2-CSS-ft-sd](https://drive.google.com/file/d/1R5xafCIzJCXc8ia4TGfC65irmTNiMg6u/view?usp=sharing)[445MB] 64 | * [FlowNet2-S](https://drive.google.com/file/d/1V61dZjFomwlynwlYklJHC-TLfdFom3Lg/view?usp=sharing)[148MB] 65 | * [FlowNet2-SD](https://drive.google.com/file/d/1QW03eyYG_vD-dT-Mx4wopYvtPu_msTKn/view?usp=sharing)[173MB] 66 | 67 | ## Inference 68 | # Example on MPISintel Clean 69 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset MpiSintelClean \ 70 | --inference_dataset_root /path/to/mpi-sintel/clean/dataset \ 71 | --resume /path/to/checkpoints 72 | 73 | ## Training and validation 74 | 75 | # Example on MPISintel Final and Clean, with L1Loss on FlowNet2 model 76 | python main.py --batch_size 8 --model FlowNet2 --loss=L1Loss --optimizer=Adam --optimizer_lr=1e-4 \ 77 | --training_dataset MpiSintelFinal --training_dataset_root /path/to/mpi-sintel/final/dataset \ 78 | --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset 79 | 80 | # Example on MPISintel Final and Clean, with MultiScale loss on FlowNet2C model 81 | python main.py --batch_size 8 --model FlowNet2C --optimizer=Adam --optimizer_lr=1e-4 --loss=MultiScale --loss_norm=L1 \ 82 | --loss_numScales=5 --loss_startScale=4 --optimizer_lr=1e-4 --crop_size 384 512 \ 83 | --training_dataset FlyingChairs --training_dataset_root /path/to/flying-chairs/dataset \ 84 | --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset 85 | 86 | ## Results on MPI-Sintel 87 | [![Predicted flows on MPI-Sintel](./image.png)](https://www.youtube.com/watch?v=HtBmabY8aeU "Predicted flows on MPI-Sintel") 88 | 89 | ## Reference 90 | If you find this implementation useful in your work, please acknowledge it appropriately and cite the paper: 91 | ```` 92 | @InProceedings{IMKDB17, 93 | author = "E. Ilg and N. Mayer and T. Saikia and M. Keuper and A. Dosovitskiy and T. Brox", 94 | title = "FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks", 95 | booktitle = "IEEE Conference on Computer Vision and Pattern Recognition (CVPR)", 96 | month = "Jul", 97 | year = "2017", 98 | url = "http://lmb.informatik.uni-freiburg.de//Publications/2017/IMKDB17" 99 | } 100 | ```` 101 | ``` 102 | @misc{flownet2-pytorch, 103 | author = {Fitsum Reda and Robert Pottorff and Jon Barker and Bryan Catanzaro}, 104 | title = {flownet2-pytorch: Pytorch implementation of FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks}, 105 | year = {2017}, 106 | publisher = {GitHub}, 107 | journal = {GitHub repository}, 108 | howpublished = {\url{https://github.com/NVIDIA/flownet2-pytorch}} 109 | } 110 | ``` 111 | ## Related Optical Flow Work from Nvidia 112 | Code (in Caffe and Pytorch): [PWC-Net](https://github.com/NVlabs/PWC-Net)
113 | Paper : [PWC-Net: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume](https://arxiv.org/abs/1709.02371). 114 | 115 | ## Acknowledgments 116 | Parts of this code were derived, as noted in the code, from [ClementPinard/FlowNetPytorch](https://github.com/ClementPinard/FlowNetPytorch). 117 | -------------------------------------------------------------------------------- /dvs/flownet2/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import flow_utils, tools -------------------------------------------------------------------------------- /dvs/flownet2/convert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | 3 | import caffe 4 | from caffe.proto import caffe_pb2 5 | import sys, os 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | import argparse, tempfile 11 | import numpy as np 12 | 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('caffe_model', help='input model in hdf5 or caffemodel format') 15 | parser.add_argument('prototxt_template',help='prototxt template') 16 | parser.add_argument('flownet2_pytorch', help='path to flownet2-pytorch') 17 | 18 | args = parser.parse_args() 19 | 20 | args.rgb_max = 255 21 | args.fp16 = False 22 | args.grads = {} 23 | 24 | # load models 25 | sys.path.append(args.flownet2_pytorch) 26 | 27 | import models 28 | from utils.param_utils import * 29 | 30 | width = 256 31 | height = 256 32 | keys = {'TARGET_WIDTH': width, 33 | 'TARGET_HEIGHT': height, 34 | 'ADAPTED_WIDTH':width, 35 | 'ADAPTED_HEIGHT':height, 36 | 'SCALE_WIDTH':1., 37 | 'SCALE_HEIGHT':1.,} 38 | 39 | template = '\n'.join(np.loadtxt(args.prototxt_template, dtype=str, delimiter='\n')) 40 | for k in keys: 41 | template = template.replace('$%s$'%(k),str(keys[k])) 42 | 43 | prototxt = tempfile.NamedTemporaryFile(mode='w', delete=True) 44 | prototxt.write(template) 45 | prototxt.flush() 46 | 47 | net = caffe.Net(prototxt.name, args.caffe_model, caffe.TEST) 48 | 49 | weights = {} 50 | biases = {} 51 | 52 | for k, v in list(net.params.items()): 53 | weights[k] = np.array(v[0].data).reshape(v[0].data.shape) 54 | biases[k] = np.array(v[1].data).reshape(v[1].data.shape) 55 | print((k, weights[k].shape, biases[k].shape)) 56 | 57 | if 'FlowNet2/' in args.caffe_model: 58 | model = models.FlowNet2(args) 59 | 60 | parse_flownetc(model.flownetc.modules(), weights, biases) 61 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_') 62 | parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_') 63 | parse_flownetsd(model.flownets_d.modules(), weights, biases, param_prefix='netsd_') 64 | parse_flownetfusion(model.flownetfusion.modules(), weights, biases, param_prefix='fuse_') 65 | 66 | state = {'epoch': 0, 67 | 'state_dict': model.state_dict(), 68 | 'best_EPE': 1e10} 69 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2_checkpoint.pth.tar')) 70 | 71 | elif 'FlowNet2-C/' in args.caffe_model: 72 | model = models.FlowNet2C(args) 73 | 74 | parse_flownetc(model.modules(), weights, biases) 75 | state = {'epoch': 0, 76 | 'state_dict': model.state_dict(), 77 | 'best_EPE': 1e10} 78 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-C_checkpoint.pth.tar')) 79 | 80 | elif 'FlowNet2-CS/' in args.caffe_model: 81 | model = models.FlowNet2CS(args) 82 | 83 | parse_flownetc(model.flownetc.modules(), weights, biases) 84 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_') 85 | 86 | state = {'epoch': 0, 87 | 'state_dict': model.state_dict(), 88 | 'best_EPE': 1e10} 89 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CS_checkpoint.pth.tar')) 90 | 91 | elif 'FlowNet2-CSS/' in args.caffe_model: 92 | model = models.FlowNet2CSS(args) 93 | 94 | parse_flownetc(model.flownetc.modules(), weights, biases) 95 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_') 96 | parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_') 97 | 98 | state = {'epoch': 0, 99 | 'state_dict': model.state_dict(), 100 | 'best_EPE': 1e10} 101 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS_checkpoint.pth.tar')) 102 | 103 | elif 'FlowNet2-CSS-ft-sd/' in args.caffe_model: 104 | model = models.FlowNet2CSS(args) 105 | 106 | parse_flownetc(model.flownetc.modules(), weights, biases) 107 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_') 108 | parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_') 109 | 110 | state = {'epoch': 0, 111 | 'state_dict': model.state_dict(), 112 | 'best_EPE': 1e10} 113 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS-ft-sd_checkpoint.pth.tar')) 114 | 115 | elif 'FlowNet2-S/' in args.caffe_model: 116 | model = models.FlowNet2S(args) 117 | 118 | parse_flownetsonly(model.modules(), weights, biases, param_prefix='') 119 | state = {'epoch': 0, 120 | 'state_dict': model.state_dict(), 121 | 'best_EPE': 1e10} 122 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-S_checkpoint.pth.tar')) 123 | 124 | elif 'FlowNet2-SD/' in args.caffe_model: 125 | model = models.FlowNet2SD(args) 126 | 127 | parse_flownetsd(model.modules(), weights, biases, param_prefix='') 128 | 129 | state = {'epoch': 0, 130 | 'state_dict': model.state_dict(), 131 | 'best_EPE': 1e10} 132 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-SD_checkpoint.pth.tar')) 133 | 134 | else: 135 | print(('model type cound not be determined from input caffe model %s'%(args.caffe_model))) 136 | quit() 137 | print(("done converting ", args.caffe_model)) -------------------------------------------------------------------------------- /dvs/flownet2/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd ./networks/correlation_package 3 | rm -rf *_cuda.egg-info build dist __pycache__ 4 | python3 setup.py install --user 5 | 6 | cd ../resample2d_package 7 | rm -rf *_cuda.egg-info build dist __pycache__ 8 | python3 setup.py install --user 9 | 10 | cd ../channelnorm_package 11 | rm -rf *_cuda.egg-info build dist __pycache__ 12 | python3 setup.py install --user 13 | 14 | cd .. 15 | -------------------------------------------------------------------------------- /dvs/flownet2/losses.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Portions of this code copyright 2017, Clement Pinard 3 | ''' 4 | 5 | # freda (todo) : adversarial loss 6 | 7 | import torch 8 | import torch.nn as nn 9 | import math 10 | 11 | def EPE(input_flow, target_flow): 12 | return torch.norm(target_flow-input_flow,p=2,dim=1).mean() 13 | 14 | class L1(nn.Module): 15 | def __init__(self): 16 | super(L1, self).__init__() 17 | def forward(self, output, target): 18 | lossvalue = torch.abs(output - target).mean() 19 | return lossvalue 20 | 21 | class L2(nn.Module): 22 | def __init__(self): 23 | super(L2, self).__init__() 24 | def forward(self, output, target): 25 | lossvalue = torch.norm(output-target,p=2,dim=1).mean() 26 | return lossvalue 27 | 28 | class L1Loss(nn.Module): 29 | def __init__(self, args): 30 | super(L1Loss, self).__init__() 31 | self.args = args 32 | self.loss = L1() 33 | self.loss_labels = ['L1', 'EPE'] 34 | 35 | def forward(self, output, target): 36 | lossvalue = self.loss(output, target) 37 | epevalue = EPE(output, target) 38 | return [lossvalue, epevalue] 39 | 40 | class L2Loss(nn.Module): 41 | def __init__(self, args): 42 | super(L2Loss, self).__init__() 43 | self.args = args 44 | self.loss = L2() 45 | self.loss_labels = ['L2', 'EPE'] 46 | 47 | def forward(self, output, target): 48 | lossvalue = self.loss(output, target) 49 | epevalue = EPE(output, target) 50 | return [lossvalue, epevalue] 51 | 52 | class MultiScale(nn.Module): 53 | def __init__(self, args, startScale = 4, numScales = 5, l_weight= 0.32, norm= 'L1'): 54 | super(MultiScale,self).__init__() 55 | 56 | self.startScale = startScale 57 | self.numScales = numScales 58 | self.loss_weights = torch.FloatTensor([(l_weight / 2 ** scale) for scale in range(self.numScales)]) 59 | self.args = args 60 | self.l_type = norm 61 | self.div_flow = 0.05 62 | assert(len(self.loss_weights) == self.numScales) 63 | 64 | if self.l_type == 'L1': 65 | self.loss = L1() 66 | else: 67 | self.loss = L2() 68 | 69 | self.multiScales = [nn.AvgPool2d(self.startScale * (2**scale), self.startScale * (2**scale)) for scale in range(self.numScales)] 70 | self.loss_labels = ['MultiScale-'+self.l_type, 'EPE'], 71 | 72 | def forward(self, output, target): 73 | lossvalue = 0 74 | epevalue = 0 75 | 76 | if type(output) is tuple: 77 | target = self.div_flow * target 78 | for i, output_ in enumerate(output): 79 | target_ = self.multiScales[i](target) 80 | epevalue += self.loss_weights[i]*EPE(output_, target_) 81 | lossvalue += self.loss_weights[i]*self.loss(output_, target_) 82 | return [lossvalue, epevalue] 83 | else: 84 | epevalue += EPE(output, target) 85 | lossvalue += self.loss(output, target) 86 | return [lossvalue, epevalue] 87 | 88 | -------------------------------------------------------------------------------- /dvs/flownet2/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 4 | import torch 5 | import torch.nn as nn 6 | from torch.utils.data import DataLoader 7 | from torch.autograd import Variable 8 | from tensorboardX import SummaryWriter 9 | 10 | import argparse, os, sys, subprocess 11 | import colorama 12 | import numpy as np 13 | from tqdm import tqdm 14 | from glob import glob 15 | from os.path import * 16 | 17 | import models, datasets 18 | from utils import flow_utils, tools 19 | import time 20 | 21 | # Reusable function for inference 22 | def inference(args, epoch, data_path, data_loader, model, offset=0): 23 | 24 | model.eval() 25 | 26 | if args.save_flow or args.render_validation: 27 | flow_folder = "{}/flo".format(data_path) 28 | flow_back_folder = "{}/flo_back".format(data_path) 29 | if not os.path.exists(flow_folder): 30 | os.makedirs(flow_folder) 31 | if not os.path.exists(flow_back_folder): 32 | os.makedirs(flow_back_folder) 33 | 34 | # visualization folder 35 | if args.inference_visualize: 36 | flow_vis_folder = "{}/flo_vis".format(data_path) 37 | if not os.path.exists(flow_vis_folder): 38 | os.makedirs(flow_vis_folder) 39 | flow_back_vis_folder = "{}/flo_back_vis".format(data_path) 40 | if not os.path.exists(flow_back_vis_folder): 41 | os.makedirs(flow_back_vis_folder) 42 | 43 | args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches 44 | 45 | progress = tqdm(data_loader, ncols=100, total=np.minimum(len(data_loader), args.inference_n_batches), desc='Inferencing ', 46 | leave=True, position=offset) 47 | 48 | for batch_idx, (data) in enumerate(progress): 49 | data = data[0] 50 | data_back = torch.cat((data[:,:,1:,:,:], data[:,:,:1,:,:]), dim = 2) 51 | if args.cuda: 52 | data_forward = data.cuda(non_blocking=True) 53 | data_back = data_back.cuda(non_blocking=True) 54 | data_forward = Variable(data_forward) 55 | data_back = Variable(data_back) 56 | 57 | flo_path = join(flow_folder, '%06d.flo'%(batch_idx)) 58 | flo_back_path = join(flow_back_folder, '%06d.flo'%(batch_idx)) 59 | frame_size = data_loader.dataset.frame_size 60 | if not os.path.exists(flo_path): 61 | with torch.no_grad(): 62 | output = model(data_forward)[:,:,:frame_size[0], :frame_size[1]] 63 | if args.save_flow or args.render_validation: 64 | _pflow = output[0].data.cpu().numpy().transpose(1, 2, 0) 65 | flow_utils.writeFlow( flo_path, _pflow) 66 | if args.inference_visualize: 67 | flow_utils.visulize_flow_file( 68 | join(flow_folder, '%06d.flo' % (batch_idx)),flow_vis_folder) 69 | 70 | if not os.path.exists(flo_back_path): 71 | with torch.no_grad(): 72 | output = model(data_back)[:,:,:frame_size[0], :frame_size[1]] 73 | if args.save_flow or args.render_validation: 74 | _pflow = output[0].data.cpu().numpy().transpose(1, 2, 0) 75 | flow_utils.writeFlow( flo_back_path, _pflow) 76 | if args.inference_visualize: 77 | flow_utils.visulize_flow_file( 78 | join(flow_back_folder, '%06d.flo' % (batch_idx)), flow_back_vis_folder) 79 | 80 | progress.update(1) 81 | 82 | if batch_idx == (args.inference_n_batches - 1): 83 | break 84 | progress.close() 85 | return 86 | 87 | if __name__ == '__main__': 88 | parser = argparse.ArgumentParser() 89 | parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') 90 | parser.add_argument('--fp16_scale', type=float, default=1024., help='Loss scaling, positive power of 2 values can improve fp16 convergence.') 91 | 92 | parser.add_argument('--start_epoch', type=int, default=1) 93 | parser.add_argument('--batch_size', '-b', type=int, default=8, help="Batch size") 94 | parser.add_argument('--crop_size', type=int, nargs='+', default = [256, 256], help="Spatial dimension to crop training samples for training") 95 | parser.add_argument("--rgb_max", type=float, default = 255.) 96 | 97 | parser.add_argument('--number_workers', '-nw', '--num_workers', type=int, default=8) 98 | parser.add_argument('--number_gpus', '-ng', type=int, default=-1, help='number of GPUs to use') 99 | parser.add_argument('--no_cuda', action='store_true') 100 | 101 | parser.add_argument('--save', '-s', default='./Google', type=str, help='directory for saving') 102 | 103 | parser.add_argument('--inference', action='store_true') 104 | parser.add_argument('--inference_visualize', action='store_true', 105 | help="visualize the optical flow during inference") 106 | parser.add_argument('--inference_size', type=int, nargs='+', default = [-1,-1], help='spatial size divisible by 64. default (-1,-1) - largest possible valid size would be used') 107 | parser.add_argument('--inference_batch_size', type=int, default=1) 108 | parser.add_argument('--inference_n_batches', type=int, default=-1) 109 | parser.add_argument('--save_flow', action='store_true', help='save predicted flows to file') 110 | 111 | parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') 112 | parser.add_argument('--log_frequency', '--summ_iter', type=int, default=1, help="Log every n batches") 113 | 114 | tools.add_arguments_for_module(parser, models, argument_for_class='model', default='FlowNet2') 115 | 116 | tools.add_arguments_for_module(parser, datasets, argument_for_class='inference_dataset', default='Google', 117 | skip_params=['is_cropped'], 118 | parameter_defaults={'root': './Google/train', 119 | 'replicates': 1}) 120 | 121 | main_dir = os.path.dirname(os.path.realpath(__file__)) 122 | os.chdir(main_dir) 123 | 124 | # Parse the official arguments 125 | with tools.TimerBlock("Parsing Arguments") as block: 126 | args = parser.parse_args() 127 | if args.number_gpus < 0 : args.number_gpus = torch.cuda.device_count() 128 | 129 | # Get argument defaults (hastag #thisisahack) 130 | parser.add_argument('--IGNORE', action='store_true') 131 | defaults = vars(parser.parse_args(['--IGNORE'])) 132 | 133 | # Print all arguments, color the non-defaults 134 | for argument, value in sorted(vars(args).items()): 135 | reset = colorama.Style.RESET_ALL 136 | color = reset if value == defaults[argument] else colorama.Fore.MAGENTA 137 | block.log('{}{}: {}{}'.format(color, argument, value, reset)) 138 | 139 | args.model_class = tools.module_to_dict(models)[args.model] 140 | 141 | args.inference_dataset_class = tools.module_to_dict(datasets)[args.inference_dataset] 142 | 143 | args.cuda = not args.no_cuda and torch.cuda.is_available() 144 | # args.current_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]).rstrip() 145 | args.log_file = join(args.save, 'args.txt') 146 | 147 | # dict to collect activation gradients (for training debug purpose) 148 | args.grads = {} 149 | 150 | args.total_epochs = 1 151 | args.inference_dir = "{}/inference".format(args.save) 152 | 153 | print('Source Code') 154 | # print((' Current Git Hash: {}\n'.format(args.current_hash))) 155 | 156 | # Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments 157 | with tools.TimerBlock("Initializing Datasets") as block: 158 | args.effective_batch_size = args.batch_size * args.number_gpus 159 | args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus 160 | args.effective_number_workers = args.number_workers * args.number_gpus 161 | gpuargs = {'num_workers': args.effective_number_workers, 162 | 'pin_memory': True, 163 | 'drop_last' : True} if args.cuda else {} 164 | inf_gpuargs = gpuargs.copy() 165 | inf_gpuargs['num_workers'] = args.number_workers 166 | 167 | block.log('Inference Dataset: {}'.format(args.inference_dataset)) 168 | 169 | dataset_root = args.inference_dataset_root 170 | data_name = sorted(os.listdir(dataset_root)) 171 | 172 | block.log(data_name) 173 | inference_loaders = {} 174 | for i in range(len(data_name)): 175 | dataset_path = os.path.join(dataset_root, data_name[i]) 176 | args.inference_dataset_root = dataset_path 177 | inference_dataset = args.inference_dataset_class(args, False, **tools.kwargs_from_args(args, 'inference_dataset')) 178 | inference_loaders[dataset_path] = DataLoader(inference_dataset, batch_size=args.effective_inference_batch_size, shuffle=False, **inf_gpuargs) 179 | block.log('Inference Input: {}'.format(' '.join([str([d for d in x.size()]) for x in inference_dataset[0][0]]))) 180 | 181 | # Dynamically load model and loss class with parameters passed in via "--model_[param]=[value]" or "--loss_[param]=[value]" arguments 182 | with tools.TimerBlock("Building {} model".format(args.model)) as block: 183 | class Model(nn.Module): 184 | def __init__(self, args): 185 | super(Model, self).__init__() 186 | kwargs = tools.kwargs_from_args(args, 'model') 187 | self.model = args.model_class(args, **kwargs) 188 | 189 | def forward(self, data): 190 | output = self.model(data) 191 | return output 192 | 193 | model = Model(args) 194 | 195 | block.log('Effective Batch Size: {}'.format(args.effective_batch_size)) 196 | block.log('Number of parameters: {}'.format(sum([p.data.nelement() if p.requires_grad else 0 for p in model.parameters()]))) 197 | 198 | if args.cuda and args.number_gpus > 0: 199 | block.log('Initializing CUDA') 200 | model = model.cuda() 201 | block.log('Parallelizing') 202 | model = nn.parallel.DataParallel(model, device_ids=list(range(args.number_gpus))) 203 | 204 | # Load weights if needed, otherwise randomly initialize 205 | if args.resume and os.path.isfile(args.resume): 206 | block.log("Loading checkpoint '{}'".format(args.resume)) 207 | checkpoint = torch.load(args.resume) 208 | model.module.model.load_state_dict(checkpoint['state_dict']) 209 | block.log("Loaded checkpoint '{}' (at epoch {})".format(args.resume, checkpoint['epoch'])) 210 | 211 | elif args.resume and args.inference: 212 | block.log("No checkpoint found at '{}'".format(args.resume)) 213 | quit() 214 | 215 | else: 216 | block.log("Random initialization") 217 | 218 | block.log("Initializing save directory: {}".format(args.save)) 219 | if not os.path.exists(args.save): 220 | os.makedirs(args.save) 221 | 222 | # Log all arguments to file 223 | for argument, value in sorted(vars(args).items()): 224 | block.log2file(args.log_file, '{}: {}'.format(argument, value)) 225 | 226 | for data_path in inference_loaders: 227 | # Primary epoch loop 228 | progress = tqdm(list(range(args.start_epoch, args.total_epochs + 1)), miniters=1, ncols=100, desc='Overall Progress', leave=True, position=0) 229 | offset = 1 230 | 231 | for epoch in progress: 232 | stats = inference(args=args, epoch=epoch - 1, data_path = data_path, data_loader=inference_loaders[data_path], model=model, offset=offset) 233 | offset += 1 234 | print("\n") -------------------------------------------------------------------------------- /dvs/flownet2/networks/FlowNetC.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import init 4 | 5 | import math 6 | import numpy as np 7 | 8 | from .correlation_package.correlation import Correlation 9 | 10 | from .submodules import * 11 | 'Parameter count , 39,175,298 ' 12 | 13 | class FlowNetC(nn.Module): 14 | def __init__(self,args, batchNorm=True, div_flow = 20): 15 | super(FlowNetC,self).__init__() 16 | 17 | self.batchNorm = batchNorm 18 | self.div_flow = div_flow 19 | 20 | self.conv1 = conv(self.batchNorm, 3, 64, kernel_size=7, stride=2) 21 | self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2) 22 | self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2) 23 | self.conv_redir = conv(self.batchNorm, 256, 32, kernel_size=1, stride=1) 24 | 25 | if args.fp16: 26 | self.corr = nn.Sequential( 27 | tofp32(), 28 | Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1), 29 | tofp16()) 30 | else: 31 | self.corr = Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1) 32 | 33 | self.corr_activation = nn.LeakyReLU(0.1,inplace=True) 34 | self.conv3_1 = conv(self.batchNorm, 473, 256) 35 | self.conv4 = conv(self.batchNorm, 256, 512, stride=2) 36 | self.conv4_1 = conv(self.batchNorm, 512, 512) 37 | self.conv5 = conv(self.batchNorm, 512, 512, stride=2) 38 | self.conv5_1 = conv(self.batchNorm, 512, 512) 39 | self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) 40 | self.conv6_1 = conv(self.batchNorm,1024, 1024) 41 | 42 | self.deconv5 = deconv(1024,512) 43 | self.deconv4 = deconv(1026,256) 44 | self.deconv3 = deconv(770,128) 45 | self.deconv2 = deconv(386,64) 46 | 47 | self.predict_flow6 = predict_flow(1024) 48 | self.predict_flow5 = predict_flow(1026) 49 | self.predict_flow4 = predict_flow(770) 50 | self.predict_flow3 = predict_flow(386) 51 | self.predict_flow2 = predict_flow(194) 52 | 53 | self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) 54 | self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) 55 | self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) 56 | self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) 57 | 58 | for m in self.modules(): 59 | if isinstance(m, nn.Conv2d): 60 | if m.bias is not None: 61 | init.uniform_(m.bias) 62 | init.xavier_uniform_(m.weight) 63 | 64 | if isinstance(m, nn.ConvTranspose2d): 65 | if m.bias is not None: 66 | init.uniform_(m.bias) 67 | init.xavier_uniform_(m.weight) 68 | # init_deconv_bilinear(m.weight) 69 | self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') 70 | 71 | def forward(self, x): 72 | x1 = x[:,0:3,:,:] 73 | x2 = x[:,3::,:,:] 74 | 75 | out_conv1a = self.conv1(x1) 76 | out_conv2a = self.conv2(out_conv1a) 77 | out_conv3a = self.conv3(out_conv2a) 78 | 79 | # FlownetC bottom input stream 80 | out_conv1b = self.conv1(x2) 81 | 82 | out_conv2b = self.conv2(out_conv1b) 83 | out_conv3b = self.conv3(out_conv2b) 84 | 85 | # Merge streams 86 | out_corr = self.corr(out_conv3a, out_conv3b) # False 87 | out_corr = self.corr_activation(out_corr) 88 | 89 | # Redirect top input stream and concatenate 90 | out_conv_redir = self.conv_redir(out_conv3a) 91 | 92 | in_conv3_1 = torch.cat((out_conv_redir, out_corr), 1) 93 | 94 | # Merged conv layers 95 | out_conv3_1 = self.conv3_1(in_conv3_1) 96 | 97 | out_conv4 = self.conv4_1(self.conv4(out_conv3_1)) 98 | 99 | out_conv5 = self.conv5_1(self.conv5(out_conv4)) 100 | out_conv6 = self.conv6_1(self.conv6(out_conv5)) 101 | 102 | flow6 = self.predict_flow6(out_conv6) 103 | flow6_up = self.upsampled_flow6_to_5(flow6) 104 | out_deconv5 = self.deconv5(out_conv6) 105 | 106 | concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1) 107 | 108 | flow5 = self.predict_flow5(concat5) 109 | flow5_up = self.upsampled_flow5_to_4(flow5) 110 | out_deconv4 = self.deconv4(concat5) 111 | concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1) 112 | 113 | flow4 = self.predict_flow4(concat4) 114 | flow4_up = self.upsampled_flow4_to_3(flow4) 115 | out_deconv3 = self.deconv3(concat4) 116 | concat3 = torch.cat((out_conv3_1,out_deconv3,flow4_up),1) 117 | 118 | flow3 = self.predict_flow3(concat3) 119 | flow3_up = self.upsampled_flow3_to_2(flow3) 120 | out_deconv2 = self.deconv2(concat3) 121 | concat2 = torch.cat((out_conv2a,out_deconv2,flow3_up),1) 122 | 123 | flow2 = self.predict_flow2(concat2) 124 | 125 | if self.training: 126 | return flow2,flow3,flow4,flow5,flow6 127 | else: 128 | return flow2, 129 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/FlowNetFusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import init 4 | 5 | import math 6 | import numpy as np 7 | 8 | from .submodules import * 9 | 'Parameter count = 581,226' 10 | 11 | class FlowNetFusion(nn.Module): 12 | def __init__(self,args, batchNorm=True): 13 | super(FlowNetFusion,self).__init__() 14 | 15 | self.batchNorm = batchNorm 16 | self.conv0 = conv(self.batchNorm, 11, 64) 17 | self.conv1 = conv(self.batchNorm, 64, 64, stride=2) 18 | self.conv1_1 = conv(self.batchNorm, 64, 128) 19 | self.conv2 = conv(self.batchNorm, 128, 128, stride=2) 20 | self.conv2_1 = conv(self.batchNorm, 128, 128) 21 | 22 | self.deconv1 = deconv(128,32) 23 | self.deconv0 = deconv(162,16) 24 | 25 | self.inter_conv1 = i_conv(self.batchNorm, 162, 32) 26 | self.inter_conv0 = i_conv(self.batchNorm, 82, 16) 27 | 28 | self.predict_flow2 = predict_flow(128) 29 | self.predict_flow1 = predict_flow(32) 30 | self.predict_flow0 = predict_flow(16) 31 | 32 | self.upsampled_flow2_to_1 = nn.ConvTranspose2d(2, 2, 4, 2, 1) 33 | self.upsampled_flow1_to_0 = nn.ConvTranspose2d(2, 2, 4, 2, 1) 34 | 35 | for m in self.modules(): 36 | if isinstance(m, nn.Conv2d): 37 | if m.bias is not None: 38 | init.uniform_(m.bias) 39 | init.xavier_uniform_(m.weight) 40 | 41 | if isinstance(m, nn.ConvTranspose2d): 42 | if m.bias is not None: 43 | init.uniform_(m.bias) 44 | init.xavier_uniform_(m.weight) 45 | # init_deconv_bilinear(m.weight) 46 | 47 | def forward(self, x): 48 | out_conv0 = self.conv0(x) 49 | out_conv1 = self.conv1_1(self.conv1(out_conv0)) 50 | out_conv2 = self.conv2_1(self.conv2(out_conv1)) 51 | 52 | flow2 = self.predict_flow2(out_conv2) 53 | flow2_up = self.upsampled_flow2_to_1(flow2) 54 | out_deconv1 = self.deconv1(out_conv2) 55 | 56 | concat1 = torch.cat((out_conv1,out_deconv1,flow2_up),1) 57 | out_interconv1 = self.inter_conv1(concat1) 58 | flow1 = self.predict_flow1(out_interconv1) 59 | flow1_up = self.upsampled_flow1_to_0(flow1) 60 | out_deconv0 = self.deconv0(concat1) 61 | 62 | concat0 = torch.cat((out_conv0,out_deconv0,flow1_up),1) 63 | out_interconv0 = self.inter_conv0(concat0) 64 | flow0 = self.predict_flow0(out_interconv0) 65 | 66 | return flow0 67 | 68 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/FlowNetS.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Portions of this code copyright 2017, Clement Pinard 3 | ''' 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torch.nn import init 8 | 9 | import math 10 | import numpy as np 11 | 12 | from .submodules import * 13 | 'Parameter count : 38,676,504 ' 14 | 15 | class FlowNetS(nn.Module): 16 | def __init__(self, args, input_channels = 12, batchNorm=True): 17 | super(FlowNetS,self).__init__() 18 | 19 | self.batchNorm = batchNorm 20 | self.conv1 = conv(self.batchNorm, input_channels, 64, kernel_size=7, stride=2) 21 | self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2) 22 | self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2) 23 | self.conv3_1 = conv(self.batchNorm, 256, 256) 24 | self.conv4 = conv(self.batchNorm, 256, 512, stride=2) 25 | self.conv4_1 = conv(self.batchNorm, 512, 512) 26 | self.conv5 = conv(self.batchNorm, 512, 512, stride=2) 27 | self.conv5_1 = conv(self.batchNorm, 512, 512) 28 | self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) 29 | self.conv6_1 = conv(self.batchNorm,1024, 1024) 30 | 31 | self.deconv5 = deconv(1024,512) 32 | self.deconv4 = deconv(1026,256) 33 | self.deconv3 = deconv(770,128) 34 | self.deconv2 = deconv(386,64) 35 | 36 | self.predict_flow6 = predict_flow(1024) 37 | self.predict_flow5 = predict_flow(1026) 38 | self.predict_flow4 = predict_flow(770) 39 | self.predict_flow3 = predict_flow(386) 40 | self.predict_flow2 = predict_flow(194) 41 | 42 | self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) 43 | self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) 44 | self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) 45 | self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) 46 | 47 | for m in self.modules(): 48 | if isinstance(m, nn.Conv2d): 49 | if m.bias is not None: 50 | init.uniform_(m.bias) 51 | init.xavier_uniform_(m.weight) 52 | 53 | if isinstance(m, nn.ConvTranspose2d): 54 | if m.bias is not None: 55 | init.uniform_(m.bias) 56 | init.xavier_uniform_(m.weight) 57 | # init_deconv_bilinear(m.weight) 58 | self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') 59 | 60 | def forward(self, x): 61 | out_conv1 = self.conv1(x) 62 | 63 | out_conv2 = self.conv2(out_conv1) 64 | out_conv3 = self.conv3_1(self.conv3(out_conv2)) 65 | out_conv4 = self.conv4_1(self.conv4(out_conv3)) 66 | out_conv5 = self.conv5_1(self.conv5(out_conv4)) 67 | out_conv6 = self.conv6_1(self.conv6(out_conv5)) 68 | 69 | flow6 = self.predict_flow6(out_conv6) 70 | flow6_up = self.upsampled_flow6_to_5(flow6) 71 | out_deconv5 = self.deconv5(out_conv6) 72 | 73 | concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1) 74 | flow5 = self.predict_flow5(concat5) 75 | flow5_up = self.upsampled_flow5_to_4(flow5) 76 | out_deconv4 = self.deconv4(concat5) 77 | 78 | concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1) 79 | flow4 = self.predict_flow4(concat4) 80 | flow4_up = self.upsampled_flow4_to_3(flow4) 81 | out_deconv3 = self.deconv3(concat4) 82 | 83 | concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1) 84 | flow3 = self.predict_flow3(concat3) 85 | flow3_up = self.upsampled_flow3_to_2(flow3) 86 | out_deconv2 = self.deconv2(concat3) 87 | 88 | concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1) 89 | flow2 = self.predict_flow2(concat2) 90 | 91 | if self.training: 92 | return flow2,flow3,flow4,flow5,flow6 93 | else: 94 | return flow2, 95 | 96 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/FlowNetSD.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import init 4 | 5 | import math 6 | import numpy as np 7 | 8 | from .submodules import * 9 | 'Parameter count = 45,371,666' 10 | 11 | class FlowNetSD(nn.Module): 12 | def __init__(self, args, batchNorm=True): 13 | super(FlowNetSD,self).__init__() 14 | 15 | self.batchNorm = batchNorm 16 | self.conv0 = conv(self.batchNorm, 6, 64) 17 | self.conv1 = conv(self.batchNorm, 64, 64, stride=2) 18 | self.conv1_1 = conv(self.batchNorm, 64, 128) 19 | self.conv2 = conv(self.batchNorm, 128, 128, stride=2) 20 | self.conv2_1 = conv(self.batchNorm, 128, 128) 21 | self.conv3 = conv(self.batchNorm, 128, 256, stride=2) 22 | self.conv3_1 = conv(self.batchNorm, 256, 256) 23 | self.conv4 = conv(self.batchNorm, 256, 512, stride=2) 24 | self.conv4_1 = conv(self.batchNorm, 512, 512) 25 | self.conv5 = conv(self.batchNorm, 512, 512, stride=2) 26 | self.conv5_1 = conv(self.batchNorm, 512, 512) 27 | self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) 28 | self.conv6_1 = conv(self.batchNorm,1024, 1024) 29 | 30 | self.deconv5 = deconv(1024,512) 31 | self.deconv4 = deconv(1026,256) 32 | self.deconv3 = deconv(770,128) 33 | self.deconv2 = deconv(386,64) 34 | 35 | self.inter_conv5 = i_conv(self.batchNorm, 1026, 512) 36 | self.inter_conv4 = i_conv(self.batchNorm, 770, 256) 37 | self.inter_conv3 = i_conv(self.batchNorm, 386, 128) 38 | self.inter_conv2 = i_conv(self.batchNorm, 194, 64) 39 | 40 | self.predict_flow6 = predict_flow(1024) 41 | self.predict_flow5 = predict_flow(512) 42 | self.predict_flow4 = predict_flow(256) 43 | self.predict_flow3 = predict_flow(128) 44 | self.predict_flow2 = predict_flow(64) 45 | 46 | self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1) 47 | self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1) 48 | self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1) 49 | self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1) 50 | 51 | for m in self.modules(): 52 | if isinstance(m, nn.Conv2d): 53 | if m.bias is not None: 54 | init.uniform_(m.bias) 55 | init.xavier_uniform_(m.weight) 56 | 57 | if isinstance(m, nn.ConvTranspose2d): 58 | if m.bias is not None: 59 | init.uniform_(m.bias) 60 | init.xavier_uniform_(m.weight) 61 | # init_deconv_bilinear(m.weight) 62 | self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') 63 | 64 | 65 | 66 | def forward(self, x): 67 | out_conv0 = self.conv0(x) 68 | out_conv1 = self.conv1_1(self.conv1(out_conv0)) 69 | out_conv2 = self.conv2_1(self.conv2(out_conv1)) 70 | 71 | out_conv3 = self.conv3_1(self.conv3(out_conv2)) 72 | out_conv4 = self.conv4_1(self.conv4(out_conv3)) 73 | out_conv5 = self.conv5_1(self.conv5(out_conv4)) 74 | out_conv6 = self.conv6_1(self.conv6(out_conv5)) 75 | 76 | flow6 = self.predict_flow6(out_conv6) 77 | flow6_up = self.upsampled_flow6_to_5(flow6) 78 | out_deconv5 = self.deconv5(out_conv6) 79 | 80 | concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1) 81 | out_interconv5 = self.inter_conv5(concat5) 82 | flow5 = self.predict_flow5(out_interconv5) 83 | 84 | flow5_up = self.upsampled_flow5_to_4(flow5) 85 | out_deconv4 = self.deconv4(concat5) 86 | 87 | concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1) 88 | out_interconv4 = self.inter_conv4(concat4) 89 | flow4 = self.predict_flow4(out_interconv4) 90 | flow4_up = self.upsampled_flow4_to_3(flow4) 91 | out_deconv3 = self.deconv3(concat4) 92 | 93 | concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1) 94 | out_interconv3 = self.inter_conv3(concat3) 95 | flow3 = self.predict_flow3(out_interconv3) 96 | flow3_up = self.upsampled_flow3_to_2(flow3) 97 | out_deconv2 = self.deconv2(concat3) 98 | 99 | concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1) 100 | out_interconv2 = self.inter_conv2(concat2) 101 | flow2 = self.predict_flow2(out_interconv2) 102 | 103 | if self.training: 104 | return flow2,flow3,flow4,flow5,flow6 105 | else: 106 | return flow2, 107 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/networks/__init__.py -------------------------------------------------------------------------------- /dvs/flownet2/networks/channelnorm_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/networks/channelnorm_package/__init__.py -------------------------------------------------------------------------------- /dvs/flownet2/networks/channelnorm_package/channelnorm.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function, Variable 2 | from torch.nn.modules.module import Module 3 | import channelnorm_cuda 4 | 5 | class ChannelNormFunction(Function): 6 | 7 | @staticmethod 8 | def forward(ctx, input1, norm_deg=2): 9 | assert input1.is_contiguous() 10 | b, _, h, w = input1.size() 11 | output = input1.new(b, 1, h, w).zero_() 12 | 13 | channelnorm_cuda.forward(input1, output, norm_deg) 14 | ctx.save_for_backward(input1, output) 15 | ctx.norm_deg = norm_deg 16 | 17 | return output 18 | 19 | @staticmethod 20 | def backward(ctx, grad_output): 21 | input1, output = ctx.saved_tensors 22 | 23 | grad_input1 = Variable(input1.new(input1.size()).zero_()) 24 | 25 | channelnorm_cuda.backward(input1, output, grad_output.data, 26 | grad_input1.data, ctx.norm_deg) 27 | 28 | return grad_input1, None 29 | 30 | 31 | class ChannelNorm(Module): 32 | 33 | def __init__(self, norm_deg=2): 34 | super(ChannelNorm, self).__init__() 35 | self.norm_deg = norm_deg 36 | 37 | def forward(self, input1): 38 | return ChannelNormFunction.apply(input1, self.norm_deg) 39 | 40 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/channelnorm_package/channelnorm_cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "channelnorm_kernel.cuh" 5 | 6 | int channelnorm_cuda_forward( 7 | at::Tensor& input1, 8 | at::Tensor& output, 9 | int norm_deg) { 10 | 11 | channelnorm_kernel_forward(input1, output, norm_deg); 12 | return 1; 13 | } 14 | 15 | 16 | int channelnorm_cuda_backward( 17 | at::Tensor& input1, 18 | at::Tensor& output, 19 | at::Tensor& gradOutput, 20 | at::Tensor& gradInput1, 21 | int norm_deg) { 22 | 23 | channelnorm_kernel_backward(input1, output, gradOutput, gradInput1, norm_deg); 24 | return 1; 25 | } 26 | 27 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 28 | m.def("forward", &channelnorm_cuda_forward, "Channel norm forward (CUDA)"); 29 | m.def("backward", &channelnorm_cuda_backward, "Channel norm backward (CUDA)"); 30 | } 31 | 32 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/channelnorm_package/channelnorm_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "channelnorm_kernel.cuh" 6 | 7 | #define CUDA_NUM_THREADS 512 8 | 9 | #define DIM0(TENSOR) ((TENSOR).x) 10 | #define DIM1(TENSOR) ((TENSOR).y) 11 | #define DIM2(TENSOR) ((TENSOR).z) 12 | #define DIM3(TENSOR) ((TENSOR).w) 13 | 14 | #define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))]) 15 | 16 | using at::Half; 17 | 18 | template 19 | __global__ void kernel_channelnorm_update_output( 20 | const int n, 21 | const scalar_t* __restrict__ input1, 22 | const long4 input1_size, 23 | const long4 input1_stride, 24 | scalar_t* __restrict__ output, 25 | const long4 output_size, 26 | const long4 output_stride, 27 | int norm_deg) { 28 | 29 | int index = blockIdx.x * blockDim.x + threadIdx.x; 30 | 31 | if (index >= n) { 32 | return; 33 | } 34 | 35 | int dim_b = DIM0(output_size); 36 | int dim_c = DIM1(output_size); 37 | int dim_h = DIM2(output_size); 38 | int dim_w = DIM3(output_size); 39 | int dim_chw = dim_c * dim_h * dim_w; 40 | 41 | int b = ( index / dim_chw ) % dim_b; 42 | int y = ( index / dim_w ) % dim_h; 43 | int x = ( index ) % dim_w; 44 | 45 | int i1dim_c = DIM1(input1_size); 46 | int i1dim_h = DIM2(input1_size); 47 | int i1dim_w = DIM3(input1_size); 48 | int i1dim_chw = i1dim_c * i1dim_h * i1dim_w; 49 | int i1dim_hw = i1dim_h * i1dim_w; 50 | 51 | float result = 0.0; 52 | 53 | for (int c = 0; c < i1dim_c; ++c) { 54 | int i1Index = b * i1dim_chw + c * i1dim_hw + y * i1dim_w + x; 55 | scalar_t val = input1[i1Index]; 56 | result += static_cast(val * val); 57 | } 58 | result = sqrt(result); 59 | output[index] = static_cast(result); 60 | } 61 | 62 | 63 | template 64 | __global__ void kernel_channelnorm_backward_input1( 65 | const int n, 66 | const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride, 67 | const scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride, 68 | const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride, 69 | scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, 70 | int norm_deg) { 71 | 72 | int index = blockIdx.x * blockDim.x + threadIdx.x; 73 | 74 | if (index >= n) { 75 | return; 76 | } 77 | 78 | float val = 0.0; 79 | 80 | int dim_b = DIM0(gradInput_size); 81 | int dim_c = DIM1(gradInput_size); 82 | int dim_h = DIM2(gradInput_size); 83 | int dim_w = DIM3(gradInput_size); 84 | int dim_chw = dim_c * dim_h * dim_w; 85 | int dim_hw = dim_h * dim_w; 86 | 87 | int b = ( index / dim_chw ) % dim_b; 88 | int y = ( index / dim_w ) % dim_h; 89 | int x = ( index ) % dim_w; 90 | 91 | 92 | int outIndex = b * dim_hw + y * dim_w + x; 93 | val = static_cast(gradOutput[outIndex]) * static_cast(input1[index]) / (static_cast(output[outIndex])+1e-9); 94 | gradInput[index] = static_cast(val); 95 | 96 | } 97 | 98 | void channelnorm_kernel_forward( 99 | at::Tensor& input1, 100 | at::Tensor& output, 101 | int norm_deg) { 102 | 103 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3)); 104 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3)); 105 | 106 | const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3)); 107 | const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3)); 108 | 109 | int n = output.numel(); 110 | 111 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_forward", ([&] { 112 | 113 | kernel_channelnorm_update_output<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( 114 | //at::globalContext().getCurrentCUDAStream() >>>( 115 | n, 116 | input1.data(), 117 | input1_size, 118 | input1_stride, 119 | output.data(), 120 | output_size, 121 | output_stride, 122 | norm_deg); 123 | 124 | })); 125 | 126 | // TODO: ATen-equivalent check 127 | 128 | // THCudaCheck(cudaGetLastError()); 129 | } 130 | 131 | void channelnorm_kernel_backward( 132 | at::Tensor& input1, 133 | at::Tensor& output, 134 | at::Tensor& gradOutput, 135 | at::Tensor& gradInput1, 136 | int norm_deg) { 137 | 138 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3)); 139 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3)); 140 | 141 | const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3)); 142 | const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3)); 143 | 144 | const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3)); 145 | const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3)); 146 | 147 | const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3)); 148 | const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3)); 149 | 150 | int n = gradInput1.numel(); 151 | 152 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_backward_input1", ([&] { 153 | 154 | kernel_channelnorm_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( 155 | //at::globalContext().getCurrentCUDAStream() >>>( 156 | n, 157 | input1.data(), 158 | input1_size, 159 | input1_stride, 160 | output.data(), 161 | output_size, 162 | output_stride, 163 | gradOutput.data(), 164 | gradOutput_size, 165 | gradOutput_stride, 166 | gradInput1.data(), 167 | gradInput1_size, 168 | gradInput1_stride, 169 | norm_deg 170 | ); 171 | 172 | })); 173 | 174 | // TODO: Add ATen-equivalent check 175 | 176 | // THCudaCheck(cudaGetLastError()); 177 | } 178 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/channelnorm_package/channelnorm_kernel.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | void channelnorm_kernel_forward( 6 | at::Tensor& input1, 7 | at::Tensor& output, 8 | int norm_deg); 9 | 10 | 11 | void channelnorm_kernel_backward( 12 | at::Tensor& input1, 13 | at::Tensor& output, 14 | at::Tensor& gradOutput, 15 | at::Tensor& gradInput1, 16 | int norm_deg); 17 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/channelnorm_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import torch 4 | 5 | from setuptools import setup 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | cxx_args = ['-std=c++11'] 9 | 10 | nvcc_args = [ 11 | '-gencode', 'arch=compute_52,code=sm_52', 12 | '-gencode', 'arch=compute_60,code=sm_60', 13 | '-gencode', 'arch=compute_61,code=sm_61', 14 | '-gencode', 'arch=compute_70,code=sm_70', 15 | '-gencode', 'arch=compute_70,code=compute_70' 16 | ] 17 | 18 | setup( 19 | name='channelnorm_cuda', 20 | ext_modules=[ 21 | CUDAExtension('channelnorm_cuda', [ 22 | 'channelnorm_cuda.cc', 23 | 'channelnorm_kernel.cu' 24 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 25 | ], 26 | cmdclass={ 27 | 'build_ext': BuildExtension 28 | }) 29 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/correlation_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/networks/correlation_package/__init__.py -------------------------------------------------------------------------------- /dvs/flownet2/networks/correlation_package/correlation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules.module import Module 3 | from torch.autograd import Function 4 | import correlation_cuda 5 | 6 | class CorrelationFunction(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, input1, input2, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1): 10 | ctx.save_for_backward(input1, input2) 11 | 12 | ctx.pad_size = pad_size 13 | ctx.kernel_size = kernel_size 14 | ctx.max_displacement = max_displacement 15 | ctx.stride1 = stride1 16 | ctx.stride2 = stride2 17 | ctx.corr_multiply = corr_multiply 18 | 19 | with torch.cuda.device_of(input1): 20 | rbot1 = input1.new() 21 | rbot2 = input2.new() 22 | output = input1.new() 23 | 24 | correlation_cuda.forward(input1, input2, rbot1, rbot2, output, 25 | ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply) 26 | 27 | return output 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | input1, input2 = ctx.saved_tensors 32 | 33 | with torch.cuda.device_of(input1): 34 | rbot1 = input1.new() 35 | rbot2 = input2.new() 36 | 37 | grad_input1 = input1.new() 38 | grad_input2 = input2.new() 39 | 40 | correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2, 41 | ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply) 42 | 43 | return grad_input1, grad_input2, None, None, None, None, None, None 44 | 45 | 46 | class Correlation(Module): 47 | def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1): 48 | super(Correlation, self).__init__() 49 | self.pad_size = pad_size 50 | self.kernel_size = kernel_size 51 | self.max_displacement = max_displacement 52 | self.stride1 = stride1 53 | self.stride2 = stride2 54 | self.corr_multiply = corr_multiply 55 | 56 | def forward(self, input1, input2): 57 | 58 | result = CorrelationFunction.apply(input1, input2, self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply) 59 | 60 | return result 61 | 62 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/correlation_package/correlation_cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "correlation_cuda_kernel.cuh" 9 | 10 | int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output, 11 | int pad_size, 12 | int kernel_size, 13 | int max_displacement, 14 | int stride1, 15 | int stride2, 16 | int corr_type_multiply) 17 | { 18 | 19 | int batchSize = input1.size(0); 20 | 21 | int nInputChannels = input1.size(1); 22 | int inputHeight = input1.size(2); 23 | int inputWidth = input1.size(3); 24 | 25 | int kernel_radius = (kernel_size - 1) / 2; 26 | int border_radius = kernel_radius + max_displacement; 27 | 28 | int paddedInputHeight = inputHeight + 2 * pad_size; 29 | int paddedInputWidth = inputWidth + 2 * pad_size; 30 | 31 | int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1); 32 | 33 | int outputHeight = ceil(static_cast(paddedInputHeight - 2 * border_radius) / static_cast(stride1)); 34 | int outputwidth = ceil(static_cast(paddedInputWidth - 2 * border_radius) / static_cast(stride1)); 35 | 36 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 37 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 38 | output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth}); 39 | 40 | rInput1.fill_(0); 41 | rInput2.fill_(0); 42 | output.fill_(0); 43 | 44 | int success = correlation_forward_cuda_kernel( 45 | output, 46 | output.size(0), 47 | output.size(1), 48 | output.size(2), 49 | output.size(3), 50 | output.stride(0), 51 | output.stride(1), 52 | output.stride(2), 53 | output.stride(3), 54 | input1, 55 | input1.size(1), 56 | input1.size(2), 57 | input1.size(3), 58 | input1.stride(0), 59 | input1.stride(1), 60 | input1.stride(2), 61 | input1.stride(3), 62 | input2, 63 | input2.size(1), 64 | input2.stride(0), 65 | input2.stride(1), 66 | input2.stride(2), 67 | input2.stride(3), 68 | rInput1, 69 | rInput2, 70 | pad_size, 71 | kernel_size, 72 | max_displacement, 73 | stride1, 74 | stride2, 75 | corr_type_multiply, 76 | at::cuda::getCurrentCUDAStream() 77 | //at::globalContext().getCurrentCUDAStream() 78 | ); 79 | 80 | //check for errors 81 | if (!success) { 82 | AT_ERROR("CUDA call failed"); 83 | } 84 | 85 | return 1; 86 | 87 | } 88 | 89 | int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput, 90 | at::Tensor& gradInput1, at::Tensor& gradInput2, 91 | int pad_size, 92 | int kernel_size, 93 | int max_displacement, 94 | int stride1, 95 | int stride2, 96 | int corr_type_multiply) 97 | { 98 | 99 | int batchSize = input1.size(0); 100 | int nInputChannels = input1.size(1); 101 | int paddedInputHeight = input1.size(2)+ 2 * pad_size; 102 | int paddedInputWidth = input1.size(3)+ 2 * pad_size; 103 | 104 | int height = input1.size(2); 105 | int width = input1.size(3); 106 | 107 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 108 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 109 | gradInput1.resize_({batchSize, nInputChannels, height, width}); 110 | gradInput2.resize_({batchSize, nInputChannels, height, width}); 111 | 112 | rInput1.fill_(0); 113 | rInput2.fill_(0); 114 | gradInput1.fill_(0); 115 | gradInput2.fill_(0); 116 | 117 | int success = correlation_backward_cuda_kernel(gradOutput, 118 | gradOutput.size(0), 119 | gradOutput.size(1), 120 | gradOutput.size(2), 121 | gradOutput.size(3), 122 | gradOutput.stride(0), 123 | gradOutput.stride(1), 124 | gradOutput.stride(2), 125 | gradOutput.stride(3), 126 | input1, 127 | input1.size(1), 128 | input1.size(2), 129 | input1.size(3), 130 | input1.stride(0), 131 | input1.stride(1), 132 | input1.stride(2), 133 | input1.stride(3), 134 | input2, 135 | input2.stride(0), 136 | input2.stride(1), 137 | input2.stride(2), 138 | input2.stride(3), 139 | gradInput1, 140 | gradInput1.stride(0), 141 | gradInput1.stride(1), 142 | gradInput1.stride(2), 143 | gradInput1.stride(3), 144 | gradInput2, 145 | gradInput2.size(1), 146 | gradInput2.stride(0), 147 | gradInput2.stride(1), 148 | gradInput2.stride(2), 149 | gradInput2.stride(3), 150 | rInput1, 151 | rInput2, 152 | pad_size, 153 | kernel_size, 154 | max_displacement, 155 | stride1, 156 | stride2, 157 | corr_type_multiply, 158 | at::cuda::getCurrentCUDAStream() 159 | //at::globalContext().getCurrentCUDAStream() 160 | ); 161 | 162 | if (!success) { 163 | AT_ERROR("CUDA call failed"); 164 | } 165 | 166 | return 1; 167 | } 168 | 169 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 170 | m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)"); 171 | m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)"); 172 | } 173 | 174 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/correlation_package/correlation_cuda_kernel.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | int correlation_forward_cuda_kernel(at::Tensor& output, 8 | int ob, 9 | int oc, 10 | int oh, 11 | int ow, 12 | int osb, 13 | int osc, 14 | int osh, 15 | int osw, 16 | 17 | at::Tensor& input1, 18 | int ic, 19 | int ih, 20 | int iw, 21 | int isb, 22 | int isc, 23 | int ish, 24 | int isw, 25 | 26 | at::Tensor& input2, 27 | int gc, 28 | int gsb, 29 | int gsc, 30 | int gsh, 31 | int gsw, 32 | 33 | at::Tensor& rInput1, 34 | at::Tensor& rInput2, 35 | int pad_size, 36 | int kernel_size, 37 | int max_displacement, 38 | int stride1, 39 | int stride2, 40 | int corr_type_multiply, 41 | cudaStream_t stream); 42 | 43 | 44 | int correlation_backward_cuda_kernel( 45 | at::Tensor& gradOutput, 46 | int gob, 47 | int goc, 48 | int goh, 49 | int gow, 50 | int gosb, 51 | int gosc, 52 | int gosh, 53 | int gosw, 54 | 55 | at::Tensor& input1, 56 | int ic, 57 | int ih, 58 | int iw, 59 | int isb, 60 | int isc, 61 | int ish, 62 | int isw, 63 | 64 | at::Tensor& input2, 65 | int gsb, 66 | int gsc, 67 | int gsh, 68 | int gsw, 69 | 70 | at::Tensor& gradInput1, 71 | int gisb, 72 | int gisc, 73 | int gish, 74 | int gisw, 75 | 76 | at::Tensor& gradInput2, 77 | int ggc, 78 | int ggsb, 79 | int ggsc, 80 | int ggsh, 81 | int ggsw, 82 | 83 | at::Tensor& rInput1, 84 | at::Tensor& rInput2, 85 | int pad_size, 86 | int kernel_size, 87 | int max_displacement, 88 | int stride1, 89 | int stride2, 90 | int corr_type_multiply, 91 | cudaStream_t stream); 92 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/correlation_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import torch 4 | 5 | from setuptools import setup, find_packages 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | cxx_args = ['-std=c++11'] 9 | 10 | nvcc_args = [ 11 | '-gencode', 'arch=compute_50,code=sm_50', 12 | '-gencode', 'arch=compute_52,code=sm_52', 13 | '-gencode', 'arch=compute_60,code=sm_60', 14 | '-gencode', 'arch=compute_61,code=sm_61', 15 | '-gencode', 'arch=compute_70,code=sm_70', 16 | '-gencode', 'arch=compute_70,code=compute_70' 17 | ] 18 | 19 | setup( 20 | name='correlation_cuda', 21 | ext_modules=[ 22 | CUDAExtension('correlation_cuda', [ 23 | 'correlation_cuda.cc', 24 | 'correlation_cuda_kernel.cu' 25 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 26 | ], 27 | cmdclass={ 28 | 'build_ext': BuildExtension 29 | }) 30 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/resample2d_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/networks/resample2d_package/__init__.py -------------------------------------------------------------------------------- /dvs/flownet2/networks/resample2d_package/resample2d.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.autograd import Function, Variable 3 | import resample2d_cuda 4 | 5 | class Resample2dFunction(Function): 6 | 7 | @staticmethod 8 | def forward(ctx, input1, input2, kernel_size=1, bilinear= True): 9 | assert input1.is_contiguous() 10 | assert input2.is_contiguous() 11 | 12 | ctx.save_for_backward(input1, input2) 13 | ctx.kernel_size = kernel_size 14 | ctx.bilinear = bilinear 15 | 16 | _, d, _, _ = input1.size() 17 | b, _, h, w = input2.size() 18 | output = input1.new(b, d, h, w).zero_() 19 | 20 | resample2d_cuda.forward(input1, input2, output, kernel_size, bilinear) 21 | 22 | return output 23 | 24 | @staticmethod 25 | def backward(ctx, grad_output): 26 | grad_output = grad_output.contiguous() 27 | assert grad_output.is_contiguous() 28 | 29 | input1, input2 = ctx.saved_tensors 30 | 31 | grad_input1 = Variable(input1.new(input1.size()).zero_()) 32 | grad_input2 = Variable(input1.new(input2.size()).zero_()) 33 | 34 | resample2d_cuda.backward(input1, input2, grad_output.data, 35 | grad_input1.data, grad_input2.data, 36 | ctx.kernel_size, ctx.bilinear) 37 | 38 | return grad_input1, grad_input2, None, None 39 | 40 | class Resample2d(Module): 41 | 42 | def __init__(self, kernel_size=1, bilinear = True): 43 | super(Resample2d, self).__init__() 44 | self.kernel_size = kernel_size 45 | self.bilinear = bilinear 46 | 47 | def forward(self, input1, input2): 48 | input1_c = input1.contiguous() 49 | return Resample2dFunction.apply(input1_c, input2, self.kernel_size, self.bilinear) 50 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/resample2d_package/resample2d_cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "resample2d_kernel.cuh" 5 | 6 | int resample2d_cuda_forward( 7 | at::Tensor& input1, 8 | at::Tensor& input2, 9 | at::Tensor& output, 10 | int kernel_size, bool bilinear) { 11 | resample2d_kernel_forward(input1, input2, output, kernel_size, bilinear); 12 | return 1; 13 | } 14 | 15 | int resample2d_cuda_backward( 16 | at::Tensor& input1, 17 | at::Tensor& input2, 18 | at::Tensor& gradOutput, 19 | at::Tensor& gradInput1, 20 | at::Tensor& gradInput2, 21 | int kernel_size, bool bilinear) { 22 | resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size, bilinear); 23 | return 1; 24 | } 25 | 26 | 27 | 28 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 29 | m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)"); 30 | m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)"); 31 | } 32 | 33 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/resample2d_package/resample2d_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define CUDA_NUM_THREADS 512 6 | #define THREADS_PER_BLOCK 64 7 | 8 | #define DIM0(TENSOR) ((TENSOR).x) 9 | #define DIM1(TENSOR) ((TENSOR).y) 10 | #define DIM2(TENSOR) ((TENSOR).z) 11 | #define DIM3(TENSOR) ((TENSOR).w) 12 | 13 | #define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))]) 14 | 15 | template 16 | __global__ void kernel_resample2d_update_output(const int n, 17 | const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride, 18 | const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride, 19 | scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride, int kernel_size, bool bilinear) { 20 | int index = blockIdx.x * blockDim.x + threadIdx.x; 21 | 22 | if (index >= n) { 23 | return; 24 | } 25 | 26 | scalar_t val = 0.0f; 27 | 28 | int dim_b = DIM0(output_size); 29 | int dim_c = DIM1(output_size); 30 | int dim_h = DIM2(output_size); 31 | int dim_w = DIM3(output_size); 32 | int dim_chw = dim_c * dim_h * dim_w; 33 | int dim_hw = dim_h * dim_w; 34 | 35 | int b = ( index / dim_chw ) % dim_b; 36 | int c = ( index / dim_hw ) % dim_c; 37 | int y = ( index / dim_w ) % dim_h; 38 | int x = ( index ) % dim_w; 39 | 40 | scalar_t dx = DIM3_INDEX(input2, b, 0, y, x); 41 | scalar_t dy = DIM3_INDEX(input2, b, 1, y, x); 42 | 43 | scalar_t xf = static_cast(x) + dx; 44 | scalar_t yf = static_cast(y) + dy; 45 | scalar_t alpha = xf - floor(xf); // alpha 46 | scalar_t beta = yf - floor(yf); // beta 47 | 48 | if (bilinear) { 49 | int xL = max(min( int (floor(xf)), dim_w-1), 0); 50 | int xR = max(min( int (floor(xf)+1), dim_w -1), 0); 51 | int yT = max(min( int (floor(yf)), dim_h-1), 0); 52 | int yB = max(min( int (floor(yf)+1), dim_h-1), 0); 53 | 54 | for (int fy = 0; fy < kernel_size; fy += 1) { 55 | for (int fx = 0; fx < kernel_size; fx += 1) { 56 | val += static_cast((1. - alpha)*(1. - beta) * DIM3_INDEX(input1, b, c, yT + fy, xL + fx)); 57 | val += static_cast((alpha)*(1. - beta) * DIM3_INDEX(input1, b, c, yT + fy, xR + fx)); 58 | val += static_cast((1. - alpha)*(beta) * DIM3_INDEX(input1, b, c, yB + fy, xL + fx)); 59 | val += static_cast((alpha)*(beta) * DIM3_INDEX(input1, b, c, yB + fy, xR + fx)); 60 | } 61 | } 62 | 63 | output[index] = val; 64 | } 65 | else { 66 | int xN = max(min( int (floor(xf + 0.5)), dim_w - 1), 0); 67 | int yN = max(min( int (floor(yf + 0.5)), dim_h - 1), 0); 68 | 69 | output[index] = static_cast ( DIM3_INDEX(input1, b, c, yN, xN) ); 70 | } 71 | 72 | } 73 | 74 | 75 | template 76 | __global__ void kernel_resample2d_backward_input1( 77 | const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride, 78 | const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride, 79 | const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride, 80 | scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, int kernel_size, bool bilinear) { 81 | 82 | int index = blockIdx.x * blockDim.x + threadIdx.x; 83 | 84 | if (index >= n) { 85 | return; 86 | } 87 | 88 | int dim_b = DIM0(gradOutput_size); 89 | int dim_c = DIM1(gradOutput_size); 90 | int dim_h = DIM2(gradOutput_size); 91 | int dim_w = DIM3(gradOutput_size); 92 | int dim_chw = dim_c * dim_h * dim_w; 93 | int dim_hw = dim_h * dim_w; 94 | 95 | int b = ( index / dim_chw ) % dim_b; 96 | int c = ( index / dim_hw ) % dim_c; 97 | int y = ( index / dim_w ) % dim_h; 98 | int x = ( index ) % dim_w; 99 | 100 | scalar_t dx = DIM3_INDEX(input2, b, 0, y, x); 101 | scalar_t dy = DIM3_INDEX(input2, b, 1, y, x); 102 | 103 | scalar_t xf = static_cast(x) + dx; 104 | scalar_t yf = static_cast(y) + dy; 105 | scalar_t alpha = xf - int(xf); // alpha 106 | scalar_t beta = yf - int(yf); // beta 107 | 108 | int idim_h = DIM2(input1_size); 109 | int idim_w = DIM3(input1_size); 110 | 111 | int xL = max(min( int (floor(xf)), idim_w-1), 0); 112 | int xR = max(min( int (floor(xf)+1), idim_w -1), 0); 113 | int yT = max(min( int (floor(yf)), idim_h-1), 0); 114 | int yB = max(min( int (floor(yf)+1), idim_h-1), 0); 115 | 116 | for (int fy = 0; fy < kernel_size; fy += 1) { 117 | for (int fx = 0; fx < kernel_size; fx += 1) { 118 | atomicAdd(&DIM3_INDEX(gradInput, b, c, (yT + fy), (xL + fx)), (1-alpha)*(1-beta) * DIM3_INDEX(gradOutput, b, c, y, x)); 119 | atomicAdd(&DIM3_INDEX(gradInput, b, c, (yT + fy), (xR + fx)), (alpha)*(1-beta) * DIM3_INDEX(gradOutput, b, c, y, x)); 120 | atomicAdd(&DIM3_INDEX(gradInput, b, c, (yB + fy), (xL + fx)), (1-alpha)*(beta) * DIM3_INDEX(gradOutput, b, c, y, x)); 121 | atomicAdd(&DIM3_INDEX(gradInput, b, c, (yB + fy), (xR + fx)), (alpha)*(beta) * DIM3_INDEX(gradOutput, b, c, y, x)); 122 | } 123 | } 124 | 125 | } 126 | 127 | template 128 | __global__ void kernel_resample2d_backward_input2( 129 | const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride, 130 | const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride, 131 | const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride, 132 | scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, int kernel_size, bool bilinear) { 133 | 134 | int index = blockIdx.x * blockDim.x + threadIdx.x; 135 | 136 | if (index >= n) { 137 | return; 138 | } 139 | 140 | scalar_t output = 0.0; 141 | int kernel_rad = (kernel_size - 1)/2; 142 | 143 | int dim_b = DIM0(gradInput_size); 144 | int dim_c = DIM1(gradInput_size); 145 | int dim_h = DIM2(gradInput_size); 146 | int dim_w = DIM3(gradInput_size); 147 | int dim_chw = dim_c * dim_h * dim_w; 148 | int dim_hw = dim_h * dim_w; 149 | 150 | int b = ( index / dim_chw ) % dim_b; 151 | int c = ( index / dim_hw ) % dim_c; 152 | int y = ( index / dim_w ) % dim_h; 153 | int x = ( index ) % dim_w; 154 | 155 | int odim_c = DIM1(gradOutput_size); 156 | 157 | scalar_t dx = DIM3_INDEX(input2, b, 0, y, x); 158 | scalar_t dy = DIM3_INDEX(input2, b, 1, y, x); 159 | 160 | scalar_t xf = static_cast(x) + dx; 161 | scalar_t yf = static_cast(y) + dy; 162 | 163 | int xL = max(min( int (floor(xf)), dim_w-1), 0); 164 | int xR = max(min( int (floor(xf)+1), dim_w -1), 0); 165 | int yT = max(min( int (floor(yf)), dim_h-1), 0); 166 | int yB = max(min( int (floor(yf)+1), dim_h-1), 0); 167 | 168 | if (c % 2) { 169 | float gamma = 1 - (xf - floor(xf)); // alpha 170 | for (int i = 0; i <= 2*kernel_rad; ++i) { 171 | for (int j = 0; j <= 2*kernel_rad; ++j) { 172 | for (int ch = 0; ch < odim_c; ++ch) { 173 | output += (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xL + i)); 174 | output -= (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xL + i)); 175 | output += (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xR + i)); 176 | output -= (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xR + i)); 177 | } 178 | } 179 | } 180 | } 181 | else { 182 | float gamma = 1 - (yf - floor(yf)); // alpha 183 | for (int i = 0; i <= 2*kernel_rad; ++i) { 184 | for (int j = 0; j <= 2*kernel_rad; ++j) { 185 | for (int ch = 0; ch < odim_c; ++ch) { 186 | output += (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xR + i)); 187 | output -= (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xL + i)); 188 | output += (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xR + i)); 189 | output -= (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xL + i)); 190 | } 191 | } 192 | } 193 | 194 | } 195 | 196 | gradInput[index] = output; 197 | 198 | } 199 | 200 | void resample2d_kernel_forward( 201 | at::Tensor& input1, 202 | at::Tensor& input2, 203 | at::Tensor& output, 204 | int kernel_size, 205 | bool bilinear) { 206 | 207 | int n = output.numel(); 208 | 209 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3)); 210 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3)); 211 | 212 | const long4 input2_size = make_long4(input2.size(0), input2.size(1), input2.size(2), input2.size(3)); 213 | const long4 input2_stride = make_long4(input2.stride(0), input2.stride(1), input2.stride(2), input2.stride(3)); 214 | 215 | const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3)); 216 | const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3)); 217 | 218 | // TODO: when atomicAdd gets resolved, change to AT_DISPATCH_FLOATING_TYPES_AND_HALF 219 | // AT_DISPATCH_FLOATING_TYPES(input1.type(), "resample_forward_kernel", ([&] { 220 | 221 | kernel_resample2d_update_output<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( 222 | //at::globalContext().getCurrentCUDAStream() >>>( 223 | n, 224 | input1.data(), 225 | input1_size, 226 | input1_stride, 227 | input2.data(), 228 | input2_size, 229 | input2_stride, 230 | output.data(), 231 | output_size, 232 | output_stride, 233 | kernel_size, 234 | bilinear); 235 | 236 | // })); 237 | 238 | // TODO: ATen-equivalent check 239 | 240 | // THCudaCheck(cudaGetLastError()); 241 | 242 | } 243 | 244 | void resample2d_kernel_backward( 245 | at::Tensor& input1, 246 | at::Tensor& input2, 247 | at::Tensor& gradOutput, 248 | at::Tensor& gradInput1, 249 | at::Tensor& gradInput2, 250 | int kernel_size, 251 | bool bilinear) { 252 | 253 | int n = gradOutput.numel(); 254 | 255 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3)); 256 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3)); 257 | 258 | const long4 input2_size = make_long4(input2.size(0), input2.size(1), input2.size(2), input2.size(3)); 259 | const long4 input2_stride = make_long4(input2.stride(0), input2.stride(1), input2.stride(2), input2.stride(3)); 260 | 261 | const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3)); 262 | const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3)); 263 | 264 | const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3)); 265 | const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3)); 266 | 267 | // AT_DISPATCH_FLOATING_TYPES(input1.type(), "resample_backward_input1", ([&] { 268 | 269 | kernel_resample2d_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( 270 | //at::globalContext().getCurrentCUDAStream() >>>( 271 | n, 272 | input1.data(), 273 | input1_size, 274 | input1_stride, 275 | input2.data(), 276 | input2_size, 277 | input2_stride, 278 | gradOutput.data(), 279 | gradOutput_size, 280 | gradOutput_stride, 281 | gradInput1.data(), 282 | gradInput1_size, 283 | gradInput1_stride, 284 | kernel_size, 285 | bilinear 286 | ); 287 | 288 | // })); 289 | 290 | const long4 gradInput2_size = make_long4(gradInput2.size(0), gradInput2.size(1), gradInput2.size(2), gradInput2.size(3)); 291 | const long4 gradInput2_stride = make_long4(gradInput2.stride(0), gradInput2.stride(1), gradInput2.stride(2), gradInput2.stride(3)); 292 | 293 | n = gradInput2.numel(); 294 | 295 | // AT_DISPATCH_FLOATING_TYPES(gradInput2.type(), "resample_backward_input2", ([&] { 296 | 297 | 298 | kernel_resample2d_backward_input2<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( 299 | //at::globalContext().getCurrentCUDAStream() >>>( 300 | n, 301 | input1.data(), 302 | input1_size, 303 | input1_stride, 304 | input2.data(), 305 | input2_size, 306 | input2_stride, 307 | gradOutput.data(), 308 | gradOutput_size, 309 | gradOutput_stride, 310 | gradInput2.data(), 311 | gradInput2_size, 312 | gradInput2_stride, 313 | kernel_size, 314 | bilinear 315 | ); 316 | 317 | // })); 318 | 319 | // TODO: Use the ATen equivalent to get last error 320 | 321 | // THCudaCheck(cudaGetLastError()); 322 | 323 | } 324 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/resample2d_package/resample2d_kernel.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | void resample2d_kernel_forward( 6 | at::Tensor& input1, 7 | at::Tensor& input2, 8 | at::Tensor& output, 9 | int kernel_size, 10 | bool bilinear); 11 | 12 | void resample2d_kernel_backward( 13 | at::Tensor& input1, 14 | at::Tensor& input2, 15 | at::Tensor& gradOutput, 16 | at::Tensor& gradInput1, 17 | at::Tensor& gradInput2, 18 | int kernel_size, 19 | bool bilinear); -------------------------------------------------------------------------------- /dvs/flownet2/networks/resample2d_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import torch 4 | 5 | from setuptools import setup 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | cxx_args = ['-std=c++11'] 9 | 10 | nvcc_args = [ 11 | '-gencode', 'arch=compute_50,code=sm_50', 12 | '-gencode', 'arch=compute_52,code=sm_52', 13 | '-gencode', 'arch=compute_60,code=sm_60', 14 | '-gencode', 'arch=compute_61,code=sm_61', 15 | '-gencode', 'arch=compute_70,code=sm_70', 16 | '-gencode', 'arch=compute_70,code=compute_70' 17 | ] 18 | 19 | setup( 20 | name='resample2d_cuda', 21 | ext_modules=[ 22 | CUDAExtension('resample2d_cuda', [ 23 | 'resample2d_cuda.cc', 24 | 'resample2d_kernel.cu' 25 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 26 | ], 27 | cmdclass={ 28 | 'build_ext': BuildExtension 29 | }) 30 | -------------------------------------------------------------------------------- /dvs/flownet2/networks/submodules.py: -------------------------------------------------------------------------------- 1 | # freda (todo) : 2 | 3 | import torch.nn as nn 4 | import torch 5 | import numpy as np 6 | 7 | def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1): 8 | if batchNorm: 9 | return nn.Sequential( 10 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False), 11 | nn.BatchNorm2d(out_planes), 12 | nn.LeakyReLU(0.1,inplace=True) 13 | ) 14 | else: 15 | return nn.Sequential( 16 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True), 17 | nn.LeakyReLU(0.1,inplace=True) 18 | ) 19 | 20 | def i_conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, bias = True): 21 | if batchNorm: 22 | return nn.Sequential( 23 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias), 24 | nn.BatchNorm2d(out_planes), 25 | ) 26 | else: 27 | return nn.Sequential( 28 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias), 29 | ) 30 | 31 | def predict_flow(in_planes): 32 | return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True) 33 | 34 | def deconv(in_planes, out_planes): 35 | return nn.Sequential( 36 | nn.ConvTranspose2d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True), 37 | nn.LeakyReLU(0.1,inplace=True) 38 | ) 39 | 40 | class tofp16(nn.Module): 41 | def __init__(self): 42 | super(tofp16, self).__init__() 43 | 44 | def forward(self, input): 45 | return input.half() 46 | 47 | 48 | class tofp32(nn.Module): 49 | def __init__(self): 50 | super(tofp32, self).__init__() 51 | 52 | def forward(self, input): 53 | return input.float() 54 | 55 | 56 | def init_deconv_bilinear(weight): 57 | f_shape = weight.size() 58 | heigh, width = f_shape[-2], f_shape[-1] 59 | f = np.ceil(width/2.0) 60 | c = (2 * f - 1 - f % 2) / (2.0 * f) 61 | bilinear = np.zeros([heigh, width]) 62 | for x in range(width): 63 | for y in range(heigh): 64 | value = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) 65 | bilinear[x, y] = value 66 | weight.data.fill_(0.) 67 | for i in range(f_shape[0]): 68 | for j in range(f_shape[1]): 69 | weight.data[i,j,:,:] = torch.from_numpy(bilinear) 70 | 71 | 72 | def save_grad(grads, name): 73 | def hook(grad): 74 | grads[name] = grad 75 | return hook 76 | 77 | ''' 78 | def save_grad(grads, name): 79 | def hook(grad): 80 | grads[name] = grad 81 | return hook 82 | import torch 83 | from channelnorm_package.modules.channelnorm import ChannelNorm 84 | model = ChannelNorm().cuda() 85 | grads = {} 86 | a = 100*torch.autograd.Variable(torch.randn((1,3,5,5)).cuda(), requires_grad=True) 87 | a.register_hook(save_grad(grads, 'a')) 88 | b = model(a) 89 | y = torch.mean(b) 90 | y.backward() 91 | 92 | ''' 93 | -------------------------------------------------------------------------------- /dvs/flownet2/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \ 3 | --inference_dataset_root ./../video \ 4 | --resume ./FlowNet2_checkpoint.pth.tar \ 5 | --inference_visualize 6 | -------------------------------------------------------------------------------- /dvs/flownet2/run_release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \ 3 | --inference_dataset_root ./../dataset_release/test \ 4 | --resume ./FlowNet2_checkpoint.pth.tar \ 5 | --inference_visualize 6 | 7 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \ 8 | --inference_dataset_root ./../dataset_release/training \ 9 | --resume ./FlowNet2_checkpoint.pth.tar \ 10 | --inference_visualize -------------------------------------------------------------------------------- /dvs/flownet2/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleinterns/deep-stabilization/7159c09d21aee3fc2098c64698c1300e40e3a8ea/dvs/flownet2/utils/__init__.py -------------------------------------------------------------------------------- /dvs/flownet2/utils/flow_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import os.path 4 | 5 | TAG_CHAR = np.array([202021.25], np.float32) 6 | 7 | def readFlow(fn): 8 | """ Read .flo file in Middlebury format""" 9 | # Code adapted from: 10 | # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy 11 | 12 | # WARNING: this will work on little-endian architectures (eg Intel x86) only! 13 | # print 'fn = %s'%(fn) 14 | with open(fn, 'rb') as f: 15 | magic = np.fromfile(f, np.float32, count=1) 16 | if 202021.25 != magic: 17 | print('Magic number incorrect. Invalid .flo file') 18 | return None 19 | else: 20 | w = np.fromfile(f, np.int32, count=1) 21 | h = np.fromfile(f, np.int32, count=1) 22 | # print 'Reading %d x %d flo file\n' % (w, h) 23 | data = np.fromfile(f, np.float32, count=2*int(w)*int(h)) 24 | # Reshape data into 3D array (columns, rows, bands) 25 | # The reshape here is for visualization, the original code is (w,h,2) 26 | return np.resize(data, (int(h), int(w), 2)) 27 | 28 | def writeFlow(filename,uv,v=None): 29 | """ Write optical flow to file. 30 | 31 | If v is None, uv is assumed to contain both u and v channels, 32 | stacked in depth. 33 | Original code by Deqing Sun, adapted from Daniel Scharstein. 34 | """ 35 | nBands = 2 36 | 37 | if v is None: 38 | assert(uv.ndim == 3) 39 | assert(uv.shape[2] == 2) 40 | u = uv[:,:,0] 41 | v = uv[:,:,1] 42 | else: 43 | u = uv 44 | 45 | assert(u.shape == v.shape) 46 | height,width = u.shape 47 | f = open(filename,'wb') 48 | # write the header 49 | f.write(TAG_CHAR) 50 | np.array(width).astype(np.int32).tofile(f) 51 | np.array(height).astype(np.int32).tofile(f) 52 | # arrange into matrix form 53 | tmp = np.zeros((height, width*nBands)) 54 | tmp[:,np.arange(width)*2] = u 55 | tmp[:,np.arange(width)*2 + 1] = v 56 | tmp.astype(np.float32).tofile(f) 57 | f.close() 58 | 59 | 60 | # ref: https://github.com/sampepose/flownet2-tf/ 61 | # blob/18f87081db44939414fc4a48834f9e0da3e69f4c/src/flowlib.py#L240 62 | def visulize_flow_file(flow_filename, save_dir=None): 63 | flow_data = readFlow(flow_filename) 64 | img = flow2img(flow_data) 65 | # plt.imshow(img) 66 | # plt.show() 67 | if save_dir: 68 | idx = flow_filename.rfind("/") + 1 69 | plt.imsave(os.path.join(save_dir, "%s-vis.png" % flow_filename[idx:-4]), img) 70 | 71 | 72 | def flow2img(flow_data): 73 | """ 74 | convert optical flow into color image 75 | :param flow_data: 76 | :return: color image 77 | """ 78 | # print(flow_data.shape) 79 | # print(type(flow_data)) 80 | u = flow_data[:, :, 0] 81 | v = flow_data[:, :, 1] 82 | 83 | UNKNOW_FLOW_THRESHOLD = 1e7 84 | pr1 = abs(u) > UNKNOW_FLOW_THRESHOLD 85 | pr2 = abs(v) > UNKNOW_FLOW_THRESHOLD 86 | idx_unknown = (pr1 | pr2) 87 | u[idx_unknown] = v[idx_unknown] = 0 88 | 89 | # get max value in each direction 90 | maxu = -999. 91 | maxv = -999. 92 | minu = 999. 93 | minv = 999. 94 | maxu = max(maxu, np.max(u)) 95 | maxv = max(maxv, np.max(v)) 96 | minu = min(minu, np.min(u)) 97 | minv = min(minv, np.min(v)) 98 | 99 | rad = np.sqrt(u ** 2 + v ** 2) 100 | maxrad = max(-1, np.max(rad)) 101 | u = u / maxrad + np.finfo(float).eps 102 | v = v / maxrad + np.finfo(float).eps 103 | 104 | img = compute_color(u, v) 105 | 106 | idx = np.repeat(idx_unknown[:, :, np.newaxis], 3, axis=2) 107 | img[idx] = 0 108 | 109 | return np.uint8(img) 110 | 111 | 112 | def compute_color(u, v): 113 | """ 114 | compute optical flow color map 115 | :param u: horizontal optical flow 116 | :param v: vertical optical flow 117 | :return: 118 | """ 119 | 120 | height, width = u.shape 121 | img = np.zeros((height, width, 3)) 122 | 123 | NAN_idx = np.isnan(u) | np.isnan(v) 124 | u[NAN_idx] = v[NAN_idx] = 0 125 | 126 | colorwheel = make_color_wheel() 127 | ncols = np.size(colorwheel, 0) 128 | 129 | rad = np.sqrt(u ** 2 + v ** 2) 130 | 131 | a = np.arctan2(-v, -u) / np.pi 132 | 133 | fk = (a + 1) / 2 * (ncols - 1) + 1 134 | 135 | k0 = np.floor(fk).astype(int) 136 | 137 | k1 = k0 + 1 138 | k1[k1 == ncols + 1] = 1 139 | f = fk - k0 140 | 141 | for i in range(0, np.size(colorwheel, 1)): 142 | tmp = colorwheel[:, i] 143 | col0 = tmp[k0 - 1] / 255 144 | col1 = tmp[k1 - 1] / 255 145 | col = (1 - f) * col0 + f * col1 146 | 147 | idx = rad <= 1 148 | col[idx] = 1 - rad[idx] * (1 - col[idx]) 149 | notidx = np.logical_not(idx) 150 | 151 | col[notidx] *= 0.75 152 | img[:, :, i] = np.uint8(np.floor(255 * col * (1 - NAN_idx))) 153 | 154 | return img 155 | 156 | 157 | def make_color_wheel(): 158 | """ 159 | Generate color wheel according Middlebury color code 160 | :return: Color wheel 161 | """ 162 | RY = 15 163 | YG = 6 164 | GC = 4 165 | CB = 11 166 | BM = 13 167 | MR = 6 168 | 169 | ncols = RY + YG + GC + CB + BM + MR 170 | 171 | colorwheel = np.zeros([ncols, 3]) 172 | 173 | col = 0 174 | 175 | # RY 176 | colorwheel[0:RY, 0] = 255 177 | colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY)) 178 | col += RY 179 | 180 | # YG 181 | colorwheel[col:col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG)) 182 | colorwheel[col:col + YG, 1] = 255 183 | col += YG 184 | 185 | # GC 186 | colorwheel[col:col + GC, 1] = 255 187 | colorwheel[col:col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC)) 188 | col += GC 189 | 190 | # CB 191 | colorwheel[col:col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB)) 192 | colorwheel[col:col + CB, 2] = 255 193 | col += CB 194 | 195 | # BM 196 | colorwheel[col:col + BM, 2] = 255 197 | colorwheel[col:col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM)) 198 | col += + BM 199 | 200 | # MR 201 | colorwheel[col:col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR)) 202 | colorwheel[col:col + MR, 0] = 255 203 | 204 | return colorwheel 205 | -------------------------------------------------------------------------------- /dvs/flownet2/utils/frame_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from os.path import * 3 | from imageio import imread 4 | from . import flow_utils 5 | 6 | def read_gen(file_name): 7 | ext = splitext(file_name)[-1] 8 | if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg': 9 | im = imread(file_name) 10 | if im.shape[2] > 3: 11 | return im[:,:,:3] 12 | else: 13 | return im 14 | elif ext == '.bin' or ext == '.raw': 15 | return np.load(file_name) 16 | elif ext == '.flo': 17 | return flow_utils.readFlow(file_name).astype(np.float32) 18 | return [] 19 | -------------------------------------------------------------------------------- /dvs/flownet2/utils/param_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | def parse_flownetc(modules, weights, biases): 6 | keys = [ 7 | 'conv1', 8 | 'conv2', 9 | 'conv3', 10 | 'conv_redir', 11 | 'conv3_1', 12 | 'conv4', 13 | 'conv4_1', 14 | 'conv5', 15 | 'conv5_1', 16 | 'conv6', 17 | 'conv6_1', 18 | 19 | 'deconv5', 20 | 'deconv4', 21 | 'deconv3', 22 | 'deconv2', 23 | 24 | 'Convolution1', 25 | 'Convolution2', 26 | 'Convolution3', 27 | 'Convolution4', 28 | 'Convolution5', 29 | 30 | 'upsample_flow6to5', 31 | 'upsample_flow5to4', 32 | 'upsample_flow4to3', 33 | 'upsample_flow3to2', 34 | 35 | ] 36 | i = 0 37 | for m in modules: 38 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 39 | weight = weights[keys[i]].copy() 40 | bias = biases[keys[i]].copy() 41 | if keys[i] == 'conv1': 42 | m.weight.data[:,:,:,:] = torch.from_numpy(np.flip(weight, axis=1).copy()) 43 | m.bias.data[:] = torch.from_numpy(bias) 44 | else: 45 | m.weight.data[:,:,:,:] = torch.from_numpy(weight) 46 | m.bias.data[:] = torch.from_numpy(bias) 47 | 48 | i = i + 1 49 | return 50 | 51 | def parse_flownets(modules, weights, biases, param_prefix='net2_'): 52 | keys = [ 53 | 'conv1', 54 | 'conv2', 55 | 'conv3', 56 | 'conv3_1', 57 | 'conv4', 58 | 'conv4_1', 59 | 'conv5', 60 | 'conv5_1', 61 | 'conv6', 62 | 'conv6_1', 63 | 64 | 'deconv5', 65 | 'deconv4', 66 | 'deconv3', 67 | 'deconv2', 68 | 69 | 'predict_conv6', 70 | 'predict_conv5', 71 | 'predict_conv4', 72 | 'predict_conv3', 73 | 'predict_conv2', 74 | 75 | 'upsample_flow6to5', 76 | 'upsample_flow5to4', 77 | 'upsample_flow4to3', 78 | 'upsample_flow3to2', 79 | ] 80 | for i, k in enumerate(keys): 81 | if 'upsample' in k: 82 | keys[i] = param_prefix + param_prefix + k 83 | else: 84 | keys[i] = param_prefix + k 85 | i = 0 86 | for m in modules: 87 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 88 | weight = weights[keys[i]].copy() 89 | bias = biases[keys[i]].copy() 90 | if keys[i] == param_prefix+'conv1': 91 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy()) 92 | m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy()) 93 | m.weight.data[:,6:9,:,:] = torch.from_numpy(np.flip(weight[:,6:9,:,:], axis=1).copy()) 94 | m.weight.data[:,9::,:,:] = torch.from_numpy(weight[:,9:,:,:].copy()) 95 | if m.bias is not None: 96 | m.bias.data[:] = torch.from_numpy(bias) 97 | else: 98 | m.weight.data[:,:,:,:] = torch.from_numpy(weight) 99 | if m.bias is not None: 100 | m.bias.data[:] = torch.from_numpy(bias) 101 | i = i + 1 102 | return 103 | 104 | def parse_flownetsonly(modules, weights, biases, param_prefix=''): 105 | keys = [ 106 | 'conv1', 107 | 'conv2', 108 | 'conv3', 109 | 'conv3_1', 110 | 'conv4', 111 | 'conv4_1', 112 | 'conv5', 113 | 'conv5_1', 114 | 'conv6', 115 | 'conv6_1', 116 | 117 | 'deconv5', 118 | 'deconv4', 119 | 'deconv3', 120 | 'deconv2', 121 | 122 | 'Convolution1', 123 | 'Convolution2', 124 | 'Convolution3', 125 | 'Convolution4', 126 | 'Convolution5', 127 | 128 | 'upsample_flow6to5', 129 | 'upsample_flow5to4', 130 | 'upsample_flow4to3', 131 | 'upsample_flow3to2', 132 | ] 133 | for i, k in enumerate(keys): 134 | if 'upsample' in k: 135 | keys[i] = param_prefix + param_prefix + k 136 | else: 137 | keys[i] = param_prefix + k 138 | i = 0 139 | for m in modules: 140 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 141 | weight = weights[keys[i]].copy() 142 | bias = biases[keys[i]].copy() 143 | if keys[i] == param_prefix+'conv1': 144 | # print ("%s :"%(keys[i]), m.weight.size(), m.bias.size(), tf_w[keys[i]].shape[::-1]) 145 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy()) 146 | m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy()) 147 | if m.bias is not None: 148 | m.bias.data[:] = torch.from_numpy(bias) 149 | else: 150 | m.weight.data[:,:,:,:] = torch.from_numpy(weight) 151 | if m.bias is not None: 152 | m.bias.data[:] = torch.from_numpy(bias) 153 | i = i + 1 154 | return 155 | 156 | def parse_flownetsd(modules, weights, biases, param_prefix='netsd_'): 157 | keys = [ 158 | 'conv0', 159 | 'conv1', 160 | 'conv1_1', 161 | 'conv2', 162 | 'conv2_1', 163 | 'conv3', 164 | 'conv3_1', 165 | 'conv4', 166 | 'conv4_1', 167 | 'conv5', 168 | 'conv5_1', 169 | 'conv6', 170 | 'conv6_1', 171 | 172 | 'deconv5', 173 | 'deconv4', 174 | 'deconv3', 175 | 'deconv2', 176 | 177 | 'interconv5', 178 | 'interconv4', 179 | 'interconv3', 180 | 'interconv2', 181 | 182 | 'Convolution1', 183 | 'Convolution2', 184 | 'Convolution3', 185 | 'Convolution4', 186 | 'Convolution5', 187 | 188 | 'upsample_flow6to5', 189 | 'upsample_flow5to4', 190 | 'upsample_flow4to3', 191 | 'upsample_flow3to2', 192 | ] 193 | for i, k in enumerate(keys): 194 | keys[i] = param_prefix + k 195 | 196 | i = 0 197 | for m in modules: 198 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 199 | weight = weights[keys[i]].copy() 200 | bias = biases[keys[i]].copy() 201 | if keys[i] == param_prefix+'conv0': 202 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy()) 203 | m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy()) 204 | if m.bias is not None: 205 | m.bias.data[:] = torch.from_numpy(bias) 206 | else: 207 | m.weight.data[:,:,:,:] = torch.from_numpy(weight) 208 | if m.bias is not None: 209 | m.bias.data[:] = torch.from_numpy(bias) 210 | i = i + 1 211 | 212 | return 213 | 214 | def parse_flownetfusion(modules, weights, biases, param_prefix='fuse_'): 215 | keys = [ 216 | 'conv0', 217 | 'conv1', 218 | 'conv1_1', 219 | 'conv2', 220 | 'conv2_1', 221 | 222 | 'deconv1', 223 | 'deconv0', 224 | 225 | 'interconv1', 226 | 'interconv0', 227 | 228 | '_Convolution5', 229 | '_Convolution6', 230 | '_Convolution7', 231 | 232 | 'upsample_flow2to1', 233 | 'upsample_flow1to0', 234 | ] 235 | for i, k in enumerate(keys): 236 | keys[i] = param_prefix + k 237 | 238 | i = 0 239 | for m in modules: 240 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 241 | weight = weights[keys[i]].copy() 242 | bias = biases[keys[i]].copy() 243 | if keys[i] == param_prefix+'conv0': 244 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy()) 245 | m.weight.data[:,3::,:,:] = torch.from_numpy(weight[:,3:,:,:].copy()) 246 | if m.bias is not None: 247 | m.bias.data[:] = torch.from_numpy(bias) 248 | else: 249 | m.weight.data[:,:,:,:] = torch.from_numpy(weight) 250 | if m.bias is not None: 251 | m.bias.data[:] = torch.from_numpy(bias) 252 | i = i + 1 253 | 254 | return 255 | -------------------------------------------------------------------------------- /dvs/flownet2/utils/tools.py: -------------------------------------------------------------------------------- 1 | # freda (todo) : 2 | 3 | import os, time, sys, math 4 | import subprocess, shutil 5 | from os.path import * 6 | import numpy as np 7 | from inspect import isclass 8 | from pytz import timezone 9 | from datetime import datetime 10 | import inspect 11 | import torch 12 | 13 | def datestr(): 14 | pacific = timezone('US/Pacific') 15 | now = datetime.now(pacific) 16 | return '{}{:02}{:02}_{:02}{:02}'.format(now.year, now.month, now.day, now.hour, now.minute) 17 | 18 | def module_to_dict(module, exclude=[]): 19 | return dict([(x, getattr(module, x)) for x in dir(module) 20 | if isclass(getattr(module, x)) 21 | and x not in exclude 22 | and getattr(module, x) not in exclude]) 23 | 24 | class TimerBlock: 25 | def __init__(self, title): 26 | print(("{}".format(title))) 27 | 28 | def __enter__(self): 29 | self.start = time.clock() 30 | return self 31 | 32 | def __exit__(self, exc_type, exc_value, traceback): 33 | self.end = time.clock() 34 | self.interval = self.end - self.start 35 | 36 | if exc_type is not None: 37 | self.log("Operation failed\n") 38 | else: 39 | self.log("Operation finished\n") 40 | 41 | 42 | def log(self, string): 43 | duration = time.clock() - self.start 44 | units = 's' 45 | if duration > 60: 46 | duration = duration / 60. 47 | units = 'm' 48 | print((" [{:.3f}{}] {}".format(duration, units, string))) 49 | 50 | def log2file(self, fid, string): 51 | fid = open(fid, 'a') 52 | fid.write("%s\n"%(string)) 53 | fid.close() 54 | 55 | def add_arguments_for_module(parser, module, argument_for_class, default, skip_params=[], parameter_defaults={}): 56 | argument_group = parser.add_argument_group(argument_for_class.capitalize()) 57 | 58 | module_dict = module_to_dict(module) 59 | argument_group.add_argument('--' + argument_for_class, type=str, default=default, choices=list(module_dict.keys())) 60 | 61 | args, unknown_args = parser.parse_known_args() 62 | class_obj = module_dict[vars(args)[argument_for_class]] 63 | 64 | argspec = inspect.getargspec(class_obj.__init__) 65 | 66 | defaults = argspec.defaults[::-1] if argspec.defaults else None 67 | 68 | args = argspec.args[::-1] 69 | for i, arg in enumerate(args): 70 | cmd_arg = '{}_{}'.format(argument_for_class, arg) 71 | if arg not in skip_params + ['self', 'args']: 72 | if arg in list(parameter_defaults.keys()): 73 | argument_group.add_argument('--{}'.format(cmd_arg), type=type(parameter_defaults[arg]), default=parameter_defaults[arg]) 74 | elif (defaults is not None and i < len(defaults)): 75 | argument_group.add_argument('--{}'.format(cmd_arg), type=type(defaults[i]), default=defaults[i]) 76 | else: 77 | print(("[Warning]: non-default argument '{}' detected on class '{}'. This argument cannot be modified via the command line" 78 | .format(arg, module.__class__.__name__))) 79 | # We don't have a good way of dealing with inferring the type of the argument 80 | # TODO: try creating a custom action and using ast's infer type? 81 | # else: 82 | # argument_group.add_argument('--{}'.format(cmd_arg), required=True) 83 | 84 | def kwargs_from_args(args, argument_for_class): 85 | argument_for_class = argument_for_class + '_' 86 | return {key[len(argument_for_class):]: value for key, value in list(vars(args).items()) if argument_for_class in key and key != argument_for_class + 'class'} 87 | 88 | def format_dictionary_of_losses(labels, values): 89 | try: 90 | string = ', '.join([('{}: {:' + ('.3f' if value >= 0.001 else '.1e') +'}').format(name, value) for name, value in zip(labels, values)]) 91 | except (TypeError, ValueError) as e: 92 | print((list(zip(labels, values)))) 93 | string = '[Log Error] ' + str(e) 94 | 95 | return string 96 | 97 | 98 | class IteratorTimer(): 99 | def __init__(self, iterable): 100 | self.iterable = iterable 101 | self.iterator = self.iterable.__iter__() 102 | 103 | def __iter__(self): 104 | return self 105 | 106 | def __len__(self): 107 | return len(self.iterable) 108 | 109 | def __next__(self): 110 | start = time.time() 111 | n = next(self.iterator) 112 | self.last_duration = (time.time() - start) 113 | return n 114 | 115 | next = __next__ 116 | 117 | def gpumemusage(): 118 | gpu_mem = subprocess.check_output("nvidia-smi | grep MiB | cut -f 3 -d '|'", shell=True).replace(' ', '').replace('\n', '').replace('i', '') 119 | all_stat = [float(a) for a in gpu_mem.replace('/','').split('MB')[:-1]] 120 | 121 | gpu_mem = '' 122 | for i in range(len(all_stat)/2): 123 | curr, tot = all_stat[2*i], all_stat[2*i+1] 124 | util = "%1.2f"%(100*curr/tot)+'%' 125 | cmem = str(int(math.ceil(curr/1024.)))+'GB' 126 | gmem = str(int(math.ceil(tot/1024.)))+'GB' 127 | gpu_mem += util + '--' + join(cmem, gmem) + ' ' 128 | return gpu_mem 129 | 130 | 131 | def update_hyperparameter_schedule(args, epoch, global_iteration, optimizer): 132 | if args.schedule_lr_frequency > 0: 133 | for param_group in optimizer.param_groups: 134 | if (global_iteration + 1) % args.schedule_lr_frequency == 0: 135 | param_group['lr'] /= float(args.schedule_lr_fraction) 136 | param_group['lr'] = float(np.maximum(param_group['lr'], 0.000001)) 137 | 138 | def save_checkpoint(state, is_best, path, prefix, filename='checkpoint.pth.tar'): 139 | prefix_save = os.path.join(path, prefix) 140 | name = prefix_save + '_' + filename 141 | torch.save(state, name) 142 | if is_best: 143 | shutil.copyfile(name, prefix_save + '_model_best.pth.tar') 144 | 145 | -------------------------------------------------------------------------------- /dvs/gyro/__init__.py: -------------------------------------------------------------------------------- 1 | from .gyro_function import ( 2 | GetGyroAtTimeStamp, 3 | QuaternionProduct, 4 | QuaternionReciprocal, 5 | ConvertQuaternionToAxisAngle, 6 | FindOISAtTimeStamp, 7 | GetMetadata, 8 | GetProjections, 9 | GetVirtualProjection, 10 | GetForwardGrid, 11 | CenterZoom, 12 | GetWarpingFlow, 13 | torch_norm_quat, 14 | torch_QuaternionProduct, 15 | torch_QuaternionReciprocal, 16 | torch_GetVirtualProjection, 17 | get_static, 18 | torch_GetForwardGrid, 19 | torch_GetWarpingFlow, 20 | train_GetGyroAtTimeStamp, 21 | train_ConvertQuaternionToAxisAngle, 22 | ConvertAxisAngleToQuaternion, 23 | torch_ConvertAxisAngleToQuaternion, 24 | torch_ConvertQuaternionToAxisAngle, 25 | ConvertAxisAngleToQuaternion_no_angle, 26 | ConvertQuaternionToAxisAngle_no_angle, 27 | torch_GetHomographyTransformFromProjections, 28 | torch_ApplyTransform, 29 | norm_quat, 30 | SlerpWithDefault 31 | ) 32 | from .gyro_io import ( 33 | LoadGyroData, 34 | LoadOISData, 35 | LoadFrameData, 36 | LoadStabResult, 37 | get_grid, 38 | get_rotations, 39 | visual_rotation 40 | ) -------------------------------------------------------------------------------- /dvs/gyro/gyro_io.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import linalg as LA 3 | import matplotlib.pyplot as plt 4 | import scipy.io as sio 5 | from .gyro_function import ( 6 | ProcessGyroData, QuaternionProduct, QuaternionReciprocal, 7 | ConvertQuaternionToAxisAngle, FindOISAtTimeStamp, GetMetadata, 8 | GetProjections, GetVirtualProjection, GetForwardGrid, 9 | CenterZoom, GetGyroAtTimeStamp, get_static, ConvertAxisAngleToQuaternion, 10 | ConvertAxisAngleToQuaternion_no_angle, ConvertQuaternionToAxisAngle_no_angle 11 | ) 12 | 13 | def load_gyro_mesh(input_name): 14 | data = LoadStabResult(input_name) 15 | w, h = data["vertex_grid_size"][0] 16 | data["warping grid"] = np.reshape(data["warping grid"],(-1,int(w),int(h),4)) 17 | return data 18 | 19 | def get_grid(static_options, frame_data, quats_data, ois_data, virtual_data, no_shutter = False): 20 | grid = [] 21 | result_poses = {} 22 | result_poses['virtual pose'] = virtual_data 23 | for i in range(len(virtual_data)): 24 | metadata = GetMetadata(frame_data, i) 25 | real_projections = GetProjections(static_options, metadata, quats_data, ois_data, no_shutter = no_shutter) 26 | virtual_projection = GetVirtualProjection(static_options, result_poses, metadata, i) 27 | grid.append(GetForwardGrid(static_options, real_projections, virtual_projection)) 28 | grid = np.array(grid) 29 | zoom_ratio = 1 / (1 - 2 * static_options["cropping_ratio"]) 30 | curr_grid = CenterZoom(grid, zoom_ratio) 31 | curr_grid = np.transpose(curr_grid,(0,3,2,1)) 32 | return curr_grid 33 | 34 | def get_rotations(frame_data, quats_data, ois_data, num_frames): 35 | quats = np.zeros((num_frames, 4)) 36 | for i in range(num_frames): 37 | quats[i,:] = GetGyroAtTimeStamp(quats_data, frame_data[i,0]) 38 | 39 | rotations = np.zeros((num_frames,3)) 40 | lens_offsets = np.zeros((num_frames, 2)) 41 | for i in range(num_frames): 42 | if i != 0: 43 | quat_dif = QuaternionProduct(quats[i,:], QuaternionReciprocal(quats[i-1,:])) 44 | axis_dif_cur = ConvertQuaternionToAxisAngle_no_angle(quat_dif) 45 | rotations[i,:] = axis_dif_cur 46 | lens_offsets[i,:] = FindOISAtTimeStamp(ois_data, frame_data[i, 4]) 47 | 48 | return rotations, lens_offsets 49 | 50 | def visual_rotation(rotations_real, lens_offsets_real, rotations_virtual, lens_offsets_virtual, rotations_virtual2, lens_offsets_virtual2, path): 51 | # figure('units','normalized','outerposition',[0 0 1 1]) 52 | plt.clf() 53 | plt.figure(figsize=(8,16)) 54 | 55 | plt.subplot(5,1,1) 56 | plt.plot(rotations_real[:,0], "g") 57 | if rotations_virtual is not None: 58 | plt.plot(rotations_virtual[:,0], "b") 59 | if rotations_virtual2 is not None: 60 | plt.plot(rotations_virtual2[:,0], "r") 61 | plt.ylim(-0.02, 0.02) 62 | plt.xlabel('frame id') 63 | plt.ylabel('gyro x') 64 | 65 | plt.subplot(5,1,2) 66 | plt.plot(rotations_real[:,1], "g") 67 | if rotations_virtual is not None: 68 | plt.plot(rotations_virtual[:,1], "b") 69 | if rotations_virtual2 is not None: 70 | plt.plot(rotations_virtual2[:,1], "r") 71 | plt.ylim(-0.02, 0.02) 72 | plt.xlabel('frame id') 73 | plt.ylabel('gyro y') 74 | 75 | plt.subplot(5,1,3) 76 | plt.plot(rotations_real[:,2], "g") 77 | if rotations_virtual is not None: 78 | plt.plot(rotations_virtual[:,2], "b") 79 | if rotations_virtual2 is not None: 80 | plt.plot(rotations_virtual2[:,2], "r") 81 | plt.ylim(-0.02, 0.02) 82 | plt.xlabel('frame id') 83 | plt.ylabel('gyro z') 84 | 85 | plt.subplot(5,1,4) 86 | plt.plot(lens_offsets_real[:,0], "g") 87 | if lens_offsets_virtual is not None: 88 | plt.plot(lens_offsets_virtual[:,0], "b") 89 | if rotations_virtual2 is not None: 90 | plt.plot(lens_offsets_virtual2[:,0], "r") 91 | plt.xlabel('frame id') 92 | plt.ylabel('ois x') 93 | 94 | plt.subplot(5,1,5) 95 | plt.plot(lens_offsets_real[:,1], "g") 96 | if lens_offsets_virtual is not None: 97 | plt.plot(lens_offsets_virtual[:,1], "b") 98 | if rotations_virtual2 is not None: 99 | plt.plot(lens_offsets_virtual2[:,1], "r") 100 | plt.xlabel('frame id') 101 | plt.ylabel('ois y') 102 | 103 | plt.savefig(path[:-4]+".jpg") 104 | return 105 | 106 | def LoadOISData(ois_name): 107 | ois_log = np.loadtxt(ois_name) 108 | ois_log = ois_log[:, -3:] 109 | return ois_log 110 | 111 | def LoadFrameData(frame_log_name): 112 | frame_data = np.loadtxt(frame_log_name) 113 | frame_data[:, [0,4]] = frame_data[:, [0,4]] - np.expand_dims(frame_data[:,1]/2, axis = 1) 114 | return frame_data 115 | 116 | 117 | def LoadGyroData(gyro_log_name): 118 | raw_gyro_data = np.loadtxt(gyro_log_name) 119 | raw_gyro_data[:,0] = raw_gyro_data[:,0] * 1000 120 | raw_gyro_data = raw_gyro_data[:,[0, 2, 1, 3]] 121 | 122 | [_, quats_data] = ProcessGyroData(raw_gyro_data) 123 | quats_data = np.concatenate((raw_gyro_data[:, 0, None], quats_data), axis = 1) 124 | return quats_data 125 | 126 | def LoadStabResult(input_name): 127 | fid = open(input_name) 128 | data = {} 129 | while True: 130 | name, val = ReadLine(fid) 131 | if name == None: 132 | break 133 | if name in data: 134 | data[name] = np.concatenate((data[name], val), axis=0) 135 | else: 136 | data[name] = val 137 | fid.close() 138 | print("Mesh length: ", len(list(data.values())[0])) 139 | return data 140 | 141 | 142 | def ReadLine(fid): 143 | name = '' 144 | val = 0 145 | tline = fid.readline() 146 | if len(tline) == 0: 147 | return None, None 148 | if tline[-1] == "\n": 149 | tline = tline[:-1] 150 | ind = tline.find(':') 151 | name = tline[:ind] 152 | tmp_val= str2num(tline[ind+1:]) 153 | if len(tmp_val) > 0: 154 | val = tmp_val 155 | else: 156 | tline = fid.readline() 157 | if tline[-1] == "\n": 158 | tline = tline[:-1] 159 | val = str2num(tline) 160 | return name, np.expand_dims(np.array(val), axis=0) 161 | 162 | def str2num(string): 163 | nums = string.split(" ") 164 | nums = [float(_) for _ in nums if _ != ""] 165 | return nums 166 | 167 | -------------------------------------------------------------------------------- /dvs/inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import torchvision 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | 8 | import time 9 | import yaml 10 | import argparse 11 | import numpy as np 12 | from printer import Printer 13 | from dataset import get_data_loader, get_inference_data_loader 14 | from model import Model 15 | import datetime 16 | import copy 17 | from util import make_dir, get_optimizer, norm_flow 18 | from gyro import ( 19 | get_grid, 20 | get_rotations, 21 | visual_rotation, 22 | torch_QuaternionProduct, 23 | torch_norm_quat 24 | ) 25 | from warp import warp_video 26 | 27 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 28 | 29 | def run(model, loader, cf, USE_CUDA=True): 30 | no_flo = False 31 | number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"] 32 | model.net.eval() 33 | model.unet.eval() 34 | activation = nn.Softshrink(0.0006) # 0.0036 35 | for i, data in enumerate(loader, 0): 36 | # get the inputs; data is a list of [inputs, labels] 37 | real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data 38 | print("Fininsh Load data") 39 | 40 | real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4] 41 | real_projections = real_projections.type(torch.float) 42 | flo = flo.type(torch.float) 43 | flo_back = flo_back.type(torch.float) 44 | ois = ois.type(torch.float) 45 | 46 | batch_size, step, dim = real_inputs.size() 47 | times = times.numpy() 48 | real_queue_idx = real_queue_idx.numpy() 49 | virtual_queue = [None] * batch_size 50 | 51 | run_loss = 0 52 | model.net.init_hidden(batch_size) 53 | count = 0 54 | for j in range(step): 55 | if (j+1) % 100 == 0: 56 | print("Step: "+str(j+1)+"/"+str(step)) 57 | virtual_inputs, vt_1 = loader.dataset.get_virtual_data( 58 | virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j]) 59 | real_inputs_step = real_inputs[:,j,:] 60 | inputs = torch.cat((real_inputs_step,virtual_inputs), dim = 1) 61 | 62 | # inputs = Variable(real_inputs_step) 63 | if USE_CUDA: 64 | real_inputs_step = real_inputs_step.cuda() 65 | virtual_inputs = virtual_inputs.cuda() 66 | inputs = inputs.cuda() 67 | if no_flo is False: 68 | flo_step = flo[:,j].cuda() 69 | flo_back_step = flo_back[:,j].cuda() 70 | else: 71 | flo_step = None 72 | flo_back_step = None 73 | vt_1 = vt_1.cuda() 74 | real_projections_t = real_projections[:,j+1].cuda() 75 | real_projections_t_1 = real_projections[:,j].cuda() 76 | real_postion_anchor = real_postion[:,j].cuda() 77 | ois_step = ois[:,j].cuda() 78 | 79 | if no_flo is False: 80 | b, h, w, _ = flo_step.size() 81 | flo_step = norm_flow(flo_step, h, w) 82 | flo_back_step = norm_flow(flo_back_step, h, w) 83 | 84 | with torch.no_grad(): 85 | if no_flo is False: 86 | flo_out = model.unet(flo_step, flo_back_step) 87 | else: 88 | flo_out = None 89 | if j < 1: 90 | for i in range(2): 91 | out = model.net(inputs, flo_out, ois_step) 92 | else: 93 | out = model.net(inputs, flo_out, ois_step) 94 | 95 | real_position = real_inputs_step[:,40:44] 96 | virtual_position = virtual_inputs[:, -4:] 97 | 98 | out[:, :3] = activation(out[:, :3]) 99 | out = torch_norm_quat(out) 100 | 101 | pos = torch_QuaternionProduct(virtual_position, real_postion_anchor) 102 | loss_step = model.loss(out, vt_1, virtual_inputs, real_inputs_step, \ 103 | flo_step, flo_back_step, real_projections_t, real_projections_t_1, real_postion_anchor, \ 104 | follow = True, optical = True, undefine = True) 105 | run_loss += loss_step 106 | 107 | out = torch_QuaternionProduct(out, pos) 108 | 109 | if USE_CUDA: 110 | out = out.cpu().detach().numpy() 111 | 112 | virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1]) 113 | 114 | run_loss /= step 115 | print( "\nLoss: follow, angle, smooth, c2_smooth, undefine, optical") 116 | print(run_loss.cpu().numpy()[:-1], "\n") 117 | return np.squeeze(virtual_queue, axis=0) 118 | 119 | 120 | def inference(cf, data_path, USE_CUDA): 121 | checkpoints_dir = cf['data']['checkpoints_dir'] 122 | checkpoints_dir = make_dir(checkpoints_dir, cf) 123 | files = os.listdir(data_path) 124 | for f in files: 125 | if f[-3:] == "mp4" and "no_ois" not in f and "no_shutter" not in f and "gimbal" not in f.lower() and "grid" not in f.lower() and "flo" not in f.lower(): 126 | video_name = f[:-4] 127 | 128 | # Define the model 129 | model = Model(cf) 130 | load_model = cf["model"]["load_model"] 131 | 132 | print("------Load Pretrined Model--------") 133 | if load_model is not None: 134 | checkpoint = torch.load(load_model) 135 | print(load_model) 136 | else: 137 | load_last = os.path.join(checkpoints_dir, cf['data']['exp']+'_last.checkpoint') 138 | checkpoint = torch.load(load_last) 139 | print(load_last) 140 | model.net.load_state_dict(checkpoint['state_dict']) 141 | model.unet.load_state_dict(checkpoint['unet']) 142 | 143 | if USE_CUDA: 144 | model.net.cuda() 145 | model.unet.cuda() 146 | 147 | print("-----------Load Dataset----------") 148 | test_loader = get_inference_data_loader(cf, data_path, no_flo = False) 149 | data = test_loader.dataset.data[0] 150 | 151 | start_time = time.time() 152 | virtual_queue= run(model, test_loader, cf, USE_CUDA=USE_CUDA) 153 | 154 | virtual_data = np.zeros((1,5)) 155 | virtual_data[:,1:] = virtual_queue[0, 1:] 156 | virtual_data[:,0] = data.frame[0,0] 157 | virtual_queue = np.concatenate((virtual_data, virtual_queue), axis = 0) 158 | 159 | print(virtual_queue.shape) 160 | time_used = (time.time() - start_time) / 60 161 | 162 | print("Time_used: %.4f minutes" % (time_used)) 163 | 164 | 165 | virtual_path = os.path.join("./test", cf['data']['exp'], data_path.split("/")[-1]+'.txt') 166 | np.savetxt(virtual_path, virtual_queue, delimiter=' ') 167 | 168 | print("------Start Warping Video--------") 169 | grid = get_grid(test_loader.dataset.static_options, \ 170 | data.frame[:data.length], data.gyro, data.ois, virtual_queue[:data.length,1:], no_shutter = False) 171 | return data, virtual_queue, video_name, grid 172 | 173 | def visual_result(cf, data, video_name, virtual_queue, virtual_queue2 = None, compare_exp = None): 174 | print("------Start Visual Result--------") 175 | rotations_virtual, lens_offsets_virtual = get_rotations(data.frame[:data.length], virtual_queue, np.zeros(data.ois.shape), data.length) 176 | rotations_real, lens_offsets_real = get_rotations(data.frame[:data.length], data.gyro, data.ois, data.length) 177 | if virtual_queue2 is not None: 178 | rotations_virtual2, lens_offsets_virtual2 = get_rotations(data.frame[:data.length], virtual_queue2, np.zeros(data.ois.shape), data.length) 179 | path = os.path.join("./test", cf['data']['exp'], video_name+'_'+compare_exp+'.jpg') 180 | else: 181 | rotations_virtual2, lens_offsets_virtual2 = None, None 182 | path = os.path.join("./test", cf['data']['exp'], video_name+'.jpg') 183 | 184 | visual_rotation(rotations_real, lens_offsets_real, rotations_virtual, lens_offsets_virtual, rotations_virtual2, lens_offsets_virtual2, path) 185 | 186 | 187 | def main(args = None): 188 | config_file = args.config 189 | dir_path = args.dir_path 190 | cf = yaml.load(open(config_file, 'r')) 191 | 192 | USE_CUDA = cf['data']["use_cuda"] 193 | 194 | log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'_test.log'), 'w+') 195 | printer = Printer(sys.stdout, log_file).open() 196 | 197 | data_name = sorted(os.listdir(dir_path)) 198 | for i in range(len(data_name)): 199 | print("Running Inference: " + str(i+1) + "/" + str(len(data_name))) 200 | save_path = os.path.join("./test", cf['data']['exp'], data_name[i]+'_stab.mp4') 201 | 202 | data_path = os.path.join(dir_path, data_name[i]) 203 | data, virtual_queue, video_name, grid= inference(cf, data_path, USE_CUDA) 204 | 205 | virtual_queue2 = None 206 | visual_result(cf, data, data_name[i], virtual_queue, virtual_queue2 = virtual_queue2, compare_exp = None) 207 | 208 | video_path = os.path.join(data_path, video_name+".mp4") 209 | warp_video(grid, video_path, save_path, frame_number = False) 210 | return 211 | 212 | if __name__ == '__main__': 213 | parser = argparse.ArgumentParser("Training model") 214 | parser.add_argument("--config", default="./conf/stabilzation.yaml", help="Config file.") 215 | parser.add_argument("--dir_path", default="./video") 216 | args = parser.parse_args() 217 | main(args = args) -------------------------------------------------------------------------------- /dvs/load_frame_sensor_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 3 | import sys 4 | import torch 5 | import torchvision 6 | import torch.nn as nn 7 | from torch.autograd import Variable 8 | 9 | import time 10 | import yaml 11 | import argparse 12 | import numpy as np 13 | from printer import Printer 14 | from dataset import get_data_loader, get_inference_data_loader 15 | from model import Model 16 | import datetime 17 | import copy 18 | from util import make_dir, get_optimizer, norm_flow 19 | from gyro import ( 20 | get_grid, 21 | get_rotations, 22 | visual_rotation, 23 | GetGyroAtTimeStamp, 24 | torch_ConvertQuaternionToAxisAngle, 25 | torch_ConvertAxisAngleToQuaternion, 26 | torch_QuaternionProduct, 27 | get_static 28 | ) 29 | from warp import warp_video 30 | 31 | def run(loader, cf, USE_CUDA=True): 32 | number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"] 33 | for i, data in enumerate(loader, 0): 34 | # get the inputs; data is a list of [inputs, labels] 35 | real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data 36 | print("Fininsh Load data") 37 | 38 | real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4] 39 | real_projections = real_projections.type(torch.float) 40 | 41 | batch_size, step, dim = real_inputs.size() 42 | times = times.numpy() 43 | real_queue_idx = real_queue_idx.numpy() 44 | virtual_queue = [None] * batch_size 45 | 46 | for j in range(step): 47 | virtual_inputs, vt_1 = loader.dataset.get_virtual_data( 48 | virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j]) 49 | real_inputs_step = real_inputs[:,j,:] 50 | if USE_CUDA: 51 | real_inputs_step = real_inputs_step.cuda() 52 | virtual_inputs = virtual_inputs.cuda() 53 | real_postion_anchor = real_postion[:,j].cuda() 54 | 55 | out = real_inputs_step[:,40:44] 56 | 57 | virtual_position = virtual_inputs[:, -4:] 58 | pos = torch_QuaternionProduct(virtual_position, real_postion_anchor) 59 | 60 | out = torch_QuaternionProduct(out, pos) 61 | 62 | if USE_CUDA: 63 | out = out.cpu().detach().numpy() 64 | 65 | virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1]) 66 | return np.squeeze(virtual_queue, axis=0) 67 | 68 | def inference(cf, data_path, USE_CUDA): 69 | print("-----------Load Dataset----------") 70 | test_loader = get_inference_data_loader(cf, data_path) 71 | data = test_loader.dataset.data[0] 72 | test_loader.dataset.no_flo = True 73 | test_loader.dataset.static_options = get_static(ratio = 0) 74 | 75 | start_time = time.time() 76 | virtual_queue = run(test_loader, cf, USE_CUDA=USE_CUDA) 77 | 78 | virtual_data = np.zeros((1,5)) 79 | virtual_data[:,1:] = virtual_queue[0, 1:] 80 | virtual_data[:,0] = data.frame[0,0] 81 | virtual_queue = np.concatenate((virtual_data, virtual_queue), axis = 0) 82 | 83 | files = os.listdir(data_path) 84 | for f in files: 85 | if f[-3:] == "mp4" and "no_ois" not in f and "gimbal" not in f.lower(): 86 | video_name = f[:-4] 87 | print(video_name) 88 | virtual_path = os.path.join("./test", cf['data']['exp'], video_name+'.txt') 89 | 90 | print("------Start Visual Result--------") 91 | rotations_real, lens_offsets_real = get_rotations(data.frame[:data.length], data.gyro, data.ois, data.length) 92 | fig_path = os.path.join(data_path, video_name+"_real.jpg") 93 | visual_rotation(rotations_real, lens_offsets_real, None, None, None, None, fig_path) 94 | 95 | return 96 | 97 | def main(args = None): 98 | config_file = args.config 99 | dir_path = args.dir_path 100 | cf = yaml.load(open(config_file, 'r')) 101 | 102 | USE_CUDA = cf['data']["use_cuda"] 103 | 104 | checkpoints_dir = cf['data']['checkpoints_dir'] 105 | checkpoints_dir = make_dir(checkpoints_dir, cf) 106 | 107 | data_name = sorted(os.listdir(dir_path)) 108 | for i in range(len(data_name)): 109 | print("Running: " + str(i+1) + "/" + str(len(data_name))) 110 | inference(cf, os.path.join(dir_path, data_name[i]), USE_CUDA) 111 | return 112 | 113 | if __name__ == '__main__': 114 | parser = argparse.ArgumentParser("Training model") 115 | parser.add_argument("--config", default="./conf/stabilzation.yaml", help="Config file.") 116 | parser.add_argument("--dir_path", default="./video") 117 | args = parser.parse_args() 118 | main(args = args) -------------------------------------------------------------------------------- /dvs/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch.autograd import Variable 4 | import operator 5 | import torch.nn.functional as F 6 | import matplotlib.pyplot as plt 7 | from gyro import ( 8 | torch_QuaternionProduct, 9 | torch_QuaternionReciprocal, 10 | get_static, 11 | torch_GetVirtualProjection, 12 | torch_GetForwardGrid, 13 | torch_GetWarpingFlow, 14 | torch_ConvertAxisAngleToQuaternion, 15 | torch_ConvertQuaternionToAxisAngle, 16 | torch_norm_quat, 17 | torch_GetHomographyTransformFromProjections, 18 | torch_ApplyTransform 19 | ) 20 | 21 | class C2_Smooth_loss(torch.nn.Module): 22 | def __init__(self): 23 | super(C2_Smooth_loss, self).__init__() 24 | self.MSE = torch.nn.MSELoss() 25 | 26 | def forward(self, Qt, Qt_1, Qt_2): 27 | detaQt_1 = torch_QuaternionProduct(Qt_1, torch_QuaternionReciprocal(Qt_2)) 28 | return self.MSE(Qt, detaQt_1) 29 | 30 | class C1_Smooth_loss(torch.nn.Module): 31 | def __init__(self): 32 | super(C1_Smooth_loss, self).__init__() 33 | self.MSE = torch.nn.MSELoss() 34 | 35 | def forward(self, v_r_axis, v_axis_t_1 = None, real_postion = None): 36 | quat_zero = torch.zeros(v_r_axis.shape).cuda() 37 | quat_zero[:,3] = 1 38 | return self.MSE(v_r_axis, quat_zero) 39 | 40 | class Follow_loss(torch.nn.Module): 41 | def __init__(self): 42 | super(Follow_loss, self).__init__() 43 | self.MSE = torch.nn.MSELoss() 44 | 45 | def forward(self, virtual_quat, real_quat, real_postion = None): 46 | if real_postion is not None: 47 | real_quat = torch_QuaternionProduct(real_quat, real_postion) 48 | return self.MSE(virtual_quat, real_quat) 49 | 50 | class Stay_loss(torch.nn.Module): 51 | def __init__(self): 52 | super(Stay_loss, self).__init__() 53 | self.zero = torch.tensor([0.0,0.0,0.0,1.0]).cuda() 54 | 55 | def forward(self, virtual_quat): 56 | return torch.mean(torch.abs(virtual_quat - self.zero)) 57 | 58 | 59 | class Angle_loss(torch.nn.Module): 60 | def __init__(self): 61 | super(Angle_loss, self).__init__() 62 | self.MSE = torch.nn.MSELoss() 63 | 64 | def forward(self, Q1, Q2, threshold = 0.5236, logistic_beta1 = 100): 65 | batch_size = Q1.shape[0] 66 | Q3 = torch_norm_quat(torch_QuaternionProduct(Q2, torch_QuaternionReciprocal(Q1))) 67 | theta = torch.zeros(batch_size).cuda() 68 | index = (Q3[:,3] < 1).nonzero() 69 | theta[index] = torch.acos(Q3[index,3]) * 2 70 | loss = torch.mean(theta * (1 / (1 + torch.exp(-logistic_beta1 * (theta - threshold))))) 71 | return loss, theta 72 | 73 | class Optical_loss(torch.nn.Module): 74 | def __init__(self): 75 | super(Optical_loss, self).__init__() 76 | self.static_options = get_static() 77 | self.mesh = get_mesh() 78 | 79 | def forward(self, Vt, Vt_1, flo, flo_back, real_projection_t, real_projection_t_1): 80 | virtual_projection_t = torch_GetVirtualProjection(self.static_options, Vt) 81 | virtual_projection_t_1 = torch_GetVirtualProjection(self.static_options, Vt_1) 82 | 83 | b, h, w = flo.size()[:3] 84 | 85 | grid_t = torch_GetForwardGrid(self.static_options, real_projection_t, virtual_projection_t)[:,:2,:,:].permute(0,1,3,2) 86 | grid_t = torch.nn.functional.upsample_bilinear(grid_t, size = (h, w)) # [B,C(xy),H,W] 87 | 88 | grid_t_1 = torch_GetForwardGrid(self.static_options, real_projection_t_1, virtual_projection_t_1)[:,:2,:,:].permute(0,1,3,2) 89 | grid_t_1 = torch.nn.functional.upsample_bilinear(grid_t_1, size = (h, w)) # [B,C(xy),H,W] 90 | 91 | mesh = self.mesh.repeat(b, 1, 1, 1) 92 | flo = flo + mesh 93 | flo_back = flo_back + mesh # [B,H,W,C] 94 | 95 | valid = (flo[:,:,:,0] > 0) * (flo[:,:,:,1] > 0) * (flo[:,:,:,0] < 1) * (flo[:,:,:,1] < 1) 96 | valid_f = torch.unsqueeze(valid, dim = 3).type(torch.cuda.FloatTensor) 97 | valid = torch.unsqueeze(valid, dim = 1).type(torch.cuda.FloatTensor) 98 | 99 | valid_back = (flo_back[:,:,:,0] > 0) * (flo_back[:,:,:,1] > 0) * (flo_back[:,:,:,0] < 1) * (flo_back[:,:,:,1] < 1) 100 | valid_back_f = torch.unsqueeze(valid_back, dim = 3).type(torch.cuda.FloatTensor) 101 | valid_back = torch.unsqueeze(valid_back, dim = 1).type(torch.cuda.FloatTensor) # [B,C,H,W] 102 | 103 | flo = (flo * 2 - 1) * valid_f 104 | flo_back = (flo_back * 2 - 1) * valid_back_f 105 | 106 | forward_t = torch.nn.functional.grid_sample(grid_t, flo, padding_mode="reflection") # default bilinear 107 | backward_t_1 = torch.nn.functional.grid_sample(grid_t_1, flo_back, padding_mode="reflection") # default bilinear 108 | 109 | forward_diff = ((forward_t - grid_t_1) * valid) ** 2 110 | backward_diff = ((backward_t_1 - grid_t) * valid_back) ** 2 111 | 112 | forward_loss = torch.sum(forward_diff, dim = (1,2,3)) / torch.sum(valid, dim = (1,2,3)) 113 | backward_loss = torch.sum(backward_diff, dim = (1,2,3)) / torch.sum(valid_back, dim = (1,2,3)) 114 | 115 | loss = forward_loss + backward_loss 116 | loss = torch.min(loss, loss - loss + 1) #[0] 117 | loss = torch.sum(loss) / b 118 | 119 | return loss 120 | 121 | 122 | def get_mesh(height = 270, width = 480, USE_CUDA = True): 123 | xs = np.linspace(0, 1, width, endpoint = False) + 0.5 / height 124 | ys = np.linspace(0, 1, height, endpoint = False) + 0.5 / width 125 | xmesh, ymesh = np.meshgrid(xs, ys) 126 | # Reshape the sampling positions to a H x W x 2 tensor 127 | mesh = torch.Tensor(np.expand_dims(np.moveaxis(np.array(list(zip(xmesh, ymesh))), 1, 2),axis=0)) 128 | if USE_CUDA: 129 | mesh = mesh.cuda() 130 | return mesh 131 | 132 | class Undefine_loss(torch.nn.Module): 133 | def __init__(self, ratio = 0.08, inner_ratio = 0.04, USE_CUDA = True): 134 | super(Undefine_loss, self).__init__() 135 | self.static_options = get_static() 136 | self.inner_ratio = inner_ratio 137 | width = self.static_options["width"] 138 | height = self.static_options["height"] 139 | x0, x1, y0, y1 = \ 140 | int(width*ratio), int(width*(1-ratio)), int(height*ratio), int(height*(1-ratio)) 141 | self.norm = torch.Tensor([width, height, 1]) 142 | self.p00 = torch.Tensor([x0, y0, 1]) 143 | self.p01 = torch.Tensor([x0, y1, 1]) 144 | self.p10 = torch.Tensor([x1, y0, 1]) 145 | self.p11 = torch.Tensor([x1, y1, 1]) 146 | if USE_CUDA == True: 147 | self.p00 = self.p00.cuda() 148 | self.p01 = self.p01.cuda() 149 | self.p10 = self.p10.cuda() 150 | self.p11 = self.p11.cuda() 151 | self.norm = self.norm.cuda() 152 | 153 | def forward(self, Vt, Rt, ratio = 0.04): 154 | batch_size = Vt.size()[0] 155 | 156 | row_mid = self.static_options["num_grid_rows"] // 2 157 | virtual_projection_t = torch_GetVirtualProjection(self.static_options, Vt) 158 | 159 | real_projection_t = torch_GetVirtualProjection(self.static_options, Rt) 160 | 161 | # virtual projection and real projection 162 | transform = torch_GetHomographyTransformFromProjections(real_projection_t, virtual_projection_t) 163 | 164 | p00 = (torch_ApplyTransform(transform, self.p00) / self.norm)[:,:2] 165 | p01 = (torch_ApplyTransform(transform, self.p01) / self.norm)[:,:2] 166 | p10 = (torch_ApplyTransform(transform, self.p10) / self.norm)[:,:2] 167 | p11 = (torch_ApplyTransform(transform, self.p11) / self.norm)[:,:2] 168 | 169 | loss = torch.stack((self.get_loss(p00), self.get_loss(p01), self.get_loss(p10), self.get_loss(p11)),dim = 1) 170 | loss,_ = torch.max(loss, dim = 1) 171 | 172 | loss = torch.min(loss, loss - loss + 1) #[0] 173 | loss = torch.sum(loss) / batch_size 174 | 175 | return loss 176 | 177 | def get_loss(self, p): 178 | d = (p - self.inner_ratio) * (p < self.inner_ratio).type(torch.cuda.FloatTensor) + \ 179 | (1 - self.inner_ratio - p) * (p > (1 - self.inner_ratio)).type(torch.cuda.FloatTensor) 180 | return torch.sum(d**2, dim = 1) 181 | -------------------------------------------------------------------------------- /dvs/metrics.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import cv2 5 | import math 6 | import pdb 7 | import matplotlib.pyplot as plt 8 | from printer import Printer 9 | from warp import video2frame_one_seq 10 | import datetime 11 | import torch 12 | import copy 13 | import csv 14 | import copyreg 15 | import shutil 16 | import matplotlib.pyplot as plt 17 | from util import crop_video 18 | 19 | def _pickle_keypoints(point): 20 | return cv2.KeyPoint, (*point.pt, point.size, point.angle, 21 | point.response, point.octave, point.class_id) 22 | 23 | copyreg.pickle(cv2.KeyPoint().__class__, _pickle_keypoints) 24 | 25 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 26 | 27 | h_size = 480 28 | w_size = 640 29 | 30 | def crop_metric(M): 31 | points = np.array([[0,0,1],[0,h_size,1], [w_size,0,1], [w_size,h_size,1]]).T 32 | result = np.matmul(M,points).T 33 | result = result[:,:2]/result[:,2:] 34 | w_out = 1 - max(result[0,0], result[1,0], w_size - result[2,0], w_size - result[3,0], 0)/w_size 35 | h_out = 1 - max(result[0,1], result[2,1], h_size - result[1,1], h_size - result[3,1], 0)/h_size 36 | return w_out, h_out 37 | 38 | # https://stackoverflow.com/questions/34389125/how-to-get-the-scale-factor-of-getperspectivetransform-in-opencv 39 | def get_scale(M): 40 | h1 = M[0, 0] 41 | h2 = M[0, 1] 42 | h3 = M[0, 2] 43 | h4 = M[1, 0] 44 | h5 = M[1, 1] 45 | h6 = M[1, 2] 46 | h7 = M[2, 0] 47 | h8 = M[2, 1] 48 | QR = np.array([[h1-(h7*h3), h2-(h8*h3)], [h4-(h7*h6), h5-(h8*h6)]]) 49 | Q, R = np.linalg.qr(QR) 50 | return abs(R[0,0]), abs(R[1,1]) 51 | 52 | # https://stackoverflow.com/questions/21019338/how-to-change-the-homography-with-the-scale-of-the-image 53 | def get_rescale_matrix(M, sx, sy): 54 | S = np.eye(3, dtype = float) 55 | S[0,0] = sx 56 | S[1,1] = sy 57 | 58 | S1 = np.eye(3, dtype = float) 59 | S1[0,0] = 1/sx 60 | S1[1,1] = 1/sy 61 | return np.matmul(M, S1) 62 | 63 | # Part of code reference from https://github.com/jinsc37/DIFRINT/blob/master/metrics.py 64 | def metrics(in_src, out_src, package, crop_scale = False, re_compute = False): 65 | load_dic = None 66 | if re_compute and os.path.exists(package): 67 | print("Start load") 68 | load_dic = torch.load(package) 69 | print("Finish load") 70 | dic = { 71 | 'M': None, 72 | 'CR_seq': [], 73 | 'DV_seq': [], 74 | 'SS_t': None, 75 | 'SS_r': None, 76 | 'w_crop':[], 77 | 'h_crop':[], 78 | 'distortion': [], 79 | 'count': 0, 80 | 'in_sift': {}, 81 | 'out_sift': {}, 82 | 'fft_t': {}, 83 | 'fft_r': {} 84 | } 85 | 86 | if load_dic is not None: 87 | dic["in_sift"] = load_dic["in_sift"] 88 | dic["out_sift"] = load_dic["out_sift"] 89 | 90 | frameList_in = sorted(os.listdir(in_src)) 91 | frameList = sorted(os.listdir(out_src)) 92 | frameList = frameList[:min(len(frameList_in),len(frameList))] 93 | 94 | # Create brute-force matcher object 95 | bf = cv2.BFMatcher() 96 | 97 | # Apply the homography transformation if we have enough good matches 98 | MIN_MATCH_COUNT = 10 #10 99 | 100 | ratio = 0.7 #0.7 101 | thresh = 5.0 #5.0 102 | 103 | Pt = np.asarray([[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]]) 104 | P_seq = [] 105 | count = 1 106 | for index, f in enumerate(frameList, 0): 107 | if f.endswith('.png'): 108 | # Load the images in gray scale 109 | img1 = cv2.imread(os.path.join(in_src, f), 0) 110 | img1 = cv2.resize(img1, (w_size,h_size), interpolation = cv2.INTER_LINEAR) 111 | 112 | img1o = cv2.imread(os.path.join(out_src, f), 0) 113 | img1o = cv2.resize(img1o, (w_size,h_size), interpolation = cv2.INTER_LINEAR) 114 | sift = cv2.SIFT_create() 115 | 116 | if f in dic["in_sift"]: 117 | keyPoints1, descriptors1 = dic["in_sift"][f] 118 | else: 119 | # Detect the SIFT key points and compute the descriptors for the two images 120 | keyPoints1, descriptors1 = sift.detectAndCompute(img1, None) 121 | dic["in_sift"][f] = (keyPoints1, descriptors1) 122 | 123 | if f in dic["out_sift"]: 124 | keyPoints1o, descriptors1o = dic["out_sift"][f] 125 | else: 126 | keyPoints1o, descriptors1o = sift.detectAndCompute(img1o, None) 127 | dic["out_sift"][f] = (keyPoints1o, descriptors1o) 128 | 129 | # Match the descriptors 130 | matches = bf.knnMatch(descriptors1, descriptors1o, k=2) 131 | 132 | # Select the good matches using the ratio test 133 | goodMatches = [] 134 | 135 | for m, n in matches: 136 | if m.distance < ratio * n.distance: 137 | goodMatches.append(m) 138 | 139 | if len(goodMatches) > MIN_MATCH_COUNT: 140 | # Get the good key points positions 141 | sourcePoints = np.float32([ keyPoints1[m.queryIdx].pt for m in goodMatches ]).reshape(-1, 1, 2) 142 | destinationPoints = np.float32([ keyPoints1o[m.trainIdx].pt for m in goodMatches ]).reshape(-1, 1, 2) 143 | 144 | M, mask = cv2.findHomography(sourcePoints, destinationPoints, method=cv2.RANSAC, ransacReprojThreshold=thresh) 145 | im_dst = cv2.warpPerspective(img1, M, (w_size,h_size)) 146 | 147 | cm = [] 148 | for i in range(6): 149 | for j in range(6): 150 | hs = int(h_size * (0.2 + 0.1 * i)) 151 | he = int(h_size * (0.3 + 0.1 * i)) 152 | ws = int(w_size * (0.2 + 0.1 * j)) 153 | we = int(w_size * (0.3 + 0.1 * j)) 154 | cm.append(np.corrcoef(img1o[hs:he, ws:we].flat, im_dst[hs:he, ws:we].flat)) 155 | dic["distortion"].append(cm) 156 | 157 | if crop_scale: 158 | sx, sy = get_scale(M) 159 | M_scale = get_rescale_matrix(M, sx, sy) 160 | w_crop, h_crop = crop_metric(M_scale) 161 | else: 162 | w_crop, h_crop = crop_metric(M) 163 | dic["w_crop"].append(w_crop) 164 | dic["h_crop"].append(h_crop) 165 | 166 | # Obtain Scale, Translation, Rotation, Distortion value 167 | sx = M[0, 0] 168 | sy = M[1, 1] 169 | scaleRecovered = math.sqrt(np.abs(sx*sy)) 170 | 171 | w, _ = np.linalg.eig(M[0:2,0:2]) 172 | w = np.sort(w)[::-1] 173 | DV = w[1]/w[0] 174 | #pdb.set_trace() 175 | 176 | dic["CR_seq"].append(1.0/scaleRecovered) 177 | dic["DV_seq"].append(DV) 178 | 179 | # For Stability score calculation 180 | if count < len(frameList): 181 | f_path = f[:-9] + '%05d.png' % (int(f[-9:-4])+1) 182 | if f_path in dic["out_sift"]: 183 | keyPoints2o, descriptors2o = dic["out_sift"][f_path] 184 | else: 185 | img2o = cv2.imread(os.path.join(out_src, f_path), 0) 186 | img2o = cv2.resize(img2o, (w_size,h_size), interpolation = cv2.INTER_LINEAR) 187 | keyPoints2o, descriptors2o = sift.detectAndCompute(img2o, None) 188 | dic["out_sift"][f_path] = (keyPoints2o, descriptors2o) 189 | 190 | matches = bf.knnMatch(descriptors1o, descriptors2o, k=2) 191 | goodMatches = [] 192 | 193 | for m, n in matches: 194 | if m.distance < ratio * n.distance: 195 | goodMatches.append(m) 196 | 197 | if len(goodMatches) > MIN_MATCH_COUNT: 198 | # Get the good key points positions 199 | sourcePoints = np.float32([ keyPoints1o[m.queryIdx].pt for m in goodMatches ]).reshape(-1, 1, 2) 200 | destinationPoints = np.float32([ keyPoints2o[m.trainIdx].pt for m in goodMatches ]).reshape(-1, 1, 2) 201 | 202 | # Obtain the homography matrix 203 | M, mask = cv2.findHomography(sourcePoints, destinationPoints, method=cv2.RANSAC, ransacReprojThreshold=thresh) 204 | 205 | P_seq.append(np.matmul(Pt, M)) 206 | Pt = np.matmul(Pt, M) 207 | if count % 10 ==0: 208 | sys.stdout.write('\rFrame: ' + str(count) + '/' + str(len(frameList))) 209 | sys.stdout.flush() 210 | dic["count"] = count 211 | count += 1 212 | 213 | # Make 1D temporal signals 214 | P_seq_t = np.asarray([1]) 215 | P_seq_r = np.asarray([1]) 216 | 217 | #pdb.set_trace() 218 | for Mp in P_seq: 219 | sx = Mp[0, 0] 220 | sy = Mp[1, 1] 221 | c = Mp[0, 2] 222 | f = Mp[1, 2] 223 | 224 | transRecovered = math.sqrt(c*c + f*f) 225 | thetaRecovered = math.atan2(sx, sy) * 180 / math.pi 226 | 227 | P_seq_t = np.concatenate((P_seq_t, [transRecovered]), axis=0) 228 | P_seq_r = np.concatenate((P_seq_r, [thetaRecovered]), axis=0) 229 | 230 | P_seq_t = np.delete(P_seq_t, 0) 231 | P_seq_r = np.delete(P_seq_r, 0) 232 | 233 | # FFT 234 | fft_t = np.fft.fft(P_seq_t) 235 | fft_r = np.fft.fft(P_seq_r) 236 | fft_t = abs(fft_t)**2 237 | fft_r = abs(fft_r)**2 238 | 239 | fft_t = np.delete(fft_t, 0) 240 | fft_r = np.delete(fft_r, 0) 241 | fft_t = fft_t[:int(len(fft_t)/2)] 242 | fft_r = fft_r[:int(len(fft_r)/2)] 243 | 244 | dic["fft_t"] = fft_t 245 | dic["fft_r"] = fft_r 246 | 247 | SS_t = np.sum(fft_t[:5])/np.sum(fft_t) 248 | SS_r = np.sum(fft_r[:5])/np.sum(fft_r) 249 | 250 | dic["CR_seq"] = np.array(dic["CR_seq"]) 251 | dic["DV_seq"] = np.array(dic["DV_seq"]) 252 | dic["w_crop"] = np.array(dic["w_crop"]) 253 | dic["h_crop"] = np.array(dic["h_crop"]) 254 | dic["distortion"] = np.array(dic["distortion"]) 255 | dic["SS_t"] = SS_t 256 | dic["SS_r"] = SS_r 257 | 258 | if not (re_compute and os.path.exists(package)): 259 | torch.save(dic, package) 260 | 261 | DV_seq = np.absolute(dic["DV_seq"]) 262 | DV_seq = DV_seq[np.where((DV_seq >= 0.5) & (DV_seq <= 1))] 263 | Distortion = str.format('{0:.4f}', np.nanmin(DV_seq)) 264 | Distortion_avg = str.format('{0:.4f}', np.nanmean(DV_seq)) 265 | 266 | Trans = str.format('{0:.4f}', dic["SS_t"]) 267 | Rot = str.format('{0:.4f}', dic["SS_r"]) 268 | 269 | w_crop = crop_rm_outlier(dic["w_crop"]) 270 | h_crop = crop_rm_outlier(dic["h_crop"]) 271 | 272 | FOV = str.format( '{0:.4f}', min(np.nanmin(w_crop), np.nanmin(h_crop)) ) 273 | FOV_avg = str.format( '{0:.4f}', (np.nanmean(w_crop)+np.nanmean(h_crop)) / 2 ) 274 | 275 | Correlation_avg = str.format( '{0:.4f}', np.nanmean(dic["distortion"][10:]) ) 276 | Correlation_min = str.format( '{0:.4f}', np.nanmin(dic["distortion"][10:]) ) 277 | 278 | # Print results 279 | print('\n***Distortion value (Avg, Min):') 280 | print(Distortion_avg +' | '+ Distortion) 281 | print('***Stability Score (Avg, Trans, Rot):') 282 | print(str.format('{0:.4f}', (dic["SS_t"]+dic["SS_r"])/2) +' | '+ Trans +' | '+ Rot ) 283 | print("=================") 284 | print('***FOV ratio (Avg, Min):') 285 | print( FOV_avg +' | '+ FOV ) 286 | print('***Correlation value (Avg, Min):') 287 | print( Correlation_avg +' | '+ Correlation_min , "\n") 288 | 289 | dic['in_sift'] = 0 290 | dic['out_sift'] = 0 291 | torch.save(dic, package[:-3]+"_light.pt") 292 | return float(FOV) 293 | 294 | def crop_rm_outlier(crop): 295 | crop = np.array(crop) 296 | crop = crop[crop >= 0.5] 297 | return sorted(crop)[5:] 298 | 299 | if __name__ == '__main__': 300 | metric_path = os.path.join("./test/stabilzation/metric") 301 | if not os.path.exists(metric_path): 302 | os.makedirs(metric_path) 303 | 304 | in_video = "./video/s_114_outdoor_running_trail_daytime/ControlCam_20200930_104820.mp4" 305 | in_folder = os.path.join(metric_path, "in_frame") 306 | if not os.path.exists(in_folder): 307 | os.makedirs(in_folder) 308 | print("Convert video to frames") 309 | video2frame_one_seq(in_video, in_folder) 310 | 311 | out_video = "./test/stabilzation/s_114_outdoor_running_trail_daytime_stab.mp4" 312 | out_folder = os.path.join(metric_path, "out_frame") 313 | if not os.path.exists(out_folder): 314 | os.makedirs(out_folder) 315 | print("Convert video to frames") 316 | video2frame_one_seq(out_video, out_folder) 317 | 318 | package = os.path.join(metric_path, "stabilzation.pt") 319 | FOV = metrics(in_folder, out_folder, package) 320 | 321 | crop_path = out_video[:-4] + "_crop.mp4" 322 | crop_video(out_video, crop_path, FOV) 323 | -------------------------------------------------------------------------------- /dvs/printer.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | class Printer(object): 4 | def __init__(self, *files): 5 | self.files = files 6 | 7 | #Redirect Printer 8 | def open(self): 9 | if not hasattr(sys, '_stdout'): 10 | sys._stdout = sys.stdout 11 | sys.stdout = self 12 | return self 13 | 14 | #Restore the Default Printer 15 | def close(self): 16 | stdout = sys._stdout 17 | for f in self.files: 18 | if f != stdout: 19 | f.close() 20 | sys.stdout = stdout 21 | 22 | #Overloading write() Function 23 | def write(self, obj): 24 | for f in self.files: 25 | f.write(obj) 26 | f.flush() 27 | 28 | def flush(self): 29 | pass 30 | 31 | if __name__ == '__main__': 32 | print("Start testing") 33 | t = Printer(sys.stdout, open('./test.txt', 'w+')).open() 34 | print("In files") 35 | t.close() 36 | print("Not in files") -------------------------------------------------------------------------------- /dvs/requirements.txt: -------------------------------------------------------------------------------- 1 | colorama==0.4.4 2 | ffmpeg==1.4 3 | imageio==2.9.0 4 | matplotlib==3.3.4 5 | opencv-contrib-python==4.5.1.48 6 | opencv-python==4.5.1.48 7 | pytz==2021.1 8 | PyYAML==5.4.1 9 | scipy==1.5.4 10 | tensorboardX==2.1 11 | tqdm==4.59.0 -------------------------------------------------------------------------------- /dvs/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import torchvision 5 | import torch.nn as nn 6 | from torch.autograd import Variable 7 | 8 | import time 9 | import yaml 10 | import argparse 11 | import numpy as np 12 | from printer import Printer 13 | from dataset import get_data_loader 14 | from model import Model 15 | import datetime 16 | import copy 17 | from util import make_dir, get_optimizer, AverageMeter, save_train_info, norm_flow 18 | from gyro import torch_QuaternionProduct, torch_QuaternionReciprocal, torch_norm_quat 19 | 20 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 21 | 22 | def run_epoch(model, loader, cf, epoch, lr, optimizer=None, is_training=True, USE_CUDA=True, clip_norm=0): 23 | no_flo = False 24 | number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"] 25 | avg_loss = AverageMeter() 26 | if is_training: 27 | model.net.train() 28 | model.unet.train() 29 | else: 30 | model.net.eval() 31 | model.unet.eval() 32 | for i, data in enumerate(loader, 0): 33 | # get the inputs; data is a list of [inputs, labels] 34 | real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data 35 | print("Fininsh Load data") 36 | 37 | real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4] 38 | real_projections = real_projections.type(torch.float) 39 | flo = flo.type(torch.float) 40 | flo_back = flo_back.type(torch.float) 41 | ois = ois.type(torch.float) 42 | 43 | batch_size, step, dim = real_inputs.size() 44 | times = times.numpy() 45 | real_queue_idx = real_queue_idx.numpy() 46 | virtual_queue = loader.dataset.random_init_virtual_queue(batch_size, real_postion[:,0,:].numpy(), times[:,1]) # TODO 47 | # virtual_queue = [None] * batch_size 48 | loss = 0 49 | model.net.init_hidden(batch_size) 50 | for j in range(step): 51 | virtual_inputs, vt_1 = loader.dataset.get_virtual_data( 52 | virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j]) 53 | 54 | real_inputs_step = real_inputs[:,j,:] 55 | inputs = torch.cat((real_inputs_step,virtual_inputs), dim = 1) 56 | 57 | # inputs = Variable(real_inputs_step) 58 | if USE_CUDA: 59 | real_inputs_step = real_inputs_step.cuda() 60 | virtual_inputs = virtual_inputs.cuda() 61 | inputs = inputs.cuda() 62 | if no_flo is False: 63 | flo_step = flo[:,j].cuda() 64 | flo_back_step = flo_back[:,j].cuda() 65 | else: 66 | flo_step = None 67 | flo_back_step = None 68 | vt_1 = vt_1.cuda() 69 | real_projections_t = real_projections[:,j+1].cuda() 70 | real_projections_t_1 = real_projections[:,j].cuda() 71 | real_postion_anchor = real_postion[:,j].cuda() 72 | ois_step = ois[:,j].cuda() 73 | 74 | if no_flo is False: 75 | b, h, w, _ = flo_step.size() 76 | flo_step = norm_flow(flo_step, h, w) 77 | flo_back_step = norm_flow(flo_back_step, h, w) 78 | 79 | if is_training: 80 | if no_flo is False: 81 | flo_out = model.unet(flo_step, flo_back_step) 82 | else: 83 | flo_out = None 84 | 85 | if j < 1: 86 | for i in range(2): 87 | out = model.net(inputs, flo_out, ois_step) 88 | else: 89 | out = model.net(inputs, flo_out, ois_step) 90 | else: 91 | with torch.no_grad(): 92 | if no_flo is False: 93 | flo_out = model.unet(flo_step, flo_back_step) 94 | else: 95 | flo_out = None 96 | 97 | if j < 1: 98 | for i in range(2): 99 | out = model.net(inputs, flo_out, ois_step) 100 | else: 101 | out = model.net(inputs, flo_out, ois_step) 102 | 103 | if epoch <= 30: 104 | follow = True 105 | else: 106 | follow = False 107 | 108 | if epoch > 30: 109 | undefine = True 110 | else: 111 | undefine = False 112 | 113 | if epoch > 40: 114 | optical = True 115 | else: 116 | optical = False 117 | 118 | loss_step = model.loss(out, vt_1, virtual_inputs, real_inputs_step, \ 119 | flo_step, flo_back_step, real_projections_t, real_projections_t_1, real_postion_anchor, \ 120 | follow = follow, undefine = undefine, optical = optical, stay = optical) 121 | 122 | loss = loss_step 123 | 124 | virtual_position = virtual_inputs[:, -4:] 125 | pos = torch_QuaternionProduct(virtual_position, real_postion_anchor) 126 | out = torch_QuaternionProduct(out, pos) 127 | 128 | if USE_CUDA: 129 | out = out.cpu().detach().numpy() 130 | 131 | virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1]) 132 | 133 | if (j+1) % 10 == 0: 134 | print("Step: "+str(j+1)+"/"+str(step)) 135 | print(loss) 136 | loss = torch.sum(loss) 137 | if is_training: 138 | optimizer.zero_grad() 139 | loss.backward(retain_graph=True) 140 | if clip_norm: 141 | nn.utils.clip_grad_norm_(model.net.parameters(), max_norm=clip_norm) 142 | nn.utils.clip_grad_norm_(model.unet.parameters(), max_norm=clip_norm) 143 | optimizer.step() 144 | 145 | avg_loss.update(loss.item(), batch_size) 146 | 147 | return avg_loss.avg 148 | 149 | 150 | def train(args = None): 151 | torch.autograd.set_detect_anomaly(True) 152 | config_file = args.config 153 | cf = yaml.load(open(config_file, 'r')) 154 | 155 | USE_CUDA = cf['data']["use_cuda"] 156 | seed = cf['train']["seed"] 157 | 158 | torch.manual_seed(seed) 159 | if USE_CUDA: 160 | torch.cuda.manual_seed(seed) 161 | 162 | checkpoints_dir = cf['data']['checkpoints_dir'] 163 | epochs = cf["train"]["epoch"] 164 | snapshot = cf["train"]["snapshot"] 165 | decay_epoch = cf['train']['decay_epoch'] 166 | init_lr = cf["train"]["init_lr"] 167 | lr_decay = cf["train"]["lr_decay"] 168 | lr_step = cf["train"]["lr_step"] 169 | clip_norm = cf["train"]["clip_norm"] 170 | load_model = cf["model"]["load_model"] 171 | 172 | checkpoints_dir = make_dir(checkpoints_dir, cf) 173 | 174 | if load_model is None: 175 | log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'.log'), 'w+') 176 | else: 177 | log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'.log'), 'a') 178 | printer = Printer(sys.stdout, log_file).open() 179 | 180 | print('----Print Arguments Setting------') 181 | for key in cf: 182 | print('{}:'.format(key)) 183 | for para in cf[key]: 184 | print('{:50}:{}'.format(para,cf[key][para])) 185 | print('\n') 186 | 187 | # Define the model 188 | model = Model(cf) 189 | optimizer = get_optimizer(cf["train"]["optimizer"], model, init_lr, cf) 190 | 191 | for idx, m in enumerate(model.net.children()): 192 | print('{}:{}'.format(idx,m)) 193 | for idx, m in enumerate(model.unet.children()): 194 | print('{}:{}'.format(idx,m)) 195 | 196 | if load_model is not None: 197 | print("------Load Pretrined Model--------") 198 | checkpoint = torch.load(load_model) 199 | model.net.load_state_dict(checkpoint['state_dict']) 200 | model.unet.load_state_dict(checkpoint['unet']) 201 | print("------Resume Training Process-----") 202 | optimizer.load_state_dict(checkpoint['optim_dict']) 203 | epoch_load = checkpoint['epoch'] 204 | print("Epoch load: ", epoch_load) 205 | else: 206 | epoch_load = 0 207 | 208 | if USE_CUDA: 209 | model.net.cuda() 210 | model.unet.cuda() 211 | if load_model is not None: 212 | for state in optimizer.state.values(): 213 | for k, v in state.items(): 214 | if isinstance(v, torch.Tensor): 215 | state[k] = v.cuda() 216 | for param in optimizer.param_groups: 217 | init_lr = param['lr'] 218 | 219 | print("-----------Load Dataset----------") 220 | train_loader, test_loader = get_data_loader(cf, no_flo = False) 221 | 222 | print("----------Start Training----------") 223 | currentDT = datetime.datetime.now() 224 | print(currentDT.strftime(" %Y-%m-%d %H:%M:%S")) 225 | 226 | start_time = time.time() 227 | 228 | if lr_step: 229 | decay_epoch = list(range(1+lr_step, epochs+1, lr_step)) 230 | 231 | lr = init_lr 232 | 233 | for count in range(epoch_load+1, epochs+1): 234 | if decay_epoch != None and count in decay_epoch: 235 | lr *= lr_decay 236 | for param in optimizer.param_groups: 237 | param['lr'] *= lr_decay 238 | 239 | print("Epoch: %d, learning_rate: %.5f" % (count, lr)) 240 | 241 | train_loss = run_epoch(model, train_loader, cf, count, lr, optimizer=optimizer, clip_norm=clip_norm, is_training=True, USE_CUDA=USE_CUDA) 242 | 243 | test_loss = run_epoch(model, test_loader, cf, count, lr, is_training=False, USE_CUDA=USE_CUDA) 244 | 245 | time_used = (time.time() - start_time) / 60 246 | print("Epoch %d done | TrLoss: %.4f | TestLoss: %.4f | Time_used: %.4f minutes" % ( 247 | count, train_loss, test_loss, time_used)) 248 | 249 | if count % snapshot == 0: 250 | save_train_info("epoch", checkpoints_dir, cf, model, count, optimizer) 251 | save_train_info("last", checkpoints_dir, cf, model, count, optimizer) 252 | print("Model stored at epoch %d"%count) 253 | 254 | currentDT = datetime.datetime.now() 255 | print(currentDT.strftime(" %Y-%m-%d %H:%M:%S")) 256 | print("------------End Training----------") 257 | return 258 | 259 | if __name__ == '__main__': 260 | parser = argparse.ArgumentParser("Training model") 261 | parser.add_argument("--config", default="./conf/stabilzation_train.yaml", help="Config file.") 262 | args = parser.parse_args() 263 | train(args = args) -------------------------------------------------------------------------------- /dvs/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import cv2 4 | from itertools import chain 5 | from warp import load_video, save_video 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from gyro import get_rotations 9 | import shutil 10 | 11 | def save_train_info(name, checkpoints_dir, cf, model, count, optimizer = None): 12 | path = None 13 | if name == "last": 14 | path = os.path.join(checkpoints_dir, cf['data']['exp']+'_last.checkpoint') 15 | elif name == "best": 16 | path = os.path.join(checkpoints_dir, cf['data']['exp']+'_best.checkpoint') 17 | else: 18 | path = os.path.join(checkpoints_dir, cf['data']['exp']+'_epoch%d.checkpoint'%count) 19 | torch.save(model.save_checkpoint(epoch = count, optimizer=optimizer), path) 20 | 21 | def make_dir(checkpoints_dir ,cf): 22 | inference_path = "./test" 23 | if not os.path.exists(checkpoints_dir): 24 | os.makedirs(checkpoints_dir) 25 | if not os.path.exists(cf["data"]["log"]): 26 | os.makedirs(cf["data"]["log"]) 27 | if not os.path.exists(inference_path): 28 | os.makedirs(inference_path) 29 | 30 | inference_path = os.path.join(inference_path, cf['data']['exp']) 31 | if not os.path.exists(inference_path): 32 | os.makedirs(inference_path) 33 | checkpoints_dir = os.path.join(checkpoints_dir, cf['data']['exp']) 34 | if not os.path.exists(checkpoints_dir): 35 | os.makedirs(checkpoints_dir) 36 | return checkpoints_dir 37 | 38 | def get_optimizer(optimizer, model, init_lr, cf): 39 | if optimizer == "adam": 40 | optimizer = torch.optim.Adam(chain(model.net.parameters(), model.unet.parameters()), lr=init_lr, weight_decay=cf["train"]["weight_decay"]) 41 | elif optimizer == "sgd": 42 | optimizer = torch.optim.SGD(chain(model.net.parameters(), model.unet.parameters()), lr=init_lr, momentum=cf["train"]["momentum"]) 43 | return optimizer 44 | 45 | def crop_video(in_path, out_path, crop_ratio): 46 | frame_array, fps, size = load_video(in_path) 47 | hs = int((1-crop_ratio)*1080) + 1 48 | he = int(crop_ratio*1080) - 1 49 | ws = int((1-crop_ratio)*1920) + 1 50 | we = int(crop_ratio*1920) - 1 51 | for i in range(len(frame_array)): 52 | frame_array[i] = cv2.resize(frame_array[i][hs:he,ws:we,:], size, interpolation = cv2.INTER_LINEAR) 53 | save_video(out_path, frame_array, fps, size= size) 54 | 55 | def norm_flow(flow, h, w): 56 | if flow.shape[2] == 2: 57 | flow[:,:,0] /= h 58 | flow[:,:,1] /= w 59 | else: 60 | flow[:,:,:,0] /= h 61 | flow[:,:,:,1] /= w 62 | return flow 63 | 64 | class AverageMeter(object): 65 | def __init__(self): 66 | self.reset() 67 | 68 | def reset(self): 69 | self.avg = 0 70 | self.sum = 0 71 | self.cnt = 0 72 | 73 | def update(self, val, n=1): 74 | self.sum += val * n 75 | self.cnt += n 76 | if self.cnt > 0: 77 | self.avg = self.sum / self.cnt -------------------------------------------------------------------------------- /dvs/warp/__init__.py: -------------------------------------------------------------------------------- 1 | from .warping import ( 2 | warp_video 3 | ) 4 | from .read_write import ( 5 | save_video, 6 | load_video, 7 | video2frame_one_seq 8 | ) -------------------------------------------------------------------------------- /dvs/warp/rasterizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from numpy import array 4 | import torch 5 | import cv2 6 | import time 7 | 8 | device = torch.device("cuda") 9 | 10 | def Rasterization(image, grid, get_mesh_only = False): 11 | # grid xy WH 12 | shape = image.size() 13 | height = shape[1] 14 | width = shape[2] 15 | wapper_upper_triangle, wapper_lower_triangle = grid_to_triangle(grid[:,:,:2]) 16 | origin_upper_triangle, origin_lower_triangle = grid_to_triangle(grid[:,:,2:]) 17 | 18 | 19 | [xmax, xmin, ymax, ymin], xlength, ylength = grid_size(wapper_upper_triangle, wapper_lower_triangle, height, width) 20 | 21 | xratio = xlength / width 22 | yratio = ylength / height 23 | 24 | wapper_triangle = torch.stack((wapper_upper_triangle,wapper_lower_triangle),dim = 1).to(device) # grid * upper/lower * point * xy 25 | origin_triangle = torch.stack((origin_upper_triangle,origin_lower_triangle),dim = 1).to(device) # grid * upper/lower * point * xy 26 | 27 | tran_triangle = torch.zeros(wapper_triangle.size()).to(device) 28 | 29 | tran_triangle[:,:,:,0] = (wapper_triangle[:,:,:,0] - xmin.view(-1,1,1).to(device)/width) / xratio 30 | tran_triangle[:,:,:,1] = (wapper_triangle[:,:,:,1] - ymin.view(-1,1,1).to(device)/height) / yratio 31 | 32 | mask = triangle2mask(tran_triangle, ylength, xlength) # consuming 33 | 34 | mask = torch.unsqueeze(mask, 4) 35 | origin_triangle = torch.unsqueeze(origin_triangle, 1) 36 | 37 | grid_sample = origin_triangle * mask # consuming 38 | grid_sample = torch.sum(torch.sum(grid_sample, dim = 3), dim = 2).view(-1,ylength,xlength,2) # consuming 39 | 40 | gxmin = min(0, int(torch.min(xmin))) 41 | gxmax = int(torch.max(xmin) + xlength) 42 | gymin = min(0, int(torch.min(ymin))) 43 | gymax = int(torch.max(ymin) + ylength) 44 | grid_merge = torch.zeros((max(gymax-gymin, height, height - gymin),max(gxmax - gxmin, width, width - gxmin),2)).to(device) 45 | for i in range(grid_sample.size()[0]): 46 | x_s = int(xmin[i] - gxmin) 47 | x_e = int(xmin[i] + xlength - gxmin) 48 | y_s = int(ymin[i] - gymin) 49 | y_e = int(ymin[i] + ylength -gymin) 50 | grid_merge[ y_s:y_e, x_s:x_e, :] += grid_sample[i, :, :, :] 51 | 52 | # grid_merge = grid_merge[min(-gxmin,0):min(-gxmin,0)+height, min(-gymin,0):min(-gymin,0)+width, :] 53 | grid_merge = grid_merge[-gymin:-gymin+height, -gxmin:-gxmin+width, :] 54 | # if get_mesh_only: 55 | # grid_merge = grid_merge.cpu().numpy() 56 | # mesh_grid = generate_mesh_grid(height, width) 57 | # out = grid_merge - mesh_grid 58 | # return np.concatenate((out[:,:,1:],out[:,:,:1]),2) 59 | 60 | shift = torch.tensor([0.5/height,0.5/width])[None, None, :].to(device) 61 | grid_merge = (grid_merge + 1*shift) * 2 - 1 62 | 63 | image[:3,:2,:2] = 0 64 | 65 | image = torch.unsqueeze(image, 0).to(device) 66 | grid_merge = torch.unsqueeze(grid_merge, 0) 67 | 68 | image = torch.nn.functional.grid_sample(image, grid_merge) # default bilinear 69 | 70 | image = torch.squeeze(image, 0) 71 | return image.cpu() 72 | 73 | def grid_to_triangle(grid): 74 | grid_shape = grid.size() 75 | num = (grid_shape[0] - 1) * (grid_shape[1] - 1) 76 | 77 | upper_triangle = grid[:-1, :-1, :, None] 78 | upper_triangle = torch.cat(( upper_triangle, grid[1:, :-1, :, None]), dim = 3) 79 | upper_triangle = torch.cat(( upper_triangle, grid[:-1, 1:, :, None]), dim = 3) 80 | upper_triangle = upper_triangle.view(num, 2, 3) 81 | upper_triangle = torch.transpose(upper_triangle, 1, 2) # grid * point * xy 82 | 83 | lower_triangle = grid[:-1, 1:, :, None] 84 | lower_triangle = torch.cat(( lower_triangle, grid[1:, :-1, :, None]), dim = 3) 85 | lower_triangle = torch.cat(( lower_triangle, grid[1:, 1:, :, None]), dim = 3) 86 | lower_triangle = lower_triangle.view(num, 2, 3) 87 | lower_triangle = torch.transpose(lower_triangle, 1, 2) 88 | 89 | return upper_triangle, lower_triangle # grid * point * xy 90 | 91 | def grid_size(upper_triangle, lower_triangle, height, width): 92 | wapper_grid = torch.cat((upper_triangle, lower_triangle),dim =1) 93 | xmax = torch.floor(torch.max(wapper_grid[:,:,0]*width, 1)[0]) + 1 94 | ymax = torch.floor(torch.max(wapper_grid[:,:,1]*height, 1)[0]) + 1 95 | xmin = torch.floor(torch.min(wapper_grid[:,:,0]*width, 1)[0]) 96 | ymin = torch.floor(torch.min(wapper_grid[:,:,1]*height, 1)[0]) 97 | 98 | xlength = int(torch.max(xmax - xmin)) 99 | ylength = int(torch.max(ymax - ymin)) 100 | 101 | return [xmax, xmin, ymax, ymin], xlength, ylength 102 | 103 | def generate_mesh_grid(height, width): 104 | # Create a grid of sampling positions 105 | xs = np.linspace(0, 1, width, endpoint=False) 106 | ys = np.linspace(0, 1, height, endpoint=False) 107 | xmesh, ymesh = np.meshgrid(xs, ys) 108 | # Reshape the sampling positions to a H x W x 2 tensor 109 | return np.moveaxis(array(list(zip(xmesh, ymesh))), 1, 2) 110 | 111 | def triangle2mask(d, height, width): # d: [N x T x 3 x 2] 112 | N = d.size()[0] # batch size 113 | T = d.size()[1] # triangle number 114 | P = height * width # The number of pixels in the output image. 115 | 116 | area = edgefunc(d[:, :, 1, :], d[:, :, 2, :], d[:, :, None, 0, :]) 117 | 118 | gridcpu = generate_mesh_grid(height, width) 119 | 120 | gridcpu = np.reshape(gridcpu, (height*width, 2)) 121 | 122 | grid = torch.Tensor(gridcpu) 123 | grid = grid.unsqueeze(0).repeat((N, T, 1, 1)) # [N x T x P x 2] 124 | 125 | grid = grid.to(device) 126 | 127 | # Evaluate the edge functions at every position. 128 | # We should get a [N x P] vector out of each. 129 | w0 = edgefunc(d[:, :, 1, :], d[:, :, 2, :], grid) / area 130 | w1 = edgefunc(d[:, :, 2, :], d[:, :, 0, :], grid) / area 131 | w2 = edgefunc(d[:, :, 0, :], d[:, :, 1, :], grid) / area 132 | 133 | # Only pixels inside the triangles will have color 134 | # [N x P] 135 | 136 | mask = (w0 > 0) & (w1 > 0) & (w2 > 0) 137 | mask = torch.unsqueeze(mask, 3).type(torch.cuda.FloatTensor) 138 | 139 | w = torch.stack((w0,w1,w2),dim = 3) * mask 140 | 141 | return torch.transpose(w, 1, 2) # [N x P x T x 3] 142 | 143 | 144 | def edgefunc(v0, v1, p): 145 | """ 146 | let P = H * W 147 | v0 and v1 have vertex positions for all T triangles. 148 | Their shapes are [N x T X 2] 149 | p is a list of sampling points as a [N x T X P x 2] tensor. 150 | Each of the T triangles has an [P x 2] matrix of sampling points. 151 | returns a [N x T x P] matrix 152 | """ 153 | P = p.size()[2] 154 | 155 | # Take all the x and y coordinates of all the positions as a 156 | # [N x S] tensor 157 | py = p[:, :, :, 1] 158 | px = p[:, :, :, 0] 159 | 160 | # We need to manually broadcast the vector to cover all sample points 161 | x10 = v0[:, :, 0] - v1[:, :, 0] # [N x T] 162 | y01 = v1[:, :, 1] - v0[:, :, 1] # [N x T] 163 | 164 | x10 = x10.unsqueeze(2).repeat((1, 1, P)) # [N x T x P] 165 | y01 = y01.unsqueeze(2).repeat((1, 1, P)) # [N x T x P] 166 | 167 | cross = v0[:,:,1]*v1[:,:,0] - v0[:,:,0]*v1[:,:,1] # [N x T] 168 | cross = cross.unsqueeze(2).repeat((1, 1, P)) # [N x T x P] 169 | 170 | return y01*px + x10*py + cross 171 | 172 | if __name__ == '__main__': 173 | print(generate_mesh_grid(2,3)) -------------------------------------------------------------------------------- /dvs/warp/read_write.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | from PIL import Image, ImageDraw, ImageFont 5 | import matplotlib.pyplot as plt 6 | import ffmpeg 7 | import json 8 | import torch 9 | import argparse 10 | 11 | def load_video(path, save_dir = None, resize = None, length = -1): # N x H x W x C 12 | vidcap = cv2.VideoCapture(path) 13 | fps = vidcap.get(cv2.CAP_PROP_FPS) 14 | success,image = vidcap.read() 15 | print(image.shape) 16 | height, width, layers = image.shape 17 | if resize is None: 18 | size = (width,height) 19 | elif type(resize) is int: 20 | size = (width//resize,height//resize) 21 | else: 22 | size = resize 23 | count = 0 24 | frames = [] 25 | while success: 26 | if resize is not None: 27 | image = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR) 28 | if save_dir != None: 29 | path = os.path.join(save_dir, "frame_" + str(count).zfill(4) + ".png") 30 | cv2.imwrite(path, image) 31 | frames.append(image) 32 | success,image = vidcap.read() 33 | count += 1 34 | if length > 0 and count >= length: 35 | break 36 | print("Video length: ", len(frames)) 37 | return frames, fps, size 38 | 39 | def video2frame(path, resize = None): 40 | data_name = sorted(os.listdir(path)) 41 | for i in range(len(data_name)): 42 | print(str(i+1)+" / " + str(len(data_name))) 43 | data_folder = os.path.join(path, data_name[i]) 44 | print(data_folder) 45 | files = os.listdir(data_folder) 46 | for f in files: 47 | if f[-4:] == ".mp4": 48 | video_name = f 49 | video_path = os.path.join(data_folder, video_name) 50 | frame_folder = os.path.join(data_folder, "frames") 51 | if not os.path.exists(frame_folder): 52 | os.makedirs(frame_folder) 53 | load_video(video_path, save_dir = frame_folder, resize=resize) 54 | 55 | def video2frame_one_seq(path, save_dir = None, resize = None): # N x H x W x C 56 | vidcap = cv2.VideoCapture(path) 57 | fps = vidcap.get(cv2.CAP_PROP_FPS) 58 | success,image = vidcap.read() 59 | print(path) 60 | print(image.shape) 61 | height, width, layers = image.shape 62 | if resize is None: 63 | size = (width,height) 64 | elif type(resize) is int: 65 | size = (width//resize,height//resize) 66 | else: 67 | size = resize 68 | count = 0 69 | while success: 70 | if resize is not None: 71 | image = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR) 72 | if save_dir != None: 73 | path = os.path.join(save_dir, "frame_" + str(count).zfill(5) + ".png") 74 | cv2.imwrite(path, image) 75 | success,image = vidcap.read() 76 | count += 1 77 | return fps, size 78 | 79 | def save_video(path,frame_array, fps, size, losses = None, frame_number = False, writer = None): 80 | if writer is None: 81 | if path[-3:] == "mp4": 82 | out = cv2.VideoWriter(path,cv2.VideoWriter_fourcc(*'mp4v'), fps, size) 83 | else: 84 | out = cv2.VideoWriter(path,cv2.VideoWriter_fourcc('M','J','P','G'), fps, size) 85 | else: 86 | out = writer 87 | for i in range(len(frame_array)): 88 | # writing to a image array 89 | if frame_number: 90 | frame_array[i] = draw_number(np.asarray(frame_array[i]), i) 91 | if losses is not None: 92 | frame_array[i] = draw_number(np.asarray(frame_array[i]), losses[i], x = 900, message = "Loss: ") 93 | out.write(frame_array[i]) 94 | if writer is None: 95 | out.release() 96 | 97 | def draw_number(frame, num, x = 10, y = 10, message = "Frame: "): 98 | image=Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) 99 | draw = ImageDraw.Draw(image) 100 | font = ImageFont.truetype("./data/arial.ttf", 45) 101 | 102 | message = message + str(num) 103 | color = 'rgb(0, 0, 0)' # black color 104 | 105 | draw.text((x, y), message, fill=color, font=font) 106 | return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) 107 | 108 | if __name__ == "__main__": 109 | parser = argparse.ArgumentParser("FlowNet2 Preparation") 110 | parser.add_argument("--dir_path", default="./video") 111 | args = parser.parse_args() 112 | dir_path = args.dir_path 113 | if dir_path == "./video": 114 | video2frame(dir_path, resize = 4) 115 | else: 116 | video2frame(os.path.join(dir_path, "test"), resize = 4) 117 | video2frame(os.path.join(dir_path, "training"), resize = 4) -------------------------------------------------------------------------------- /dvs/warp/warping.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .read_write import load_video, save_video 3 | import torch 4 | import cv2 5 | from .rasterizer import Rasterization 6 | import time 7 | import os 8 | 9 | def warp_video(mesh_path, video_path, save_path, losses = None, frame_number = False, fps_fix = None): 10 | if type(mesh_path) == str: 11 | print("Error") 12 | else: 13 | grid_data = mesh_path 14 | 15 | frame_array, fps, size = load_video(video_path, length = grid_data.shape[0]) 16 | if fps_fix is not None: 17 | fps = fps_fix 18 | length = min(grid_data.shape[0], len(frame_array)) 19 | seq_length = 100 20 | seq = length//seq_length 21 | writer = cv2.VideoWriter(save_path,cv2.VideoWriter_fourcc(*'mp4v'), fps, size) 22 | for i in range(seq+1): 23 | if seq_length*i==length: 24 | break 25 | print("Frame: "+str(i*seq_length)+"/"+str(length)) 26 | frame_array_save = warpping_rast(grid_data[seq_length*i:min(seq_length*(i+1),length)], frame_array[seq_length*i:min(seq_length*(i+1),length)], losses = losses) 27 | save_video(save_path,frame_array_save, fps, size, losses = losses, frame_number = frame_number, writer = writer) 28 | writer.release() 29 | 30 | def warpping_rast(grid_data, frame_array, losses = None): 31 | output = [] 32 | for i in range(0, min(len(frame_array), grid_data.shape[0])): 33 | frame = warpping_one_frame_rast(frame_array[i], grid_data[i]) 34 | output.append(frame) 35 | return output 36 | 37 | def warpping_one_frame_rast(image, grid): 38 | img = torch.Tensor(image).permute(2,0,1)/255 39 | grid = torch.Tensor(grid) 40 | output_image = Rasterization(img, grid) 41 | return np.clip(output_image.permute(1,2,0).numpy() * 255, 0, 255).astype("uint8") 42 | --------------------------------------------------------------------------------