├── LICENSE
├── README.md
├── data
    ├── README.md
    └── data_prepare.py
├── data_gen.py
├── docs
    └── cover.png
├── eval.py
├── eval_cmp.py
├── models
    ├── __init__.py
    ├── baseline.py
    ├── bmwnet.py
    ├── dronet.py
    ├── flowdrivenet.py
    ├── pilotnet.py
    └── pointnet.py
├── tf_ops
    ├── 3d_interpolation
    │   ├── __pycache__
    │   │   └── tf_interpolate.cpython-35.pyc
    │   ├── interpolate.cpp
    │   ├── tf_interpolate.cpp
    │   ├── tf_interpolate.py
    │   ├── tf_interpolate_compile.sh
    │   ├── tf_interpolate_op_test.py
    │   ├── tf_interpolate_so.so
    │   └── visu_interpolation.py
    ├── grouping
    │   ├── __pycache__
    │   │   └── tf_grouping.cpython-35.pyc
    │   ├── test
    │   │   ├── compile.sh
    │   │   ├── query_ball_point.cpp
    │   │   ├── query_ball_point.cu
    │   │   ├── query_ball_point_block.cu
    │   │   ├── query_ball_point_grid.cu
    │   │   ├── selection_sort.cpp
    │   │   ├── selection_sort.cu
    │   │   └── selection_sort_const.cu
    │   ├── tf_grouping.cpp
    │   ├── tf_grouping.py
    │   ├── tf_grouping_compile.sh
    │   ├── tf_grouping_g.cu
    │   ├── tf_grouping_g.cu.o
    │   ├── tf_grouping_op_test.py
    │   └── tf_grouping_so.so
    └── sampling
    │   ├── __pycache__
    │       └── tf_sampling.cpython-35.pyc
    │   ├── tf_sampling.cpp
    │   ├── tf_sampling.py
    │   ├── tf_sampling_compile.sh
    │   ├── tf_sampling_g.cu
    │   ├── tf_sampling_g.cu.o
    │   └── tf_sampling_so.so
├── train.py
├── train_cmp.py
├── train_multi_gpus.py
└── utils
    ├── README.md
    ├── __init__.py
    ├── compile_render_balls_so.sh
    ├── pc_util.py
    ├── pointnet_util.py
    ├── provider.py
    ├── render_balls_so.cpp
    ├── render_balls_so.so
    ├── show3d_balls.py
    └── tf_util.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 |  * @Author: Shuai Wang
 3 |  * @Github: https://github.com/wsustcid
 4 |  * @Version: 1.0.0
 5 |  * @Date: 2020-05-10 09:29:01
 6 |  * @LastEditTime: 2021-04-26 00:12:10
 7 |  -->
 8 | # FlowDriveNet
 9 | 
10 | ## 1. Introduction
11 | <div align=center> <img src=./docs/cover.png /></div>
12 | 
13 | Learning driving policies using an end-to-end network has been proved a promising solution for autonomous driving. Due to the lack of a benchmark driver behavior dataset that contains both the visual and the LiDAR data, existing works solely focus on learning driving from visual sensors. Besides, most works are limited to predict steering angle yet neglect the more challenging vehicle speed control problem. 
14 | 
15 | In this project, we propose release our __Code__ and __Dataset__ for training our FlowDriveNet, which is a novel end-to-end network and takes advantages of sequential visual data and LiDAR data jointly to predict steering angle and vehicle speed. 
16 | 
17 | ## 2. Requirements
18 | * Python 3.x
19 | * Tensorflow 1.x.x
20 | * Python Libraries: numpy, scipy and __laspy__
21 | 
22 | ## 3. Dataset
23 | ### 3.1 Udacity CH2-LiDAR
24 | The dataset used in this project is created from the Udacity `CH2` dataset, which is a popular open source driving dataset and can be used for the vision-based driving learning methods. However, the original dataset does not contain LiDAR data, then we extract the raw point cloud data and remove distortions from the original ROS bag file.
25 | 
26 | **Pipeline for creating this data:**
27 |  - Extracting images (center, right, left) and corresponding label file from bag file. (6311,6311,6288)
28 |  - Extracting Point Cloud data from bag file and save to seperate pcd file. (3096)
29 |    - fix calibration file
30 |    - covert raw velodyne point packet topic to PointCloud2 topic
31 |    - save to pcd file using ros node
32 |  - Registering the point cloud with images and labels using timestamps.(3096)
33 | 
34 | **Related tools:**
35 |  - Data extraction tool: [udacity launch](https://github.com/wsustcid/self-driving-car/tree/master/datasets/udacity_launch)
36 |  - Optical Flow and Point Flow extraction tool: './data/data_prepare.py' for more implementation details.
37 | 
38 | 
39 | ## 4. Train & Evaluation
40 | ```python
41 | # single GPU
42 | python train.py --data_root xx --input_config xx ...
43 | 
44 | # Multiple GPU
45 | python train_multi-gpus.py
46 | 
47 | # Evaluation 
48 | python eval.py
49 | ```
50 | 
51 | ## 5. Citation
52 | ```
53 | @article{wang,
54 |   title={FlowDriveNet: An End-to-End Network for Learning Driving Policies from Image Optical Flow and LiDAR Point Flow},
55 |   author={Shuai Wang, Jiahu Qin, Menglin Li and Yaonan Wang},
56 |   journal={IEEE International Conference on Robotics and Automation (ICRA)},
57 |   year={2021}
58 | }
59 | ```
60 | 
61 | ## 6. License
62 | This Project is released under the [Apache licenes](LICENSE).
63 | 
64 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 |  * @Author: Shuai Wang
 3 |  * @Github: https://github.com/wsustcid
 4 |  * @Version: 1.0.0
 5 |  * @Date: 2020-05-28 22:19:51
 6 |  * @LastEditTime: 2020-09-07 17:46:21
 7 |  * @Description:  
 8 | --> 
 9 | # DATASET
10 | 
11 | ## Udacity CH2-LiDAR
12 | 
13 | ```python
14 |     CH2 folder tree:
15 |     └── CH2_001  # test set
16 |     │   ├── center
17 |     │   ├── center.csv
18 |     │   ├── left
19 |     │   ├── left.csv
20 |     │   ├── points
21 |     │   ├── right
22 |     │   ├── right.csv
23 |     └── CH2_002 # train set
24 |         ├── HMB_1
25 |         │   ├── center
26 |         │   ├── center.csv
27 |         │   ├── left
28 |         │   ├── left.csv
29 |         │   ├── points
30 |         │   ├── right
31 |         │   ├── right.csv
32 |         ├── HMB_2; _4, _5, _6
33 | ```
34 | 
35 | ## LiVi-Set


--------------------------------------------------------------------------------
/data/data_prepare.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @Author: Shuai Wang
  3 | @Github: https://github.com/wsustcid
  4 | @Version: 1.0.0
  5 | @Date: 2020-09-07 21:08:55
  6 | @LastEditTime: 2020-09-09 15:19:37
  7 | @Description:  
  8 | '''
  9 | import os, sys
 10 | import time
 11 | import numpy as np
 12 | base_dir = os.path.dirname(os.path.abspath(__file__))
 13 | sys.path.append(os.path.join(base_dir, '../tools/'))
 14 | 
 15 | from label_loading import load_image_path, load_image_path_all, load_clouds_path
 16 | from data_processor import ImageProcessor, CloudProcessor
 17 | 
 18 | def create_optical_flow(data_root, set_type, dtype):
 19 |     '''
 20 |     Processing Procedure:
 21 |      1. Load two consecutive image with grayscale format (uint8,480,640)
 22 |      2. compute the optical flow (float32,480,640)
 23 |      3. save the optical flow to bin file
 24 |     '''
 25 |     # load image path
 26 |     if set_type=='train':
 27 |         data_path = os.path.join(data_root, 'CH2_002')
 28 |         images_path = load_image_path_all(data_path, dtype)
 29 |     elif set_type == 'test':
 30 |         HMB_path = os.path.join(data_root, "CH2_001")
 31 |         images_path = load_image_path(HMB_path, dtype)
 32 |     
 33 |     imp = ImageProcessor()
 34 |     start = time.time()
 35 |     for i in range(len(images_path)):
 36 |         # load image
 37 |         if i == 0:
 38 |             pre_img_path = images_path[0]
 39 |             pre_img = imp.load_image(pre_img_path)
 40 | 
 41 |         end_img_path = images_path[i]
 42 |         end_img = imp.load_image(end_img_path) 
 43 | 
 44 |         # compute flow
 45 |         flow = imp.compute_flow(pre_img, end_img) 
 46 |         
 47 |         # save to bin
 48 |         flow_name = end_img_path.split('/')[-1]
 49 |         bin_path = os.path.join(os.path.dirname(end_img_path), '../'+dtype+'_bin', flow_name[:-4]+'.bin')
 50 |         imp.save_flow(flow, bin_path)
 51 |         
 52 |         pre_img = end_img
 53 |         
 54 |     end = time.time()
 55 |     print("Procedure Duration: {} s".format(end-start))
 56 |     
 57 | 
 58 | def create_point_flow(HMB_path):
 59 |     """
 60 |     Input: the HMB path 
 61 |     Pipeline:
 62 |       1. load point cloud from pcd file
 63 |       2. cloud process
 64 |       3. compute flow and save to .bin file
 65 |     """
 66 |     start = time.time()
 67 |     
 68 |     # load cloud path
 69 |     clouds_path = load_clouds_path(HMB_path)
 70 |     
 71 |     clp = CloudProcessor()
 72 |     
 73 |     for i in range(len(clouds_path)):
 74 |         if i == 0:
 75 |             cloud_pre_path = clouds_path[0]
 76 |             cloud_pre = o3d.io.read_point_cloud(cloud_pre_path)
 77 |             cloud_pre = clp.cloud_process(cloud_pre)
 78 | 
 79 |         cloud_path = clouds_path[i]
 80 |         cloud = o3d.io.read_point_cloud(cloud_path)
 81 |         cloud = clp.cloud_process(cloud)
 82 |         
 83 |         # save to bin
 84 |         cloud_name = cloud_path.split('/')[-1]
 85 |         bin_path = os.path.join(os.path.dirname(cloud_path), '../points_bin',cloud_name[:-4]+'.bin')
 86 |         clp.save_cloud_flow(cloud, cloud_pre, bin_path)
 87 | 
 88 |         cloud_pre = o3d.geometry.PointCloud()
 89 |         cloud_pre.points = cloud.points
 90 |     
 91 |     end = time.time()
 92 |     print("Procedure Duration: {} s".format(end-start))
 93 | 
 94 | if __name__ == '__main__':
 95 |     data_root = '/media/ubuntu16/Documents/Datasets/Udacity/CH2'
 96 | 
 97 |     create_optical_flow(data_root, set_type='train', dtype='right')
 98 | 
 99 |     
100 | 


--------------------------------------------------------------------------------
/docs/cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsustcid/FlowDriveNet/3604495269ae45e5b43964046104f685ec66e383/docs/cover.png


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @Author: Shuai Wang
  3 | @Github: https://github.com/wsustcid
  4 | @Version: 1.0.0
  5 | @Date: 2020-09-11 23:42:23
  6 | @LastEditTime: 2020-10-13 22:32:20
  7 | '''
  8 | 
  9 | import os
 10 | import sys
 11 | import argparse
 12 | from datetime import datetime
 13 | import time
 14 | from tqdm import tqdm
 15 | import time
 16 | 
 17 | import numpy as np
 18 | import tensorflow as tf
 19 | 
 20 | base_dir = os.path.dirname(os.path.abspath(__file__))
 21 | sys.path.append(base_dir)
 22 | from utils.tf_util import log_string
 23 | 
 24 | from data_gen import DataLoader
 25 | from models.flowdrivenet import FlowDriveNet
 26 | 
 27 | parser = argparse.ArgumentParser()
 28 | parser.add_argument('--data_root', default='/media/ubuntu16/Documents/Datasets/Udacity/CH2',
 29 |                     help='data_root path [default: local path]')
 30 | parser.add_argument('--input_cfg', default='GRAY', 
 31 |                     help='Input type: GRAY, GRAYF, GRAYF-T, XYZ, XYZF, XYZF-T, GRAYF-XYZF-T')
 32 | parser.add_argument('--model_cfg', default='VFE',
 33 |                     help='Model type: VFE, VFE-TFP, PFE, PFE-TFP, VFE-PFE-TFP')
 34 | parser.add_argument('--height', type=int, default=200, help='img height')
 35 | parser.add_argument('--width', type=int, default=200, help='img width')
 36 | parser.add_argument('--seq_len', type=int, default=5, help='sel length')
 37 | parser.add_argument('--aug_cfg', default='None', help='None, IA, RP, SC, BA, BS')
 38 | #parser.add_argument('--use_side_cam', default=False, action='store_true')
 39 | parser.add_argument('--num_point', type=int, default=10000, help='Point N')
 40 | parser.add_argument('--log_dir', default='test',
 41 |                     help='Log dir [default: test]')
 42 | parser.add_argument('--batch_size', type=int, default=1,
 43 |                     help='Batch Size during training [default: 16]')
 44 | parser.add_argument('--decay_steps', type=int, default=300000,
 45 |                     help='Decay step for lr decay [default: 200000]') # decay_steps = n_train * epochs
 46 | parser.add_argument('--decay_rate', type=float, default=0.7,
 47 |                     help='Decay rate for lr decay [default: 0.7]')
 48 | parser.add_argument('--model_file', default='/media/ubuntu16/F/FlowDriveNet/logs/VFE/gray_base/model_best.ckpt',
 49 |                     help='the model path to be evaluated')
 50 | 
 51 | 
 52 | FLAGS = parser.parse_args()
 53 | 
 54 | BATCH_SIZE = FLAGS.batch_size
 55 | 
 56 | log_dir  = os.path.join(base_dir, 'logs', FLAGS.log_dir)
 57 | os.makedirs(log_dir, exist_ok=True)
 58 | test_log_dir = os.path.join(log_dir, 'log_test.txt')
 59 | log_string(test_log_dir, str(FLAGS)+'\n')
 60 | 
 61 | # 
 62 | dataloader = DataLoader(FLAGS.data_root, FLAGS.input_cfg, 
 63 |                         FLAGS.height, FLAGS.width,
 64 |                         FLAGS.seq_len, 
 65 |                         FLAGS.num_point,
 66 |                         FLAGS.aug_cfg)
 67 | model = FlowDriveNet(FLAGS.input_cfg, FLAGS.model_cfg, 
 68 |                       FLAGS.height, FLAGS.width, FLAGS.seq_len, FLAGS.num_point)
 69 | 
 70 | def get_bn_decay(batch):
 71 |         bn_momentum = tf.train.exponential_decay(
 72 |                         0.5,
 73 |                         batch*BATCH_SIZE,
 74 |                         float(FLAGS.decay_steps),
 75 |                         0.5,
 76 |                         staircase=True)
 77 |         bn_decay = tf.minimum(0.99, 1 - bn_momentum)
 78 |         return bn_decay
 79 | 
 80 | def eval():
 81 |     with tf.Graph().as_default():
 82 |         image_pl, points_pl, _ = model.get_inputs_pl(BATCH_SIZE)
 83 |         is_training_pl = tf.placeholder(tf.bool, shape=())
 84 |         # define global_step; optimizer will increase it in every training loop
 85 |         batch = tf.get_variable('batch', [], 
 86 |                                 initializer=tf.constant_initializer(0),
 87 |                                 trainable=False)
 88 |         bn_decay = get_bn_decay(batch) 
 89 |         
 90 |         pred = model.get_model(image_pl, points_pl, is_training_pl, bn_decay)
 91 | 
 92 |         # Create a session
 93 |         config = tf.ConfigProto()
 94 |         config.gpu_options.allow_growth = True
 95 |         config.allow_soft_placement = True
 96 |         config.log_device_placement = False
 97 |         sess = tf.Session(config=config)
 98 | 
 99 |         # Init variables
100 |         init = tf.global_variables_initializer()
101 |         sess.run(init)
102 |         
103 |         # restore model
104 |         saver = tf.train.Saver()
105 |         saver.restore(sess, FLAGS.model_file)
106 | 
107 |         # save all tensor
108 |         ops = {'image_pl': image_pl,
109 |             'points_pl': points_pl,
110 |             'is_training_pl': is_training_pl,
111 |             'pred': pred}
112 | 
113 |         ## evaluation    
114 |         is_training = False
115 |         num_batches = dataloader.num_test // BATCH_SIZE
116 |         rmse_angle_sum = 0.0
117 |         rmse_speed_sum = 0.0
118 |         result_all = np.zeros((0,4)) # pred_a, pred_s, label_a, label_s
119 |         
120 |         time_sum = 0.0
121 |         for i in tqdm(range(num_batches)):
122 |             X_image_batch, X_cloud_batch, y_batch = dataloader.load_test_batch(BATCH_SIZE)
123 |             
124 |             feed_dict = {ops['image_pl']: X_image_batch,
125 |                         ops['points_pl']: X_cloud_batch,
126 |                         ops['is_training_pl']: is_training}
127 |             t1 = time.time()
128 |             pred_batch = sess.run(ops['pred'],feed_dict=feed_dict)
129 |             t2 = time.time()
130 |             time_sum += (t2-t1) 
131 |             result_batch = np.hstack((pred_batch, y_batch))
132 |             result_all = np.concatenate((result_all, result_batch), axis=0)
133 |     
134 |         
135 |         np.savetxt(os.path.join(log_dir, 'results.csv'), result_all, delimiter=",")
136 |         # b = np.loadtxt("temp.csv", delimiter=",")
137 | 
138 |         rmse_angle = np.sqrt(np.mean(np.square(result_all[:,0] - result_all[:,2])))
139 |         rmse_speed = np.sqrt(np.mean(np.square(result_all[:,1] - result_all[:,3])))
140 |         log_string(test_log_dir, 'Test rmse_angle: %f' % (rmse_angle))
141 |         log_string(test_log_dir, 'Test rmse_speed: %f' % (rmse_speed))
142 |         log_string(test_log_dir, 'Test rmse_average: %f' % ((rmse_angle+rmse_speed)/2))
143 |         log_string(test_log_dir, 'Test FPS: %f' % (1/(time_sum/num_batches)))
144 | 
145 | 
146 | if __name__ == "__main__":
147 |     eval()
148 | 


--------------------------------------------------------------------------------
/eval_cmp.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @Author: Shuai Wang
  3 | @Github: https://github.com/wsustcid
  4 | @Version: 1.0.0
  5 | @Date: 2020-09-11 23:42:23
  6 | @LastEditTime: 2020-09-28 22:43:54
  7 | '''
  8 | 
  9 | import os
 10 | import sys
 11 | import argparse
 12 | from datetime import datetime
 13 | import time
 14 | from tqdm import tqdm
 15 | 
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | 
 19 | base_dir = os.path.dirname(os.path.abspath(__file__))
 20 | sys.path.append(base_dir)
 21 | from utils.tf_util import log_string
 22 | 
 23 | from data_gen import DataLoader
 24 | from models.flowdrivenet import FlowDriveNet
 25 | 
 26 | parser = argparse.ArgumentParser()
 27 | parser.add_argument('--data_root', default='/media/ubuntu16/Documents/Datasets/Udacity/CH2',
 28 |                     help='data_root path [default: local path]')
 29 | parser.add_argument('--input_cfg', default='BGR', 
 30 |                     help='Input type: BGR, GRAYF-T, XYZ, GRAY')
 31 | parser.add_argument('--model_cfg', default='PilotNet',
 32 |                     help='Model type: PilotNet, BMWNet, PointNet, DroNet')
 33 | parser.add_argument('--use_side_cam', default=False, action='store_true')
 34 | parser.add_argument('--log_dir', default='test',
 35 |                     help='Log dir [default: test]')
 36 | parser.add_argument('--batch_size', type=int, default=1,
 37 |                     help='Batch Size during training [default: 16]')
 38 | parser.add_argument('--decay_steps', type=int, default=300000,
 39 |                     help='Decay step for lr decay [default: 200000]') # decay_steps = n_train * epochs
 40 | parser.add_argument('--decay_rate', type=float, default=0.7,
 41 |                     help='Decay rate for lr decay [default: 0.7]')
 42 | parser.add_argument('--model_file', default='/media/ubuntu16/F/FlowDriveNet/logs/VFE/gray_base/model_best.ckpt',
 43 |                     help='the model path to be evaluated')
 44 | 
 45 | 
 46 | FLAGS = parser.parse_args()
 47 | 
 48 | BATCH_SIZE = FLAGS.batch_size
 49 | 
 50 | log_dir  = os.path.join(base_dir, 'logs', FLAGS.log_dir)
 51 | os.makedirs(log_dir, exist_ok=True)
 52 | test_log_dir = os.path.join(log_dir, 'log_test.txt')
 53 | log_string(test_log_dir, str(FLAGS)+'\n')
 54 | 
 55 | # 
 56 | if FLAGS.model_cfg == 'PilotNet':
 57 |     from models.pilotnet import PilotNet
 58 |     dataloader = DataLoader(FLAGS.data_root, "BGR", 
 59 |                             height=66, width=200, 
 60 |                             seq_len=None, 
 61 |                             num_point=None,
 62 |                             use_side_cam=FLAGS.use_side_cam)
 63 |     model = PilotNet()
 64 | elif FLAGS.model_cfg == 'BMWNet':
 65 |     from models.bmwnet import BMWNet
 66 |     # TODO add seq_len
 67 |     dataloader = DataLoader(FLAGS.data_root, 'GRAYF-T', 
 68 |                             height=66, width=200, 
 69 |                             seq_len=10, 
 70 |                             num_point=None,
 71 |                             use_side_cam=FLAGS.use_side_cam)
 72 |     model = BMWNet()
 73 | elif FLAGS.model_cfg == 'PointNet':
 74 |     from models.pointnet import PointNet
 75 |     dataloader = DataLoader(FLAGS.data_root, 'XYZ', 
 76 |                             height=None, width=None, 
 77 |                             seq_len=None, 
 78 |                             num_point=10000,
 79 |                             use_side_cam=FLAGS.use_side_cam)
 80 |     model = PointNet(num_point=10000)
 81 | elif FLAGS.model_cfg == 'DroNet':
 82 |     from models.dronet import DroNet
 83 |     dataloader = DataLoader(FLAGS.data_root, 'GRAY', 
 84 |                             height=200, width=200, 
 85 |                             seq_len=None, 
 86 |                             num_point=None,
 87 |                             use_side_cam=FLAGS.use_side_cam)
 88 |     model = DroNet()
 89 | else:
 90 |     raise TypeError
 91 | 
 92 | def get_bn_decay(batch):
 93 |         bn_momentum = tf.train.exponential_decay(
 94 |                         0.5,
 95 |                         batch*BATCH_SIZE,
 96 |                         float(FLAGS.decay_steps),
 97 |                         0.5,
 98 |                         staircase=True)
 99 |         bn_decay = tf.minimum(0.99, 1 - bn_momentum)
100 |         return bn_decay
101 | 
102 | def eval():
103 |     with tf.Graph().as_default():
104 |         feature_pl, _ = model.get_inputs_pl(BATCH_SIZE)
105 |         is_training_pl = tf.placeholder(tf.bool, shape=())
106 |         # define global_step; optimizer will increase it in every training loop
107 |         batch = tf.get_variable('batch', [], 
108 |                                 initializer=tf.constant_initializer(0),
109 |                                 trainable=False)
110 |         bn_decay = get_bn_decay(batch) 
111 |         
112 |         pred = model.get_model(feature_pl, is_training_pl, bn_decay)
113 | 
114 |         # Create a session
115 |         config = tf.ConfigProto()
116 |         config.gpu_options.allow_growth = True
117 |         config.allow_soft_placement = True
118 |         config.log_device_placement = False
119 |         sess = tf.Session(config=config)
120 | 
121 |         # Init variables
122 |         init = tf.global_variables_initializer()
123 |         sess.run(init)
124 |         
125 |         # restore model
126 |         saver = tf.train.Saver()
127 |         saver.restore(sess, FLAGS.model_file)
128 | 
129 |         # save all tensor
130 |         ops = {'feature_pl': feature_pl,
131 |             'is_training_pl': is_training_pl,
132 |             'pred': pred}
133 | 
134 |         ## evaluation    
135 |         is_training = False
136 |         num_batches = dataloader.num_test // BATCH_SIZE
137 |         rmse_angle_sum = 0.0
138 |         rmse_speed_sum = 0.0
139 |         if FLAGS.model_cfg in ['PilotNet', 'BMWNet', 'PointNet']:
140 |             result_all = np.zeros((0,2)) 
141 |         elif FLAGS.model_cfg == 'DroNet':
142 |             result_all = np.zeros((0,4)) # pred_a, pred_s, label_a, label_s
143 |         else:
144 |             raise TypeError
145 |         
146 |         time_sum = 0.0
147 |         for i in tqdm(range(num_batches)):
148 |             if FLAGS.model_cfg in ['PilotNet', 'BMWNet']:
149 |                 X_batch, y = dataloader.load_image_test_batch(BATCH_SIZE)
150 |                 y_batch = y[:,0:1]
151 |             elif FLAGS.model_cfg == 'PointNet':
152 |                 X_batch, y = dataloader.load_cloud_test_batch(BATCH_SIZE)
153 |                 y_batch = y[:,1:2]
154 |             elif FLAGS.model_cfg == 'DroNet':
155 |                 X_batch, y_batch = dataloader.load_image_test_batch(BATCH_SIZE)
156 |             else:
157 |                 raise TypeError
158 |             
159 |             feed_dict = {ops['feature_pl']: X_batch,
160 |                         ops['is_training_pl']: is_training}
161 |             t1 = time.time()
162 |             pred_batch = sess.run(ops['pred'],feed_dict=feed_dict)
163 |             t2 = time.time()
164 |             time_sum += (t2-t1) 
165 |             result_batch = np.hstack((pred_batch, y_batch))
166 |             result_all = np.concatenate((result_all, result_batch), axis=0)
167 |             
168 |             
169 |         
170 |         np.savetxt(os.path.join(log_dir, 'results.csv'), result_all, delimiter=",")
171 |         # b = np.loadtxt("temp.csv", delimiter=",")
172 |         if FLAGS.model_cfg in ['PilotNet', 'BMWNet']:
173 |             rmse_angle = np.sqrt(np.mean(np.square(result_all[:,0] - result_all[:,1])))
174 |             log_string(test_log_dir, 'Test rmse_angle: %f' % (rmse_angle))
175 |             log_string(test_log_dir, 'Test FPS: %f' % (1/(time_sum/num_batches)))
176 |         elif FLAGS.model_cfg == 'PointNet':
177 |             rmse_speed = np.sqrt(np.mean(np.square(result_all[:,0] - result_all[:,1])))
178 |             log_string(test_log_dir, 'Test rmse_speed: %f' % (rmse_speed))
179 |             log_string(test_log_dir, 'Test FPS: %f' % (1/(time_sum/num_batches)))
180 |         elif FLAGS.model_cfg == 'DroNet':
181 |             rmse_angle = np.sqrt(np.mean(np.square(result_all[:,0] - result_all[:,2])))
182 |             rmse_speed = np.sqrt(np.mean(np.square(result_all[:,1] - result_all[:,3])))
183 |             log_string(test_log_dir, 'Test rmse_angle: %f' % (rmse_angle))
184 |             log_string(test_log_dir, 'Test rmse_speed: %f' % (rmse_speed))
185 |             log_string(test_log_dir, 'Test rmse_average: %f' % ((rmse_angle+rmse_speed)/2))
186 |             log_string(test_log_dir, 'Test FPS: %f' % (1/(time_sum/num_batches)))
187 |         else:
188 |             raise TypeError
189 |         
190 | 
191 | if __name__ == "__main__":
192 |     eval()
193 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | @Author: Shuai Wang
3 | @Github: https://github.com/wsustcid
4 | @Version: 1.0.0
5 | @Date: 1970-01-01 08:00:00
6 | @LastEditTime: 2020-06-09 17:43:19
7 | @Description:  
8 | '''
9 | 


--------------------------------------------------------------------------------
/models/baseline.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | @Author: Shuai Wang
 3 | @Github: https://github.com/wsustcid
 4 | @Version: 1.0.0
 5 | @Date: 2020-08-04 19:43:43
 6 | @LastEditTime: 2020-09-13 22:39:59
 7 | @Description:  
 8 | '''
 9 | import os 
10 | import sys
11 | import numpy as np
12 | 
13 | base_dir = os.path.dirname(os.path.abspath(__file__))
14 | sys.path.append(os.path.join(base_dir, '../'))
15 |  
16 | from data_gen import DataLoader
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     data_root='/media/ubuntu16/Documents/Datasets/Udacity/CH2'
21 |     dataloader = DataLoader(data_root, input_cfg='GRAY', height=200, width=200, use_side_cam=False)
22 |     
23 |     label_train = dataloader.y_train
24 |     label_val = dataloader.y_val
25 |     label_test = dataloader.y_test
26 |     
27 |     pred_train = np.zeros(label_train.shape, dtype=np.float32)
28 |     pred_val = np.zeros(label_val.shape, dtype=np.float32)
29 |     pred_test = np.zeros(label_test.shape, dtype=np.float32)
30 | 
31 |     rmse_train = np.sqrt(np.mean(np.square(label_train-pred_train), axis=0))
32 |     rmse_val = np.sqrt(np.mean(np.square(label_val-pred_val), axis=0))
33 |     rmse_test = np.sqrt(np.mean(np.square(label_test-pred_test), axis=0))
34 |     
35 |     print("Train: rmse_angle: {}; rmse_speed: {}; rmse_all: {}".format(rmse_train[0], rmse_train[1], np.mean(rmse_train)))
36 |     print("Val: rmse_angle: {}; rmse_speed: {}; rmse_all: {}".format(rmse_val[0], rmse_val[1], np.mean(rmse_val)))
37 |     print("Test: rmse_angle: {}; rmse_speed: {}; rmse_all: {}".format(rmse_test[0], rmse_test[1], np.mean(rmse_test)))


--------------------------------------------------------------------------------
/models/bmwnet.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @Author: Shuai Wang
  3 | @Github: https://github.com/wsustcid
  4 | @Version: 1.0.0
  5 | @Date: 2020-05-28 22:19:51
  6 | @LastEditTime: 2020-09-28 22:02:40
  7 | '''
  8 | 
  9 | import os
 10 | import sys
 11 | import numpy as np
 12 | import tensorflow as tf
 13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 14 | sys.path.append(os.path.join(BASE_DIR, '..'))
 15 | 
 16 | from utils import tf_util
 17 | 
 18 | 
 19 | class BMWNet(object):
 20 |     def __init__(self, height=66, width=200, seq_len=10):
 21 |         self.height = height
 22 |         self.width = width
 23 |         self.seq_len = seq_len
 24 |         
 25 |         self.output_dim = 1
 26 | 
 27 |         self.drop_rate = 0.5
 28 | 
 29 | 
 30 |     def get_inputs_pl(self, batch_size):
 31 |         image_pl = tf.placeholder(tf.float32, shape=(batch_size, self.seq_len, self.height, self.width, 3))
 32 |         label_pl = tf.placeholder(tf.float32, shape=(batch_size, self.output_dim))
 33 |         
 34 |         return image_pl, label_pl
 35 | 
 36 |         
 37 |     def get_model(self, image_pl, is_training, bn_decay=None):
 38 |         """ BMWNet model
 39 |         Args:
 40 |         - input: input Tensor: (B,T,66,200,3)
 41 |         - is_training: A bool flag used for dropout
 42 |         Return:
 43 |         - output: output Tensor (B,2)
 44 |         """
 45 |         
 46 |         img_shape = image_pl.get_shape().as_list()[2:]
 47 |         input_img = tf.reshape(image_pl, (-1,*img_shape)) # (B*T,H,W,C)
 48 | 
 49 |         X = tf.layers.conv2d(input_img, 24, [5,5], strides=[2,2], 
 50 |                              activation=tf.nn.relu,
 51 |                              padding='VALID', name='conv1') # (B*T,31,98,24)
 52 |         X = tf.layers.conv2d(X, 36, [5,5], strides=[2,2],
 53 |                              activation=tf.nn.relu,
 54 |                              padding='VALID', name='conv2') # (B*T,14,47,36)
 55 |         X = tf.layers.conv2d(X, 48, [5,5], strides=[2,2],
 56 |                              activation=tf.nn.relu, 
 57 |                              padding='VALID', name='conv3') # (B*T,5,22,48)
 58 |         X = tf.layers.conv2d(X, 64, [3,3], strides=[1,1],
 59 |                              activation=tf.nn.relu, 
 60 |                              padding='VALID', name='conv4') # (B*T,3,20,64)
 61 |         X = tf.layers.conv2d(X, 64, [3,3], strides=[1,1],
 62 |                              activation=tf.nn.relu, 
 63 |                              padding='VALID', name='conv5') # (B*T,1,18,64)
 64 |         
 65 |         X = tf.contrib.layers.flatten(X) # (B*T, 1152)
 66 | 
 67 |         X = tf.layers.dense(X, 1152, activation=tf.nn.relu, name='fc1')
 68 |         X = tf.layers.dropout(X, rate=self.drop_rate, training=is_training, name='dp1')
 69 | 
 70 |         X = tf.layers.dense(X, 512, activation=tf.nn.relu, name='fc2')
 71 |         X = tf.layers.dropout(X, rate=self.drop_rate, training=is_training, name='dp2')
 72 |         
 73 |         # add lstm
 74 |         X = tf.reshape(X, (-1, self.seq_len, X.get_shape().as_list()[-1])) # (B,T,D)
 75 |         X = tf_util.lstm(X, hidden_size=128, scope='lstm')
 76 |         
 77 |         output = tf.layers.dense(X, self.output_dim, activation=None, name='output')
 78 |         # y = tf.mul(tf.atan(tf.matmul(h_fc4_drop, W_fc5) + b_fc5), 2, name='y') # [-pi,pi]
 79 | 
 80 | 
 81 |         return output
 82 | 
 83 |     def get_loss(self, pred, label, name='Loss'):
 84 |         """ Return loss for a batch of data
 85 |         Args:
 86 |         Return: MSE loss
 87 |         """
 88 |         
 89 |         loss = tf.reduce_mean(tf.square(tf.subtract(pred, label)))
 90 |         
 91 |         tf.summary.scalar(name, loss)
 92 | 
 93 |         return loss
 94 | 
 95 |     def get_rmse(self, pred, label, name='rmse'):
 96 |         """Return rmse as evalation metrics for a batch of data
 97 |         """
 98 |         rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(pred, label))))
 99 |         
100 |         tf.summary.scalar(name, rmse)
101 |         
102 |         return rmse
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     with tf.Graph().as_default():
107 |         is_training_pl = tf.placeholder(tf.bool, shape=())
108 | 
109 |         model = BMWNet()
110 | 
111 |         img_pl, label_pl = model.get_inputs_pl(batch_size=32)
112 |         outputs = model.get_model(img_pl, is_training=is_training_pl)
113 |         
114 |         tf_util.model_summary()
115 |         #Total size of variables: 2378261
116 |         # Total bytes of variables: 9513044


--------------------------------------------------------------------------------
/models/dronet.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @Author: Shuai Wang
  3 | @Github: https://github.com/wsustcid
  4 | @Version: 1.0.0
  5 | @Date: 2020-09-15 19:34:29
  6 | @LastEditTime: 2020-09-28 22:17:54
  7 | '''
  8 | 
  9 | import os
 10 | import sys
 11 | import numpy as np
 12 | import tensorflow as tf
 13 | 
 14 | base_dir = os.path.dirname(os.path.abspath(__file__))
 15 | sys.path.append(os.path.join(base_dir, '..'))
 16 | from utils import tf_util
 17 | 
 18 | class DroNet(object):
 19 |     def __init__(self, height=200, width=200, channels=1):
 20 |         self.height = height
 21 |         self.width = width
 22 |         self.channels = channels
 23 |         self.output_dim = 2
 24 |         
 25 |         self.drop_rate = 0.5
 26 | 
 27 |     def get_inputs_pl(self, batch_size):
 28 |        
 29 |         image_pl = tf.placeholder(tf.float32, shape=(batch_size, self.height, self.width, self.channels))
 30 |         label_pl   = tf.placeholder(tf.float32, shape=(batch_size, self.output_dim))
 31 |         
 32 |         return image_pl, label_pl
 33 | 
 34 |         
 35 |     def get_model(self, image_pl, is_training, bn_decay=None):
 36 |         """ DroNet model (ResNet8)
 37 |         Args:
 38 |         - input: input Tensor: (B,200,200,1)
 39 |         - is_training: A bool flag used for dropout
 40 |         Return:
 41 |         - output: output Tensor (B,1)
 42 |         TODO: in conv2d set
 43 |         kernel_initializer="he_normal",
 44 |                 kernel_regularizer=regularizers.l2(1e-4)
 45 |         """
 46 |         # Input
 47 |         X1 = tf.layers.conv2d(image_pl, 32, [5,5], strides=[2,2], 
 48 |                              padding='same', name='conv1')
 49 |         X1 = tf.layers.max_pooling2d(X1, pool_size=(3,3), strides=[2,2])
 50 |         
 51 |         # First residual block
 52 |         X2 = tf.layers.batch_normalization(X1, training=is_training, name='res1_bn1')
 53 |         X2 = tf.nn.relu(X2, name='res1_relu1')
 54 |         X2 = tf.layers.conv2d(X2, 32, [3,3], strides=[2,2],
 55 |                              padding='same', name='res1_conv1')
 56 |         
 57 |         X2 = tf.layers.batch_normalization(X2, training=is_training, name='res1_bn2')
 58 |         X2 = tf.nn.relu(X2, name='res1_relu2')
 59 |         X2 = tf.layers.conv2d(X2, 32, [3,3], strides=[1,1],
 60 |                              padding='same', name='res1_conv2')
 61 | 
 62 |         X1 = tf.layers.conv2d(X1, 32, [1,1], strides=[2,2],
 63 |                              padding='same', name='res1_skip')
 64 |         X3 = tf.add(X1, X2, name='res1_add')
 65 | 
 66 |         # Second residual block
 67 |         X4 = tf.layers.batch_normalization(X3, training=is_training, name='res2_bn1')
 68 |         X4 = tf.nn.relu(X4, name='res2_relu1')
 69 |         X4 = tf.layers.conv2d(X4, 64, [3,3], strides=[2,2],
 70 |                              padding='same', name='res2_conv1')
 71 |         
 72 |         X4 = tf.layers.batch_normalization(X4, training=is_training, name='res2_bn2')
 73 |         X4 = tf.nn.relu(X4, name='res2_relu2')
 74 |         X4 = tf.layers.conv2d(X4, 64, [3,3], strides=[1,1],
 75 |                              padding='same', name='res2_conv2')
 76 | 
 77 |         X3 = tf.layers.conv2d(X3, 64, [1,1], strides=[2,2],
 78 |                              padding='same', name='res2_skip')
 79 |         X5 = tf.add(X3, X4, name='res2_add')
 80 | 
 81 |         # Third residual block
 82 |         X6 = tf.layers.batch_normalization(X5, training=is_training, name='res3_bn1')
 83 |         X6 = tf.nn.relu(X6, name='res3_relu1')
 84 |         X6 = tf.layers.conv2d(X6, 128, [3,3], strides=[2,2],
 85 |                              padding='same', name='res3_conv1')
 86 |         
 87 |         X6 = tf.layers.batch_normalization(X6, training=is_training, name='res3_bn2')
 88 |         X6 = tf.nn.relu(X6, name='res3_relu2')
 89 |         X6 = tf.layers.conv2d(X6, 128, [3,3], strides=[1,1],
 90 |                              padding='same', name='res3_conv2')
 91 | 
 92 |         X5 = tf.layers.conv2d(X5, 128, [1,1], strides=[2,2],
 93 |                              padding='same', name='res3_skip')
 94 |         X7 = tf.add(X5, X6, name='res3_add')
 95 | 
 96 |         X = tf.contrib.layers.flatten(X7) # (B, 1152)
 97 |         X = tf.nn.relu(X)
 98 |         X = tf.layers.dropout(X, rate=self.drop_rate)
 99 | 
100 |         output = tf.layers.dense(X, self.output_dim, activation=None, name='output')
101 | 
102 |         return output
103 | 
104 |     def get_loss(self, pred, label, name='Loss'):
105 |         """ Return loss for a batch of data
106 |         Args:
107 |         Return: MSE loss
108 |         """
109 |         
110 |         loss = tf.reduce_mean(tf.square(tf.subtract(pred, label)))
111 |         
112 |         tf.summary.scalar(name, loss)
113 | 
114 |         return loss
115 | 
116 |     def get_rmse(self, pred, label, name='rmse'):
117 |         """Return rmse as evalation metrics for a batch of data
118 |         """
119 |         rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(pred, label)), axis=0))
120 |         
121 |         tf.summary.scalar(name+'_angle', rmse[0])
122 |         tf.summary.scalar(name+'_speed', rmse[1])
123 | 
124 |         return rmse[0], rmse[1]
125 | 
126 |     def get_acc(self, pred, label, delta_a=5.0, delta_s=1.0, name='acc'):
127 |         """
128 |         TODO: Tolerance is to large! angle < 0.01;  
129 |         - delta_a: tolerance of angle: 5 degrees
130 |         - delta_s: tolerance of speed: 1 ft/s = 0.3048 m/s = 1.09728 km/h
131 |         """
132 |         acc_a = tf.abs(tf.subtract(pred[:, 0], label[:, 0])) < (delta_a/180*np.pi)
133 |         acc_a = tf.reduce_mean(tf.cast(acc_a,tf.float32))
134 |         
135 |         acc_s = tf.abs(tf.subtract(pred[:, 1], label[:, 1])) < delta_s
136 |         acc_s = tf.reduce_mean(tf.cast(acc_s, tf.float32))
137 | 
138 |         tf.summary.scalar(name+'_angle', acc_a)
139 |         tf.summary.scalar(name+'_speed', acc_s)
140 |         
141 |         return acc_a, acc_s
142 | 
143 | if __name__ == '__main__':
144 |     with tf.Graph().as_default():
145 |         is_training = tf.cast(True, tf.bool)
146 |         batch_size = 16
147 |         
148 |         model = DroNet()
149 | 
150 |         image_pl, label_pl = model.get_inputs_pl(batch_size)
151 |         pred = model.get_model(image_pl, is_training)
152 |         loss = model.get_loss(pred, label_pl)
153 | 
154 |         tf_util.model_summary()
155 |         # Total size of variables: 320930
156 |         # Total bytes of variables: 1283720
157 | 
158 | 


--------------------------------------------------------------------------------
/models/pilotnet.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @Author: Shuai Wang
  3 | @Github: https://github.com/wsustcid
  4 | @Version: 1.0.0
  5 | @Date: 2020-05-28 22:19:51
  6 | @LastEditTime: 2020-09-15 23:32:47
  7 | '''
  8 | 
  9 | import os
 10 | import sys
 11 | import numpy as np
 12 | import tensorflow as tf
 13 | 
 14 | base_dir = os.path.dirname(os.path.abspath(__file__))
 15 | sys.path.append(os.path.join(base_dir, '..'))
 16 | from utils import tf_util
 17 | 
 18 | class PilotNet(object):
 19 |     def __init__(self, height=66, width=200):
 20 |         self.height = height
 21 |         self.width = width
 22 |         self.output_dim = 1
 23 |         
 24 |         self.drop_rate = 0.5
 25 | 
 26 |     def get_inputs_pl(self, batch_size):
 27 |         """ Create placeholders for the PilotNet
 28 |         """
 29 |         image_pl = tf.placeholder(tf.float32, shape=(batch_size, self.height, self.width, 3))
 30 |         label_pl   = tf.placeholder(tf.float32, shape=(batch_size, self.output_dim))
 31 |         
 32 |         return image_pl, label_pl
 33 | 
 34 |         
 35 |     def get_model(self, image_pl, is_training, bn_decay=None):
 36 |         """ Nvidia PilotNet model
 37 |         Args:
 38 |         - input: input Tensor: (B,66,200,3)
 39 |         - is_training: A bool flag used for dropout
 40 |         Return:
 41 |         - output: output Tensor (B,1)
 42 |         """
 43 |         X = tf.layers.conv2d(image_pl, 24, [5,5], strides=[2,2], 
 44 |                              activation=tf.nn.relu,
 45 |                              padding='VALID', name='conv1') # (B,31,98,24)
 46 |         X = tf.layers.conv2d(X, 36, [5,5], strides=[2,2],
 47 |                              activation=tf.nn.relu,
 48 |                              padding='VALID', name='conv2') # (B,14,47,36)
 49 |         X = tf.layers.conv2d(X, 48, [5,5], strides=[2,2],
 50 |                              activation=tf.nn.relu, 
 51 |                              padding='VALID', name='conv3') # (B,5,22,48)
 52 |         X = tf.layers.conv2d(X, 64, [3,3], strides=[1,1],
 53 |                              activation=tf.nn.relu, 
 54 |                              padding='VALID', name='conv4') # (B,3,20,64)
 55 |         X = tf.layers.conv2d(X, 64, [3,3], strides=[1,1],
 56 |                              activation=tf.nn.relu, 
 57 |                              padding='VALID', name='conv5') # (B,1,18,64)
 58 |         
 59 |         X = tf.contrib.layers.flatten(X) # (B, 1152)
 60 | 
 61 |         X = tf.layers.dense(X, 1164, activation=tf.nn.relu, name='fc1')
 62 |         X = tf.layers.dropout(X, rate=self.drop_rate, training=is_training, name='dp1')
 63 | 
 64 |         X = tf.layers.dense(X, 100, activation=tf.nn.relu, name='fc2')
 65 |         X = tf.layers.dropout(X, rate=self.drop_rate, training=is_training, name='dp2')
 66 | 
 67 |         X = tf.layers.dense(X, 50, activation=tf.nn.relu, name='fc3')
 68 |         X = tf.layers.dropout(X, rate=self.drop_rate, training=is_training, name='dp4')
 69 | 
 70 |         X = tf.layers.dense(X, 10, activation=tf.nn.relu, name='fc4')
 71 |         X = tf.layers.dropout(X, rate=self.drop_rate, training=is_training, name='dp4')
 72 | 
 73 |         output = tf.layers.dense(X, self.output_dim, activation=None, name='output')
 74 |         
 75 |         # y = tf.mul(tf.atan(tf.matmul(h_fc4_drop, W_fc5) + b_fc5), 2, name='y') # [-pi,pi]
 76 | 
 77 |         return output
 78 | 
 79 |     def get_loss(self, pred, label, name='Loss'):
 80 |         """ Return loss for a batch of data
 81 |         Args:
 82 |         Return: MSE loss
 83 |         """
 84 |         
 85 |         loss = tf.reduce_mean(tf.square(tf.subtract(pred, label)))
 86 |         
 87 |         tf.summary.scalar(name, loss)
 88 | 
 89 |         return loss
 90 | 
 91 |     def get_rmse(self, pred, label, name='rmse'):
 92 |         """Return rmse as evalation metrics for a batch of data
 93 |         """
 94 |         rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(pred, label))))
 95 |         
 96 |         tf.summary.scalar(name, rmse)
 97 |         
 98 |         return rmse
 99 | 
100 |     def get_acc(self, pred, label, delta_a=5.0, delta_s=1.0, name='acc'):
101 |         """
102 |         TODO: Tolerance is to large! angle < 0.01;  
103 |         - delta_a: tolerance of angle: 5 degrees
104 |         - delta_s: tolerance of speed: 1 ft/s = 0.3048 m/s = 1.09728 km/h
105 |         """
106 |         acc_a = tf.abs(tf.subtract(pred[:, 0], label[:, 0])) < (delta_a/180*np.pi)
107 |         acc_a = tf.reduce_mean(tf.cast(acc_a,tf.float32))
108 |         
109 |         acc_s = tf.abs(tf.subtract(pred[:, 1], label[:, 1])) < delta_s
110 |         acc_s = tf.reduce_mean(tf.cast(acc_s, tf.float32))
111 | 
112 |         tf.summary.scalar(name+'_angle', acc_a)
113 |         tf.summary.scalar(name+'_speed', acc_s)
114 |         
115 |         return acc_a, acc_s
116 | 
117 | if __name__ == '__main__':
118 |     with tf.Graph().as_default():
119 |         is_training = tf.cast(True, tf.bool)
120 |         batch_size = 16
121 |         
122 |         model = PilotNet()
123 | 
124 |         image_pl, label_pl = model.get_inputs_pl(batch_size)
125 |         pred = model.get_model(image_pl, is_training)
126 |         loss = model.get_loss(pred, label_pl)
127 |         rmse = model.get_rmse(pred, label_pl)
128 | 
129 |         tf_util.model_summary()
130 |                 
131 |         print('loss:', loss)
132 |         print('rmse:', rmse)
133 |         # Total size of variables: 1595511
134 |         # Total bytes of variables: 6382044
135 | 


--------------------------------------------------------------------------------
/models/pointnet.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @Author: Shuai Wang
  3 | @Github: https://github.com/wsustcid
  4 | @Version: 1.0.0
  5 | @Date: 2020-06-28 11:01:02
  6 | @LastEditTime: 2020-09-28 22:06:02
  7 | @Description:  
  8 | '''
  9 | 
 10 | '''
 11 | PointNet version 1 Model
 12 | Reference: https://github.com/charlesq34/pointnet
 13 | '''
 14 | 
 15 | import os
 16 | import sys
 17 | import numpy as np
 18 | import tensorflow as tf
 19 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 20 | sys.path.append(os.path.join(BASE_DIR, '../utils'))
 21 | import tf_util
 22 | 
 23 | 
 24 | class PointNet(object):
 25 |     def __init__(self, num_point):
 26 | 
 27 |         self.num_point = num_point
 28 |         self.output_dim = 1
 29 | 
 30 | 
 31 |     def get_inputs_pl(self, batch_size):
 32 |         points_pl = tf.placeholder(tf.float32, shape=(batch_size, self.num_point, 3))
 33 |         label_pl = tf.placeholder(tf.float32, shape=(batch_size, self.output_dim))
 34 | 
 35 |         return points_pl, label_pl
 36 | 
 37 | 
 38 |     def get_model(self, points_pl, is_training, bn_decay=None):
 39 |         """ Regression PointNet, input is BxNx3, output Bx1 """
 40 |         input_image = tf.expand_dims(points_pl, -1) # (B,N,3,1)
 41 |         
 42 |         # Point functions (MLP implemented as conv2d)
 43 |         net = tf_util.conv2d(input_image, 64, [1,3],
 44 |                             padding='VALID', stride=[1,1],
 45 |                             bn=True, is_training=is_training,
 46 |                             scope='conv1', bn_decay=bn_decay)
 47 |         net = tf_util.conv2d(net, 64, [1,1],
 48 |                             padding='VALID', stride=[1,1],
 49 |                             bn=True, is_training=is_training,
 50 |                             scope='conv2', bn_decay=bn_decay)
 51 |         net = tf_util.conv2d(net, 64, [1,1],
 52 |                             padding='VALID', stride=[1,1],
 53 |                             bn=True, is_training=is_training,
 54 |                             scope='conv3', bn_decay=bn_decay)
 55 |         net = tf_util.conv2d(net, 128, [1,1],
 56 |                             padding='VALID', stride=[1,1],
 57 |                             bn=True, is_training=is_training,
 58 |                             scope='conv4', bn_decay=bn_decay)
 59 |         net = tf_util.conv2d(net, 1024, [1,1],
 60 |                             padding='VALID', stride=[1,1],
 61 |                             bn=True, is_training=is_training,
 62 |                             scope='conv5', bn_decay=bn_decay)
 63 | 
 64 |         # Symmetric function: max pooling
 65 |         net = tf_util.max_pool2d(net, [self.num_point, 1],
 66 |                                 padding='VALID', scope='maxpool') # (B,1,1,1024)
 67 |         
 68 |         # MLP on global point cloud vector
 69 |         #net = tf.reshape(net, [batch_size, -1])
 70 |         net = tf.squeeze(net, axis=[1,2]) # (B,1024)
 71 |         
 72 |         net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
 73 |                                     scope='fc1', bn_decay=bn_decay)
 74 |         net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
 75 |                                     scope='fc2', bn_decay=bn_decay)
 76 |         net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,
 77 |                             scope='dp1')
 78 |         net = tf_util.fully_connected(net, self.output_dim, activation_fn=None, scope='fc3')
 79 | 
 80 |         return net
 81 | 
 82 | 
 83 |     def get_loss(self, pred, label, name='Loss'):
 84 |         """ Return mse loss for a batch of data
 85 |         """
 86 |         loss = tf.reduce_mean(tf.square(tf.subtract(pred, label)))
 87 |         
 88 |         tf.summary.scalar(name, loss)
 89 |         
 90 |         return loss
 91 | 
 92 | 
 93 |     def get_rmse(self,pred, label, name='rmse'):
 94 |         """Return rmse as evalation metrics for a batch of data
 95 |         """
 96 |         rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(pred, label))))
 97 |         
 98 |         tf.summary.scalar(name, rmse)
 99 | 
100 |         return rmse
101 | 
102 | if __name__=='__main__':
103 |     with tf.Graph().as_default():
104 |         batch_size = 16
105 |         is_training = tf.constant(True)
106 |         
107 |         model = PointNet(num_point=20000)
108 | 
109 |         points_pl, label_pl = model.get_inputs_pl(batch_size)
110 |         pred = model.get_model(points_pl, is_training)
111 |         loss = model.get_loss(pred, label_pl)
112 |         print(pred)
113 |         print(loss)
114 |         tf_util.model_summary()
115 |         # Total size of variables: 809601
116 |         # Total bytes of variables: 3238404
117 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/__pycache__/tf_interpolate.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsustcid/FlowDriveNet/3604495269ae45e5b43964046104f685ec66e383/tf_ops/3d_interpolation/__pycache__/tf_interpolate.cpython-35.pyc


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/interpolate.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include <string>
  7 | #include <vector>
  8 | using namespace std;
  9 | float randomf(){
 10 |     return (rand()+0.5)/(RAND_MAX+1.0);
 11 | }
 12 | static double get_time(){
 13 |     timespec tp;
 14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 16 | }
 17 | 
 18 | // Find three nearest neigbors with square distance
 19 | // input: xyz1 (b,n,3), xyz2(b,m,3)
 20 | // output: dist (b,n,3), idx (b,n,3)
 21 | void threenn_cpu(int b, int n, int m, const float *xyz1, const float *xyz2, float *dist, int *idx) {
 22 |      for (int i=0;i<b;++i) {
 23 |         for (int j=0;j<n;++j) {
 24 | 	    float x1=xyz1[j*3+0];
 25 | 	    float y1=xyz1[j*3+1];
 26 | 	    float z1=xyz1[j*3+2];
 27 |             double best1=1e40; double best2=1e40; double best3=1e40;
 28 |             int besti1=0; int besti2=0; int besti3=0;
 29 |             for (int k=0;k<m;++k) {
 30 |                 float x2=xyz2[k*3+0];
 31 | 	        float y2=xyz2[k*3+1];
 32 | 	        float z2=xyz2[k*3+2];
 33 | 		//float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 34 | 		double d=x2*x2+y2*y2+z2*z2;
 35 |                 if (d<best1) {
 36 |                     best3=best2;
 37 |                     besti3=besti2;
 38 |                     best2=best1;
 39 |                     besti2=besti1;
 40 |                     best1=d;
 41 |                     besti1=k;
 42 |                 } else if (d<best2) {
 43 |                     best3=best2;
 44 |                     besti3=besti2;
 45 |                     best2=d;
 46 |                     besti2=k;
 47 |                 } else if (d<best3) {
 48 |                     best3=d;
 49 |                     besti3=k;
 50 |                 }
 51 |             } 
 52 |             dist[j*3]=best1;
 53 |             idx[j*3]=besti1;
 54 |             dist[j*3+1]=best2;
 55 |             idx[j*3+1]=besti2;
 56 |             dist[j*3+2]=best3;
 57 |             idx[j*3+2]=besti3;
 58 |         } 
 59 |         xyz1+=n*3;
 60 |         xyz2+=m*3;
 61 |         dist+=n*3;
 62 |         idx+=n*3;
 63 |     }
 64 | } 
 65 | 
 66 | // CONSTANT WEIGHT TODO
 67 | // input: dist (b,n,3)
 68 | // output: weight (b,n,3)
 69 | void get_weights_cpu(int b, int n, const float *dist, float *weight) {
 70 |     const float w = 1.0/3.0;
 71 |     for (int i=0;i<b;++i) {
 72 |         for (int j=0;j<n;++j) {
 73 |             weight[j*3]=w;
 74 |             weight[j*3+1]=w;
 75 |             weight[j*3+2]=w;
 76 |         } 
 77 |         dist+=n*3;
 78 |         weight+=n*3;
 79 |     }
 80 | }
 81 | 
 82 | // input: points (b,m,c), idx (b,n,3), weight (b,n,3)
 83 | // output: out (b,n,c)
 84 | void interpolate_cpu(int b, int m, int c, int n, const float *points, const int *idx, const float *weight, float *out) {
 85 |      float w1,w2,w3;
 86 |      int i1,i2,i3;
 87 |      for (int i=0;i<b;++i) {
 88 |         for (int j=0;j<n;++j) {
 89 |             w1=weight[j*3];
 90 |             w2=weight[j*3+1];
 91 |             w3=weight[j*3+2]; 
 92 |             i1=idx[j*3];
 93 |             i2=idx[j*3+1];
 94 |             i3=idx[j*3+2];
 95 |             for (int l=0;l<c;++l) {
 96 |                 out[j*c+l] = points[i1*c+l]*w1 + points[i2*c+l]*w2 + points[i3*c+l]*w3;
 97 |             }
 98 |         } 
 99 |         points+=m*c;
100 |         idx+=n*3;
101 |         weight+=n*3;
102 |         out+=n*c;
103 |     }
104 | }
105 | 
106 | // input: grad_out (b,n,c), idx (b,n,3), weight (b,n,3)
107 | // output: grad_points (b,m,c)
108 | void interpolate_grad_cpu(int b, int n, int c, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points) {
109 |      float w1,w2,w3;
110 |      int i1,i2,i3;
111 |      for (int i=0;i<b;++i) {
112 |         for (int j=0;j<n;++j) {
113 |             w1=weight[j*3];
114 |             w2=weight[j*3+1];
115 |             w3=weight[j*3+2]; 
116 |             i1=idx[j*3];
117 |             i2=idx[j*3+1];
118 |             i3=idx[j*3+2];
119 |             for (int l=0;l<c;++l) {
120 |                 grad_points[i1*c+l] += grad_out[j*c+l]*w1;
121 |                 grad_points[i2*c+l] += grad_out[j*c+l]*w2;
122 |                 grad_points[i3*c+l] += grad_out[j*c+l]*w3;
123 |             }
124 |         } 
125 |         grad_out+=n*c;
126 |         idx+=n*3;
127 |         weight+=n*3;
128 |         grad_points+=m*c;
129 |     }
130 | }
131 | 
132 | int main()
133 | {
134 |     int b=32,n=512,m=128,c=64;
135 |     float *xyz1=new float[b*n*3];
136 |     float *xyz2=new float[b*m*3];
137 |     float *dist=new float[b*n*3];
138 |     int *idx=new int[b*n*3];
139 |     memset(idx, 0, sizeof(int)*b*n*3);
140 |     float *weight=new float[b*n*3];
141 |     float *points=new float[b*m*c];
142 |     float *out=new float[b*n*c];
143 |     float *grad_out=new float[b*n*c]; // grad to out
144 |     memset(grad_out, 0.0, sizeof(float)*b*n*c);
145 |     float *grad_points=new float[b*m*c]; // grad to points
146 |     for (int i=0;i<b*n*3;i++)
147 |         xyz1[i]=randomf();
148 |     for (int i=0;i<b*m*3;i++)
149 |         xyz2[i]=randomf();
150 |     for (int i=0;i<b*m*c;i++)
151 |         points[i]=randomf();
152 | 
153 |     double t0=get_time();
154 |     threenn_cpu(b,n,m,xyz1,xyz2,dist,idx);
155 |     printf("threenn cpu time %f\n",get_time()-t0);
156 |     
157 |     t0=get_time();
158 |     get_weights_cpu(b,n,dist,weight);
159 |     printf("get_weights_cpu cpu time %f\n",get_time()-t0);
160 | 
161 |     t0=get_time();
162 |     interpolate_cpu(b,m,c,n,points,idx,weight,out);
163 |     printf("interpolate_cpu cpu time %f\n",get_time()-t0);
164 | 
165 |     t0=get_time();
166 |     interpolate_grad_cpu(b,n,c,m,grad_out,idx,weight,grad_points);
167 |     printf("interpolate_grad_cpu cpu time %f\n",get_time()-t0);
168 |     return 0;
169 | }
170 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/tf_interpolate.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include "tensorflow/core/framework/op.h"
  7 | #include "tensorflow/core/framework/op_kernel.h"
  8 | #include "tensorflow/core/framework/shape_inference.h"
  9 | #include "tensorflow/core/framework/common_shape_fns.h"
 10 | using namespace tensorflow;
 11 | 
 12 | REGISTER_OP("ThreeNN")
 13 |     .Input("xyz1: float32")
 14 |     .Input("xyz2: float32")
 15 |     .Output("dist: float32")
 16 |     .Output("idx: int32")
 17 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 18 |         c->set_output(0, c->input(0));
 19 |         c->set_output(1, c->input(0));
 20 |         return Status::OK();
 21 |     });
 22 | REGISTER_OP("ThreeInterpolate")
 23 |     .Input("points: float32")
 24 |     .Input("idx: int32")
 25 |     .Input("weight: float32")
 26 |     .Output("out: float32")
 27 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 28 |         ::tensorflow::shape_inference::ShapeHandle dims1; // (b,m,c)
 29 |         c->WithRank(c->input(0), 3, &dims1);
 30 |         ::tensorflow::shape_inference::ShapeHandle dims2; // (b,n,3)
 31 |         c->WithRank(c->input(1), 3, &dims2);
 32 |         // (b,n,c)
 33 |         ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims1, 0), c->Dim(dims2, 1), c->Dim(dims1, 2)});
 34 |         c->set_output(0, output);
 35 |         return Status::OK();
 36 |     });
 37 | REGISTER_OP("ThreeInterpolateGrad")
 38 |     .Input("points: float32")
 39 |     .Input("idx: int32")
 40 |     .Input("weight: float32")
 41 |     .Input("grad_out: float32")
 42 |     .Output("grad_points: float32")
 43 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 44 |         c->set_output(0, c->input(0));
 45 |         return Status::OK();
 46 |     });
 47 | 
 48 | float randomf(){
 49 |     return (rand()+0.5)/(RAND_MAX+1.0);
 50 | }
 51 | static double get_time(){
 52 |     timespec tp;
 53 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 54 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 55 | }
 56 | 
 57 | // Find three nearest neigbors with square distance
 58 | // input: xyz1 (b,n,3), xyz2(b,m,3)
 59 | // output: dist (b,n,3), idx (b,n,3)
 60 | void threenn_cpu(int b, int n, int m, const float *xyz1, const float *xyz2, float *dist, int *idx) {
 61 |      for (int i=0;i<b;++i) {
 62 |         for (int j=0;j<n;++j) {
 63 | 	    float x1=xyz1[j*3+0];
 64 | 	    float y1=xyz1[j*3+1];
 65 | 	    float z1=xyz1[j*3+2];
 66 |             double best1=1e40; double best2=1e40; double best3=1e40;
 67 |             int besti1=0; int besti2=0; int besti3=0;
 68 |             for (int k=0;k<m;++k) {
 69 |                 float x2=xyz2[k*3+0];
 70 | 	        float y2=xyz2[k*3+1];
 71 | 	        float z2=xyz2[k*3+2];
 72 | 		//float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 73 | 		double d=(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1);
 74 |                 if (d<best1) {
 75 |                     best3=best2;
 76 |                     besti3=besti2;
 77 |                     best2=best1;
 78 |                     besti2=besti1;
 79 |                     best1=d;
 80 |                     besti1=k;
 81 |                 } else if (d<best2) {
 82 |                     best3=best2;
 83 |                     besti3=besti2;
 84 |                     best2=d;
 85 |                     besti2=k;
 86 |                 } else if (d<best3) {
 87 |                     best3=d;
 88 |                     besti3=k;
 89 |                 }
 90 |             } 
 91 |             dist[j*3]=best1;
 92 |             idx[j*3]=besti1;
 93 |             dist[j*3+1]=best2;
 94 |             idx[j*3+1]=besti2;
 95 |             dist[j*3+2]=best3;
 96 |             idx[j*3+2]=besti3;
 97 |         } 
 98 |         xyz1+=n*3;
 99 |         xyz2+=m*3;
100 |         dist+=n*3;
101 |         idx+=n*3;
102 |     }
103 | } 
104 | 
105 | // input: points (b,m,c), idx (b,n,3), weight (b,n,3)
106 | // output: out (b,n,c)
107 | void threeinterpolate_cpu(int b, int m, int c, int n, const float *points, const int *idx, const float *weight, float *out) {
108 |      float w1,w2,w3;
109 |      int i1,i2,i3;
110 |      for (int i=0;i<b;++i) {
111 |         for (int j=0;j<n;++j) {
112 |             w1=weight[j*3];
113 |             w2=weight[j*3+1];
114 |             w3=weight[j*3+2]; 
115 |             i1=idx[j*3];
116 |             i2=idx[j*3+1];
117 |             i3=idx[j*3+2];
118 |             for (int l=0;l<c;++l) {
119 |                 out[j*c+l] = points[i1*c+l]*w1 + points[i2*c+l]*w2 + points[i3*c+l]*w3;
120 |             }
121 |         } 
122 |         points+=m*c;
123 |         idx+=n*3;
124 |         weight+=n*3;
125 |         out+=n*c;
126 |     }
127 | }
128 | 
129 | // input: grad_out (b,n,c), idx (b,n,3), weight (b,n,3)
130 | // output: grad_points (b,m,c)
131 | void threeinterpolate_grad_cpu(int b, int n, int c, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points) {
132 |      float w1,w2,w3;
133 |      int i1,i2,i3;
134 |      for (int i=0;i<b;++i) {
135 |         for (int j=0;j<n;++j) {
136 |             w1=weight[j*3];
137 |             w2=weight[j*3+1];
138 |             w3=weight[j*3+2]; 
139 |             i1=idx[j*3];
140 |             i2=idx[j*3+1];
141 |             i3=idx[j*3+2];
142 |             for (int l=0;l<c;++l) {
143 |                 grad_points[i1*c+l] += grad_out[j*c+l]*w1;
144 |                 grad_points[i2*c+l] += grad_out[j*c+l]*w2;
145 |                 grad_points[i3*c+l] += grad_out[j*c+l]*w3;
146 |             }
147 |         } 
148 |         grad_out+=n*c;
149 |         idx+=n*3;
150 |         weight+=n*3;
151 |         grad_points+=m*c;
152 |     }
153 | }
154 | 
155 | 
156 | 
157 | class ThreeNNOp : public OpKernel {
158 |     public:
159 |         explicit ThreeNNOp(OpKernelConstruction* context) : OpKernel(context) {}
160 | 
161 |         void Compute(OpKernelContext* context) override {
162 |             const Tensor& xyz1_tensor = context->input(0);
163 |             OP_REQUIRES(context, xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeNN expects (b,n,3) xyz1 shape."));
164 |             int b = xyz1_tensor.shape().dim_size(0);
165 |             int n = xyz1_tensor.shape().dim_size(1);
166 | 
167 |             const Tensor& xyz2_tensor = context->input(1);
168 |             OP_REQUIRES(context, xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeNN expects (b,m,3) xyz2 shape."));
169 |             int m = xyz2_tensor.shape().dim_size(1);
170 | 
171 |             Tensor *dist_tensor = nullptr;
172 |             OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{b,n,3}, &dist_tensor));
173 |             Tensor *idx_tensor = nullptr;
174 |             OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape{b,n,3}, &idx_tensor));
175 | 
176 |             auto xyz1_flat = xyz1_tensor.flat<float>();
177 |             const float *xyz1 = &(xyz1_flat(0));
178 |             auto xyz2_flat = xyz2_tensor.flat<float>();
179 |             const float *xyz2 = &(xyz2_flat(0));
180 |             auto dist_flat = dist_tensor->flat<float>();
181 |             float *dist = &(dist_flat(0));
182 |             auto idx_flat = idx_tensor->flat<int>();
183 |             int *idx = &(idx_flat(0));
184 |             threenn_cpu(b,n,m,xyz1,xyz2,dist,idx);
185 |         }
186 | };
187 | REGISTER_KERNEL_BUILDER(Name("ThreeNN").Device(DEVICE_CPU), ThreeNNOp);
188 | 
189 | 
190 | 
191 | class ThreeInterpolateOp: public OpKernel{
192 |     public:
193 |         explicit ThreeInterpolateOp(OpKernelConstruction * context):OpKernel(context){}
194 | 
195 |         void Compute(OpKernelContext * context) override {
196 |             const Tensor& points_tensor=context->input(0);
197 |             OP_REQUIRES(context, points_tensor.dims()==3, errors::InvalidArgument("ThreeInterpolate expects (b,m,c) points shape"));
198 |             int b = points_tensor.shape().dim_size(0);
199 |             int m = points_tensor.shape().dim_size(1);
200 |             int c = points_tensor.shape().dim_size(2);
201 | 
202 |             const Tensor& idx_tensor=context->input(1);
203 |             OP_REQUIRES(context,idx_tensor.dims()==3 && idx_tensor.shape().dim_size(0)==b && idx_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeInterpolate expects (b,n,3) idx shape"));
204 |             int n = idx_tensor.shape().dim_size(1);
205 |             const Tensor& weight_tensor=context->input(2);
206 |             OP_REQUIRES(context,weight_tensor.dims()==3 && weight_tensor.shape().dim_size(0)==b && weight_tensor.shape().dim_size(1)==n && weight_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeInterpolate expects (b,n,3) weight shape"));
207 | 
208 |             Tensor * out_tensor = nullptr;
209 |             OP_REQUIRES_OK(context, context->allocate_output(0,TensorShape{b,n,c}, &out_tensor));
210 | 
211 |             auto points_flat = points_tensor.flat<float>();
212 |             const float *points = &(points_flat(0));
213 |             auto idx_flat = idx_tensor.flat<int>();
214 |             const int *idx = &(idx_flat(0));
215 |             auto weight_flat = weight_tensor.flat<float>();
216 |             const float *weight = &(weight_flat(0));
217 |             auto out_flat = out_tensor->flat<float>();
218 |             float *out = &(out_flat(0));
219 |             threeinterpolate_cpu(b,m,c,n,points,idx,weight,out);
220 |         }
221 | };
222 | REGISTER_KERNEL_BUILDER(Name("ThreeInterpolate").Device(DEVICE_CPU),ThreeInterpolateOp);
223 | 
224 | 
225 | class ThreeInterpolateGradOp: public OpKernel{
226 |     public:
227 |         explicit ThreeInterpolateGradOp(OpKernelConstruction * context):OpKernel(context){}
228 | 
229 |         void Compute(OpKernelContext * context) override {
230 |             const Tensor& points_tensor=context->input(0);
231 |             OP_REQUIRES(context, points_tensor.dims()==3, errors::InvalidArgument("ThreeInterpolateGrad expects (b,m,c) points shape"));
232 |             int b = points_tensor.shape().dim_size(0);
233 |             int m = points_tensor.shape().dim_size(1);
234 |             int c = points_tensor.shape().dim_size(2);
235 | 
236 |             const Tensor& idx_tensor=context->input(1);
237 |             OP_REQUIRES(context,idx_tensor.dims()==3 && idx_tensor.shape().dim_size(0)==b, errors::InvalidArgument("ThreeInterpolateGrad expects (b,n,3) idx shape"));
238 |             int n = idx_tensor.shape().dim_size(1);
239 |             const Tensor& weight_tensor=context->input(2);
240 |             OP_REQUIRES(context,weight_tensor.dims()==3 && weight_tensor.shape().dim_size(0)==b && weight_tensor.shape().dim_size(1)==n && weight_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeInterpolateGrad expects (b,n,3) weight shape"));
241 | 
242 |             const Tensor& grad_out_tensor=context->input(3);
243 |             OP_REQUIRES(context,grad_out_tensor.dims()==3 && grad_out_tensor.shape().dim_size(0)==b && grad_out_tensor.shape().dim_size(1)==n && grad_out_tensor.shape().dim_size(2)==c, errors::InvalidArgument("ThreeInterpolateGrad expects (b,n,c) grad_out shape"));
244 | 
245 |             Tensor * grad_points_tensor = nullptr;
246 |             OP_REQUIRES_OK(context, context->allocate_output(0,TensorShape{b,m,c}, &grad_points_tensor));
247 | 
248 |             auto points_flat = points_tensor.flat<float>();
249 |             const float *points = &(points_flat(0));
250 |             auto idx_flat = idx_tensor.flat<int>();
251 |             const int *idx = &(idx_flat(0));
252 |             auto weight_flat = weight_tensor.flat<float>();
253 |             const float *weight = &(weight_flat(0));
254 |             auto grad_out_flat = grad_out_tensor.flat<float>();
255 |             const float *grad_out = &(grad_out_flat(0));
256 |             auto grad_points_flat = grad_points_tensor->flat<float>();
257 |             float *grad_points = &(grad_points_flat(0));
258 |             memset(grad_points, 0, sizeof(float)*b*m*c);
259 |             threeinterpolate_grad_cpu(b,n,c,m,grad_out,idx,weight,grad_points);
260 |         }
261 | };
262 | REGISTER_KERNEL_BUILDER(Name("ThreeInterpolateGrad").Device(DEVICE_CPU),ThreeInterpolateGradOp);
263 | 
264 | 
265 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/tf_interpolate.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | @Author: Shuai Wang
 3 | @Github: https://github.com/wsustcid
 4 | @Version: 1.0.0
 5 | @Date: 1970-01-01 08:00:00
 6 | @LastEditTime: 2020-06-16 22:22:43
 7 | @Description:  
 8 | '''
 9 | import tensorflow as tf
10 | from tensorflow.python.framework import ops
11 | import sys
12 | import os
13 | BASE_DIR = os.path.dirname(__file__)
14 | sys.path.append(BASE_DIR)
15 | interpolate_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_interpolate_so.so'))
16 | def three_nn(xyz1, xyz2):
17 |     '''
18 |     Input:
19 |         xyz1: (b,n,3) float32 array, unknown points
20 |         xyz2: (b,m,3) float32 array, known points
21 |     Output:
22 |         dist: (b,n,3) float32 array, distances to known points
23 |         idx: (b,n,3) int32 array, indices to known points
24 |     '''
25 |     return interpolate_module.three_nn(xyz1, xyz2)
26 | ops.NoGradient('ThreeNN')
27 | def three_interpolate(points, idx, weight):
28 |     '''
29 |     Input:
30 |         points: (b,m,c) float32 array, known points
31 |         idx: (b,n,3) int32 array, indices to known points
32 |         weight: (b,n,3) float32 array, weights on known points
33 |     Output:
34 |         out: (b,n,c) float32 array, interpolated point values
35 |     '''
36 |     return interpolate_module.three_interpolate(points, idx, weight)
37 | @tf.RegisterGradient('ThreeInterpolate')
38 | def _three_interpolate_grad(op, grad_out):
39 |     points = op.inputs[0]
40 |     idx = op.inputs[1]
41 |     weight = op.inputs[2]
42 |     return [interpolate_module.three_interpolate_grad(points, idx, weight, grad_out), None, None]
43 | 
44 | if __name__=='__main__':
45 |     import numpy as np
46 |     import time
47 |     np.random.seed(100)
48 |     pts = np.random.random((32,128,64)).astype('float32')
49 |     tmp1 = np.random.random((32,512,3)).astype('float32')
50 |     tmp2 = np.random.random((32,128,3)).astype('float32')
51 |     with tf.device('/cpu:0'):
52 |         points = tf.constant(pts)
53 |         xyz1 = tf.constant(tmp1)
54 |         xyz2 = tf.constant(tmp2)
55 |         dist, idx = three_nn(xyz1, xyz2)
56 |         weight = tf.ones_like(dist)/3.0
57 |         interpolated_points = three_interpolate(points, idx, weight)
58 |     with tf.Session('') as sess:
59 |         now = time.time() 
60 |         for _ in range(100):
61 |             ret = sess.run(interpolated_points)
62 |         print(time.time() - now)
63 |         print(ret.shape, ret.dtype)
64 |         #print ret
65 |     
66 |     
67 |     
68 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/tf_interpolate_compile.sh:
--------------------------------------------------------------------------------
1 | # TF1.2
2 | #g++ -std=c++11 tf_interpolate.cpp -o tf_interpolate_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -lcudart -L /usr/local/cuda-8.0/lib64/ -O2 -D_GLIBCXX_USE_CXX11_ABI=0
3 | 
4 | # TF1.4
5 | #g++ -std=c++11 tf_interpolate.cpp -o tf_interpolate_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -I /usr/local/lib/python2.7/dist-packages/tensorflow/include/external/nsync/public -lcudart -L /usr/local/cuda-8.0/lib64/ -L/usr/local/lib/python2.7/dist-packages/tensorflow -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0
6 | 
7 | # TF1.12 (local venv)
8 | g++ -std=c++11 tf_interpolate.cpp -o tf_interpolate_so.so -shared -fPIC -I /home/ubuntu16/venv/lib/python3.5/site-packages/tensorflow/include -I /usr/local/cuda-9.0/include -I /home/ubuntu16/venv/lib/python3.5/site-packages/tensorflow/include/external/nsync/public -lcudart -L /usr/local/cuda-9.0/lib64/ -L/home/ubuntu16/venv/lib/python3.5/site-packages/tensorflow -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/tf_interpolate_op_test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from tf_interpolate import three_nn, three_interpolate
 4 | 
 5 | class GroupPointTest(tf.test.TestCase):
 6 |   def test(self):
 7 |     pass
 8 | 
 9 |   def test_grad(self):
10 |     with self.test_session():
11 |       points = tf.constant(np.random.random((1,8,16)).astype('float32'))
12 |       print points
13 |       xyz1 = tf.constant(np.random.random((1,128,3)).astype('float32'))
14 |       xyz2 = tf.constant(np.random.random((1,8,3)).astype('float32'))
15 |       dist, idx = three_nn(xyz1, xyz2)
16 |       weight = tf.ones_like(dist)/3.0
17 |       interpolated_points = three_interpolate(points, idx, weight)
18 |       print interpolated_points
19 |       err = tf.test.compute_gradient_error(points, (1,8,16), interpolated_points, (1,128,16))
20 |       print err
21 |       self.assertLess(err, 1e-4) 
22 | 
23 | if __name__=='__main__':
24 |   tf.test.main() 
25 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/tf_interpolate_so.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsustcid/FlowDriveNet/3604495269ae45e5b43964046104f685ec66e383/tf_ops/3d_interpolation/tf_interpolate_so.so


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/visu_interpolation.py:
--------------------------------------------------------------------------------
 1 | ''' Visualize part segmentation '''
 2 | import os
 3 | import sys
 4 | ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 5 | sys.path.append('/home/rqi/Projects/toolkits/visualization')
 6 | from show3d_balls import showpoints
 7 | import numpy as np
 8 | from tf_interpolate import three_nn, three_interpolate
 9 | import tensorflow as tf
10 | 
11 | 
12 | pts2 = np.array([[0,0,1],[1,0,0],[0,1,0],[1,1,0]]).astype('float32')
13 | xyz1 = np.random.random((100,3)).astype('float32')
14 | xyz2 = np.array([[0,0,0],[1,0,0],[0,1,0],[1,1,1]]).astype('float32')
15 | 
16 | def fun(xyz1,xyz2,pts2):
17 |     with tf.device('/cpu:0'):
18 |         points = tf.constant(np.expand_dims(pts2,0))
19 |         xyz1 = tf.constant(np.expand_dims(xyz1,0))
20 |         xyz2 = tf.constant(np.expand_dims(xyz2,0))
21 |         dist, idx = three_nn(xyz1, xyz2)
22 |         #weight = tf.ones_like(dist)/3.0
23 |         dist = tf.maximum(dist, 1e-10)
24 |         norm = tf.reduce_sum((1.0/dist),axis=2,keep_dims=True)
25 |         norm = tf.tile(norm, [1,1,3])
26 |         print norm
27 |         weight = (1.0/dist) / norm
28 |         interpolated_points = three_interpolate(points, idx, weight)
29 |     with tf.Session('') as sess:
30 |         tmp,pts1,d,w = sess.run([xyz1, interpolated_points, dist, weight])
31 |         #print w
32 |         pts1 = pts1.squeeze()
33 |     return pts1
34 | 
35 | pts1 = fun(xyz1,xyz2,pts2) 
36 | all_pts = np.zeros((104,3))
37 | all_pts[0:100,:] = pts1
38 | all_pts[100:,:] = pts2
39 | all_xyz = np.zeros((104,3))
40 | all_xyz[0:100,:]=xyz1
41 | all_xyz[100:,:]=xyz2
42 | showpoints(xyz2, pts2, ballradius=8)
43 | showpoints(xyz1, pts1, ballradius=8)
44 | showpoints(all_xyz, all_pts, ballradius=8)
45 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/__pycache__/tf_grouping.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsustcid/FlowDriveNet/3604495269ae45e5b43964046104f685ec66e383/tf_ops/grouping/__pycache__/tf_grouping.cpython-35.pyc


--------------------------------------------------------------------------------
/tf_ops/grouping/test/compile.sh:
--------------------------------------------------------------------------------
1 | g++ query_ball_point.cpp -o query_ball_point
2 | nvcc query_ball_point.cu -o query_ball_point_cuda
3 | nvcc query_ball_point_block.cu -o query_ball_point_block
4 | nvcc query_ball_point_grid.cu -o query_ball_point_grid
5 | g++ -Wall selection_sort.cpp -o selection_sort
6 | nvcc selection_sort.cu -o selection_sort_cuda
7 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/query_ball_point.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include <string>
  7 | #include <vector>
  8 | using namespace std;
  9 | float randomf(){
 10 |     return (rand()+0.5)/(RAND_MAX+1.0);
 11 | }
 12 | static double get_time(){
 13 |     timespec tp;
 14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 16 | }
 17 | // input: radius (1), nsample (1), xyz1 (b,n,3), xyz2 (b,m,3)
 18 | // output: idx (b,m,nsample)
 19 | void query_ball_point_cpu(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx) {
 20 |     for (int i=0;i<b;++i) {
 21 |         for (int j=0;j<m;++j) {
 22 |             int cnt = 0;
 23 |             for (int k=0;k<n;++k) {
 24 |                 if (cnt == nsample)
 25 |                     break; // only pick the FIRST nsample points in the ball
 26 | 	        float x2=xyz2[j*3+0];
 27 | 	        float y2=xyz2[j*3+1];
 28 | 	        float z2=xyz2[j*3+2];
 29 | 	        float x1=xyz1[k*3+0];
 30 | 	        float y1=xyz1[k*3+1];
 31 | 	        float z1=xyz1[k*3+2];
 32 | 		float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 33 |                 if (d<radius) {
 34 |                     if (cnt==0) { // set ALL indices to k, s.t. if there are less points in ball than nsample, we still have valid (repeating) indices
 35 |                         for (int l=0;l<nsample;++l)
 36 |                             idx[j*nsample+l] = k;
 37 |                     }
 38 |                     idx[j*nsample+cnt] = k;
 39 |                     cnt+=1;
 40 |                 }
 41 |             }
 42 |         }
 43 |         xyz1+=n*3;
 44 |         xyz2+=m*3;
 45 |         idx+=m*nsample;
 46 |     }
 47 | }
 48 | 
 49 | 
 50 | // input: points (b,n,c), idx (b,m,nsample)
 51 | // output: out (b,m,nsample,c)
 52 | void group_point_cpu(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out) {
 53 |     for (int i=0;i<b;++i) {
 54 |         for (int j=0;j<m;++j) {
 55 |             for (int k=0;k<nsample;++k) {
 56 |                 int ii = idx[j*nsample+k];
 57 |                 for (int l=0;l<c;++l) {
 58 |                     out[j*nsample*c+k*c+l] = points[ii*c+l];
 59 |                 }
 60 |             }
 61 |         }
 62 |         points+=n*c;
 63 |         idx+=m*nsample;
 64 |         out+=m*nsample*c;
 65 |     }
 66 | }
 67 | 
 68 | // input: grad_out (b,m,nsample,c), idx (b,m,nsample), 
 69 | // output: grad_points (b,n,c)
 70 | void group_point_grad_cpu(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) {
 71 |     for (int i=0;i<b;++i) {
 72 |         for (int j=0;j<m;++j) {
 73 |             for (int k=0;k<nsample;++k) {
 74 |                 int ii = idx[j*nsample+k];
 75 |                 for (int l=0;l<c;++l) {
 76 |                      grad_points[ii*c+l] += grad_out[j*nsample*c+k*c+l];
 77 |                 }
 78 |             }
 79 |         }
 80 |         idx+=m*nsample;
 81 |         grad_out+=m*nsample*c;
 82 |         grad_points+=n*c;
 83 |     }
 84 | }
 85 | 
 86 | int main()
 87 | {
 88 |     int b=32,n=512,m=128,nsample=64,c=64;
 89 |     float radius=0.1;
 90 |     float *xyz1=new float[b*n*3];
 91 |     float *xyz2=new float[b*m*3];
 92 |     float *points=new float[b*n*c];
 93 |     int *idx=new int[b*m*nsample];
 94 |     memset(idx, 0, sizeof(int)*b*m*nsample);
 95 |     float *out=new float[b*m*nsample*c];
 96 |     float *grad_out=new float[b*m*nsample*c]; // grad to out
 97 |     memset(grad_out, 0.0, sizeof(float)*b*m*nsample*c);
 98 |     float *grad_points=new float[b*n*c]; // grad to points
 99 |     for (int i=0;i<b*n*3;i++)
100 |         xyz1[i]=randomf();
101 |     for (int i=0;i<b*m*3;i++)
102 |         xyz2[i]=randomf();
103 |     for (int i=0;i<b*n*c;i++)
104 |         points[i]=randomf();
105 | 
106 |     double t0=get_time();
107 |     query_ball_point_cpu(b,n,m,radius,nsample,xyz1,xyz2,idx);
108 |     printf("query_ball_point cpu time %f\n",get_time()-t0);
109 | 
110 |     t0=get_time();
111 |     group_point_cpu(b,n,c,m,nsample,points,idx,out);
112 |     printf("grou_point cpu time %f\n",get_time()-t0);
113 | 
114 |     t0=get_time();
115 |     group_point_grad_cpu(b,n,c,m,nsample,grad_out,idx,grad_points);
116 |     printf("grou_point_grad cpu time %f\n",get_time()-t0);
117 | 
118 |     return 0;
119 | }
120 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/query_ball_point.cu:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include <string>
  7 | #include <vector>
  8 | using namespace std;
  9 | float randomf(){
 10 |     return (rand()+0.5)/(RAND_MAX+1.0);
 11 | }
 12 | static double get_time(){
 13 |     timespec tp;
 14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 16 | }
 17 | // input: radius (1), nsample (1), xyz1 (b,n,3), xyz2 (b,m,3)
 18 | // output: idx (b,m,nsample)
 19 | __global__ void query_ball_point_gpu(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx) {
 20 |     for (int i=0;i<b;++i) {
 21 |         for (int j=0;j<m;++j) {
 22 |             int cnt = 0;
 23 |             for (int k=0;k<n;++k) {
 24 |                 if (cnt == nsample)
 25 |                     break; // only pick the FIRST nsample points in the ball
 26 | 	        float x2=xyz2[j*3+0];
 27 | 	        float y2=xyz2[j*3+1];
 28 | 	        float z2=xyz2[j*3+2];
 29 | 	        float x1=xyz1[k*3+0];
 30 | 	        float y1=xyz1[k*3+1];
 31 | 	        float z1=xyz1[k*3+2];
 32 | 		float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 33 |                 if (d<radius) {
 34 |                     if (cnt==0) { // set ALL indices to k, s.t. if there are less points in ball than nsample, we still have valid (repeating) indices
 35 |                         for (int l=0;l<nsample;++l)
 36 |                             idx[j*nsample+l] = k;
 37 |                     }
 38 |                     idx[j*nsample+cnt] = k;
 39 |                     cnt+=1;
 40 |                 }
 41 |             }
 42 |         }
 43 |         xyz1+=n*3;
 44 |         xyz2+=m*3;
 45 |         idx+=m*nsample;
 46 |     }
 47 | }
 48 | 
 49 | 
 50 | // input: points (b,n,c), idx (b,m,nsample)
 51 | // output: out (b,m,nsample,c)
 52 | __global__ void group_point_gpu(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out) {
 53 |     for (int i=0;i<b;++i) {
 54 |         for (int j=0;j<m;++j) {
 55 |             for (int k=0;k<nsample;++k) {
 56 |                 int ii = idx[j*nsample+k];
 57 |                 for (int l=0;l<c;++l) {
 58 |                     out[j*nsample*c+k*c+l] = points[ii*c+l];
 59 |                 }
 60 |             }
 61 |         }
 62 |         points+=n*c;
 63 |         idx+=m*nsample;
 64 |         out+=m*nsample*c;
 65 |     }
 66 | }
 67 | 
 68 | // input: grad_out (b,m,nsample,c), idx (b,m,nsample), 
 69 | // output: grad_points (b,n,c)
 70 | __global__ void group_point_grad_gpu(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) {
 71 |     for (int i=0;i<b;++i) {
 72 |         for (int j=0;j<m;++j) {
 73 |             for (int k=0;k<nsample;++k) {
 74 |                 int ii = idx[j*nsample+k];
 75 |                 for (int l=0;l<c;++l) {
 76 |                      grad_points[ii*c+l] += grad_out[j*nsample*c+k*c+l];
 77 |                 }
 78 |             }
 79 |         }
 80 |         idx+=m*nsample;
 81 |         grad_out+=m*nsample*c;
 82 |         grad_points+=n*c;
 83 |     }
 84 | }
 85 | 
 86 | int main()
 87 | {
 88 |     int b=32,n=512,m=128,nsample=64,c=64;
 89 |     float radius=0.1;
 90 |     float *xyz1, *xyz2, *points;
 91 |     cudaMallocManaged(&xyz1, b*n*3*sizeof(float));
 92 |     cudaMallocManaged(&xyz2, b*m*3*sizeof(float));
 93 |     cudaMallocManaged(&points, b*n*c*sizeof(float));
 94 |     int *idx;
 95 |     cudaMallocManaged(&idx, b*m*nsample*sizeof(int));
 96 |     memset(idx, 0, sizeof(int)*b*m*nsample);
 97 |     float *out, *grad_out;
 98 |     cudaMallocManaged(&out, b*m*nsample*c*sizeof(float));
 99 |     cudaMallocManaged(&grad_out, b*m*nsample*c*sizeof(float));
100 |     memset(grad_out, 0.0, sizeof(float)*b*m*nsample*c);
101 |     float *grad_points;
102 |     cudaMallocManaged(&grad_points, b*n*c*sizeof(float));
103 | 
104 |     for (int i=0;i<b*n*3;i++)
105 |         xyz1[i]=randomf();
106 |     for (int i=0;i<b*m*3;i++)
107 |         xyz2[i]=randomf();
108 |     for (int i=0;i<b*n*c;i++)
109 |         points[i]=randomf();
110 | 
111 |     double t0=get_time();
112 |     query_ball_point_gpu<<<1,1>>>(b,n,m,radius,nsample,xyz1,xyz2,idx);
113 |     cudaDeviceSynchronize();
114 |     printf("query_ball_point gpu time %f\n",get_time()-t0);
115 | 
116 |     t0=get_time();
117 |     group_point_gpu<<<1,1>>>(b,n,c,m,nsample,points,idx,out);
118 |     cudaDeviceSynchronize();
119 |     printf("grou_point gpu time %f\n",get_time()-t0);
120 | 
121 |     t0=get_time();
122 |     group_point_grad_gpu<<<1,1>>>(b,n,c,m,nsample,grad_out,idx,grad_points);
123 |     cudaDeviceSynchronize();
124 |     printf("grou_point_grad gpu time %f\n",get_time()-t0);
125 | 
126 |     cudaFree(xyz1);
127 |     cudaFree(xyz2);
128 |     cudaFree(points);
129 |     cudaFree(idx);
130 |     cudaFree(out);
131 |     cudaFree(grad_out);
132 |     cudaFree(grad_points);
133 |     return 0;
134 | }
135 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/query_ball_point_block.cu:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include <string>
  7 | #include <vector>
  8 | using namespace std;
  9 | float randomf(){
 10 |     return (rand()+0.5)/(RAND_MAX+1.0);
 11 | }
 12 | static double get_time(){
 13 |     timespec tp;
 14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 16 | }
 17 | // input: radius (1), nsample (1), xyz1 (b,n,3), xyz2 (b,m,3)
 18 | // output: idx (b,m,nsample)
 19 | __global__ void query_ball_point_gpu(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx) {
 20 |     int index = threadIdx.x;
 21 |     xyz1 += n*3*index;
 22 |     xyz2 += m*3*index;
 23 |     idx += m*nsample*index;
 24 | 
 25 |     for (int j=0;j<m;++j) {
 26 |         int cnt = 0;
 27 |         for (int k=0;k<n;++k) {
 28 |             if (cnt == nsample)
 29 |                 break; // only pick the FIRST nsample points in the ball
 30 |             float x2=xyz2[j*3+0];
 31 |             float y2=xyz2[j*3+1];
 32 |             float z2=xyz2[j*3+2];
 33 |             float x1=xyz1[k*3+0];
 34 |             float y1=xyz1[k*3+1];
 35 |             float z1=xyz1[k*3+2];
 36 |     	float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 37 |             if (d<radius) {
 38 |                 if (cnt==0) { // set ALL indices to k, s.t. if there are less points in ball than nsample, we still have valid (repeating) indices
 39 |                     for (int l=0;l<nsample;++l)
 40 |                         idx[j*nsample+l] = k;
 41 |                 }
 42 |                 idx[j*nsample+cnt] = k;
 43 |                 cnt+=1;
 44 |             }
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | 
 50 | // input: points (b,n,c), idx (b,m,nsample)
 51 | // output: out (b,m,nsample,c)
 52 | __global__ void group_point_gpu(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out) {
 53 |     int index = threadIdx.x;
 54 |     points += n*c*index;
 55 |     idx += m*nsample*index;
 56 |     out += m*nsample*c*index;
 57 | 
 58 |     for (int j=0;j<m;++j) {
 59 |         for (int k=0;k<nsample;++k) {
 60 |             int ii = idx[j*nsample+k];
 61 |             for (int l=0;l<c;++l) {
 62 |                 out[j*nsample*c+k*c+l] = points[ii*c+l];
 63 |             }
 64 |         }
 65 |     }
 66 | }
 67 | 
 68 | // input: grad_out (b,m,nsample,c), idx (b,m,nsample), 
 69 | // output: grad_points (b,n,c)
 70 | __global__ void group_point_grad_gpu(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) {
 71 |     int index = threadIdx.x;
 72 |     idx += m*nsample*index;
 73 |     grad_out += m*nsample*c*index;
 74 |     grad_points += n*c*index;
 75 | 
 76 |     for (int j=0;j<m;++j) {
 77 |         for (int k=0;k<nsample;++k) {
 78 |             int ii = idx[j*nsample+k];
 79 |             for (int l=0;l<c;++l) {
 80 |                  grad_points[ii*c+l] += grad_out[j*nsample*c+k*c+l];
 81 |             }
 82 |         }
 83 |     }
 84 | }
 85 | 
 86 | int main()
 87 | {
 88 |     int b=32,n=512,m=128,nsample=64,c=64;
 89 |     float radius=0.1;
 90 |     float *xyz1, *xyz2, *points;
 91 |     cudaMallocManaged(&xyz1, b*n*3*sizeof(float));
 92 |     cudaMallocManaged(&xyz2, b*m*3*sizeof(float));
 93 |     cudaMallocManaged(&points, b*n*c*sizeof(float));
 94 |     int *idx;
 95 |     cudaMallocManaged(&idx, b*m*nsample*sizeof(int));
 96 |     memset(idx, 0, sizeof(int)*b*m*nsample);
 97 |     float *out, *grad_out;
 98 |     cudaMallocManaged(&out, b*m*nsample*c*sizeof(float));
 99 |     cudaMallocManaged(&grad_out, b*m*nsample*c*sizeof(float));
100 |     memset(grad_out, 0.0, sizeof(float)*b*m*nsample*c);
101 |     float *grad_points;
102 |     cudaMallocManaged(&grad_points, b*n*c*sizeof(float));
103 | 
104 |     for (int i=0;i<b*n*3;i++)
105 |         xyz1[i]=randomf();
106 |     for (int i=0;i<b*m*3;i++)
107 |         xyz2[i]=randomf();
108 |     for (int i=0;i<b*n*c;i++)
109 |         points[i]=randomf();
110 | 
111 |     double t0=get_time();
112 |     query_ball_point_gpu<<<1,b>>>(b,n,m,radius,nsample,xyz1,xyz2,idx);
113 |     cudaDeviceSynchronize();
114 |     printf("query_ball_point gpu time %f\n",get_time()-t0);
115 | 
116 |     t0=get_time();
117 |     group_point_gpu<<<1,b>>>(b,n,c,m,nsample,points,idx,out);
118 |     cudaDeviceSynchronize();
119 |     printf("grou_point gpu time %f\n",get_time()-t0);
120 | 
121 |     t0=get_time();
122 |     group_point_grad_gpu<<<1,b>>>(b,n,c,m,nsample,grad_out,idx,grad_points);
123 |     cudaDeviceSynchronize();
124 |     printf("grou_point_grad gpu time %f\n",get_time()-t0);
125 | 
126 |     cudaFree(xyz1);
127 |     cudaFree(xyz2);
128 |     cudaFree(points);
129 |     cudaFree(idx);
130 |     cudaFree(out);
131 |     cudaFree(grad_out);
132 |     cudaFree(grad_points);
133 |     return 0;
134 | }
135 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/query_ball_point_grid.cu:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include <string>
  7 | #include <vector>
  8 | using namespace std;
  9 | float randomf(){
 10 |     return (rand()+0.5)/(RAND_MAX+1.0);
 11 | }
 12 | static double get_time(){
 13 |     timespec tp;
 14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 16 | }
 17 | // input: radius (1), nsample (1), xyz1 (b,n,3), xyz2 (b,m,3)
 18 | // output: idx (b,m,nsample)
 19 | __global__ void query_ball_point_gpu(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx) {
 20 |     int batch_index = blockIdx.x;
 21 |     xyz1 += n*3*batch_index;
 22 |     xyz2 += m*3*batch_index;
 23 |     idx += m*nsample*batch_index;
 24 | 
 25 |     int index = threadIdx.x;
 26 |     int stride = blockDim.x;
 27 |     
 28 |     for (int j=index;j<m;j+=stride) {
 29 |         int cnt = 0;
 30 |         for (int k=0;k<n;++k) {
 31 |             if (cnt == nsample)
 32 |                 break; // only pick the FIRST nsample points in the ball
 33 |             float x2=xyz2[j*3+0];
 34 |             float y2=xyz2[j*3+1];
 35 |             float z2=xyz2[j*3+2];
 36 |             float x1=xyz1[k*3+0];
 37 |             float y1=xyz1[k*3+1];
 38 |             float z1=xyz1[k*3+2];
 39 |     	float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 40 |             if (d<radius) {
 41 |                 if (cnt==0) { // set ALL indices to k, s.t. if there are less points in ball than nsample, we still have valid (repeating) indices
 42 |                     for (int l=0;l<nsample;++l)
 43 |                         idx[j*nsample+l] = k;
 44 |                 }
 45 |                 idx[j*nsample+cnt] = k;
 46 |                 cnt+=1;
 47 |             }
 48 |         }
 49 |     }
 50 | }
 51 | 
 52 | 
 53 | // input: points (b,n,c), idx (b,m,nsample)
 54 | // output: out (b,m,nsample,c)
 55 | __global__ void group_point_gpu(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out) {
 56 |     int batch_index = blockIdx.x;
 57 |     points += n*c*batch_index;
 58 |     idx += m*nsample*batch_index;
 59 |     out += m*nsample*c*batch_index;
 60 | 
 61 |     int index = threadIdx.x;
 62 |     int stride = blockDim.x;
 63 |     
 64 |     for (int j=index;j<m;j+=stride) {
 65 |         for (int k=0;k<nsample;++k) {
 66 |             int ii = idx[j*nsample+k];
 67 |             for (int l=0;l<c;++l) {
 68 |                 out[j*nsample*c+k*c+l] = points[ii*c+l];
 69 |             }
 70 |         }
 71 |     }
 72 | }
 73 | 
 74 | // input: grad_out (b,m,nsample,c), idx (b,m,nsample), 
 75 | // output: grad_points (b,n,c)
 76 | __global__ void group_point_grad_gpu(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) {
 77 |     int batch_index = blockIdx.x;
 78 |     idx += m*nsample*batch_index;
 79 |     grad_out += m*nsample*c*batch_index;
 80 |     grad_points += n*c*batch_index;
 81 | 
 82 |     int index = threadIdx.x;
 83 |     int stride = blockDim.x;
 84 | 
 85 |     for (int j=index;j<m;j+=stride) {
 86 |         for (int k=0;k<nsample;++k) {
 87 |             int ii = idx[j*nsample+k];
 88 |             for (int l=0;l<c;++l) {
 89 |                  // Use atomic add to avoid race condition
 90 |                  atomicAdd(&grad_points[ii*c+l], grad_out[j*nsample*c+k*c+l]);
 91 |             }
 92 |         }
 93 |     }
 94 | }
 95 | 
 96 | int main()
 97 | {
 98 |     int b=32,n=512,m=128,nsample=64,c=64;
 99 |     float radius=0.1;
100 |     float *xyz1, *xyz2, *points;
101 |     cudaMallocManaged(&xyz1, b*n*3*sizeof(float));
102 |     cudaMallocManaged(&xyz2, b*m*3*sizeof(float));
103 |     cudaMallocManaged(&points, b*n*c*sizeof(float));
104 |     int *idx;
105 |     cudaMallocManaged(&idx, b*m*nsample*sizeof(int));
106 |     memset(idx, 0, sizeof(int)*b*m*nsample);
107 |     float *out, *grad_out;
108 |     cudaMallocManaged(&out, b*m*nsample*c*sizeof(float));
109 |     cudaMallocManaged(&grad_out, b*m*nsample*c*sizeof(float));
110 |     memset(grad_out, 0.0, sizeof(float)*b*m*nsample*c);
111 |     float *grad_points;
112 |     cudaMallocManaged(&grad_points, b*n*c*sizeof(float));
113 | 
114 |     for (int i=0;i<b*n*3;i++)
115 |         xyz1[i]=randomf();
116 |     for (int i=0;i<b*m*3;i++)
117 |         xyz2[i]=randomf();
118 |     for (int i=0;i<b*n*c;i++)
119 |         points[i]=randomf();
120 | 
121 |     double t0=get_time();
122 |     query_ball_point_gpu<<<b,256>>>(b,n,m,radius,nsample,xyz1,xyz2,idx);
123 |     cudaDeviceSynchronize();
124 |     printf("query_ball_point gpu time %f\n",get_time()-t0);
125 | 
126 |     t0=get_time();
127 |     group_point_gpu<<<b,256>>>(b,n,c,m,nsample,points,idx,out);
128 |     cudaDeviceSynchronize();
129 |     printf("grou_point gpu time %f\n",get_time()-t0);
130 | 
131 |     t0=get_time();
132 |     group_point_grad_gpu<<<b,256>>>(b,n,c,m,nsample,grad_out,idx,grad_points);
133 |     cudaDeviceSynchronize();
134 |     printf("grou_point_grad gpu time %f\n",get_time()-t0);
135 | 
136 |     cudaFree(xyz1);
137 |     cudaFree(xyz2);
138 |     cudaFree(points);
139 |     cudaFree(idx);
140 |     cudaFree(out);
141 |     cudaFree(grad_out);
142 |     cudaFree(grad_points);
143 |     return 0;
144 | }
145 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/selection_sort.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <ctime>
 3 | #include <cstring> // memset
 4 | #include <cstdlib> // rand, RAND_MAX
 5 | #include <cmath> // sqrtf
 6 | #include <string>
 7 | #include <vector>
 8 | using namespace std;
 9 | float randomf(){
10 |     return (rand()+0.5)/(RAND_MAX+1.0);
11 | }
12 | static double get_time(){
13 |     timespec tp;
14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
16 | }
17 | 
18 | // input: k (1), distance matrix dist (b,m,n)
19 | // output: idx (b,m,n), val (b,m,n)
20 | void selection_sort_cpu(int b, int n, int m, int k, const float *dist, int *idx, float *val) {
21 |     float *p_dist;
22 |     float tmp;
23 |     int tmpi;
24 |     for (int i=0;i<b;++i) {
25 |         for (int j=0;j<m;++j) {
26 |             for (int s=0;s<n;++s) {
27 |                 val[i*m*n+j*n+s] = dist[i*m*n+j*n+s];
28 |                 idx[i*m*n+j*n+s] = s;
29 |             }
30 |         }
31 |     }
32 | 
33 |     for (int i=0;i<b;++i) {
34 |         for (int j=0;j<m;++j) {
35 |             for (int s=0;s<n;++s)
36 |                 printf("%f ", dist[i*m*n+j*n+s]);
37 |             printf("\n");
38 |             p_dist = val+j*n;
39 |             // selection sort for the first k elements
40 |             for (int s=0;s<k;++s) {
41 |                 int min=s; 
42 |                 // find the min
43 |                 for (int t=s+1;t<n;++t) {
44 |                     if (p_dist[t]<p_dist[min]) {
45 |                         min = t;
46 |                     }
47 |                 }
48 |                 printf("%d\n", min);
49 |                 // swap min-th and i-th element
50 |                 if (min!=s) {
51 |                     tmp = p_dist[min];
52 |                     p_dist[min] = p_dist[s];
53 |                     p_dist[s] = tmp;
54 |                     tmpi = idx[j*n+min];
55 |                     idx[j*n+min] = idx[j*n+s];
56 |                     idx[j*n+s] = tmpi;
57 |                 }       
58 |             }
59 |         }
60 |         idx+=m*n;
61 |         val+=m*n;
62 |     }
63 | }
64 | 
65 | int main()
66 | {
67 |     //int b=32,n=10000,m=1000,k=128;
68 |     int b=2,n=4,m=2,k=3;
69 |     float *dist=new float[b*m*n];
70 |     int *idx=new int[b*m*n];
71 |     float *val=new float[b*m*n];
72 |     memset(idx, 0, sizeof(int)*b*m*n);
73 |     //for (int i=0;i<b*n*m;i++)
74 |     //    dist[i]=randomf();
75 |     for (int i=0;i<b*n*m;i++) {
76 |         dist[i] = float(10-i);
77 |         printf("%f ", dist[i]);
78 |     }
79 |     printf("\n");
80 | 
81 | 
82 | 
83 |     double t0=get_time();
84 |     selection_sort_cpu(b,n,m,k,dist,idx,val);
85 |     printf("selection sort cpu time %f\n",get_time()-t0);
86 | 
87 |     for (int i=0;i<b*n*m;++i)
88 |         printf("%d ", idx[i]);
89 |     printf("\n");
90 |     for (int i=0;i<b*n*m;++i)
91 |         printf("%f ", val[i]);
92 |     printf("\n");
93 |     return 0;
94 | }
95 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/selection_sort.cu:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <ctime>
 3 | #include <cstring> // memset
 4 | #include <cstdlib> // rand, RAND_MAX
 5 | #include <cmath> // sqrtf
 6 | #include <string>
 7 | #include <vector>
 8 | using namespace std;
 9 | float randomf(){
10 |     return (rand()+0.5)/(RAND_MAX+1.0);
11 | }
12 | static double get_time(){
13 |     timespec tp;
14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
16 | }
17 | 
18 | // input: k (1), distance matrix dist (b,m,n)
19 | // output: idx (b,m,k), val (b,m,k)
20 | __global__ void selection_sort_gpu(int b, int n, int m, int k, float *dist, int *idx, float *val) {
21 |     int batch_index = blockIdx.x;
22 |     dist+=m*n*batch_index;
23 |     idx+=m*k*batch_index;
24 |     val+=m*k*batch_index;
25 | 
26 |     int index = threadIdx.x;
27 |     int stride = blockDim.x;
28 | 
29 |     float *p_dist;
30 |     for (int j=index;j<m;j+=stride) {
31 |         p_dist = dist+j*n;
32 |         // selection sort for the first k elements
33 |         for (int s=0;s<k;++s) {
34 |             int min=s; 
35 |             // find the min
36 |             for (int t=s+1;t<n;++t) {
37 |                 if (p_dist[t]<p_dist[min]) {
38 |                     min = t;
39 |                 }
40 |             }
41 |             // update idx and val
42 |             idx[j*n+s] = min;
43 |             val[j*n+s] = p_dist[min];
44 |             // swap min-th and i-th element
45 |             float tmp = p_dist[min];
46 |             p_dist[min] = p_dist[s];
47 |             p_dist[s] = tmp;
48 |         }
49 |     }
50 | }
51 | 
52 | int main()
53 | {
54 |     //int b=32,n=10000,m=1000,k=128;
55 |     int b=32,n=2048,m=512,k=128;
56 |     float *dist;
57 |     int *idx;
58 |     float *val;
59 |     cudaMallocManaged(&dist, b*m*n*sizeof(float));
60 |     cudaMallocManaged(&idx, b*m*k*sizeof(int));
61 |     cudaMallocManaged(&val, b*m*k*sizeof(float));
62 |     cudaMemset(idx, 0, sizeof(int)*b*m*k);
63 |     for (int i=0;i<b*n*m;i++)
64 |         dist[i]=randomf();
65 | 
66 |     double t0=get_time();
67 |     selection_sort_gpu<<<b,256>>>(b,n,m,k,dist,idx,val);
68 |     cudaDeviceSynchronize();
69 |     printf("selection sort cpu time %f\n",get_time()-t0);
70 | 
71 |     return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/selection_sort_const.cu:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <ctime>
 3 | #include <cstring> // memset
 4 | #include <cstdlib> // rand, RAND_MAX
 5 | #include <cmath> // sqrtf
 6 | #include <string>
 7 | #include <vector>
 8 | using namespace std;
 9 | float randomf(){
10 |     return (rand()+0.5)/(RAND_MAX+1.0);
11 | }
12 | static double get_time(){
13 |     timespec tp;
14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
16 | }
17 | 
18 | // input: k (1), distance matrix dist (b,m,n)
19 | // output: idx (b,m,n), dist_out (b,m,n)
20 | __global__ void selection_sort_gpu(int b, int n, int m, int k, const float *dist, int *outi, float *out) {
21 |     int batch_index = blockIdx.x;
22 |     dist+=m*n*batch_index;
23 |     outi+=m*n*batch_index;
24 |     out+=m*n*batch_index;
25 | 
26 |     int index = threadIdx.x;
27 |     int stride = blockDim.x;
28 | 
29 |     // copy from dist to dist_out
30 |     for (int j=index;j<m;j+=stride) {
31 |         for (int s=0;s<n;++s) {
32 |             out[j*n+s] = dist[j*n+s];
33 |             outi[j*n+s] = s;
34 |         }
35 |     }
36 | 
37 |     float *p_dist;
38 |     for (int j=index;j<m;j+=stride) {
39 |         p_dist = out+j*n;
40 |         // selection sort for the first k elements
41 |         for (int s=0;s<k;++s) {
42 |             int min=s; 
43 |             // find the min
44 |             for (int t=s+1;t<n;++t) {
45 |                 if (p_dist[t]<p_dist[min]) {
46 |                     min = t;
47 |                 }
48 |             }
49 |             // swap min-th and i-th element
50 |             if (min!=s) {
51 |                 float tmp = p_dist[min];
52 |                 p_dist[min] = p_dist[s];
53 |                 p_dist[s] = tmp;
54 |                 int tmpi = outi[j*n+min];
55 |                 outi[j*n+min] = outi[j*n+s];
56 |                 outi[j*n+s] = tmpi;
57 |             }
58 |         }
59 |     }
60 | }
61 | 
62 | int main()
63 | {
64 |     //int b=32,n=10000,m=1000,k=128;
65 |     int b=32,n=2048,m=512,k=128;
66 |     //int b=2,n=4,m=2,k=3;
67 |     float *dist;
68 |     int *idx;
69 |     float *dist_out;
70 |     cudaMallocManaged(&dist, b*m*n*sizeof(float));
71 |     cudaMallocManaged(&idx, b*m*n*sizeof(int));
72 |     cudaMallocManaged(&dist_out, b*m*n*sizeof(float));
73 |     cudaMemset(idx, 0, sizeof(int)*b*m*n);
74 |     for (int i=0;i<b*n*m;i++)
75 |         dist[i]=randomf();
76 |     //for (int i=0;i<b*n*m;i++) {
77 |     //    dist[i] = float(10-i);
78 |     //    printf("%f ", dist[i]);
79 |     //}
80 |     //printf("\n");
81 | 
82 |     double t0=get_time();
83 |     selection_sort_gpu<<<b,256>>>(b,n,m,k,dist,idx,dist_out);
84 |     cudaDeviceSynchronize();
85 |     printf("selection sort cpu time %f\n",get_time()-t0);
86 |     
87 |     //for (int i=0;i<b*n*m;++i)
88 |     //    printf("%d ", idx[i]);
89 |     //printf("\n");
90 | 
91 |     return 0;
92 | }
93 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include "tensorflow/core/framework/op.h"
  7 | #include "tensorflow/core/framework/op_kernel.h"
  8 | #include "tensorflow/core/framework/shape_inference.h"
  9 | #include "tensorflow/core/framework/common_shape_fns.h"
 10 | #include <cuda_runtime.h>
 11 | using namespace tensorflow;
 12 | 
 13 | REGISTER_OP("QueryBallPoint")
 14 |     .Attr("radius: float")
 15 |     .Attr("nsample: int")
 16 |     .Input("xyz1: float32")
 17 |     .Input("xyz2: float32")
 18 |     .Output("idx: int32")
 19 |     .Output("pts_cnt: int32")
 20 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 21 |         ::tensorflow::shape_inference::ShapeHandle dims2; // batch_size * npoint * 3
 22 |         c->WithRank(c->input(1), 3, &dims2);
 23 |         int nsample;
 24 |         TF_RETURN_IF_ERROR(c->GetAttr("nsample", &nsample));
 25 |         ::tensorflow::shape_inference::ShapeHandle output1 = c->MakeShape({c->Dim(dims2, 0), c->Dim(dims2, 1), nsample});
 26 |         c->set_output(0, output1);
 27 |         ::tensorflow::shape_inference::ShapeHandle output2 = c->MakeShape({c->Dim(dims2, 0), c->Dim(dims2, 1)});
 28 |         c->set_output(1, output2);
 29 |         return Status::OK();
 30 |     });
 31 | REGISTER_OP("SelectionSort")
 32 |     .Attr("k: int")
 33 |     .Input("dist: float32")
 34 |     .Output("outi: int32")
 35 |     .Output("out: float32")
 36 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 37 |         c->set_output(0, c->input(0));
 38 |         c->set_output(1, c->input(0));
 39 |         return Status::OK();
 40 |     });
 41 | REGISTER_OP("GroupPoint")
 42 |     .Input("points: float32")
 43 |     .Input("idx: int32")
 44 |     .Output("out: float32")
 45 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 46 |         ::tensorflow::shape_inference::ShapeHandle dims1; // batch_size * ndataset * channels
 47 |         c->WithRank(c->input(0), 3, &dims1);
 48 |         ::tensorflow::shape_inference::ShapeHandle dims2; // batch_size * npoints * nsample
 49 |         c->WithRank(c->input(1), 3, &dims2);
 50 |         // batch_size * npoints * nsample * channels
 51 |         ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims2, 0), c->Dim(dims2, 1), c->Dim(dims2, 2), c->Dim(dims1, 2)});
 52 |         c->set_output(0, output);
 53 |         return Status::OK();
 54 |     });
 55 | REGISTER_OP("GroupPointGrad")
 56 |     .Input("points: float32")
 57 |     .Input("idx: int32")
 58 |     .Input("grad_out: float32")
 59 |     .Output("grad_points: float32")
 60 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 61 |         c->set_output(0, c->input(0));
 62 |         return Status::OK();
 63 |     });
 64 | 
 65 | 
 66 | void queryBallPointLauncher(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx, int *pts_cnt);
 67 | class QueryBallPointGpuOp : public OpKernel {
 68 |     public:
 69 |         explicit QueryBallPointGpuOp(OpKernelConstruction* context) : OpKernel(context) {
 70 |             OP_REQUIRES_OK(context, context->GetAttr("radius", &radius_));
 71 |             OP_REQUIRES(context, radius_ > 0, errors::InvalidArgument("QueryBallPoint expects positive radius"));
 72 | 
 73 |             OP_REQUIRES_OK(context, context->GetAttr("nsample", &nsample_));
 74 |             OP_REQUIRES(context, nsample_ > 0, errors::InvalidArgument("QueryBallPoint expects positive nsample"));
 75 |         }
 76 | 
 77 |         void Compute(OpKernelContext* context) override {
 78 |             const Tensor& xyz1_tensor = context->input(0);
 79 |             OP_REQUIRES(context, xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3, errors::InvalidArgument("QueryBallPoint expects (batch_size, ndataset, 3) xyz1 shape."));
 80 |             int b = xyz1_tensor.shape().dim_size(0);
 81 |             int n = xyz1_tensor.shape().dim_size(1);
 82 | 
 83 |             const Tensor& xyz2_tensor = context->input(1);
 84 |             OP_REQUIRES(context, xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3, errors::InvalidArgument("QueryBallPoint expects (batch_size, npoint, 3) xyz2 shape."));
 85 |             int m = xyz2_tensor.shape().dim_size(1);
 86 | 
 87 |             Tensor *idx_tensor = nullptr;
 88 |             OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{b,m,nsample_}, &idx_tensor));
 89 |             Tensor *pts_cnt_tensor = nullptr;
 90 |             OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape{b,m}, &pts_cnt_tensor));
 91 | 
 92 |             auto xyz1_flat = xyz1_tensor.flat<float>();
 93 |             const float *xyz1 = &(xyz1_flat(0));
 94 |             auto xyz2_flat = xyz2_tensor.flat<float>();
 95 |             const float *xyz2 = &(xyz2_flat(0));
 96 |             auto idx_flat = idx_tensor->flat<int>();
 97 |             int *idx = &(idx_flat(0));
 98 |             auto pts_cnt_flat = pts_cnt_tensor->flat<int>();
 99 |             int *pts_cnt = &(pts_cnt_flat(0));
100 |             queryBallPointLauncher(b,n,m,radius_,nsample_,xyz1,xyz2,idx,pts_cnt);
101 |         }
102 |     private:
103 |         float radius_;
104 |         int nsample_;
105 | };
106 | REGISTER_KERNEL_BUILDER(Name("QueryBallPoint").Device(DEVICE_GPU), QueryBallPointGpuOp);
107 | 
108 | void selectionSortLauncher(int b, int n, int m, int k, const float *dist, int *outi, float *out);
109 | class SelectionSortGpuOp : public OpKernel {
110 |     public:
111 |         explicit SelectionSortGpuOp(OpKernelConstruction* context) : OpKernel(context) {
112 |             OP_REQUIRES_OK(context, context->GetAttr("k", &k_));
113 |             OP_REQUIRES(context, k_ > 0, errors::InvalidArgument("SelectionSort expects positive k"));
114 |         }
115 | 
116 |         void Compute(OpKernelContext* context) override {
117 |             const Tensor& dist_tensor = context->input(0);
118 |             OP_REQUIRES(context, dist_tensor.dims()==3, errors::InvalidArgument("SelectionSort expects (b,m,n) dist shape."));
119 |             int b = dist_tensor.shape().dim_size(0);
120 |             int m = dist_tensor.shape().dim_size(1);
121 |             int n = dist_tensor.shape().dim_size(2);
122 | 
123 |             Tensor *outi_tensor = nullptr;
124 |             OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{b,m,n}, &outi_tensor));
125 |             Tensor *out_tensor = nullptr;
126 |             OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape{b,m,n}, &out_tensor));
127 | 
128 |             auto dist_flat = dist_tensor.flat<float>();
129 |             const float *dist = &(dist_flat(0));
130 |             auto outi_flat = outi_tensor->flat<int>();
131 |             int *outi = &(outi_flat(0));
132 |             auto out_flat = out_tensor->flat<float>();
133 |             float *out = &(out_flat(0));
134 |             selectionSortLauncher(b,n,m,k_,dist,outi,out);
135 |         }
136 |     private:
137 |         int k_;
138 | };
139 | REGISTER_KERNEL_BUILDER(Name("SelectionSort").Device(DEVICE_GPU), SelectionSortGpuOp);
140 | 
141 | 
142 | void groupPointLauncher(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out);
143 | class GroupPointGpuOp: public OpKernel{
144 |     public:
145 |         explicit GroupPointGpuOp(OpKernelConstruction * context):OpKernel(context){}
146 | 
147 |         void Compute(OpKernelContext * context) override {
148 |             const Tensor& points_tensor=context->input(0);
149 |             OP_REQUIRES(context, points_tensor.dims()==3, errors::InvalidArgument("GroupPoint expects (batch_size, num_points, channel) points shape"));
150 |             int b = points_tensor.shape().dim_size(0);
151 |             int n = points_tensor.shape().dim_size(1);
152 |             int c = points_tensor.shape().dim_size(2);
153 | 
154 |             const Tensor& idx_tensor=context->input(1);
155 |             OP_REQUIRES(context,idx_tensor.dims()==3 && idx_tensor.shape().dim_size(0)==b, errors::InvalidArgument("GroupPoint expects (batch_size, npoints, nsample) idx shape"));
156 |             int m = idx_tensor.shape().dim_size(1);
157 |             int nsample = idx_tensor.shape().dim_size(2);
158 | 
159 |             Tensor * out_tensor = nullptr;
160 |             OP_REQUIRES_OK(context, context->allocate_output(0,TensorShape{b,m,nsample,c}, &out_tensor));
161 | 
162 |             auto points_flat = points_tensor.flat<float>();
163 |             const float *points = &(points_flat(0));
164 |             auto idx_flat = idx_tensor.flat<int>();
165 |             const int *idx = &(idx_flat(0));
166 |             auto out_flat = out_tensor->flat<float>();
167 |             float *out = &(out_flat(0));
168 |             groupPointLauncher(b,n,c,m,nsample,points,idx,out);
169 |         }
170 | };
171 | REGISTER_KERNEL_BUILDER(Name("GroupPoint").Device(DEVICE_GPU),GroupPointGpuOp);
172 | 
173 | void groupPointGradLauncher(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points);
174 | class GroupPointGradGpuOp: public OpKernel{
175 |     public:
176 |         explicit GroupPointGradGpuOp(OpKernelConstruction * context):OpKernel(context){}
177 | 
178 |         void Compute(OpKernelContext * context) override {
179 |             const Tensor& points_tensor=context->input(0);
180 |             OP_REQUIRES(context, points_tensor.dims()==3, errors::InvalidArgument("GroupPointGrad expects (batch_size, num_points, channel) points shape"));
181 |             int b = points_tensor.shape().dim_size(0);
182 |             int n = points_tensor.shape().dim_size(1);
183 |             int c = points_tensor.shape().dim_size(2);
184 | 
185 |             const Tensor& idx_tensor=context->input(1);
186 |             OP_REQUIRES(context,idx_tensor.dims()==3 && idx_tensor.shape().dim_size(0)==b, errors::InvalidArgument("GroupPointGrad expects (batch_size, npoints, nsample) idx shape"));
187 |             int m = idx_tensor.shape().dim_size(1);
188 |             int nsample = idx_tensor.shape().dim_size(2);
189 | 
190 |             const Tensor& grad_out_tensor=context->input(2);
191 |             OP_REQUIRES(context,grad_out_tensor.dims()==4 && grad_out_tensor.shape().dim_size(0)==b && grad_out_tensor.shape().dim_size(1)==m && grad_out_tensor.shape().dim_size(2)==nsample && grad_out_tensor.shape().dim_size(3)==c, errors::InvalidArgument("GroupPointGrad expects (batch_size, npoints, nsample, channel) grad_out shape"));
192 | 
193 |             Tensor * grad_points_tensor = nullptr;
194 |             OP_REQUIRES_OK(context, context->allocate_output(0,TensorShape{b,n,c}, &grad_points_tensor));
195 | 
196 |             auto points_flat = points_tensor.flat<float>();
197 |             const float *points = &(points_flat(0));
198 |             auto idx_flat = idx_tensor.flat<int>();
199 |             const int *idx = &(idx_flat(0));
200 |             auto grad_out_flat = grad_out_tensor.flat<float>();
201 |             const float *grad_out = &(grad_out_flat(0));
202 |             auto grad_points_flat = grad_points_tensor->flat<float>();
203 |             float *grad_points = &(grad_points_flat(0));
204 |             cudaMemset(grad_points, 0, sizeof(float)*b*n*c);
205 |             groupPointGradLauncher(b,n,c,m,nsample,grad_out,idx,grad_points);
206 |         }
207 | };
208 | REGISTER_KERNEL_BUILDER(Name("GroupPointGrad").Device(DEVICE_GPU),GroupPointGradGpuOp);
209 | 
210 | 
211 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.framework import ops
  3 | import sys
  4 | import os
  5 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  6 | sys.path.append(BASE_DIR)
  7 | grouping_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_grouping_so.so'))
  8 | def query_ball_point(radius, nsample, xyz1, xyz2):
  9 |     '''
 10 |     Input:
 11 |         radius: float32, ball search radius
 12 |         nsample: int32, number of points selected in each ball region
 13 |         xyz1: (batch_size, ndataset, 3) float32 array, input points
 14 |         xyz2: (batch_size, npoint, 3) float32 array, query points
 15 |     Output:
 16 |         idx: (batch_size, npoint, nsample) int32 array, indices to input points
 17 |         pts_cnt: (batch_size, npoint) int32 array, number of unique points in each local region
 18 |     '''
 19 |     #return grouping_module.query_ball_point(radius, nsample, xyz1, xyz2)
 20 |     return grouping_module.query_ball_point(xyz1, xyz2, radius, nsample)
 21 | ops.NoGradient('QueryBallPoint')
 22 | def select_top_k(k, dist):
 23 |     '''
 24 |     Input:
 25 |         k: int32, number of k SMALLEST elements selected
 26 |         dist: (b,m,n) float32 array, distance matrix, m query points, n dataset points
 27 |     Output:
 28 |         idx: (b,m,n) int32 array, first k in n are indices to the top k
 29 |         dist_out: (b,m,n) float32 array, first k in n are the top k
 30 |     '''
 31 |     return grouping_module.selection_sort(dist, k)
 32 | ops.NoGradient('SelectionSort')
 33 | def group_point(points, idx):
 34 |     '''
 35 |     Input:
 36 |         points: (batch_size, ndataset, channel) float32 array, points to sample from
 37 |         idx: (batch_size, npoint, nsample) int32 array, indices to points
 38 |     Output:
 39 |         out: (batch_size, npoint, nsample, channel) float32 array, values sampled from points
 40 |     '''
 41 |     return grouping_module.group_point(points, idx)
 42 | @tf.RegisterGradient('GroupPoint')
 43 | def _group_point_grad(op, grad_out):
 44 |     points = op.inputs[0]
 45 |     idx = op.inputs[1]
 46 |     return [grouping_module.group_point_grad(points, idx, grad_out), None]
 47 | 
 48 | def knn_point(k, xyz1, xyz2):
 49 |     '''
 50 |     Input:
 51 |         k: int32, number of k in k-nn search
 52 |         xyz1: (batch_size, ndataset, c) float32 array, input points
 53 |         xyz2: (batch_size, npoint, c) float32 array, query points
 54 |     Output:
 55 |         val: (batch_size, npoint, k) float32 array, L2 distances
 56 |         idx: (batch_size, npoint, k) int32 array, indices to input points
 57 |     '''
 58 |     b = xyz1.get_shape()[0].value
 59 |     n = xyz1.get_shape()[1].value
 60 |     c = xyz1.get_shape()[2].value
 61 |     m = xyz2.get_shape()[1].value
 62 |     #print(b, n, c, m)
 63 |     #print(xyz1, (b,1,n,c))
 64 |     xyz1 = tf.tile(tf.reshape(xyz1, (b,1,n,c)), [1,m,1,1])
 65 |     xyz2 = tf.tile(tf.reshape(xyz2, (b,m,1,c)), [1,1,n,1])
 66 |     dist = tf.reduce_sum((xyz1-xyz2)**2, -1)
 67 |     #print(dist, k)
 68 |     outi, out = select_top_k(k, dist)
 69 |     idx = tf.slice(outi, [0,0,0], [-1,-1,k])
 70 |     val = tf.slice(out, [0,0,0], [-1,-1,k])
 71 |     #print(idx, val)
 72 |     #val, idx = tf.nn.top_k(-dist, k=k) # ONLY SUPPORT CPU
 73 |     return val, idx
 74 | 
 75 | if __name__=='__main__':
 76 |     knn=True
 77 |     import numpy as np
 78 |     import time
 79 |     np.random.seed(100)
 80 |     pts = np.random.random((32,512,64)).astype('float32')
 81 |     tmp1 = np.random.random((32,512,3)).astype('float32')
 82 |     tmp2 = np.random.random((32,128,3)).astype('float32')
 83 |     with tf.device('/gpu:1'):
 84 |         points = tf.constant(pts)
 85 |         xyz1 = tf.constant(tmp1)
 86 |         xyz2 = tf.constant(tmp2)
 87 |         radius = 0.1 
 88 |         nsample = 64
 89 |         if knn:
 90 |             _, idx = knn_point(nsample, xyz1, xyz2)
 91 |             grouped_points = group_point(points, idx)
 92 |         else:
 93 |             idx, _ = query_ball_point(radius, nsample, xyz1, xyz2)
 94 |             grouped_points = group_point(points, idx)
 95 |             #grouped_points_grad = tf.ones_like(grouped_points)
 96 |             #points_grad = tf.gradients(grouped_points, points, grouped_points_grad)
 97 |     with tf.Session('') as sess:
 98 |         now = time.time() 
 99 |         for _ in range(100):
100 |             ret = sess.run(grouped_points)
101 |         print(time.time() - now)
102 |         print(ret.shape, ret.dtype)
103 |         print(ret)
104 |     
105 |     
106 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping_compile.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | #/usr/local/cuda-9.0/bin/nvcc tf_grouping_g.cu -o tf_grouping_g.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
 3 | 
 4 | # TF1.2
 5 | #g++ -std=c++11 tf_grouping.cpp tf_grouping_g.cu.o -o tf_grouping_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -lcudart -L /usr/local/cuda-8.0/lib64/ -O2 -D_GLIBCXX_USE_CXX11_ABI=0
 6 | 
 7 | # TF1.4
 8 | #g++ -std=c++11 tf_grouping.cpp tf_grouping_g.cu.o -o tf_grouping_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -I /usr/local/lib/python2.7/dist-packages/tensorflow/include/external/nsync/public -lcudart -L /usr/local/cuda-8.0/lib64/ -L/usr/local/lib/python2.7/dist-packages/tensorflow -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0
 9 | 
10 | # TF1.12 (venv local)
11 | #g++ -std=c++11 tf_grouping.cpp tf_grouping_g.cu.o -o tf_grouping_so.so -shared -fPIC -I /home/ubuntu16/venv/lib/python3.5/site-packages/tensorflow/include -I /usr/local/cuda-9.0/include -I /home/ubuntu16/venv/lib/python3.5/site-packages/tensorflow/include/external/nsync/public -lcudart -L /usr/local/cuda-9.0/lib64/ -L/home/ubuntu16/venv/lib/python3.5/site-packages/tensorflow -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0
12 | 
13 | # TF1.8 cluster version
14 | CUDA_ROOT=/usr/local/cuda-9.0
15 | TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
16 | TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
17 | 
18 | echo $CUDA_ROOT
19 | echo $TF_INC
20 | echo $TF_LIB
21 | 
22 | $CUDA_ROOT/bin/nvcc tf_grouping_g.cu -o tf_grouping_g.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
23 | 
24 | # TF>=1.4.0
25 | g++ -std=c++11 tf_grouping.cpp tf_grouping_g.cu.o -o tf_grouping_so.so -shared -fPIC -I$TF_INC/ -I$TF_INC/external/nsync/public -L$TF_LIB -ltensorflow_framework -I$CUDA_ROOT/include -lcudart -L$CUDA_ROOT/lib64/ -O2 -D_GLIBCXX_USE_CXX11_ABI=0


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping_g.cu:
--------------------------------------------------------------------------------
  1 | // input: radius (1), nsample (1), xyz1 (b,n,3), xyz2 (b,m,3)
  2 | // output: idx (b,m,nsample), pts_cnt (b,m)
  3 | __global__ void query_ball_point_gpu(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx, int *pts_cnt) {
  4 |     int batch_index = blockIdx.x;
  5 |     xyz1 += n*3*batch_index;
  6 |     xyz2 += m*3*batch_index;
  7 |     idx += m*nsample*batch_index;
  8 |     pts_cnt += m*batch_index; // counting how many unique points selected in local region
  9 | 
 10 |     int index = threadIdx.x;
 11 |     int stride = blockDim.x;
 12 |     
 13 |     for (int j=index;j<m;j+=stride) {
 14 |         int cnt = 0;
 15 |         for (int k=0;k<n;++k) {
 16 |             if (cnt == nsample)
 17 |                 break; // only pick the FIRST nsample points in the ball
 18 |             float x2=xyz2[j*3+0];
 19 |             float y2=xyz2[j*3+1];
 20 |             float z2=xyz2[j*3+2];
 21 |             float x1=xyz1[k*3+0];
 22 |             float y1=xyz1[k*3+1];
 23 |             float z1=xyz1[k*3+2];
 24 |     	    float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 25 |             if (d<radius) {
 26 |                 if (cnt==0) { // set ALL indices to k, s.t. if there are less points in ball than nsample, we still have valid (repeating) indices
 27 |                     for (int l=0;l<nsample;++l)
 28 |                         idx[j*nsample+l] = k;
 29 |                 }
 30 |                 idx[j*nsample+cnt] = k;
 31 |                 cnt+=1;
 32 |             }
 33 |         }
 34 |         pts_cnt[j] = cnt;
 35 |     }
 36 | }
 37 | 
 38 | // input: points (b,n,c), idx (b,m,nsample)
 39 | // output: out (b,m,nsample,c)
 40 | __global__ void group_point_gpu(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out) {
 41 |     int batch_index = blockIdx.x;
 42 |     points += n*c*batch_index;
 43 |     idx += m*nsample*batch_index;
 44 |     out += m*nsample*c*batch_index;
 45 | 
 46 |     int index = threadIdx.x;
 47 |     int stride = blockDim.x;
 48 |     
 49 |     for (int j=index;j<m;j+=stride) {
 50 |         for (int k=0;k<nsample;++k) {
 51 |             int ii = idx[j*nsample+k];
 52 |             for (int l=0;l<c;++l) {
 53 |                 out[j*nsample*c+k*c+l] = points[ii*c+l];
 54 |             }
 55 |         }
 56 |     }
 57 | }
 58 | 
 59 | // input: grad_out (b,m,nsample,c), idx (b,m,nsample), 
 60 | // output: grad_points (b,n,c)
 61 | __global__ void group_point_grad_gpu(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) {
 62 |     int batch_index = blockIdx.x;
 63 |     idx += m*nsample*batch_index;
 64 |     grad_out += m*nsample*c*batch_index;
 65 |     grad_points += n*c*batch_index;
 66 | 
 67 |     int index = threadIdx.x;
 68 |     int stride = blockDim.x;
 69 | 
 70 |     for (int j=index;j<m;j+=stride) {
 71 |         for (int k=0;k<nsample;++k) {
 72 |             int ii = idx[j*nsample+k];
 73 |             for (int l=0;l<c;++l) {
 74 |                  atomicAdd(&grad_points[ii*c+l], grad_out[j*nsample*c+k*c+l]);
 75 |             }
 76 |         }
 77 |     }
 78 | }
 79 | 
 80 | // input: k (1), distance matrix dist (b,m,n)
 81 | // output: idx (b,m,n), dist_out (b,m,n)
 82 | // only the top k results within n are useful
 83 | __global__ void selection_sort_gpu(int b, int n, int m, int k, const float *dist, int *outi, float *out) {
 84 |     int batch_index = blockIdx.x;
 85 |     dist+=m*n*batch_index;
 86 |     outi+=m*n*batch_index;
 87 |     out+=m*n*batch_index;
 88 | 
 89 |     int index = threadIdx.x;
 90 |     int stride = blockDim.x;
 91 | 
 92 |     // copy from dist to dist_out
 93 |     for (int j=index;j<m;j+=stride) {
 94 |         for (int s=0;s<n;++s) {
 95 |             out[j*n+s] = dist[j*n+s];
 96 |             outi[j*n+s] = s;
 97 |         }
 98 |     }
 99 | 
100 |     float *p_dist;
101 |     for (int j=index;j<m;j+=stride) {
102 |         p_dist = out+j*n;
103 |         // selection sort for the first k elements
104 |         for (int s=0;s<k;++s) {
105 |             int min=s; 
106 |             // find the min
107 |             for (int t=s+1;t<n;++t) {
108 |                 if (p_dist[t]<p_dist[min]) {
109 |                     min = t;
110 |                 }
111 |             }
112 |             // swap min-th and i-th element
113 |             if (min!=s) {
114 |                 float tmp = p_dist[min];
115 |                 p_dist[min] = p_dist[s];
116 |                 p_dist[s] = tmp;
117 |                 int tmpi = outi[j*n+min];
118 |                 outi[j*n+min] = outi[j*n+s];
119 |                 outi[j*n+s] = tmpi;
120 |             }
121 |         }
122 |     }
123 | }
124 | 
125 | void queryBallPointLauncher(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx, int *pts_cnt) {
126 |     query_ball_point_gpu<<<b,256>>>(b,n,m,radius,nsample,xyz1,xyz2,idx,pts_cnt);
127 |     //cudaDeviceSynchronize();
128 | }
129 | void selectionSortLauncher(int b, int n, int m, int k, const float *dist, int *outi, float *out) {
130 |     selection_sort_gpu<<<b,256>>>(b,n,m,k,dist,outi,out); 
131 |     //cudaDeviceSynchronize();
132 | }
133 | void groupPointLauncher(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out){
134 |     group_point_gpu<<<b,256>>>(b,n,c,m,nsample,points,idx,out);
135 |     //cudaDeviceSynchronize();
136 | }
137 | void groupPointGradLauncher(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points){
138 |     group_point_grad_gpu<<<b,256>>>(b,n,c,m,nsample,grad_out,idx,grad_points);
139 |     //group_point_grad_gpu<<<1,1>>>(b,n,c,m,nsample,grad_out,idx,grad_points);
140 |     //cudaDeviceSynchronize();
141 | }
142 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping_g.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsustcid/FlowDriveNet/3604495269ae45e5b43964046104f685ec66e383/tf_ops/grouping/tf_grouping_g.cu.o


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping_op_test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from tf_grouping import query_ball_point, group_point
 4 | 
 5 | class GroupPointTest(tf.test.TestCase):
 6 |   def test(self):
 7 |     pass
 8 | 
 9 |   def test_grad(self):
10 |     with tf.device('/gpu:0'):
11 |       points = tf.constant(np.random.random((1,128,16)).astype('float32'))
12 |       print points
13 |       xyz1 = tf.constant(np.random.random((1,128,3)).astype('float32'))
14 |       xyz2 = tf.constant(np.random.random((1,8,3)).astype('float32'))
15 |       radius = 0.3 
16 |       nsample = 32
17 |       idx, pts_cnt = query_ball_point(radius, nsample, xyz1, xyz2)
18 |       grouped_points = group_point(points, idx)
19 |       print grouped_points
20 | 
21 |     with self.test_session():
22 |       print "---- Going to compute gradient error"
23 |       err = tf.test.compute_gradient_error(points, (1,128,16), grouped_points, (1,8,32,16))
24 |       print err
25 |       self.assertLess(err, 1e-4) 
26 | 
27 | if __name__=='__main__':
28 |   tf.test.main() 
29 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping_so.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsustcid/FlowDriveNet/3604495269ae45e5b43964046104f685ec66e383/tf_ops/grouping/tf_grouping_so.so


--------------------------------------------------------------------------------
/tf_ops/sampling/__pycache__/tf_sampling.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsustcid/FlowDriveNet/3604495269ae45e5b43964046104f685ec66e383/tf_ops/sampling/__pycache__/tf_sampling.cpython-35.pyc


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling.cpp:
--------------------------------------------------------------------------------
  1 | /* Furthest point sampling
  2 |  * Original author: Haoqiang Fan
  3 |  * Modified by Charles R. Qi
  4 |  * All Rights Reserved. 2017. 
  5 |  */
  6 | #include "tensorflow/core/framework/op.h"
  7 | #include "tensorflow/core/framework/op_kernel.h"
  8 | #include "tensorflow/core/framework/shape_inference.h"
  9 | #include "tensorflow/core/framework/common_shape_fns.h"
 10 | #include <cuda_runtime.h>
 11 | 
 12 | using namespace tensorflow;
 13 | 
 14 | REGISTER_OP("ProbSample")
 15 |   .Input("inp: float32")
 16 |   .Input("inpr: float32")
 17 |   .Output("out: int32")
 18 |   .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 19 |     ::tensorflow::shape_inference::ShapeHandle dims1; // batch_size * ncategory
 20 |     c->WithRank(c->input(0), 2, &dims1);
 21 |     ::tensorflow::shape_inference::ShapeHandle dims2; // batch_size * npoints
 22 |     c->WithRank(c->input(1), 2, &dims2);
 23 |     // batch_size * npoints
 24 |     ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims2, 0), c->Dim(dims2, 1)});
 25 |     c->set_output(0, output);
 26 |     return Status::OK();
 27 |   });
 28 | REGISTER_OP("FarthestPointSample")
 29 |   .Attr("npoint: int")
 30 |   .Input("inp: float32")
 31 |   .Output("out: int32")
 32 |   .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 33 |     ::tensorflow::shape_inference::ShapeHandle dims1; // batch_size * npoint * 3
 34 |     c->WithRank(c->input(0), 3, &dims1);
 35 |     int npoint;
 36 |     TF_RETURN_IF_ERROR(c->GetAttr("npoint", &npoint));
 37 |     ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims1, 0), npoint});
 38 |     c->set_output(0, output);
 39 |     return Status::OK();
 40 |   });
 41 | REGISTER_OP("GatherPoint")
 42 |   .Input("inp: float32")
 43 |   .Input("idx: int32")
 44 |   .Output("out: float32")
 45 |   .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 46 |     ::tensorflow::shape_inference::ShapeHandle dims1; // batch_size * ndataset * 3
 47 |     c->WithRank(c->input(0), 3, &dims1);
 48 |     ::tensorflow::shape_inference::ShapeHandle dims2; // batch_size * npoints
 49 |     c->WithRank(c->input(1), 2, &dims2);
 50 |     // batch_size * npoints * 3
 51 |     ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims1, 0), c->Dim(dims2, 1), c->Dim(dims1, 2)});
 52 |     c->set_output(0, output);
 53 |     return Status::OK();
 54 |   });
 55 | REGISTER_OP("GatherPointGrad")
 56 |   .Input("inp: float32")
 57 |   .Input("idx: int32")
 58 |   .Input("out_g: float32")
 59 |   .Output("inp_g: float32")
 60 |   .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 61 |     c->set_output(0, c->input(0));
 62 |     return Status::OK();
 63 |   });
 64 | 
 65 | void probsampleLauncher(int b,int n,int m,const float * inp_p,const float * inp_r,float * temp,int * out);
 66 | class ProbSampleGpuOp: public OpKernel{
 67 |   public:
 68 |     explicit ProbSampleGpuOp(OpKernelConstruction* context):OpKernel(context){}
 69 |     void Compute(OpKernelContext * context)override{
 70 |       const Tensor& inp_tensor=context->input(0);
 71 |       const Tensor& inpr_tensor=context->input(1);
 72 |       auto inp_flat=inp_tensor.flat<float>();
 73 |       auto inpr_flat=inpr_tensor.flat<float>();
 74 |       const float * inp=&(inp_flat(0));
 75 |       const float * inpr=&(inpr_flat(0));
 76 |       OP_REQUIRES(context,inp_tensor.dims()==2,errors::InvalidArgument("ProbSample expects (batch_size,num_choices) inp shape"));
 77 |       int b=inp_tensor.shape().dim_size(0);
 78 |       int n=inp_tensor.shape().dim_size(1);
 79 |       OP_REQUIRES(context,inpr_tensor.dims()==2 && inpr_tensor.shape().dim_size(0)==b,errors::InvalidArgument("ProbSample expects (batch_size,num_points) inpr shape"));
 80 |       int m=inpr_tensor.shape().dim_size(1);
 81 |       Tensor * out_tensor=NULL;
 82 |       OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m},&out_tensor));
 83 |       auto out_flat=out_tensor->flat<int>();
 84 |       int * out=&(out_flat(0));
 85 |       Tensor temp_tensor;
 86 |       OP_REQUIRES_OK(context,context->allocate_temp(DataTypeToEnum<float>::value,TensorShape{b,n},&temp_tensor));
 87 |       auto temp_flat=temp_tensor.flat<float>();
 88 |       float * temp=&(temp_flat(0));
 89 |       probsampleLauncher(b,n,m,inp,inpr,temp,out);
 90 |     }
 91 | };
 92 | REGISTER_KERNEL_BUILDER(Name("ProbSample").Device(DEVICE_GPU), ProbSampleGpuOp);
 93 | 
 94 | void farthestpointsamplingLauncher(int b,int n,int m,const float * inp,float * temp,int * out);
 95 | class FarthestPointSampleGpuOp: public OpKernel{
 96 |   public:
 97 |     explicit FarthestPointSampleGpuOp(OpKernelConstruction* context):OpKernel(context) {
 98 |                     OP_REQUIRES_OK(context, context->GetAttr("npoint", &npoint_));
 99 |                     OP_REQUIRES(context, npoint_ > 0, errors::InvalidArgument("FarthestPointSample expects positive npoint"));
100 |                 }
101 |     void Compute(OpKernelContext * context)override{
102 |       int m = npoint_;
103 | 
104 |       const Tensor& inp_tensor=context->input(0);
105 |       OP_REQUIRES(context,inp_tensor.dims()==3 && inp_tensor.shape().dim_size(2)==3,errors::InvalidArgument("FarthestPointSample expects (batch_size,num_points,3) inp shape"));
106 |       int b=inp_tensor.shape().dim_size(0);
107 |       int n=inp_tensor.shape().dim_size(1);
108 |       auto inp_flat=inp_tensor.flat<float>();
109 |       const float * inp=&(inp_flat(0));
110 |       Tensor * out_tensor;
111 |       OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m},&out_tensor));
112 |       auto out_flat=out_tensor->flat<int>();
113 |       int * out=&(out_flat(0));
114 |       Tensor temp_tensor;
115 |       OP_REQUIRES_OK(context,context->allocate_temp(DataTypeToEnum<float>::value,TensorShape{32,n},&temp_tensor));
116 |       auto temp_flat=temp_tensor.flat<float>();
117 |       float * temp=&(temp_flat(0));
118 |       farthestpointsamplingLauncher(b,n,m,inp,temp,out);
119 |     }
120 |     private:
121 |         int npoint_;
122 | };
123 | REGISTER_KERNEL_BUILDER(Name("FarthestPointSample").Device(DEVICE_GPU),FarthestPointSampleGpuOp);
124 | 
125 | void gatherpointLauncher(int b,int n,int m,const float * inp,const int * idx,float * out);
126 | class GatherPointGpuOp: public OpKernel{
127 |   public:
128 |     explicit GatherPointGpuOp(OpKernelConstruction * context):OpKernel(context){}
129 |     void Compute(OpKernelContext * context)override{
130 |       const Tensor& inp_tensor=context->input(0);
131 |       OP_REQUIRES(context,inp_tensor.dims()==3 && inp_tensor.shape().dim_size(2)==3,errors::InvalidArgument("GatherPoint expects (batch_size,num_points,3) inp shape"));
132 |       int b=inp_tensor.shape().dim_size(0);
133 |       int n=inp_tensor.shape().dim_size(1);
134 |       const Tensor& idx_tensor=context->input(1);
135 |       OP_REQUIRES(context,idx_tensor.dims()==2 && idx_tensor.shape().dim_size(0)==b,errors::InvalidArgument("GatherPoint expects (batch_size,num_result) idx shape"));
136 |       int m=idx_tensor.shape().dim_size(1);
137 |       auto inp_flat=inp_tensor.flat<float>();
138 |       const float * inp=&(inp_flat(0));
139 |       auto idx_flat=idx_tensor.flat<int>();
140 |       const int * idx=&(idx_flat(0));
141 |       Tensor * out_tensor=NULL;
142 |       OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m,3},&out_tensor));
143 |       auto out_flat=out_tensor->flat<float>();
144 |       float * out=&(out_flat(0));
145 |       gatherpointLauncher(b,n,m,inp,idx,out);
146 |     }
147 | };
148 | REGISTER_KERNEL_BUILDER(Name("GatherPoint").Device(DEVICE_GPU),GatherPointGpuOp);
149 | 
150 | void scatteraddpointLauncher(int b,int n,int m,const float * out_g,const int * idx,float * inp_g);
151 | class GatherPointGradGpuOp: public OpKernel{
152 |   public:
153 |     explicit GatherPointGradGpuOp(OpKernelConstruction * context):OpKernel(context){}
154 |     void Compute(OpKernelContext * context)override{
155 |       const Tensor& inp_tensor=context->input(0);
156 |       OP_REQUIRES(context,inp_tensor.dims()==3 && inp_tensor.shape().dim_size(2)==3,errors::InvalidArgument("GatherPointGradGpuOp expects (batch_size,num_points,3) inp"));
157 |       int b=inp_tensor.shape().dim_size(0);
158 |       int n=inp_tensor.shape().dim_size(1);
159 |       const Tensor& idx_tensor=context->input(1);
160 |       OP_REQUIRES(context,idx_tensor.dims()==2 && idx_tensor.shape().dim_size(0)==b,errors::InvalidArgument("GatherPointGradGpuOp expects (batch_size,num_result) idx shape"));
161 |       int m=idx_tensor.shape().dim_size(1);
162 |       auto inp_flat=inp_tensor.flat<float>();
163 |       const float * inp=&(inp_flat(0));
164 |       auto idx_flat=idx_tensor.flat<int>();
165 |       const int * idx=&(idx_flat(0));
166 |       const Tensor& out_g_tensor=context->input(2);
167 |       OP_REQUIRES(context,out_g_tensor.dims()==3 && out_g_tensor.shape().dim_size(0)==b && out_g_tensor.shape().dim_size(1)==m && out_g_tensor.shape().dim_size(2)==3,errors::InvalidArgument("GatherPointGradGpuOp expects (batch_size,num_result,3) out_g shape"));
168 |       auto out_g_flat=out_g_tensor.flat<float>();
169 |       const float * out_g=&(out_g_flat(0));
170 |       Tensor * inp_g_tensor=NULL;
171 |       OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n,3},&inp_g_tensor));
172 |       auto inp_g_flat=inp_g_tensor->flat<float>();
173 |       float * inp_g=&(inp_g_flat(0));
174 |       cudaMemset(inp_g,0,b*n*3*4);
175 |       scatteraddpointLauncher(b,n,m,out_g,idx,inp_g);
176 |     }
177 | };
178 | REGISTER_KERNEL_BUILDER(Name("GatherPointGrad").Device(DEVICE_GPU),GatherPointGradGpuOp);
179 | 
180 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | @Author: Shuai Wang
 3 | @Github: https://github.com/wsustcid
 4 | @Version: 1.0.0
 5 | @Date: 1970-01-01 08:00:00
 6 | @LastEditTime: 2020-06-16 22:21:11
 7 | @Description:  
 8 | '''
 9 | ''' Furthest point sampling
10 | Original author: Haoqiang Fan
11 | Modified by Charles R. Qi
12 | All Rights Reserved. 2017. 
13 | '''
14 | import tensorflow as tf
15 | from tensorflow.python.framework import ops
16 | import sys
17 | import os
18 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
19 | sys.path.append(BASE_DIR)
20 | sampling_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_sampling_so.so'))
21 | def prob_sample(inp,inpr):
22 |     '''
23 | input:
24 |     batch_size * ncategory float32
25 |     batch_size * npoints   float32
26 | returns:
27 |     batch_size * npoints   int32
28 |     '''
29 |     return sampling_module.prob_sample(inp,inpr)
30 | ops.NoGradient('ProbSample')
31 | # TF1.0 API requires set shape in C++
32 | #@tf.RegisterShape('ProbSample')
33 | #def _prob_sample_shape(op):
34 | #    shape1=op.inputs[0].get_shape().with_rank(2)
35 | #    shape2=op.inputs[1].get_shape().with_rank(2)
36 | #    return [tf.TensorShape([shape2.dims[0],shape2.dims[1]])]
37 | def gather_point(inp,idx):
38 |     '''
39 | input:
40 |     batch_size * ndataset * 3   float32
41 |     batch_size * npoints        int32
42 | returns:
43 |     batch_size * npoints * 3    float32
44 |     '''
45 |     return sampling_module.gather_point(inp,idx)
46 | #@tf.RegisterShape('GatherPoint')
47 | #def _gather_point_shape(op):
48 | #    shape1=op.inputs[0].get_shape().with_rank(3)
49 | #    shape2=op.inputs[1].get_shape().with_rank(2)
50 | #    return [tf.TensorShape([shape1.dims[0],shape2.dims[1],shape1.dims[2]])]
51 | @tf.RegisterGradient('GatherPoint')
52 | def _gather_point_grad(op,out_g):
53 |     inp=op.inputs[0]
54 |     idx=op.inputs[1]
55 |     return [sampling_module.gather_point_grad(inp,idx,out_g),None]
56 | def farthest_point_sample(npoint,inp):
57 |     '''
58 | input:
59 |     int32
60 |     batch_size * ndataset * 3   float32
61 | returns:
62 |     batch_size * npoint         int32
63 |     '''
64 |     return sampling_module.farthest_point_sample(inp, npoint)
65 | ops.NoGradient('FarthestPointSample')
66 |     
67 | 
68 | if __name__=='__main__':
69 |     import numpy as np
70 |     np.random.seed(100)
71 |     triangles=np.random.rand(1,5,3,3).astype('float32')
72 |     with tf.device('/gpu:1'):
73 |         inp=tf.constant(triangles)
74 |         tria=inp[:,:,0,:]
75 |         trib=inp[:,:,1,:]
76 |         tric=inp[:,:,2,:]
77 |         areas=tf.sqrt(tf.reduce_sum(tf.cross(trib-tria,tric-tria)**2,2)+1e-9)
78 |         randomnumbers=tf.random_uniform((1,8192))
79 |         triids=prob_sample(areas,randomnumbers)
80 |         tria_sample=gather_point(tria,triids)
81 |         trib_sample=gather_point(trib,triids)
82 |         tric_sample=gather_point(tric,triids)
83 |         us=tf.random_uniform((1,8192))
84 |         vs=tf.random_uniform((1,8192))
85 |         uplusv=1-tf.abs(us+vs-1)
86 |         uminusv=us-vs
87 |         us=(uplusv+uminusv)*0.5
88 |         vs=(uplusv-uminusv)*0.5
89 |         pt_sample=tria_sample+(trib_sample-tria_sample)*tf.expand_dims(us,-1)+(tric_sample-tria_sample)*tf.expand_dims(vs,-1)
90 |         print('pt_sample: ', pt_sample)
91 |         reduced_sample=gather_point(pt_sample,farthest_point_sample(1024,pt_sample))
92 |         print(reduced_sample)
93 |     with tf.Session('') as sess:
94 |         ret=sess.run(reduced_sample)
95 |     print(ret.shape,ret.dtype)
96 |     import cPickle as pickle
97 |     pickle.dump(ret,open('1.pkl','wb'),-1)
98 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling_compile.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | #/usr/local/cuda-9.0/bin/nvcc tf_sampling_g.cu -o tf_sampling_g.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
 3 | 
 4 | # TF1.2
 5 | #g++ -std=c++11 tf_sampling.cpp tf_sampling_g.cu.o -o tf_sampling_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -lcudart -L /usr/local/cuda-8.0/lib64/ -O2 -D_GLIBCXX_USE_CXX11_ABI=0
 6 | 
 7 | # TF1.4
 8 | #g++ -std=c++11 tf_sampling.cpp tf_sampling_g.cu.o -o tf_sampling_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -I /usr/local/lib/python2.7/dist-packages/tensorflow/include/external/nsync/public -lcudart -L /usr/local/cuda-8.0/lib64/ -L/usr/local/lib/python2.7/dist-packages/tensorflow -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0
 9 | 
10 | # TF1.12 (local venv)
11 | #g++ -std=c++11 tf_sampling.cpp tf_sampling_g.cu.o -o tf_sampling_so.so -shared -fPIC -I /home/ubuntu16/venv/lib/python3.5/site-packages/tensorflow/include -I /usr/local/cuda-9.0/include -I /home/ubuntu16/venv/lib/python3.5/site-packages/tensorflow/include/external/nsync/public -lcudart -L /usr/local/cuda-9.0/lib64/ -L/home/ubuntu16/venv/lib/python3.5/site-packages/tensorflow -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0
12 | 
13 | # ==== cluster version ====
14 | CUDA_ROOT=/usr/local/cuda-9.0
15 | TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
16 | TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
17 | 
18 | echo $CUDA_ROOT
19 | echo $TF_INC
20 | echo $TF_LIB
21 | 
22 | $CUDA_ROOT/bin/nvcc tf_sampling_g.cu -o tf_sampling_g.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
23 | 
24 | # TF>=1.4.0
25 | g++ -std=c++11 tf_sampling.cpp tf_sampling_g.cu.o -o tf_sampling_so.so -shared -fPIC -I$TF_INC/ -I$TF_INC/external/nsync/public -L$TF_LIB -ltensorflow_framework -I$CUDA_ROOT/include -lcudart -L$CUDA_ROOT/lib64/ -O2 -D_GLIBCXX_USE_CXX11_ABI=0


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling_g.cu:
--------------------------------------------------------------------------------
  1 | /* Furthest point sampling GPU implementation
  2 |  * Original author: Haoqiang Fan
  3 |  * Modified by Charles R. Qi
  4 |  * All Rights Reserved. 2017. 
  5 |  */
  6 | 
  7 | __global__ void cumsumKernel(int b,int n,const float * __restrict__ inp,float * __restrict__ out){
  8 |   const int BlockSize=2048;
  9 |   const int paddingLevel=5;
 10 |   __shared__ float buffer4[BlockSize*4];
 11 |   __shared__ float buffer[BlockSize+(BlockSize>>paddingLevel)];
 12 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
 13 |     float runningsum=0,runningsum2=0;
 14 |     for (int j=0;j<n;j+=BlockSize*4){
 15 |       int n24_i=min(n-j,BlockSize*4);
 16 |       int n24=(n24_i+3)&~3;
 17 |       int n2=n24>>2;
 18 |       for (int k=threadIdx.x*4;k<n24_i;k+=blockDim.x*4){
 19 |         if (k+3<n24_i){
 20 |           float v1=inp[i*n+j+k];
 21 |           float v2=inp[i*n+j+k+1];
 22 |           v2+=v1;
 23 |           float v3=inp[i*n+j+k+2];
 24 |           float v4=inp[i*n+j+k+3];
 25 |           v4+=v3;
 26 |           v3+=v2;
 27 |           v4+=v2;
 28 |           buffer4[k]=v1;
 29 |           buffer4[k+1]=v2;
 30 |           buffer4[k+2]=v3;
 31 |           buffer4[k+3]=v4;
 32 |           buffer[(k>>2)+(k>>(2+paddingLevel))]=v4;
 33 |         }else{
 34 |           float v=0;
 35 |           for (int k2=k;k2<n24_i;k2++){
 36 |             v+=inp[i*n+j+k2];
 37 |             buffer4[k2]=v;
 38 |           }
 39 |           for (int k2=n24_i;k2<n24;k2++){
 40 |             buffer4[k2]=v;
 41 |           }
 42 |           buffer[(k>>2)+(k>>(2+paddingLevel))]=v;
 43 |         }
 44 |       }
 45 |       int u=0;
 46 |       for (;(2<<u)<=n2;u++){
 47 |         __syncthreads();
 48 |         for (int k=threadIdx.x;k<int(n2>>(u+1));k+=blockDim.x){
 49 |           int i1=(((k<<1)+2)<<u)-1;
 50 |           int i2=(((k<<1)+1)<<u)-1;
 51 |           i1+=i1>>paddingLevel;
 52 |           i2+=i2>>paddingLevel;
 53 |           buffer[i1]+=buffer[i2];
 54 |         }
 55 |       }
 56 |       u--;
 57 |       for (;u>=0;u--){
 58 |         __syncthreads();
 59 |         for (int k=threadIdx.x;k<int((n2-(1<<u))>>(u+1));k+=blockDim.x){
 60 |           int i1=(((k<<1)+3)<<u)-1;
 61 |           int i2=(((k<<1)+2)<<u)-1;
 62 |           i1+=i1>>paddingLevel;
 63 |           i2+=i2>>paddingLevel;
 64 |           buffer[i1]+=buffer[i2];
 65 |         }
 66 |       }
 67 |       __syncthreads();
 68 |       for (int k=threadIdx.x*4;k<n24;k+=blockDim.x*4){
 69 |         if (k!=0){
 70 |           int k2=((k>>2)-1)+(((k>>2)-1)>>paddingLevel);
 71 |           buffer4[k]+=buffer[k2];
 72 |           buffer4[k+1]+=buffer[k2];
 73 |           buffer4[k+2]+=buffer[k2];
 74 |           buffer4[k+3]+=buffer[k2];
 75 |         }
 76 |       }
 77 |       __syncthreads();
 78 |       for (int k=threadIdx.x;k<n24_i;k+=blockDim.x){
 79 |         out[i*n+j+k]=buffer4[k]+runningsum;
 80 |       }
 81 |       float t=buffer[(n2-1)+((n2-1)>>paddingLevel)]+runningsum2;
 82 |       float r2=runningsum+t;
 83 |       runningsum2=t-(r2-runningsum);
 84 |       runningsum=r2;
 85 |       __syncthreads();
 86 |     }
 87 |   }
 88 | }
 89 | 
 90 | __global__ void binarysearchKernel(int b,int n,int m,const float * __restrict__ dataset,const float * __restrict__ query, int * __restrict__ result){
 91 |   int base=1;
 92 |   while (base<n)
 93 |     base<<=1;
 94 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
 95 |     for (int j=blockIdx.y*blockDim.x+threadIdx.x;j<m;j+=blockDim.x*gridDim.y){
 96 |       float q=query[i*m+j]*dataset[i*n+n-1];
 97 |       int r=n-1;
 98 |       for (int k=base;k>=1;k>>=1)
 99 |         if (r>=k && dataset[i*n+r-k]>=q)
100 |           r-=k;
101 |       result[i*m+j]=r;
102 |     }
103 |   }
104 | }
105 | __global__ void farthestpointsamplingKernel(int b,int n,int m,const float * __restrict__ dataset,float * __restrict__ temp,int * __restrict__ idxs){
106 |   if (m<=0)
107 |     return;
108 |   const int BlockSize=512;
109 |   __shared__ float dists[BlockSize];
110 |   __shared__ int dists_i[BlockSize];
111 |   const int BufferSize=3072;
112 |   __shared__ float buf[BufferSize*3];
113 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
114 |     int old=0;
115 |     if (threadIdx.x==0)
116 |       idxs[i*m+0]=old;
117 |     for (int j=threadIdx.x;j<n;j+=blockDim.x){
118 |       temp[blockIdx.x*n+j]=1e38;
119 |     }
120 |     for (int j=threadIdx.x;j<min(BufferSize,n)*3;j+=blockDim.x){
121 |       buf[j]=dataset[i*n*3+j];
122 |     }
123 |     __syncthreads();
124 |     for (int j=1;j<m;j++){
125 |       int besti=0;
126 |       float best=-1;
127 |       float x1=dataset[i*n*3+old*3+0];
128 |       float y1=dataset[i*n*3+old*3+1];
129 |       float z1=dataset[i*n*3+old*3+2];
130 |       for (int k=threadIdx.x;k<n;k+=blockDim.x){
131 |         float td=temp[blockIdx.x*n+k];
132 |         float x2,y2,z2;
133 |         if (k<BufferSize){
134 |           x2=buf[k*3+0];
135 |           y2=buf[k*3+1];
136 |           z2=buf[k*3+2];
137 |         }else{
138 |           x2=dataset[i*n*3+k*3+0];
139 |           y2=dataset[i*n*3+k*3+1];
140 |           z2=dataset[i*n*3+k*3+2];
141 |         }
142 |         float d=(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1);
143 |         float d2=min(d,td);
144 |         if (d2!=td)
145 |           temp[blockIdx.x*n+k]=d2;
146 |         if (d2>best){
147 |           best=d2;
148 |           besti=k;
149 |         }
150 |       }
151 |       dists[threadIdx.x]=best;
152 |       dists_i[threadIdx.x]=besti;
153 |       for (int u=0;(1<<u)<blockDim.x;u++){
154 |         __syncthreads();
155 |         if (threadIdx.x<(blockDim.x>>(u+1))){
156 |           int i1=(threadIdx.x*2)<<u;
157 |           int i2=(threadIdx.x*2+1)<<u;
158 |           if (dists[i1]<dists[i2]){
159 |             dists[i1]=dists[i2];
160 |             dists_i[i1]=dists_i[i2];
161 |           }
162 |         }
163 |       }
164 |       __syncthreads();
165 |       old=dists_i[0];
166 |       if (threadIdx.x==0)
167 |         idxs[i*m+j]=old;
168 |     }
169 |   }
170 | }
171 | 
172 | __global__ void gatherpointKernel(int b,int n,int m,const float * __restrict__ inp,const int * __restrict__ idx,float * __restrict__ out){
173 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
174 |     for (int j=blockIdx.y*blockDim.x+threadIdx.x;j<m;j+=blockDim.x*gridDim.y){
175 |       int a=idx[i*m+j];
176 |       out[(i*m+j)*3+0]=inp[(i*n+a)*3+0];
177 |       out[(i*m+j)*3+1]=inp[(i*n+a)*3+1];
178 |       out[(i*m+j)*3+2]=inp[(i*n+a)*3+2];
179 |     }
180 |   }
181 | }
182 | 
183 | __global__ void scatteraddpointKernel(int b,int n,int m,const float * __restrict__ out_g,const int * __restrict__ idx,float * __restrict__ inp_g){
184 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
185 |     for (int j=blockIdx.y*blockDim.x+threadIdx.x;j<m;j+=blockDim.x*gridDim.y){
186 |       int a=idx[i*m+j];
187 |       atomicAdd(&inp_g[(i*n+a)*3+0],out_g[(i*m+j)*3+0]);
188 |       atomicAdd(&inp_g[(i*n+a)*3+1],out_g[(i*m+j)*3+1]);
189 |       atomicAdd(&inp_g[(i*n+a)*3+2],out_g[(i*m+j)*3+2]);
190 |     }
191 |   }
192 | }
193 | 
194 | void cumsumLauncher(int b,int n,const float * inp,float * out){
195 |   cumsumKernel<<<32,512>>>(b,n,inp,out);
196 | }
197 | //require b*n working space
198 | void probsampleLauncher(int b,int n,int m,const float * inp_p,const float * inp_r,float * temp,int * out){
199 |   cumsumKernel<<<32,512>>>(b,n,inp_p,temp);
200 |   binarysearchKernel<<<dim3(32,8,1),512>>>(b,n,m,temp,inp_r,out);
201 | }
202 | //require 32*n working space
203 | void farthestpointsamplingLauncher(int b,int n,int m,const float * inp,float * temp,int * out){
204 |   farthestpointsamplingKernel<<<32,512>>>(b,n,m,inp,temp,out);
205 | }
206 | void gatherpointLauncher(int b,int n,int m,const float * inp,const int * idx,float * out){
207 |   gatherpointKernel<<<dim3(2,8,1),512>>>(b,n,m,inp,idx,out);
208 | }
209 | void scatteraddpointLauncher(int b,int n,int m,const float * out_g,const int * idx,float * inp_g){
210 |   scatteraddpointKernel<<<dim3(2,8,1),512>>>(b,n,m,out_g,idx,inp_g);
211 | }
212 | 
213 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling_g.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsustcid/FlowDriveNet/3604495269ae45e5b43964046104f685ec66e383/tf_ops/sampling/tf_sampling_g.cu.o


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling_so.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsustcid/FlowDriveNet/3604495269ae45e5b43964046104f685ec66e383/tf_ops/sampling/tf_sampling_so.so


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @Author: Shuai Wang
  3 | @Github: https://github.com/wsustcid
  4 | @Version: 1.0.0
  5 | @Date: 2020-06-03 16:56:56
  6 | @LastEditTime: 2020-10-07 16:14:17
  7 | '''
  8 | 
  9 | import os
 10 | import sys
 11 | import argparse
 12 | from datetime import datetime
 13 | import time
 14 | 
 15 | import numpy as np
 16 | import tensorflow as tf
 17 | 
 18 | base_dir = os.path.dirname(os.path.abspath(__file__))
 19 | sys.path.append(base_dir)
 20 | from utils.tf_util import log_string
 21 | 
 22 | from data_gen import DataLoader
 23 | from models.flowdrivenet import FlowDriveNet
 24 | 
 25 | parser = argparse.ArgumentParser()
 26 | parser.add_argument('--data_root', default='/gdata/wangshuai/Udacity/CH2',
 27 |                     help='data_root path [default: local path]')
 28 | parser.add_argument('--input_cfg', default='GRAY', 
 29 |                     help='Input type: GRAY, GRAYF, GRAYF-T, XYZ, XYZF, XYZF-T, GRAYF-XYZF-T')
 30 | parser.add_argument('--model_cfg', default='VFE',
 31 |                     help='Model type: VFE, VFE-TFP, PFE, PFE-TFP, VFE-PFE-TFP')
 32 | parser.add_argument('--loss_cfg', default='MSE',
 33 |                     help='loss type: weighted, step, exp')
 34 | parser.add_argument('--height', type=int, default=200, help='img height')
 35 | parser.add_argument('--width', type=int, default=200, help='img width')
 36 | parser.add_argument('--seq_len', type=int, default=5, help='sel length')
 37 | parser.add_argument('--aug_cfg', default='None', help='None, IA, RP, SC, BA, BS')
 38 | #parser.add_argument('--use_side_cam', default=False, action='store_true')
 39 | parser.add_argument('--num_point', type=int, default=10000, help='Point N')
 40 | parser.add_argument('--log_dir', default='test',
 41 |                     help='Log dir [default: test]')
 42 | parser.add_argument('--max_epoch', type=int, default=300,
 43 |                     help='Epoch to run [default: 1000]')
 44 | parser.add_argument('--early_stop', type=int, default=10,
 45 |                     help='stop training when loss stop decreasing [default: 20]')
 46 | parser.add_argument('--batch_size', type=int, default=16,
 47 |                     help='Batch Size during training [default: 16]')
 48 | parser.add_argument('--learning_rate', type=float, default=0.0001,
 49 |                     help='Learning rate during training [default: 0.001]')
 50 | parser.add_argument('--optimizer', default='adam',
 51 |                     help='adam or momentum [default: adam]')
 52 | parser.add_argument('--decay_steps', type=int, default=300000,
 53 |                     help='Decay step for lr decay [default: 200000]') # decay_steps = n_train * epochs
 54 | parser.add_argument('--decay_rate', type=float, default=0.7,
 55 |                     help='Decay rate for lr decay [default: 0.7]')
 56 | 
 57 | FLAGS = parser.parse_args()
 58 | 
 59 | BATCH_SIZE = FLAGS.batch_size
 60 | 
 61 | log_dir  = os.path.join(base_dir, 'logs', FLAGS.log_dir)
 62 | os.makedirs(log_dir, exist_ok=True)
 63 | train_log_dir = os.path.join(log_dir, 'log_train.txt')
 64 | log_string(train_log_dir, str(FLAGS)+'\n')
 65 | 
 66 | model_file = os.path.join(base_dir, 'models/flowdrivenet.py')
 67 | train_file = os.path.join(base_dir,'train.py')
 68 | os.system('cp %s %s' % (model_file, log_dir))
 69 | os.system('cp %s %s' % (train_file, log_dir))
 70 | 
 71 | # 
 72 | dataloader = DataLoader(FLAGS.data_root, FLAGS.input_cfg, 
 73 |                         FLAGS.height, FLAGS.width,
 74 |                         FLAGS.seq_len,
 75 |                         FLAGS.num_point,
 76 |                         FLAGS.aug_cfg)
 77 | model = FlowDriveNet(FLAGS.input_cfg, FLAGS.model_cfg, 
 78 |                       FLAGS.height, FLAGS.width, 
 79 |                       FLAGS.seq_len, FLAGS.num_point)
 80 | 
 81 | def get_bn_decay(batch):
 82 |         bn_momentum = tf.train.exponential_decay(
 83 |                         0.5,
 84 |                         batch*BATCH_SIZE,
 85 |                         float(FLAGS.decay_steps),
 86 |                         0.5,
 87 |                         staircase=True)
 88 |         bn_decay = tf.minimum(0.99, 1 - bn_momentum)
 89 |         return bn_decay
 90 | 
 91 | #def get_lr(batch):
 92 | #    lr = tf.train.exponential_decay(learning_rate=FLAGS.learning_rate,
 93 | #                                    global_step=batch*BATCH_SIZE,
 94 | #                                    decay_steps=FLAGS.decay_steps,
 95 | #                                    decay_rate=FLAGS.decay_rate,
 96 | #                                    staircase=True)
 97 | #    lr = tf.maximum(lr, 0.00001)
 98 | #    return lr
 99 | 
100 | def train():
101 |     with tf.Graph().as_default():
102 |         image_pl, points_pl, label_pl = model.get_inputs_pl(BATCH_SIZE)
103 |         is_training_pl = tf.placeholder(tf.bool, shape=())
104 |         # define global_step; optimizer will increase it in every training loop
105 |         batch = tf.get_variable('batch', [], 
106 |                                 initializer=tf.constant_initializer(0),
107 |                                 trainable=False)
108 |         bn_decay = get_bn_decay(batch) 
109 |         tf.summary.scalar('bn_decay', bn_decay)
110 |         
111 |         pred = model.get_model(image_pl, points_pl, is_training_pl, bn_decay)
112 |         loss = model.get_loss(pred, label_pl, batch, FLAGS.loss_cfg)
113 |         rmse_angle, rmse_speed = model.get_rmse(pred, label_pl)
114 | 
115 |         #learning_rate = get_lr(batch)
116 |         if FLAGS.optimizer == 'adam':
117 |             optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
118 |         tf.summary.scalar('learning_rate', FLAGS.learning_rate)
119 |         
120 |         train_op = optimizer.minimize(loss, global_step=batch)
121 |         
122 |         # Add ops to save and restore all the variables.
123 |         saver = tf.train.Saver()
124 | 
125 |         # Create a session
126 |         config = tf.ConfigProto()
127 |         config.gpu_options.allow_growth = True
128 |         config.allow_soft_placement = True
129 |         config.log_device_placement = False
130 |         sess = tf.Session(config=config)
131 | 
132 |         # Add summary writers
133 |         merged = tf.summary.merge_all()
134 |         train_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'), sess.graph)
135 |         test_writer = tf.summary.FileWriter(os.path.join(log_dir, 'test'), sess.graph)
136 | 
137 |         # Init variables
138 |         init = tf.global_variables_initializer()
139 |         sess.run(init)
140 |         
141 |         # save all tensor
142 |         ops = {'image_pl': image_pl,
143 |             'points_pl': points_pl,
144 |             'label_pl': label_pl,
145 |             'is_training_pl': is_training_pl,
146 |             'train_op': train_op,
147 |             'loss': loss,
148 |             'rmse_angle': rmse_angle,
149 |             'rmse_speed':rmse_speed,
150 |             'merged': merged,
151 |             'batch': batch}
152 | 
153 |         test_err_min = 100000
154 |         for epoch in range(FLAGS.max_epoch):
155 |             log_string(train_log_dir, '**** EPOCH %03d ****' % (epoch))
156 |             
157 |             train_one_epoch(sess, ops, train_writer)
158 |             test_err = test_one_epoch(sess, ops, test_writer)
159 |             # save best
160 |             if test_err < test_err_min:
161 |                 es_count = 0
162 |                 test_err_min = test_err
163 |                 save_path = saver.save(sess, os.path.join(log_dir, "model_best.ckpt"))
164 |                 log_string(train_log_dir, "Best model saved in : %s" % save_path)
165 |             else:
166 |                 es_count +=1
167 | 
168 |             #if epoch % 10 == 0:
169 |             #    save_path = saver.save(sess, os.path.join(log_dir, "model.ckpt"))
170 |             #    log_string(train_log_dir, "Model saved in file: %s" % save_path)
171 |             
172 |             # Early Stopping
173 |             if es_count >= FLAGS.early_stop:
174 |                 break
175 | 
176 | 
177 | def train_one_epoch(sess, ops, train_writer):
178 |     """ ops: dict mapping from string to tf ops """
179 |     # shuffle data
180 |     #data_loader.Xs_train, data_loader.y_train = shuffle(data_loader.Xs_train, data_loader.y_train)
181 |     is_training = True
182 |     log_string(train_log_dir, datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
183 | 
184 |     num_batches = dataloader.num_train // BATCH_SIZE
185 |     loss_sum = 0.0
186 |     rmse_angle_sum = 0.0
187 |     rmse_speed_sum = 0.0
188 | 
189 |     for i in range(num_batches):
190 |         #t1 = time.time()
191 |         X_image_batch, X_cloud_batch, y_batch = dataloader.load_train_batch(BATCH_SIZE)
192 |         #t2 = time.time()
193 | 
194 |         feed_dict = {ops['image_pl']: X_image_batch,
195 |                      ops['points_pl']: X_cloud_batch,
196 |                      ops['label_pl']: y_batch,
197 |                      ops['is_training_pl']: is_training}
198 |         
199 |         summary, step, _, loss_batch, rmse_angle_batch, rmse_speed_batch = sess.run([ops['merged'], ops['batch'], ops['train_op'], ops['loss'], ops['rmse_angle'], ops['rmse_speed']], feed_dict=feed_dict)
200 |         #t3 = time.time()
201 |         #print("data time: {}; train time: {}".format(t2-t1, t3-t2))
202 | 
203 |         train_writer.add_summary(summary, step)
204 |         
205 |         loss_sum += loss_batch
206 |         rmse_angle_sum += rmse_angle_batch
207 |         rmse_speed_sum += rmse_speed_batch
208 | 
209 |     log_string(train_log_dir, 'Train loss: %f' % (loss_sum / num_batches))
210 |     log_string(train_log_dir, 'Train rmse_angle: %f' % (rmse_angle_sum / num_batches))
211 |     log_string(train_log_dir, 'Train rmse_speed: %f' % (rmse_speed_sum / num_batches))
212 |     log_string(train_log_dir, 'Train rmse_average: %f' % ((rmse_angle_sum+rmse_speed_sum)/num_batches/2))
213 | 
214 | def test_one_epoch(sess, ops, test_writer):
215 |     """ ops: dict mapping from string to tf ops """
216 |     
217 |     is_training = False
218 |     num_batches = dataloader.num_val // BATCH_SIZE
219 |     loss_sum = 0.0
220 |     rmse_angle_sum = 0.0
221 |     rmse_speed_sum = 0.0
222 | 
223 |     for i in range(num_batches):
224 |         X_image_batch, X_cloud_batch, y_batch = dataloader.load_val_batch(BATCH_SIZE)
225 |         
226 |         feed_dict = {ops['image_pl']: X_image_batch,
227 |                      ops['points_pl']: X_cloud_batch,
228 |                      ops['label_pl']: y_batch,
229 |                      ops['is_training_pl']: is_training}
230 |         
231 |         summary, step, loss_batch, rmse_angle_batch, rmse_speed_batch = sess.run([ops['merged'], ops['batch'], ops['loss'], ops['rmse_angle'], ops['rmse_speed']],feed_dict=feed_dict)
232 |         
233 |         test_writer.add_summary(summary, step)
234 |         
235 |         loss_sum += loss_batch
236 |         rmse_angle_sum += rmse_angle_batch
237 |         rmse_speed_sum += rmse_speed_batch
238 | 
239 |     log_string(train_log_dir, 'Val loss: %f' % (loss_sum / num_batches))
240 |     log_string(train_log_dir, 'Val rmse_angle: %f' % (rmse_angle_sum / num_batches))
241 |     log_string(train_log_dir, 'Val rmse_speed: %f' % (rmse_speed_sum / num_batches))
242 |     log_string(train_log_dir, 'Val rmse_average: %f' % ((rmse_angle_sum+rmse_speed_sum)/num_batches/2))
243 | 
244 |     return (rmse_angle_sum+rmse_speed_sum)/num_batches/2
245 | 
246 | if __name__ == "__main__":
247 |     train()
248 | 


--------------------------------------------------------------------------------
/train_cmp.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @Author: Shuai Wang
  3 | @Github: https://github.com/wsustcid
  4 | @Version: 1.0.0
  5 | @Date: 2020-09-15 21:58:38
  6 | @LastEditTime: 2020-09-16 20:57:19
  7 | @Description: Training the comparision models
  8 | '''
  9 | 
 10 | import os
 11 | import sys
 12 | import argparse
 13 | from datetime import datetime
 14 | import time
 15 | 
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | 
 19 | base_dir = os.path.dirname(os.path.abspath(__file__))
 20 | sys.path.append(base_dir)
 21 | from utils.tf_util import log_string
 22 | 
 23 | from data_gen import DataLoader
 24 | 
 25 | parser = argparse.ArgumentParser()
 26 | parser.add_argument('--data_root', default='/gdata/wangshuai/Udacity/CH2',
 27 |                     help='data_root path [default: local path]')
 28 | parser.add_argument('--input_cfg', default='BGR', 
 29 |                     help='Input type: BGR, GRAYF-T, XYZ, GRAY')
 30 | parser.add_argument('--model_cfg', default='PilotNet',
 31 |                     help='Model type: PilotNet, BMWNet, PointNet, DroNet')
 32 | parser.add_argument('--use_side_cam', default=False, action='store_true')
 33 | parser.add_argument('--log_dir', default='test',
 34 |                     help='Log dir [default: test]')
 35 | parser.add_argument('--max_epoch', type=int, default=300,
 36 |                     help='Epoch to run [default: 1000]')
 37 | parser.add_argument('--early_stop', type=int, default=20,
 38 |                     help='stop training when loss stop decreasing [default: 20]')
 39 | parser.add_argument('--batch_size', type=int, default=8,
 40 |                     help='Batch Size during training [default: 16]')
 41 | parser.add_argument('--learning_rate', type=float, default=0.0001,
 42 |                     help='Learning rate during training [default: 0.001]')
 43 | parser.add_argument('--optimizer', default='adam',
 44 |                     help='adam or momentum [default: adam]')
 45 | parser.add_argument('--decay_steps', type=int, default=300000,
 46 |                     help='Decay step for lr decay [default: 200000]') # decay_steps = n_train * epochs
 47 | parser.add_argument('--decay_rate', type=float, default=0.7,
 48 |                     help='Decay rate for lr decay [default: 0.7]')
 49 | 
 50 | FLAGS = parser.parse_args()
 51 | 
 52 | BATCH_SIZE = FLAGS.batch_size
 53 | 
 54 | log_dir  = os.path.join(base_dir, 'logs', FLAGS.log_dir)
 55 | os.makedirs(log_dir, exist_ok=True)
 56 | train_log_dir = os.path.join(log_dir, 'log_train.txt')
 57 | log_string(train_log_dir, str(FLAGS)+'\n')
 58 | 
 59 | 
 60 | if FLAGS.model_cfg == 'PilotNet':
 61 |     from models.pilotnet import PilotNet
 62 |     dataloader = DataLoader(FLAGS.data_root, "BGR", 
 63 |                             height=66, width=200, 
 64 |                             seq_len=None, 
 65 |                             num_point=None,
 66 |                             use_side_cam=FLAGS.use_side_cam)
 67 |     model = PilotNet()
 68 | elif FLAGS.model_cfg == 'BMWNet':
 69 |     from models.bmwnet import BMWNet
 70 |     # TODO add seq_len
 71 |     dataloader = DataLoader(FLAGS.data_root, 'GRAYF-T', 
 72 |                             height=66, width=200, 
 73 |                             seq_len=10, 
 74 |                             num_point=None,
 75 |                             use_side_cam=FLAGS.use_side_cam)
 76 |     model = BMWNet()
 77 | elif FLAGS.model_cfg == 'PointNet':
 78 |     from models.pointnet import PointNet
 79 |     dataloader = DataLoader(FLAGS.data_root, 'XYZ', 
 80 |                             height=None, width=None, 
 81 |                             seq_len=None, 
 82 |                             num_point=10000,
 83 |                             use_side_cam=FLAGS.use_side_cam)
 84 |     model = PointNet(num_point=10000)
 85 | elif FLAGS.model_cfg == 'DroNet':
 86 |     from models.dronet import DroNet
 87 |     dataloader = DataLoader(FLAGS.data_root, 'GRAY', 
 88 |                             height=200, width=200, 
 89 |                             seq_len=None, 
 90 |                             num_point=None,
 91 |                             use_side_cam=FLAGS.use_side_cam)
 92 |     model = DroNet()
 93 | else:
 94 |     raise TypeError
 95 | 
 96 | def get_bn_decay(batch):
 97 |         bn_momentum = tf.train.exponential_decay(
 98 |                         0.5,
 99 |                         batch*BATCH_SIZE,
100 |                         float(FLAGS.decay_steps),
101 |                         0.5,
102 |                         staircase=True)
103 |         bn_decay = tf.minimum(0.99, 1 - bn_momentum)
104 |         return bn_decay
105 | 
106 | #def get_lr(batch):
107 | #    lr = tf.train.exponential_decay(learning_rate=FLAGS.learning_rate,
108 | #                                    global_step=batch*BATCH_SIZE,
109 | #                                    decay_steps=FLAGS.decay_steps,
110 | #                                    decay_rate=FLAGS.decay_rate,
111 | #                                    staircase=True)
112 | #    lr = tf.maximum(lr, 0.00001)
113 | #    return lr
114 | 
115 | def train():
116 |     with tf.Graph().as_default():
117 |         feature_pl, label_pl = model.get_inputs_pl(BATCH_SIZE)
118 |         is_training_pl = tf.placeholder(tf.bool, shape=())
119 |         # define global_step; optimizer will increase it in every training loop
120 |         batch = tf.get_variable('batch', [], 
121 |                                 initializer=tf.constant_initializer(0),
122 |                                 trainable=False)
123 |         bn_decay = get_bn_decay(batch) 
124 |         
125 |         pred = model.get_model(feature_pl, is_training_pl, bn_decay)
126 |         loss = model.get_loss(pred, label_pl)
127 | 
128 |         #learning_rate = get_lr(batch)
129 |         if FLAGS.optimizer == 'adam':
130 |             optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
131 |         tf.summary.scalar('learning_rate', FLAGS.learning_rate)
132 |         
133 |         train_op = optimizer.minimize(loss, global_step=batch)
134 |         
135 |         # Add ops to save and restore all the variables.
136 |         saver = tf.train.Saver()
137 | 
138 |         # Create a session
139 |         config = tf.ConfigProto()
140 |         config.gpu_options.allow_growth = True
141 |         config.allow_soft_placement = True
142 |         config.log_device_placement = False
143 |         sess = tf.Session(config=config)
144 | 
145 |         # Add summary writers
146 |         merged = tf.summary.merge_all()
147 |         train_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'), sess.graph)
148 |         test_writer = tf.summary.FileWriter(os.path.join(log_dir, 'test'), sess.graph)
149 | 
150 |         # Init variables
151 |         init = tf.global_variables_initializer()
152 |         sess.run(init)
153 |         
154 |         # save all tensor
155 |         ops = {'feature_pl': feature_pl,
156 |             'label_pl': label_pl,
157 |             'is_training_pl': is_training_pl,
158 |             'train_op': train_op,
159 |             'loss': loss,
160 |             'pred': pred,
161 |             'merged': merged,
162 |             'batch': batch}
163 | 
164 |         test_err_min = 100000
165 |         for epoch in range(FLAGS.max_epoch):
166 |             log_string(train_log_dir, '**** EPOCH %03d ****' % (epoch))
167 |             
168 |             train_one_epoch(sess, ops, train_writer)
169 |             test_err = test_one_epoch(sess, ops, test_writer)
170 |             # save best
171 |             if test_err < test_err_min:
172 |                 es_count = 0
173 |                 test_err_min = test_err
174 |                 save_path = saver.save(sess, os.path.join(log_dir, "model_best.ckpt"))
175 |                 log_string(train_log_dir, "Best model saved in : %s" % save_path)
176 |             else:
177 |                 es_count +=1
178 | 
179 |             #if epoch % 10 == 0:
180 |             #    save_path = saver.save(sess, os.path.join(log_dir, "model.ckpt"))
181 |             #    log_string(train_log_dir, "Model saved in file: %s" % save_path)
182 |             
183 |             # Early Stopping
184 |             if es_count >= FLAGS.early_stop:
185 |                 break
186 | 
187 | 
188 | def train_one_epoch(sess, ops, train_writer):
189 |     """ ops: dict mapping from string to tf ops """
190 |     # shuffle data
191 |     #data_loader.Xs_train, data_loader.y_train = shuffle(data_loader.Xs_train, data_loader.y_train)
192 |     is_training = True
193 |     log_string(train_log_dir, datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
194 | 
195 |     num_batches = dataloader.num_train // BATCH_SIZE
196 |     loss_sum = 0.0
197 |     rmse_angle_sum = 0.0
198 |     rmse_speed_sum = 0.0
199 | 
200 |     for i in range(num_batches):
201 |         #t1 = time.time()
202 |         if FLAGS.model_cfg in ['PilotNet', 'BMWNet']:
203 |             X_batch, y = dataloader.load_image_train_batch(BATCH_SIZE)
204 |             y_batch = y[:,0:1]
205 |         elif FLAGS.model_cfg == 'PointNet':
206 |             X_batch, y = dataloader.load_cloud_train_batch(BATCH_SIZE)
207 |             y_batch = y[:,1:2]
208 |         elif FLAGS.model_cfg == 'DroNet':
209 |             X_batch, y_batch = dataloader.load_image_train_batch(BATCH_SIZE)
210 |         else:
211 |             raise TypeError
212 |         #t2 = time.time()
213 | 
214 |         feed_dict = {ops['feature_pl']: X_batch,
215 |                      ops['label_pl']: y_batch,
216 |                      ops['is_training_pl']: is_training}
217 |         
218 |         summary, step, _, loss_batch, pred_batch = sess.run([ops['merged'], ops['batch'], ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict)
219 |         #t3 = time.time()
220 |         #print("data time: {}; train time: {}".format(t2-t1, t3-t2))
221 | 
222 |         train_writer.add_summary(summary, step)
223 |         
224 |         loss_sum += loss_batch
225 |         if FLAGS.model_cfg in ['PilotNet', 'BMWNet']:
226 |             rmse_angle_batch = np.sqrt(np.mean(np.square(pred_batch-y_batch)))
227 |             rmse_angle_sum += rmse_angle_batch
228 |         elif FLAGS.model_cfg == 'PointNet':
229 |             rmse_speed_batch = np.sqrt(np.mean(np.square(pred_batch-y_batch)))
230 |             rmse_speed_sum += rmse_speed_batch
231 |         elif FLAGS.model_cfg == 'DroNet':
232 |             rmse_batch = np.sqrt(np.mean(np.square(pred_batch-y_batch), axis=0))
233 |             rmse_angle_sum += rmse_batch[0]
234 |             rmse_speed_sum += rmse_batch[1]
235 |         else:
236 |             raise TypeError
237 |         
238 |     log_string(train_log_dir, 'Train loss: %f' % (loss_sum / num_batches))
239 |     if FLAGS.model_cfg in ['PilotNet', 'BMWNet']:
240 |         log_string(train_log_dir, 'Train rmse_angle: %f' % (rmse_angle_sum / num_batches))
241 |     elif FLAGS.model_cfg == 'PointNet':
242 |         log_string(train_log_dir, 'Train rmse_speed: %f' % (rmse_speed_sum / num_batches))
243 |     elif FLAGS.model_cfg == 'DroNet':
244 |         log_string(train_log_dir, 'Train rmse_angle: %f' % (rmse_angle_sum / num_batches))
245 |         log_string(train_log_dir, 'Train rmse_speed: %f' % (rmse_speed_sum / num_batches))
246 |     else:
247 |         raise TypeError
248 | 
249 | def test_one_epoch(sess, ops, test_writer):
250 |     """ ops: dict mapping from string to tf ops """
251 |     
252 |     is_training = True
253 |     log_string(train_log_dir, datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
254 | 
255 |     num_batches = dataloader.num_val // BATCH_SIZE
256 |     loss_sum = 0.0
257 |     rmse_angle_sum = 0.0
258 |     rmse_speed_sum = 0.0
259 | 
260 |     for i in range(num_batches):
261 |         #t1 = time.time()
262 |         if FLAGS.model_cfg in ['PilotNet', 'BMWNet']:
263 |             X_batch, y = dataloader.load_image_val_batch(BATCH_SIZE)
264 |             y_batch = y[:,0:1]
265 |         elif FLAGS.model_cfg == 'PointNet':
266 |             X_batch, y = dataloader.load_cloud_val_batch(BATCH_SIZE)
267 |             y_batch = y[:,1:2]
268 |         elif FLAGS.model_cfg == 'DroNet':
269 |             X_batch, y_batch = dataloader.load_image_val_batch(BATCH_SIZE)
270 |         else:
271 |             raise TypeError
272 |         #t2 = time.time()
273 | 
274 |         feed_dict = {ops['feature_pl']: X_batch,
275 |                      ops['label_pl']: y_batch,
276 |                      ops['is_training_pl']: is_training}
277 |         
278 |         summary, step, _, loss_batch, pred_batch = sess.run([ops['merged'], ops['batch'], ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict)
279 |         #t3 = time.time()
280 |         #print("data time: {}; train time: {}".format(t2-t1, t3-t2))
281 | 
282 |         test_writer.add_summary(summary, step)
283 |         
284 |         loss_sum += loss_batch
285 |         if FLAGS.model_cfg in ['PilotNet', 'BMWNet']:
286 |             rmse_angle_batch = np.sqrt(np.mean(np.square(pred_batch-y_batch)))
287 |             rmse_angle_sum += rmse_angle_batch
288 |         elif FLAGS.model_cfg == 'PointNet':
289 |             rmse_speed_batch = np.sqrt(np.mean(np.square(pred_batch-y_batch)))
290 |             rmse_speed_sum += rmse_speed_batch
291 |         elif FLAGS.model_cfg == 'DroNet':
292 |             rmse_batch = np.sqrt(np.mean(np.square(pred_batch-y_batch), axis=0))
293 |             rmse_angle_sum += rmse_batch[0]
294 |             rmse_speed_sum += rmse_batch[1]
295 |         else:
296 |             raise TypeError
297 |         
298 |     log_string(train_log_dir, 'Val loss: %f' % (loss_sum / num_batches))
299 |     if FLAGS.model_cfg in ['PilotNet', 'BMWNet']:
300 |         log_string(train_log_dir, 'Val rmse_angle: %f' % (rmse_angle_sum / num_batches))
301 |         return rmse_angle_sum/num_batches
302 |     elif FLAGS.model_cfg == 'PointNet':
303 |         log_string(train_log_dir, 'Val rmse_speed: %f' % (rmse_speed_sum / num_batches))
304 |         return rmse_speed_sum/num_batches
305 |     elif FLAGS.model_cfg == 'DroNet':
306 |         log_string(train_log_dir, 'Val rmse_angle: %f' % (rmse_angle_sum / num_batches))
307 |         log_string(train_log_dir, 'Val rmse_speed: %f' % (rmse_speed_sum / num_batches))
308 |         return (rmse_angle_sum+rmse_speed_sum)/num_batches/2
309 |     else:
310 |         raise TypeError
311 | 
312 |     
313 | 
314 | if __name__ == "__main__":
315 |     train()
316 | 


--------------------------------------------------------------------------------
/train_multi_gpus.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @Author: Shuai Wang
  3 | @Github: https://github.com/wsustcid
  4 | @Version: 1.0.0
  5 | @Date: 2020-06-03 16:56:56
  6 | @LastEditTime: 2020-09-10 23:38:05
  7 | '''
  8 | 
  9 | import os
 10 | import sys
 11 | import argparse
 12 | from datetime import datetime
 13 | import time
 14 | 
 15 | import numpy as np
 16 | import tensorflow as tf
 17 | 
 18 | base_dir = os.path.dirname(os.path.abspath(__file__))
 19 | sys.path.append(base_dir)
 20 | from utils.tf_util import log_string
 21 | 
 22 | from data_gen import DataLoader
 23 | from models.flowdrivenet import FlowDriveNet
 24 | 
 25 | parser = argparse.ArgumentParser()
 26 | parser.add_argument('--num_gpus', type=int, default=2,
 27 |                     help='GPU to use [default: GPU 0]')
 28 | parser.add_argument('--data_root', default='/gdata/wangshuai/Udacity/CH2',
 29 |                     help='data_root path [default: local path]')
 30 | parser.add_argument('--input_cfg', default='GRAY', 
 31 |                     help='Input type: GRAY, GRAYF, GRAYF-T, XYZ, XYZF, XYZF-T, GRAYF-XYZF-T')
 32 | parser.add_argument('--model_cfg', default='VFE',
 33 |                     help='Model type: VFE, VFE-TFP, PFE, PFE-TFP, VFE-PFE-TFP')
 34 | parser.add_argument('--loss_cfg', default='MSE',
 35 |                     help='loss type: weighted, step, exp')
 36 | parser.add_argument('--height', type=int, default=200, help='img height')
 37 | parser.add_argument('--width', type=int, default=200, help='img width')
 38 | parser.add_argument('--seq_len', type=int, default=5, help='sel length')
 39 | parser.add_argument('--use_side_cam', default=False, action='store_true')
 40 | parser.add_argument('--num_point', type=int, default=20000, help='Point N')
 41 | parser.add_argument('--log_dir', default='test',
 42 |                     help='Log dir [default: test]')
 43 | parser.add_argument('--max_epoch', type=int, default=300,
 44 |                     help='Epoch to run [default: 1000]')
 45 | parser.add_argument('--early_stop', type=int, default=20,
 46 |                     help='stop training when loss stop decreasing [default: 20]')
 47 | parser.add_argument('--batch_size', type=int, default=1,
 48 |                     help='Batch Size during training [default: 1]')
 49 | parser.add_argument('--learning_rate', type=float, default=0.0001,
 50 |                     help='Learning rate during training [default: 0.001]')
 51 | parser.add_argument('--optimizer', default='adam',
 52 |                     help='adam or momentum [default: adam]')
 53 | parser.add_argument('--decay_steps', type=int, default=300000,
 54 |                     help='Decay step for lr decay [default: 200000]') # decay_steps = n_train * epochs
 55 | parser.add_argument('--decay_rate', type=float, default=0.7,
 56 |                     help='Decay rate for lr decay [default: 0.7]')
 57 | 
 58 | FLAGS = parser.parse_args()
 59 | 
 60 | BATCH_SIZE = FLAGS.num_gpus * FLAGS.batch_size
 61 | DEVICE_BATCH_SIZE = FLAGS.batch_size
 62 | 
 63 | log_dir  = os.path.join(base_dir, 'logs', FLAGS.log_dir)
 64 | os.makedirs(log_dir, exist_ok=True)
 65 | train_log_dir = os.path.join(log_dir, 'log_train.txt')
 66 | log_string(train_log_dir, datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
 67 | log_string(train_log_dir, str(FLAGS)+'\n')
 68 | 
 69 | model_file = os.path.join(base_dir, 'models/flowdrivenet.py')
 70 | train_file = os.path.join(base_dir,'train.py')
 71 | os.system('cp %s %s' % (model_file, log_dir))
 72 | os.system('cp %s %s' % (train_file, log_dir))
 73 | 
 74 | # 
 75 | dataloader = DataLoader(FLAGS.data_root, FLAGS.input_cfg, 
 76 |                         FLAGS.height, FLAGS.width, FLAGS.use_side_cam)
 77 | model = FlowDriveNet(FLAGS.input_cfg, FLAGS.model_cfg, 
 78 |                       FLAGS.height, FLAGS.width, FLAGS.seq_len)
 79 | 
 80 | def average_gradients(tower_grads):
 81 |   """Calculate the average gradient for each shared variable across all towers.
 82 |   Note that this function provides a synchronization point across all towers.
 83 |   From tensorflow tutorial: cifar10/cifar10_multi_gpu_train.py
 84 |   Args:
 85 |     tower_grads: List of lists of (gradient, variable) tuples. The outer list
 86 |       is over individual gradients. The inner list is over the gradient
 87 |       calculation for each tower.
 88 |   Returns:
 89 |      List of pairs of (gradient, variable) where the gradient has been averaged
 90 |      across all towers.
 91 |   """
 92 |   average_grads = []
 93 |   for grad_and_vars in zip(*tower_grads):
 94 |     # Note that each grad_and_vars looks like the following:
 95 |     #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
 96 |     grads = []
 97 |     #for g, _ in grad_and_vars:
 98 |     for g, v in grad_and_vars:
 99 |       # Add 0 dimension to the gradients to represent the tower.
100 |       expanded_g = tf.expand_dims(g, 0)
101 | 
102 |       # Append on a 'tower' dimension which we will average over below.
103 |       grads.append(expanded_g)
104 | 
105 |     # Average over the 'tower' dimension.
106 |     grad = tf.concat(axis=0, values=grads)
107 |     grad = tf.reduce_mean(grad, 0)
108 | 
109 |     # Keep in mind that the Variables are redundant because they are shared
110 |     # across towers. So .. we will just return the first tower's pointer to
111 |     # the Variable.
112 |     v = grad_and_vars[0][1]
113 |     grad_and_var = (grad, v)
114 |     average_grads.append(grad_and_var)
115 |   return average_grads
116 | 
117 | def get_bn_decay(batch):
118 |         bn_momentum = tf.train.exponential_decay(
119 |                         0.5,
120 |                         batch*BATCH_SIZE,
121 |                         float(FLAGS.decay_steps),
122 |                         0.5,
123 |                         staircase=True)
124 |         bn_decay = tf.minimum(0.99, 1 - bn_momentum)
125 |         return bn_decay
126 | 
127 | #def get_lr(batch):
128 | #    lr = tf.train.exponential_decay(learning_rate=FLAGS.learning_rate,
129 | #                                    global_step=batch*BATCH_SIZE,
130 | #                                    decay_steps=FLAGS.decay_steps,
131 | #                                    decay_rate=FLAGS.decay_rate,
132 | #                                    staircase=True)
133 | #    lr = tf.maximum(lr, 0.00001)
134 | #    return lr
135 | 
136 | def train():
137 |     with tf.Graph().as_default():
138 |         with tf.device('/cpu:0'): 
139 |             image_pl, points_pl, label_pl = model.get_inputs_pl(BATCH_SIZE)
140 |             is_training_pl = tf.placeholder(tf.bool, shape=())
141 |             # define global_step; optimizer will increase it in every training loop
142 |             batch = tf.get_variable('batch', [], 
143 |                                     initializer=tf.constant_initializer(0),
144 |                                     trainable=False)
145 |             bn_decay = get_bn_decay(batch) 
146 |             tf.summary.scalar('bn_decay', bn_decay)
147 | 
148 |             if FLAGS.optimizer == 'adam':
149 |                 optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
150 |             tf.summary.scalar('learning_rate', FLAGS.learning_rate)
151 | 
152 |             # Allocating variables on CPU first to accelerate multi-gpu training.
153 |             model.get_model(image_pl, points_pl, is_training_pl, bn_decay)
154 |             
155 |             # -------------------------------------------
156 |             # Get model and loss on multiple GPU devices
157 |             # Data Parallel
158 |             # -------------------------------------------
159 |             tower_grads = []
160 |             pred_gpus = []
161 |             loss_gpus = []
162 |             for i in range(FLAGS.num_gpus):
163 |                 with tf.variable_scope(tf.get_variable_scope(), reuse=True):
164 |                     with tf.device('/gpu:%d'%(i)), tf.name_scope('gpu_%d'%(i)) as scope:
165 |                         # split data to each gpu
166 |                         image_pl_gpu = image_pl[i*DEVICE_BATCH_SIZE: (i+1)*DEVICE_BATCH_SIZE]
167 |                         points_pl_gpu = points_pl[i*DEVICE_BATCH_SIZE: (i+1)*DEVICE_BATCH_SIZE]
168 |                         label_pl_gpu = label_pl[i*DEVICE_BATCH_SIZE: (i+1)*DEVICE_BATCH_SIZE]
169 | 
170 |                         pred_gpu = model.get_model(image_pl_gpu, points_pl_gpu,
171 |                                                is_training_pl, bn_decay)
172 |                         loss_gpu = model.get_loss(pred_gpu, label_pl_gpu, 
173 |                                                     batch, FLAGS.loss_cfg, name=scope)
174 | 
175 |                         grads_gpu = optimizer.compute_gradients(loss_gpu)
176 |                         tower_grads.append(grads_gpu)
177 |                         pred_gpus.append(pred_gpu)
178 |                         loss_gpus.append(loss_gpu)
179 | 
180 |             # merge pred and losses from multiple gpus
181 |             grads = average_gradients(tower_grads)
182 |             pred = tf.concat(pred_gpus, 0)
183 |             loss = tf.reduce_mean(loss_gpus)
184 |             
185 |             train_op = optimizer.apply_gradients(grads, global_step=batch)
186 |             
187 |             rmse_angle, rmse_speed = model.get_rmse(pred, label_pl) 
188 | 
189 |             
190 |          # Add ops to save and restore all the variables.
191 |         saver = tf.train.Saver()
192 | 
193 |         # Create a session
194 |         config = tf.ConfigProto()
195 |         config.gpu_options.allow_growth = True
196 |         config.allow_soft_placement = True
197 |         config.log_device_placement = False
198 |         sess = tf.Session(config=config)
199 | 
200 |         # Add summary writers
201 |         merged = tf.summary.merge_all()
202 |         train_writer = tf.summary.FileWriter(os.path.join(log_dir, 'train'), sess.graph)
203 |         test_writer = tf.summary.FileWriter(os.path.join(log_dir, 'test'), sess.graph)
204 | 
205 |         # Init variables
206 |         init = tf.global_variables_initializer()
207 |         sess.run(init, feed_dict={is_training_pl: True})
208 |         
209 |         # save all tensor
210 |         ops = {'image_pl': image_pl,
211 |             'points_pl': points_pl,
212 |             'label_pl': label_pl,
213 |             'is_training_pl': is_training_pl,
214 |             'train_op': train_op,
215 |             'loss': loss,
216 |             'rmse_angle': rmse_angle,
217 |             'rmse_speed':rmse_speed,
218 |             'merged': merged,
219 |             'batch': batch}
220 | 
221 |         test_err_min = 100000
222 |         for epoch in range(FLAGS.max_epoch):
223 |             log_string(train_log_dir, '**** EPOCH %03d ****' % (epoch))
224 |             
225 |             train_one_epoch(sess, ops, train_writer)
226 |             test_err = test_one_epoch(sess, ops, test_writer)
227 |             # save best
228 |             if test_err < test_err_min:
229 |                 es_count = 0
230 |                 test_err_min = test_err
231 |                 save_path = saver.save(sess, os.path.join(log_dir, "model_best.ckpt"))
232 |                 log_string(train_log_dir, "Best model saved in : %s" % save_path)
233 |             else:
234 |                 es_count +=1
235 | 
236 |             #if epoch % 10 == 0:
237 |             #    save_path = saver.save(sess, os.path.join(log_dir, "model.ckpt"))
238 |             #    log_string(train_log_dir, "Model saved in file: %s" % save_path)
239 |             
240 |             # Early Stopping
241 |             if es_count >= FLAGS.early_stop:
242 |                 break
243 | 
244 | 
245 | def train_one_epoch(sess, ops, train_writer):
246 |     """ ops: dict mapping from string to tf ops """
247 |     # shuffle data
248 |     #data_loader.Xs_train, data_loader.y_train = shuffle(data_loader.Xs_train, data_loader.y_train)
249 |     is_training = True
250 |     num_batches = dataloader.num_train // BATCH_SIZE
251 |     loss_sum = 0.0
252 |     rmse_angle_sum = 0.0
253 |     rmse_speed_sum = 0.0
254 | 
255 |     for i in range(num_batches):
256 |         #t1 = time.time()
257 |         X_image_batch, X_cloud_batch, y_batch = dataloader.load_train_batch(BATCH_SIZE)
258 |         #t2 = time.time()
259 | 
260 |         feed_dict = {ops['image_pl']: X_image_batch,
261 |                      ops['points_pl']: X_cloud_batch,
262 |                      ops['label_pl']: y_batch,
263 |                      ops['is_training_pl']: is_training}
264 |         
265 |         summary, step, _, loss_batch, rmse_angle_batch, rmse_speed_batch = sess.run([ops['merged'], ops['batch'], ops['train_op'], ops['loss'], ops['rmse_angle'], ops['rmse_speed']], feed_dict=feed_dict)
266 |         #t3 = time.time()
267 |         #print("data time: {}; train time: {}".format(t2-t1, t3-t2))
268 | 
269 |         train_writer.add_summary(summary, step)
270 |         
271 |         loss_sum += loss_batch
272 |         rmse_angle_sum += rmse_angle_batch
273 |         rmse_speed_sum += rmse_speed_batch
274 | 
275 |     log_string(train_log_dir, 'Train loss: %f' % (loss_sum / num_batches))
276 |     log_string(train_log_dir, 'Train rmse_angle: %f' % (rmse_angle_sum / num_batches))
277 |     log_string(train_log_dir, 'Train rmse_speed: %f' % (rmse_speed_sum / num_batches))
278 |     log_string(train_log_dir, 'Train rmse_average: %f' % ((rmse_angle_sum+rmse_speed_sum)/ num_batches/2))
279 | 
280 | def test_one_epoch(sess, ops, test_writer):
281 |     """ ops: dict mapping from string to tf ops """
282 |     
283 |     is_training = False
284 |     num_batches = dataloader.num_val // BATCH_SIZE
285 |     loss_sum = 0.0
286 |     rmse_angle_sum = 0.0
287 |     rmse_speed_sum = 0.0
288 | 
289 |     for i in range(num_batches):
290 |         X_image_batch, X_cloud_batch, y_batch = dataloader.load_val_batch(BATCH_SIZE)
291 |         
292 |         feed_dict = {ops['image_pl']: X_image_batch,
293 |                      ops['points_pl']: X_cloud_batch,
294 |                      ops['label_pl']: y_batch,
295 |                      ops['is_training_pl']: is_training}
296 |         
297 |         summary, step, loss_batch, rmse_angle_batch, rmse_speed_batch = sess.run([ops['merged'], ops['batch'], ops['loss'], ops['rmse_angle'], ops['rmse_speed']],feed_dict=feed_dict)
298 |         
299 |         test_writer.add_summary(summary, step)
300 |         
301 |         loss_sum += loss_batch
302 |         rmse_angle_sum += rmse_angle_batch
303 |         rmse_speed_sum += rmse_speed_batch
304 | 
305 |     log_string(train_log_dir, 'Val loss: %f' % (loss_sum / num_batches))
306 |     log_string(train_log_dir, 'Val rmse_angle: %f' % (rmse_angle_sum / num_batches))
307 |     log_string(train_log_dir, 'Val rmse_speed: %f' % (rmse_speed_sum / num_batches))
308 |     log_string(train_log_dir, 'Val rmse_average: %f' % ((rmse_angle_sum+rmse_speed_sum)/ num_batches/2))
309 | 
310 |     return (rmse_angle_sum+rmse_speed_sum)/num_batches/2
311 | 
312 | if __name__ == "__main__":
313 |     train()
314 | 


--------------------------------------------------------------------------------
/utils/README.md:
--------------------------------------------------------------------------------
1 | ## Utilility Functions for 3D Point Cloud Deep Learning
2 | 
3 | ### visualization tool
4 | 
5 |     sh compile_render_balls_so.sh
6 |     python show3d_balls.py
7 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/utils/compile_render_balls_so.sh:
--------------------------------------------------------------------------------
1 | g++ -std=c++11 render_balls_so.cpp -o render_balls_so.so -shared -fPIC -O2 -D_GLIBCXX_USE_CXX11_ABI=0
2 | 
3 | 


--------------------------------------------------------------------------------
/utils/pc_util.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @Author: Shuai Wang
  3 | @Github: https://github.com/wsustcid
  4 | @Version: 1.0.0
  5 | @Date: 2020-06-18 23:32:49
  6 | @LastEditTime: 2020-08-07 21:40:49
  7 | @Description:  
  8 | '''
  9 | 
 10 | import os 
 11 | 
 12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 13 | 
 14 | import time
 15 | import glob
 16 | import numpy as np 
 17 | import open3d as o3d
 18 | import pcl
 19 | import pcl.pcl_visualization
 20 | import pandas as pd
 21 | 
 22 | from pypcd import pypcd
 23 | import pprint # for pretty print the meta data of the PCD
 24 | import matplotlib.pyplot as plt 
 25 | from mpl_toolkits.mplot3d import Axes3D
 26 | from matplotlib.patches import Circle
 27 | 
 28 | 
 29 | # ----------------------------------------
 30 | # Load pcd file and visualize clouds
 31 | # The time used for loadng 1000 cloud:
 32 | #   - pypcd: 165s
 33 | #   - open3d: 18s
 34 | #   - np.fromfile: 5.6s
 35 | # ----------------------------------------
 36 | 
 37 | def load_pcd_pypcd(filepath):
 38 |     """
 39 |     load point cloud from pcd file use pypcd
 40 |     show point cloud in scatter
 41 | 
 42 |     - The getadata of the cloud saved in pcd file:
 43 |         {'count': [1, 1, 1, 1, 1, 1],
 44 |         'data': 'ascii',
 45 |         'fields': ['x', 'y', 'z', 'intensity', 'ring', 'time'],
 46 |         'height': 1,
 47 |         'points': 23633,
 48 |         'size': [4, 4, 4, 4, 2, 4],
 49 |         'type': ['F', 'F', 'F', 'F', 'U', 'F'],
 50 |         'version': '0.7',
 51 |         'viewpoint': [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 52 |         'width': 23633}
 53 |     """
 54 |     cloud = pypcd.PointCloud.from_path(filepath)
 55 | 
 56 |     pprint.pprint(cloud.get_metadata())
 57 | 
 58 |     # access the point cloud as a numpy structured array
 59 |     print("Raw shape: {}".format(cloud.points))
 60 |     print(cloud.pc_data[:5])
 61 |     
 62 |     new_cloud = np.stack([cloud.pc_data['x'], cloud.pc_data['y'], cloud.pc_data['z']], axis=1)
 63 | 
 64 |     print("New shape: {}".format(new_cloud.shape))
 65 |     print(new_cloud[:5])
 66 |     
 67 | 
 68 |     fig = plt.figure(figsize=(30,30))  # create a figure object
 69 |     ax = fig.add_subplot(111,projection='3d')
 70 |     ax.scatter(new_cloud[:,0],new_cloud[:,1], new_cloud[:,2])
 71 |     ax.axis('scaled')
 72 |     ax.set_zlabel('Z') 
 73 |     ax.set_ylabel('Y')
 74 |     ax.set_xlabel('X')
 75 | 
 76 |     plt.show()
 77 | 
 78 | def load_pcd_o3d(file_path):
 79 |     """ load and visualize pcd file use o3d
 80 | 
 81 |     Hint:
 82 |       1. some useful methods in o3d.geometry (http://www.open3d.org/docs/release/python_api/open3d.geometry.html)
 83 |         - o3d.geometry.PointCloud.rotate() scale, transform, translate
 84 |         - o3d.geometry.voxel_down_sample()
 85 |         - o3d.geometry.compute_point_cloud_mean_and_covariance
 86 |         - o3d.geometry.crop_point_cloud()
 87 |     """
 88 | 
 89 |     cloud = o3d.io.read_point_cloud(file_path) # return a o3d.geometry.PointCloud
 90 |     print(cloud.get_max_bound())
 91 |     print(cloud.get_min_bound())
 92 |     o3d.visualization.draw_geometries([cloud])
 93 |     
 94 |     cloud_np = np.asarray(cloud.points, np.float32)
 95 |     print(cloud_np.shape)
 96 |     print(np.max(cloud_np, axis=0))
 97 |     print(np.min(cloud_np, axis=0))
 98 | 
 99 | def load_pcd_pcl(file_path):
100 |     # load
101 |     cloud_pcl = pcl.load(file_path)
102 |     cloud_np = np.asarray(cloud_pcl, np.float32)
103 |     print(cloud_np.shape)
104 |     #  or convert narray to pcl cloud
105 |     #cloud_pcl = pcl.PointCloud()
106 |     #cloud_pcl.from_array(cloud_np)
107 |     
108 |     # show cloud
109 |     visual = pcl.pcl_visualization.CloudViewing()
110 |     #visual.ShowGrayCloud(cloud_pcl, b'cloud')
111 |     #visual.ShowColorACloud()
112 |     #visual.ShowColorCloud()
113 |     for i in range(1):
114 |         visual.ShowMonochromeCloud(cloud_pcl, b'cloud')
115 |         time.sleep(100)
116 |     #if visual.WasStopped():
117 |     
118 | def load_cloud_seq(label_path):
119 |     data = pd.read_csv(label_path)
120 |     clouds_name = data['point_filename'].values
121 |     num_cloud = len(clouds_name)
122 |     print("Total cloud: {}".format(num_cloud))
123 | 
124 |     path_prefix = os.path.dirname(label_path)
125 |     visual = pcl.pcl_visualization.CloudViewing()
126 |     for i in range(num_cloud):
127 |         cloud_path = os.path.join(path_prefix, 'points', clouds_name[i])
128 |         cloud_pcl = pcl.load(cloud_path)
129 |         visual.ShowMonochromeCloud(cloud_pcl, b'cloud')
130 |         print(cloud_path)
131 | 
132 |         cloud_np  = np.asarray(cloud_pcl, np.float32)
133 |         #print('Max range: {}; MIn range: {}'.format(np.max(cloud_np, axis=0),
134 |         #                                            np.min(cloud_np, axis=0)))
135 | 
136 |         
137 |         time.sleep(0.01)
138 |         #if visual.WasStopped():
139 | 
140 | def load_cloud_bin_seq(label_path):
141 |     data = pd.read_csv(label_path)
142 |     clouds_name = data['point_filename'].values
143 |     num_cloud = len(clouds_name)
144 |     print("Total cloud: {}".format(num_cloud))
145 | 
146 |     path_prefix = os.path.dirname(label_path)
147 |     cloud_pcl = pcl.PointCloud()
148 |     visual = pcl.pcl_visualization.CloudViewing()
149 |     for i in range(num_cloud):
150 |         cloud_path = os.path.join(path_prefix, 'points_bin', clouds_name[i][:-3]+'bin')
151 |         cloud_np = np.fromfile(cloud_path, np.float32).reshape(-1,3)
152 |         cloud_pcl.from_array(cloud_np)
153 |         visual.ShowMonochromeCloud(cloud_pcl, b'cloud')
154 |         print(cloud_path)
155 | 
156 |         time.sleep(0.1)
157 |         #if visual.WasStopped():
158 | 
159 | def cloud_crop(file_path):
160 |     """ Crop the pointcloud and visulaize it
161 |      
162 |     Hint: o3d.geometry.crop_point_cloud()
163 |     Args:
164 |         input (open3d.geometry.PointCloud): The input point cloud.  
165 |         min_bound (numpy.ndarray[float64[3, 1]]): Minimum bound for point coordinate  
166 |         max_bound (numpy.ndarray[float64[3, 1]]): Maximum bound for point coordinate  
167 |     Returns: 
168 |         open3d.geometry.PointCloud
169 |     """
170 |     # show origin cloud
171 |     cloud = o3d.io.read_point_cloud(file_path)
172 |     print("Origin Points: ", cloud.dimension)
173 |     print("Origin Bound: ", cloud.get_min_bound(), cloud.get_max_bound())
174 |     o3d.visualization.draw_geometries([cloud])
175 |     
176 |     # crop
177 |     min_bound = np.array([-70, -40, -5])
178 |     max_bound = np.array([70,40,15])
179 |     cloud_crop = o3d.geometry.crop_point_cloud(cloud, min_bound,max_bound)
180 |     
181 |     # show cropped cloud
182 |     cloud_np = np.asarray(cloud_crop.points, np.float32)
183 |     print("Cropped Points: ", cloud_np.shape)
184 |     print("Cropped Bound: ", np.min(cloud_np, axis=0), np.max(cloud_np, axis=0))
185 |     o3d.visualization.draw_geometries([cloud_crop])
186 | 
187 | def clouds_info(train_path, test_path,
188 |                 show_distribute=False, 
189 |                 play_clouds=False):
190 |     """ plot pointclouds distribution in train set and test set
191 | 
192 |     1. plot original point set distribution
193 |     2. get the proper crop bounds
194 |     3. crop the point and visualize the sequential cloud
195 |     """
196 |     # load train and test clouds file path
197 |     train_files = glob.glob(os.path.join(train_path, '*/center.csv'))
198 |     test_file   = os.path.join(test_path, 'center.csv')
199 |     
200 |     clouds_path_train = []
201 |     for train_file in train_files:     
202 |         clouds_name = pd.read_csv(train_file)['point_filename'].values
203 |         path_prefix = os.path.dirname(train_file)
204 |         for cloud_name in clouds_name:
205 |             cloud_path = os.path.join(path_prefix, 'points', cloud_name)
206 |             clouds_path_train.append(cloud_path)
207 |     num_train = len(clouds_path_train)
208 | 
209 |     clouds_path_test = []
210 |     clouds_name = pd.read_csv(test_file)['point_filename'].values
211 |     path_prefix = os.path.dirname(test_file)
212 |     for cloud_name in clouds_name:
213 |         cloud_path = os.path.join(path_prefix, 'points', cloud_name)
214 |         clouds_path_test.append(cloud_path)
215 |     num_test = len(clouds_path_test)
216 | 
217 |     print("Total train: ", num_train)
218 |     print("Total test: ", num_test)
219 | 
220 |     # get clouds bound distribution
221 |     if show_distribute == True:
222 |         x_bound, y_bound, z_bound = [], [], []
223 |         for i in range(num_test):
224 |             cloud = o3d.io.read_point_cloud(clouds_path_test[i])
225 |             x_bound.append(cloud.get_min_bound()[0])
226 |             x_bound.append(cloud.get_max_bound()[0])
227 | 
228 |             y_bound.append(cloud.get_min_bound()[1])
229 |             y_bound.append(cloud.get_max_bound()[1])
230 | 
231 |             z_bound.append(cloud.get_min_bound()[2])
232 |             z_bound.append(cloud.get_max_bound()[2])
233 |         
234 |         fig, axes = plt.subplots(1,3,figsize=(12,2),dpi=300)
235 |         axes[0].hist(x_bound, rwidth=0.8)
236 |         axes[1].hist(y_bound, rwidth=0.8)
237 |         axes[2].hist(z_bound, rwidth=0.8)
238 |         plt.tight_layout()
239 |         #plt.show()
240 |         fig.savefig(os.path.join(BASE_DIR,'../docs/figs/clouds_bound_test.png'))
241 | 
242 |         x_bound, y_bound, z_bound = [], [], []
243 |         for i in range(num_train):
244 |             cloud = o3d.io.read_point_cloud(clouds_path_train[i])
245 |             x_bound.append(cloud.get_min_bound()[0])
246 |             x_bound.append(cloud.get_max_bound()[0])
247 | 
248 |             y_bound.append(cloud.get_min_bound()[1])
249 |             y_bound.append(cloud.get_max_bound()[1])
250 | 
251 |             z_bound.append(cloud.get_min_bound()[2])
252 |             z_bound.append(cloud.get_max_bound()[2])
253 |         
254 |         fig, axes = plt.subplots(1,3,figsize=(12,2),dpi=300)
255 |         axes[0].hist(x_bound, rwidth=0.8)
256 |         axes[1].hist(y_bound, rwidth=0.8)
257 |         axes[2].hist(z_bound, rwidth=0.8)
258 |         plt.tight_layout()
259 |         #plt.show()
260 |         fig.savefig(os.path.join(BASE_DIR,'../docs/figs/clouds_bound_train.png')) 
261 | 
262 |     # paly clouds
263 |     # crop # [-100,-75,-10] [100,75,30]
264 |     min_bound = np.array([-70, -40, -10])
265 |     max_bound = np.array([70,40,20])
266 |     
267 |     if play_clouds == True:
268 |         visual = pcl.pcl_visualization.CloudViewing()
269 |         cloud_pcl = pcl.PointCloud()
270 |         for i in range(num_train):
271 |             cloud = o3d.io.read_point_cloud(clouds_path_train[i])
272 |             cloud_crop = o3d.geometry.crop_point_cloud(cloud, min_bound,max_bound)
273 |             
274 |             cloud_np = np.asarray(cloud.points, np.float32)
275 |             
276 |             cloud_pcl.from_array(cloud_np)
277 |             visual.ShowMonochromeCloud(cloud_pcl, b'cloud')
278 |             
279 |             print('{}: Max range: {}; MIn range: {}'.format(cloud_np.shape[0], np.max(cloud_np, axis=0),
280 |                                                         np.min(cloud_np, axis=0)))
281 | 
282 |             
283 |             time.sleep(0.5)
284 |     
285 | def cloud_show(cloud):
286 |     fig, axes = plt.subplots(1,2, figsize=(100,25))  # create a figure object
287 |     axes[0].axis('equal')
288 |     axes[0].scatter(cloud[:,0],cloud[:,1]) # BEV
289 |     axes[0].set_xlabel('X')
290 |     axes[0].set_ylabel('Y')
291 |     
292 |     axes[1].axis('equal')
293 |     axes[1].scatter(cloud[:,0],cloud[:,2]) # side view
294 |     axes[1].set_xlabel('X')
295 |     axes[1].set_ylabel('Z')
296 |     
297 |     #fig.savefig(path)
298 |     plt.show()
299 | 
300 | def cloud_show_3d(cloud):
301 |     fig = plt.figure(figsize=(100,30))  # create a figure object
302 |     ax = fig.add_subplot(111, projection='3d')
303 |     ax.set_xlabel('X')
304 |     ax.set_ylabel('Y')
305 |     ax.set_zlabel('Z')
306 | 
307 |     ax.scatter(cloud[:,0],cloud[:,1], cloud[:,2])
308 |     plt.show()
309 | 
310 | 
311 | def cloud_plot_circle(cloud, radius=0.5):
312 |     fig, axes = plt.subplots(1,2, figsize=(100,25))  # create a figure object
313 |     axes[0].axis('equal')
314 |     axes[1].axis('equal')
315 |     
316 |     axes[0].scatter(cloud[:,0],cloud[:,1]) # BEV
317 |     axes[1].scatter(cloud[:,0],cloud[:,2]) # side view
318 |     for i in range(len(cloud)):
319 |         cir1 = Circle((cloud[i][0], cloud[i][1]), radius, color='r', fill=False)
320 |         axes[0].add_patch(cir1)
321 | 
322 |         cir2 = Circle((cloud[i][0], cloud[i][2]), radius, color='r', fill=False)
323 |         axes[1].add_patch(cir2)
324 |     
325 |     axes[0].set_xlabel('X')
326 |     axes[0].set_ylabel('Y')
327 |     axes[1].set_xlabel('X')
328 |     axes[1].set_ylabel('Z')
329 |     
330 |     #fig.savefig(path)
331 |     plt.show()
332 | 
333 | def cloud_plot_circle_center(cloud, path, radius=10):
334 |     fig, axes = plt.subplots(1,2, figsize=(100,25))  # create a figure object
335 |     axes[0].axis('equal')
336 |     axes[1].axis('equal')
337 |     
338 |     axes[0].scatter(cloud[:,0],cloud[:,1]) # BEV
339 |     axes[1].scatter(cloud[:,0],cloud[:,2]) # side view
340 |     
341 |     cir1 = Circle((0,0), radius, color='r', fill=False)
342 |     axes[0].add_patch(cir1)
343 | 
344 |     cir2 = Circle((0,0), radius, color='r', fill=False)
345 |     axes[1].add_patch(cir2)
346 | 
347 |     axes[0].set_xlabel('X')
348 |     axes[0].set_ylabel('Y')
349 |     axes[1].set_xlabel('X')
350 |     axes[1].set_ylabel('Z')
351 |     
352 |     fig.savefig(path)
353 | 
354 | if __name__ == '__main__':
355 |     train_path = '/media/ubuntu16/Documents/Datasets/Udacity/CH2/CH2_002'
356 |     test_path = '/media/ubuntu16/Documents/Datasets/Udacity/CH2/CH2_001'
357 |     
358 |     file_name = 'points/1479425440130861.pcd'
359 |     #load_pcd_pypcd(os.path.join(test_path,file_name))
360 |     #load_pcd_o3d(os.path.join(data_path,file_name))
361 |     #load_pcd_pcl(os.path.join(data_path,file_name))
362 |     #load_cloud_seq(os.path.join(data_path, 'center.csv'))
363 |     #cloud_crop(os.path.join(data_path,file_name))
364 | 
365 |     #clouds_info(train_path, test_path, show_distribute=False, play_clouds=True)
366 | 
367 |     load_cloud_bin_seq(os.path.join(test_path, 'center.csv'))
368 |     


--------------------------------------------------------------------------------
/utils/provider.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import h5py
  5 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  6 | sys.path.append(BASE_DIR)
  7 | 
  8 | def shuffle_data(data, labels):
  9 |     """ Shuffle data and labels.
 10 |         Input:
 11 |           data: B,N,... numpy array
 12 |           label: B,... numpy array
 13 |         Return:
 14 |           shuffled data, label and shuffle indices
 15 |     """
 16 |     idx = np.arange(len(labels))
 17 |     np.random.shuffle(idx)
 18 |     return data[idx, ...], labels[idx], idx
 19 | 
 20 | def shuffle_points(batch_data):
 21 |     """ Shuffle orders of points in each point cloud -- changes FPS behavior.
 22 |         Use the same shuffling idx for the entire batch.
 23 |         Input:
 24 |             BxNxC array
 25 |         Output:
 26 |             BxNxC array
 27 |     """
 28 |     idx = np.arange(batch_data.shape[1])
 29 |     np.random.shuffle(idx)
 30 |     return batch_data[:,idx,:]
 31 | 
 32 | def rotate_point_cloud(batch_data):
 33 |     """ Randomly rotate the point clouds to augument the dataset
 34 |         rotation is per shape based along up direction
 35 |         Input:
 36 |           BxNx3 array, original batch of point clouds
 37 |         Return:
 38 |           BxNx3 array, rotated batch of point clouds
 39 |     """
 40 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
 41 |     for k in range(batch_data.shape[0]):
 42 |         rotation_angle = np.random.uniform() * 2 * np.pi
 43 |         cosval = np.cos(rotation_angle)
 44 |         sinval = np.sin(rotation_angle)
 45 |         rotation_matrix = np.array([[cosval, 0, sinval],
 46 |                                     [0, 1, 0],
 47 |                                     [-sinval, 0, cosval]])
 48 |         shape_pc = batch_data[k, ...]
 49 |         rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
 50 |     return rotated_data
 51 | 
 52 | def rotate_point_cloud_z(batch_data):
 53 |     """ Randomly rotate the point clouds to augument the dataset
 54 |         rotation is per shape based along up direction
 55 |         Input:
 56 |           BxNx3 array, original batch of point clouds
 57 |         Return:
 58 |           BxNx3 array, rotated batch of point clouds
 59 |     """
 60 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
 61 |     for k in range(batch_data.shape[0]):
 62 |         rotation_angle = np.random.uniform() * 2 * np.pi
 63 |         cosval = np.cos(rotation_angle)
 64 |         sinval = np.sin(rotation_angle)
 65 |         rotation_matrix = np.array([[cosval, sinval, 0],
 66 |                                     [-sinval, cosval, 0],
 67 |                                     [0, 0, 1]])
 68 |         shape_pc = batch_data[k, ...]
 69 |         rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
 70 |     return rotated_data
 71 | 
 72 | def rotate_point_cloud_with_normal(batch_xyz_normal):
 73 |     ''' Randomly rotate XYZ, normal point cloud.
 74 |         Input:
 75 |             batch_xyz_normal: B,N,6, first three channels are XYZ, last 3 all normal
 76 |         Output:
 77 |             B,N,6, rotated XYZ, normal point cloud
 78 |     '''
 79 |     for k in range(batch_xyz_normal.shape[0]):
 80 |         rotation_angle = np.random.uniform() * 2 * np.pi
 81 |         cosval = np.cos(rotation_angle)
 82 |         sinval = np.sin(rotation_angle)
 83 |         rotation_matrix = np.array([[cosval, 0, sinval],
 84 |                                     [0, 1, 0],
 85 |                                     [-sinval, 0, cosval]])
 86 |         shape_pc = batch_xyz_normal[k,:,0:3]
 87 |         shape_normal = batch_xyz_normal[k,:,3:6]
 88 |         batch_xyz_normal[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
 89 |         batch_xyz_normal[k,:,3:6] = np.dot(shape_normal.reshape((-1, 3)), rotation_matrix)
 90 |     return batch_xyz_normal
 91 | 
 92 | def rotate_perturbation_point_cloud_with_normal(batch_data, angle_sigma=0.06, angle_clip=0.18):
 93 |     """ Randomly perturb the point clouds by small rotations
 94 |         Input:
 95 |           BxNx6 array, original batch of point clouds and point normals
 96 |         Return:
 97 |           BxNx3 array, rotated batch of point clouds
 98 |     """
 99 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
100 |     for k in range(batch_data.shape[0]):
101 |         angles = np.clip(angle_sigma*np.random.randn(3), -angle_clip, angle_clip)
102 |         Rx = np.array([[1,0,0],
103 |                        [0,np.cos(angles[0]),-np.sin(angles[0])],
104 |                        [0,np.sin(angles[0]),np.cos(angles[0])]])
105 |         Ry = np.array([[np.cos(angles[1]),0,np.sin(angles[1])],
106 |                        [0,1,0],
107 |                        [-np.sin(angles[1]),0,np.cos(angles[1])]])
108 |         Rz = np.array([[np.cos(angles[2]),-np.sin(angles[2]),0],
109 |                        [np.sin(angles[2]),np.cos(angles[2]),0],
110 |                        [0,0,1]])
111 |         R = np.dot(Rz, np.dot(Ry,Rx))
112 |         shape_pc = batch_data[k,:,0:3]
113 |         shape_normal = batch_data[k,:,3:6]
114 |         rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), R)
115 |         rotated_data[k,:,3:6] = np.dot(shape_normal.reshape((-1, 3)), R)
116 |     return rotated_data
117 | 
118 | 
119 | def rotate_point_cloud_by_angle(batch_data, rotation_angle):
120 |     """ Rotate the point cloud along up direction with certain angle.
121 |         Input:
122 |           BxNx3 array, original batch of point clouds
123 |         Return:
124 |           BxNx3 array, rotated batch of point clouds
125 |     """
126 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
127 |     for k in range(batch_data.shape[0]):
128 |         #rotation_angle = np.random.uniform() * 2 * np.pi
129 |         cosval = np.cos(rotation_angle)
130 |         sinval = np.sin(rotation_angle)
131 |         rotation_matrix = np.array([[cosval, 0, sinval],
132 |                                     [0, 1, 0],
133 |                                     [-sinval, 0, cosval]])
134 |         shape_pc = batch_data[k,:,0:3]
135 |         rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
136 |     return rotated_data
137 | 
138 | def rotate_point_cloud_by_angle_with_normal(batch_data, rotation_angle):
139 |     """ Rotate the point cloud along up direction with certain angle.
140 |         Input:
141 |           BxNx6 array, original batch of point clouds with normal
142 |           scalar, angle of rotation
143 |         Return:
144 |           BxNx6 array, rotated batch of point clouds iwth normal
145 |     """
146 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
147 |     for k in range(batch_data.shape[0]):
148 |         #rotation_angle = np.random.uniform() * 2 * np.pi
149 |         cosval = np.cos(rotation_angle)
150 |         sinval = np.sin(rotation_angle)
151 |         rotation_matrix = np.array([[cosval, 0, sinval],
152 |                                     [0, 1, 0],
153 |                                     [-sinval, 0, cosval]])
154 |         shape_pc = batch_data[k,:,0:3]
155 |         shape_normal = batch_data[k,:,3:6]
156 |         rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
157 |         rotated_data[k,:,3:6] = np.dot(shape_normal.reshape((-1,3)), rotation_matrix)
158 |     return rotated_data
159 | 
160 | 
161 | 
162 | def rotate_perturbation_point_cloud(batch_data, angle_sigma=0.06, angle_clip=0.18):
163 |     """ Randomly perturb the point clouds by small rotations
164 |         Input:
165 |           BxNx3 array, original batch of point clouds
166 |         Return:
167 |           BxNx3 array, rotated batch of point clouds
168 |     """
169 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
170 |     for k in range(batch_data.shape[0]):
171 |         angles = np.clip(angle_sigma*np.random.randn(3), -angle_clip, angle_clip)
172 |         Rx = np.array([[1,0,0],
173 |                        [0,np.cos(angles[0]),-np.sin(angles[0])],
174 |                        [0,np.sin(angles[0]),np.cos(angles[0])]])
175 |         Ry = np.array([[np.cos(angles[1]),0,np.sin(angles[1])],
176 |                        [0,1,0],
177 |                        [-np.sin(angles[1]),0,np.cos(angles[1])]])
178 |         Rz = np.array([[np.cos(angles[2]),-np.sin(angles[2]),0],
179 |                        [np.sin(angles[2]),np.cos(angles[2]),0],
180 |                        [0,0,1]])
181 |         R = np.dot(Rz, np.dot(Ry,Rx))
182 |         shape_pc = batch_data[k, ...]
183 |         rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), R)
184 |     return rotated_data
185 | 
186 | 
187 | def jitter_point_cloud(batch_data, sigma=0.01, clip=0.05):
188 |     """ Randomly jitter points. jittering is per point.
189 |         Input:
190 |           BxNx3 array, original batch of point clouds
191 |         Return:
192 |           BxNx3 array, jittered batch of point clouds
193 |     """
194 |     B, N, C = batch_data.shape
195 |     assert(clip > 0)
196 |     jittered_data = np.clip(sigma * np.random.randn(B, N, C), -1*clip, clip)
197 |     jittered_data += batch_data
198 |     return jittered_data
199 | 
200 | def shift_point_cloud(batch_data, shift_range=0.1):
201 |     """ Randomly shift point cloud. Shift is per point cloud.
202 |         Input:
203 |           BxNx3 array, original batch of point clouds
204 |         Return:
205 |           BxNx3 array, shifted batch of point clouds
206 |     """
207 |     B, N, C = batch_data.shape
208 |     shifts = np.random.uniform(-shift_range, shift_range, (B,3))
209 |     for batch_index in range(B):
210 |         batch_data[batch_index,:,:] += shifts[batch_index,:]
211 |     return batch_data
212 | 
213 | 
214 | def random_scale_point_cloud(batch_data, scale_low=0.8, scale_high=1.25):
215 |     """ Randomly scale the point cloud. Scale is per point cloud.
216 |         Input:
217 |             BxNx3 array, original batch of point clouds
218 |         Return:
219 |             BxNx3 array, scaled batch of point clouds
220 |     """
221 |     B, N, C = batch_data.shape
222 |     scales = np.random.uniform(scale_low, scale_high, B)
223 |     for batch_index in range(B):
224 |         batch_data[batch_index,:,:] *= scales[batch_index]
225 |     return batch_data
226 | 
227 | def random_point_dropout(batch_pc, max_dropout_ratio=0.875):
228 |     ''' batch_pc: BxNx3 '''
229 |     for b in range(batch_pc.shape[0]):
230 |         dropout_ratio =  np.random.random()*max_dropout_ratio # 0~0.875
231 |         drop_idx = np.where(np.random.random((batch_pc.shape[1]))<=dropout_ratio)[0]
232 |         if len(drop_idx)>0:
233 |             batch_pc[b,drop_idx,:] = batch_pc[b,0,:] # set to the first point
234 |     return batch_pc
235 | 
236 | 
237 | def getDataFiles(list_filename):
238 |     return [line.rstrip() for line in open(list_filename)]
239 | 
240 | def load_h5(h5_filename):
241 |     f = h5py.File(h5_filename)
242 |     data = f['data'][:]
243 |     label = f['label'][:]
244 |     return (data, label)
245 | 
246 | def loadDataFile(filename):
247 |     return load_h5(filename)
248 | 


--------------------------------------------------------------------------------
/utils/render_balls_so.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <vector>
 3 | #include <algorithm>
 4 | #include <math.h>
 5 | using namespace std;
 6 | 
 7 | struct PointInfo{
 8 |     int x,y,z;
 9 |     float r,g,b;
10 | };
11 | 
12 | extern "C"{
13 | 
14 | void render_ball(int h,int w,unsigned char * show,int n,int * xyzs,float * c0,float * c1,float * c2,int r){
15 |     r=max(r,1);
16 |     vector<int> depth(h*w,-2100000000);
17 |     vector<PointInfo> pattern;
18 |     for (int dx=-r;dx<=r;dx++)
19 |         for (int dy=-r;dy<=r;dy++)
20 |             if (dx*dx+dy*dy<r*r){
21 |                 double dz=sqrt(double(r*r-dx*dx-dy*dy));
22 |                 PointInfo pinfo;
23 |                 pinfo.x=dx;
24 |                 pinfo.y=dy;
25 |                 pinfo.z=dz;
26 |                 pinfo.r=dz/r;
27 |                 pinfo.g=dz/r;
28 |                 pinfo.b=dz/r;
29 |                 pattern.push_back(pinfo);
30 |             }
31 |     double zmin=0,zmax=0;
32 |     for (int i=0;i<n;i++){
33 |         if (i==0){
34 |             zmin=xyzs[i*3+2]-r;
35 |             zmax=xyzs[i*3+2]+r;
36 |         }else{
37 |             zmin=min(zmin,double(xyzs[i*3+2]-r));
38 |             zmax=max(zmax,double(xyzs[i*3+2]+r));
39 |         }
40 |     }
41 |     for (int i=0;i<n;i++){
42 |         int x=xyzs[i*3+0],y=xyzs[i*3+1],z=xyzs[i*3+2];
43 |         for (int j=0;j<int(pattern.size());j++){
44 |             int x2=x+pattern[j].x;
45 |             int y2=y+pattern[j].y;
46 |             int z2=z+pattern[j].z;
47 |             if (!(x2<0 || x2>=h || y2<0 || y2>=w) && depth[x2*w+y2]<z2){
48 |                 depth[x2*w+y2]=z2;
49 |                 double intensity=min(1.0,(z2-zmin)/(zmax-zmin)*0.7+0.3);
50 |                 show[(x2*w+y2)*3+0]=pattern[j].b*c2[i]*intensity;
51 |                 show[(x2*w+y2)*3+1]=pattern[j].g*c0[i]*intensity;
52 |                 show[(x2*w+y2)*3+2]=pattern[j].r*c1[i]*intensity;
53 |             }
54 |         }
55 |     }
56 | }
57 | 
58 | }//extern "C"
59 | 


--------------------------------------------------------------------------------
/utils/render_balls_so.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wsustcid/FlowDriveNet/3604495269ae45e5b43964046104f685ec66e383/utils/render_balls_so.so


--------------------------------------------------------------------------------
/utils/show3d_balls.py:
--------------------------------------------------------------------------------
  1 | """ Original Author: Haoqiang Fan """
  2 | import numpy as np
  3 | import ctypes as ct
  4 | import cv2
  5 | import sys
  6 | import os
  7 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  8 | showsz=800
  9 | mousex,mousey=0.5,0.5
 10 | zoom=1.0
 11 | changed=True
 12 | def onmouse(*args):
 13 |     global mousex,mousey,changed
 14 |     y=args[1]
 15 |     x=args[2]
 16 |     mousex=x/float(showsz)
 17 |     mousey=y/float(showsz)
 18 |     changed=True
 19 | cv2.namedWindow('show3d')
 20 | cv2.moveWindow('show3d',0,0)
 21 | cv2.setMouseCallback('show3d',onmouse)
 22 | 
 23 | dll=np.ctypeslib.load_library(os.path.join(BASE_DIR, 'render_balls_so'),'.')
 24 | 
 25 | def showpoints(xyz,c_gt=None, c_pred = None ,waittime=0,showrot=False,magnifyBlue=0,freezerot=False,background=(0,0,0),normalizecolor=True,ballradius=10):
 26 |     global showsz,mousex,mousey,zoom,changed
 27 |     xyz=xyz-xyz.mean(axis=0)
 28 |     radius=((xyz**2).sum(axis=-1)**0.5).max()
 29 |     xyz/=(radius*2.2)/showsz
 30 |     if c_gt is None:
 31 |         c0=np.zeros((len(xyz),),dtype='float32')+255
 32 |         c1=np.zeros((len(xyz),),dtype='float32')+255
 33 |         c2=np.zeros((len(xyz),),dtype='float32')+255
 34 |     else:
 35 |         c0=c_gt[:,0]
 36 |         c1=c_gt[:,1]
 37 |         c2=c_gt[:,2]
 38 | 
 39 | 
 40 |     if normalizecolor:
 41 |         c0/=(c0.max()+1e-14)/255.0
 42 |         c1/=(c1.max()+1e-14)/255.0
 43 |         c2/=(c2.max()+1e-14)/255.0
 44 | 
 45 | 
 46 |     c0=np.require(c0,'float32','C')
 47 |     c1=np.require(c1,'float32','C')
 48 |     c2=np.require(c2,'float32','C')
 49 | 
 50 |     show=np.zeros((showsz,showsz,3),dtype='uint8')
 51 |     def render():
 52 |         rotmat=np.eye(3)
 53 |         if not freezerot:
 54 |             xangle=(mousey-0.5)*np.pi*1.2
 55 |         else:
 56 |             xangle=0
 57 |         rotmat=rotmat.dot(np.array([
 58 |             [1.0,0.0,0.0],
 59 |             [0.0,np.cos(xangle),-np.sin(xangle)],
 60 |             [0.0,np.sin(xangle),np.cos(xangle)],
 61 |             ]))
 62 |         if not freezerot:
 63 |             yangle=(mousex-0.5)*np.pi*1.2
 64 |         else:
 65 |             yangle=0
 66 |         rotmat=rotmat.dot(np.array([
 67 |             [np.cos(yangle),0.0,-np.sin(yangle)],
 68 |             [0.0,1.0,0.0],
 69 |             [np.sin(yangle),0.0,np.cos(yangle)],
 70 |             ]))
 71 |         rotmat*=zoom
 72 |         nxyz=xyz.dot(rotmat)+[showsz/2,showsz/2,0]
 73 | 
 74 |         ixyz=nxyz.astype('int32')
 75 |         show[:]=background
 76 |         dll.render_ball(
 77 |             ct.c_int(show.shape[0]),
 78 |             ct.c_int(show.shape[1]),
 79 |             show.ctypes.data_as(ct.c_void_p),
 80 |             ct.c_int(ixyz.shape[0]),
 81 |             ixyz.ctypes.data_as(ct.c_void_p),
 82 |             c0.ctypes.data_as(ct.c_void_p),
 83 |             c1.ctypes.data_as(ct.c_void_p),
 84 |             c2.ctypes.data_as(ct.c_void_p),
 85 |             ct.c_int(ballradius)
 86 |         )
 87 | 
 88 |         if magnifyBlue>0:
 89 |             show[:,:,0]=np.maximum(show[:,:,0],np.roll(show[:,:,0],1,axis=0))
 90 |             if magnifyBlue>=2:
 91 |                 show[:,:,0]=np.maximum(show[:,:,0],np.roll(show[:,:,0],-1,axis=0))
 92 |             show[:,:,0]=np.maximum(show[:,:,0],np.roll(show[:,:,0],1,axis=1))
 93 |             if magnifyBlue>=2:
 94 |                 show[:,:,0]=np.maximum(show[:,:,0],np.roll(show[:,:,0],-1,axis=1))
 95 |         if showrot:
 96 |             cv2.putText(show,'xangle %d'%(int(xangle/np.pi*180)),(30,showsz-30),0,0.5,cv2.cv.CV_RGB(255,0,0))
 97 |             cv2.putText(show,'yangle %d'%(int(yangle/np.pi*180)),(30,showsz-50),0,0.5,cv2.cv.CV_RGB(255,0,0))
 98 |             cv2.putText(show,'zoom %d%%'%(int(zoom*100)),(30,showsz-70),0,0.5,cv2.cv.CV_RGB(255,0,0))
 99 |     changed=True
100 |     while True:
101 |         if changed:
102 |             render()
103 |             changed=False
104 |         cv2.imshow('show3d',show)
105 |         if waittime==0:
106 |             cmd=cv2.waitKey(10)%256
107 |         else:
108 |             cmd=cv2.waitKey(waittime)%256
109 |         if cmd==ord('q'):
110 |             break
111 |         elif cmd==ord('Q'):
112 |             sys.exit(0)
113 | 
114 |         if cmd==ord('t') or cmd == ord('p'):
115 |             if cmd == ord('t'):
116 |                 if c_gt is None:
117 |                     c0=np.zeros((len(xyz),),dtype='float32')+255
118 |                     c1=np.zeros((len(xyz),),dtype='float32')+255
119 |                     c2=np.zeros((len(xyz),),dtype='float32')+255
120 |                 else:
121 |                     c0=c_gt[:,0]
122 |                     c1=c_gt[:,1]
123 |                     c2=c_gt[:,2]
124 |             else:
125 |                 if c_pred is None:
126 |                     c0=np.zeros((len(xyz),),dtype='float32')+255
127 |                     c1=np.zeros((len(xyz),),dtype='float32')+255
128 |                     c2=np.zeros((len(xyz),),dtype='float32')+255
129 |                 else:
130 |                     c0=c_pred[:,0]
131 |                     c1=c_pred[:,1]
132 |                     c2=c_pred[:,2]
133 |             if normalizecolor:
134 |                 c0/=(c0.max()+1e-14)/255.0
135 |                 c1/=(c1.max()+1e-14)/255.0
136 |                 c2/=(c2.max()+1e-14)/255.0
137 |             c0=np.require(c0,'float32','C')
138 |             c1=np.require(c1,'float32','C')
139 |             c2=np.require(c2,'float32','C')
140 |             changed = True
141 | 
142 | 
143 | 
144 |         if cmd==ord('n'):
145 |             zoom*=1.1
146 |             changed=True
147 |         elif cmd==ord('m'):
148 |             zoom/=1.1
149 |             changed=True
150 |         elif cmd==ord('r'):
151 |             zoom=1.0
152 |             changed=True
153 |         elif cmd==ord('s'):
154 |             cv2.imwrite('show3d.png',show)
155 |         if waittime!=0:
156 |             break
157 |     return cmd
158 |     
159 | if __name__=='__main__':
160 |     import open3d as o3d 
161 |     file_path = '/media/ubuntu16/Documents/Datasets/Udacity/CH2/CH2_001/points/1479425440130861.pcd'
162 |     cloud = o3d.io.read_point_cloud(file_path)
163 |     cloud_np = np.asarray(cloud.points, np.float32)
164 | 
165 |     showpoints(cloud_np)
166 | 
167 | 


--------------------------------------------------------------------------------