├── LICENSE.md
├── README.md
├── images
    └── overview_car.png
├── make_tf_ops.sh
├── requirements.txt
├── src
    ├── __init__.py
    ├── __pycache__
    │   ├── model_concat_upsa_1nn_cycle_nuscenes_iterative.cpython-36.pyc
    │   └── nuscenes_dataset_self_supervised_cycle_ordered_and_same_corrected_rgb.cpython-36.pyc
    ├── commands
    │   ├── command_evaluate_kitti.sh
    │   ├── command_train_cycle_fine_tune_kitti.sh
    │   ├── command_train_cycle_kitti.sh
    │   └── command_train_cycle_nuscenes.sh
    ├── evaluate_cycle_kitti.py
    ├── evaluate_kitti.py
    ├── kitti_dataset_self_supervised_cycle.py
    ├── model_concat_upsa.py
    ├── model_concat_upsa_cycle.py
    ├── nuscenes_dataset_self_supervised_cycle.py
    ├── tf_ops
    │   ├── 3d_interpolation
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── tf_interpolate.cpython-35.pyc
    │   │   │   └── tf_interpolate.cpython-36.pyc
    │   │   ├── interpolate.cpp
    │   │   ├── tf_interpolate.cpp
    │   │   ├── tf_interpolate.py
    │   │   ├── tf_interpolate_op_test.py
    │   │   ├── tf_interpolate_so.so
    │   │   └── visu_interpolation.py
    │   ├── __init__.py
    │   ├── grouping
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── tf_grouping.cpython-35.pyc
    │   │   │   └── tf_grouping.cpython-36.pyc
    │   │   ├── tf_grouping.cpp
    │   │   ├── tf_grouping.py
    │   │   ├── tf_grouping_g.cu
    │   │   ├── tf_grouping_g.cu.o
    │   │   ├── tf_grouping_op_test.py
    │   │   └── tf_grouping_so.so
    │   └── sampling
    │   │   ├── 1.pkl
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── tf_sampling.cpython-35.pyc
    │   │       ├── tf_sampling.cpython-36.pyc
    │   │       └── tf_sampling.cpython-37.pyc
    │   │   ├── tf_sampling.cpp
    │   │   ├── tf_sampling.py
    │   │   ├── tf_sampling_g.cu
    │   │   ├── tf_sampling_g.cu.o
    │   │   └── tf_sampling_so.so
    ├── train_1nn_cycle_nuscenes.py
    ├── train_cycle_fine_tune_kitti.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-35.pyc
    │       ├── __init__.cpython-36.pyc
    │       ├── __init__.cpython-37.pyc
    │       ├── pointnet_util.cpython-35.pyc
    │       ├── pointnet_util.cpython-36.pyc
    │       ├── pointnet_util.cpython-37.pyc
    │       ├── tf_util.cpython-35.pyc
    │       ├── tf_util.cpython-36.pyc
    │       └── tf_util.cpython-37.pyc
    │   ├── pointnet_util.py
    │   └── tf_util.py
└── visualization.py


/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright 2019 Carnegie Mellon University
 2 | 
 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 4 | 
 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 6 | 
 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 8 | 
 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10 | 
11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Just Go with the Flow: Self-Supervised Scene Flow Estimation, CVPR 2020 (Oral)
 2 | 
 3 | Authors: [Himangi Mittal](https://github.com/HimangiM), [Brian Okorn](https://github.com/bokorn), [David Held](https://github.com/davheld)
 4 | 
 5 | [[arxiv](https://arxiv.org/pdf/1912.00497.pdf)] [[Project Page](https://just-go-with-the-flow.github.io/)]
 6 | 
 7 | ### Citation
 8 | If you find our work useful in your research, please cite:
 9 | ```
10 | @InProceedings{Mittal_2020_CVPR,
11 | author = {Mittal, Himangi and Okorn, Brian and Held, David},
12 | title = {Just Go With the Flow: Self-Supervised Scene Flow Estimation},
13 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
14 | month = {June},
15 | year = {2020}
16 | }
17 | ```
18 | 
19 | ### Introduction
20 | In this work, we propose a method of scene flow estimation using two self-supervised losses, based on nearest neighbors and cycle consistency. These self-supervised losses allow us to train our method on large unlabeled autonomous driving datasets; the resulting method matches current state-of-the-art supervised performance using no real world annotations and exceeds stateof-the-art performance when combining our self-supervised approach with supervised learning on a smaller labeled dataset.
21 | 
22 | For more details, please refer to our [paper](https://arxiv.org/pdf/1912.00497.pdf) or [project page](https://just-go-with-the-flow.github.io/).
23 | 
24 | ### Installation 
25 | #### Requirements
26 |    ```
27 |    CUDA 9.0  
28 |    Tensorflow-gpu 1.9
29 |    Python 3.5
30 |    g++ 5.4.0
31 |    ```
32 | #### Steps
33 |   (a). Clone the repository.
34 |   ```
35 |   git clone https://github.com/HimangiM/Self-Supervised-Scene-Flow-Estimation.git
36 |   ```
37 |   (b). Install dependencies
38 |   ```
39 |   Create a virtualenv
40 |   python3 -m venv sceneflowvenv
41 |   source sceneflowvenv/bin/activate
42 |   cd Self-Supervised-Scene-Flow-Estimation
43 |   pip install -r requirements.txt
44 |   ```
45 |   ```
46 |   Check for CUDA-9.0
47 |   ```
48 |   (c). Compile the operations
49 |   The ```TF operators``` are included under src/tf_ops. Check the [CUDA compatability](https://en.wikipedia.org/wiki/CUDA#GPUs_supported) and ```edit the architecture``` accordingly in ```makefiles``` of each folder (tf_ops/sampling, tf_ops/grouping, tf_ops/3d_interpolation) The authors had used ```sm_61``` as the architecture for CUDA-9.0. Finally, move into each directory and run ```make```. Also, check for the path for CUDA-9.0 and edit the path in the makefiles of each folder. If this method throws error, then run ```bash make_tf_ops.sh sm_61```.
50 |     
51 | ### Datasets
52 |    Download the kitti dataset from the Google Drive [link](https://drive.google.com/drive/u/1/folders/1WNqrfUBR-EdN2ns_0D3FIdJBAPmFkaOo). Each file is in the .npz format and has three keys: ```pos1, pos2 and gt```, representing the first frame of point cloud, second frame of point cloud and the ground truth scene flow vectors for the points in the first frame. Create a folder with name data_preprocessing and download the kitti dataset in it. The dataset directory should look as follows:
53 |    ```
54 |    Self-Supervised-Scene-Flow-Estimation
55 |    |--data_preprocessing
56 |    |  |--kitti_self_supervised_flow
57 |    |  |  |--train
58 |    |  |  |--test
59 |    ```
60 |    The data preprocessing file to run the code on KITTI is present in the src folder: kitti_dataset_self_supervised_cycle.py. 
61 |    To create a dataloader for own dataset, refer to the script:
62 |    ```
63 |    nuscenes_dataset_self_supervised_cycle.py
64 |    ```
65 |   
66 | ### Training and Evaluation
67 |    To train on own dataset, refer to the scripts:
68 |    ```
69 |    train_1nn_cycle_nuscenes.py
70 |    bash src/commands/command_train_cycle_nuscenes.sh
71 |    ```
72 |    To evaluate on the KITTI dataset, execute the shell script:
73 |    ```
74 |    bash src/commands/command_evaluate_kitti.sh
75 |    ```
76 |    [Link](https://drive.google.com/drive/folders/1ldakyGw4QxfhAQeykyoJG2iv8yNuAy5_?usp=sharing) to the pretrained model.
77 |   
78 | ### Visualization
79 | You can use ```Open3d``` to visualize the results. A sample script is given in visualization.py
80 |    
81 |    
82 |     
83 | 


--------------------------------------------------------------------------------
/images/overview_car.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/images/overview_car.png


--------------------------------------------------------------------------------
/make_tf_ops.sh:
--------------------------------------------------------------------------------
1 | make -C src/tf_ops/3d_interpolation clean 
2 | make -C src/tf_ops/3d_interpolation ARCHI=$1
3 | make -C src/tf_ops/grouping clean 
4 | make -C src/tf_ops/grouping ARCHI=$1
5 | make -C src/tf_ops/sampling clean
6 | make -C src/tf_ops/sampling ARCHI=$1
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==0.8.0
  2 | apptools==4.5.0
  3 | astor==0.8.0
  4 | attrs==19.2.0
  5 | backcall==0.1.0
  6 | bleach==3.1.0
  7 | cachetools==4.0.0
  8 | certifi==2019.11.28
  9 | cffi==1.12.3
 10 | chardet==3.0.4
 11 | configobj==5.0.6
 12 | cvlib==0.2.4
 13 | cycler==0.10.0
 14 | Cython==0.29.16
 15 | decorator==4.4.0
 16 | defusedxml==0.6.0
 17 | entrypoints==0.3
 18 | envisage==4.9.2
 19 | filelock==3.0.12
 20 | gast==0.3.0
 21 | grpcio==1.23.0
 22 | h5py==2.10.0
 23 | idna==2.9
 24 | imageio==2.6.1
 25 | imutils==0.5.3
 26 | ipdb==0.12.2
 27 | ipydatawidgets==4.0.1
 28 | ipykernel==5.1.2
 29 | ipython==7.8.0
 30 | ipython-genutils==0.2.0
 31 | ipyvolume==0.5.2
 32 | ipywebrtc==0.5.0
 33 | ipywidgets==7.5.1
 34 | jedi==0.15.1
 35 | Jinja2==2.10.1
 36 | joblib==0.14.1
 37 | json5==0.8.5
 38 | jsonschema==3.0.2
 39 | jupyter-client==5.3.3
 40 | jupyter-core==4.5.0
 41 | jupyterlab==1.1.4
 42 | jupyterlab-server==1.0.6
 43 | Keras-Applications==1.0.8
 44 | Keras-Preprocessing==1.1.0
 45 | kiwisolver==1.1.0
 46 | llvmlite==0.29.0
 47 | Markdown==3.1.1
 48 | MarkupSafe==1.1.1
 49 | matplotlib==3.1.1
 50 | mayavi==4.7.1
 51 | mistune==0.8.4
 52 | nbconvert==5.6.0
 53 | nbformat==4.4.0
 54 | networkx==2.4
 55 | nose==1.3.7
 56 | notebook==6.0.1
 57 | numba==0.38.1
 58 | numpy==1.17.2
 59 | open3d==0.8.0.0
 60 | open3d-python==0.7.0.0
 61 | opencv-python==4.1.1.26
 62 | pandas==0.25.2
 63 | pandocfilters==1.4.2
 64 | parso==0.5.1
 65 | pexpect==4.7.0
 66 | pickleshare==0.7.5
 67 | Pillow==6.1.0
 68 | pkg-resources==0.0.0
 69 | pptk==0.1.0
 70 | progressbar==2.5
 71 | prometheus-client==0.7.1
 72 | prompt-toolkit==2.0.9
 73 | protobuf==3.9.1
 74 | ptyprocess==0.6.0
 75 | pycparser==2.19
 76 | pyface==6.1.2
 77 | Pygments==2.4.2
 78 | pyparsing==2.4.2
 79 | pypng==0.0.20
 80 | pyquaternion==0.9.5
 81 | pyrsistent==0.15.4
 82 | python-dateutil==2.8.0
 83 | python-pcl==0.3.0a1
 84 | pythreejs==2.2.0
 85 | pytz==2019.3
 86 | PyWavelets==1.1.1
 87 | PyYAML==5.1.2
 88 | pyzmq==18.1.0
 89 | requests==2.23.0
 90 | scikit-image==0.16.2
 91 | scikit-learn==0.22.1
 92 | scipy==1.1.0
 93 | seaborn==0.9.0
 94 | Send2Trash==1.5.0
 95 | six==1.12.0
 96 | tensorboard==1.9.0
 97 | tensorflow-gpu==1.9.0
 98 | termcolor==1.1.0
 99 | terminado==0.8.2
100 | testpath==0.4.2
101 | threevis==0.1.0.post25
102 | torch==1.4.0
103 | torchvision==0.5.0
104 | tornado==6.0.3
105 | tqdm==4.36.1
106 | traitlets==4.3.2
107 | traits==6.0.0
108 | traitsui==6.1.3
109 | traittypes==0.2.1
110 | transforms==0.1
111 | urllib3==1.25.8
112 | vtk==8.1.2
113 | wcwidth==0.1.7
114 | webencodings==0.5.1
115 | Werkzeug==0.15.6
116 | widgetsnbextension==3.5.1
117 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/__init__.py


--------------------------------------------------------------------------------
/src/__pycache__/model_concat_upsa_1nn_cycle_nuscenes_iterative.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/__pycache__/model_concat_upsa_1nn_cycle_nuscenes_iterative.cpython-36.pyc


--------------------------------------------------------------------------------
/src/__pycache__/nuscenes_dataset_self_supervised_cycle_ordered_and_same_corrected_rgb.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/__pycache__/nuscenes_dataset_self_supervised_cycle_ordered_and_same_corrected_rgb.cpython-36.pyc


--------------------------------------------------------------------------------
/src/commands/command_evaluate_kitti.sh:
--------------------------------------------------------------------------------
 1 | python src/evaluate_cycle_kitti.py \
 2 |     --model model_concat_upsa \
 3 |     --data data_preprocessing/kitti_self_supervised_flow \
 4 |     --model_path log_train_pretrained/model.ckpt \
 5 |     --kitti_dataset data_preprocessing/kitti_self_supervised_flow \
 6 |     --num_point 2048 \
 7 |     --batch_size 8 \
 8 |     --radius 5 \
 9 |     --layer pointnet \
10 |     --gpu 2 \
11 |     --num_frames 2
12 |     
13 | 
14 | 


--------------------------------------------------------------------------------
/src/commands/command_train_cycle_fine_tune_kitti.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | python src/train_cycle_fine_tune_kitti.py \
 5 |     --model model_concat_upsa_cycle \
 6 |     --data data_preprocessing/kitti_self_supervised_flow \
 7 |     --log_dir log_train_cycle_fine_tune_kitti \
 8 |     --num_point 2048 \
 9 |     --batch_size 8 \
10 |     --radius 5 \
11 |     --layer pointnet \
12 |     --cache_size 30000 \
13 |     --gpu 2 \
14 |     --learning_rate 0.0001 \
15 |     --dataset kitti_dataset_self_supervised_cycle \
16 |     --num_frames 2 \
17 |     --max_epoch 10000 \
18 |     --fine_tune \
19 |     --model_path log_train_pretrained/model.ckpt \
20 |     --kitti_dataset data_preprocessing/kitti_self_supervised_flow \
21 |     --sample_start_idx 0
22 | 


--------------------------------------------------------------------------------
/src/commands/command_train_cycle_kitti.sh:
--------------------------------------------------------------------------------
 1 | python src/train_cycle_fine_tune_kitti.py \
 2 |     --model model_concat_upsa_cycle \
 3 |     --data data_preprocessing/kitti_self_supervised_flow \
 4 |     --log_dir log_train_cycle_fine_tune_kitti \
 5 |     --num_point 2048 \
 6 |     --batch_size 8 \
 7 |     --radius 5 \
 8 |     --layer pointnet \
 9 |     --cache_size 30000 \
10 |     --gpu 2 \
11 |     --learning_rate 0.0001 \
12 |     --dataset kitti_dataset_self_supervised_cycle \
13 |     --num_frames 2 \
14 |     --max_epoch 10000 \
15 |     --kitti_dataset data_preprocessing/kitti_self_supervised_flow \
16 |     --sample_start_idx 0
17 | 


--------------------------------------------------------------------------------
/src/commands/command_train_cycle_nuscenes.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | python train_1nn_cycle_nuscenes.py \
 5 |     --model model_concat_upsa_1nn_cycle_nuscenes \
 6 |     --data data_preprocessing/nuscenes_trainval_rgb_pkl_600_full \
 7 |     --log_dir logs/log_train_cycle_nuscenes \
 8 |     --num_point 2048 \
 9 |     --batch_size 8 \
10 |     --radius 5 \
11 |     --layer pointnet \
12 |     --cache_size 0 \
13 |     --gpu 3 \
14 |     --learning_rate 0.001 \
15 |     --dataset nuscenes_dataset_self_supervised_cycle \
16 |     --num_frames 2 \
17 |     --fine_tune \
18 |     --model_path pretrained_models/log_train_pretrained/model.ckpt \
19 |     --max_epoch 10000 \
20 |     --flip_prob 0.5
21 | 


--------------------------------------------------------------------------------
/src/evaluate_cycle_kitti.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import math
  3 | from datetime import datetime
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import socket
  7 | import importlib
  8 | import os
  9 | import sys
 10 | import glob
 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 12 | sys.path.append(BASE_DIR)
 13 | import pickle
 14 | import pdb
 15 | import utils.tf_util
 16 | from utils.pointnet_util import *
 17 | from tf_grouping import query_ball_point, group_point, knn_point
 18 | 
 19 | len_cloud = 100000
 20 | 
 21 | parser = argparse.ArgumentParser()
 22 | parser.add_argument('--gpu', type=int, default=3, help='GPU to use [default: GPU 0]')
 23 | parser.add_argument('--model', default='model_concat_upsa', help='Model name [default: model_concat_upsa]')
 24 | parser.add_argument('--data', default='data_preprocessing/data_processed_maxcut_35_20k_2k_8192', help='Dataset directory [default: /data_preprocessing/data_processed_maxcut_35_20k_2k_8192]')
 25 | parser.add_argument('--model_path', default='log_train/model.ckpt', help='model checkpoint file path [default: log_train/model.ckpt]')
 26 | parser.add_argument('--num_point', type=int, default=2048, help='Point Number [default: 2048]')
 27 | parser.add_argument('--batch_size', type=int, default=16, help='Batch Size during training [default: 16]')
 28 | parser.add_argument('--num_frames', type=int, default=2, help='Number of frames to run cycle')
 29 | parser.add_argument('--decay_step', type=int, default=200000, help='Decay step for lr decay [default: 200000]')
 30 | parser.add_argument('--decay_rate', type=float, default=0.7, help='Decay rate for lr decay [default: 0.7]')
 31 | parser.add_argument('--radius', type=float, default=5.0, help='Radius of flow embedding layer')
 32 | parser.add_argument('--layer', type=str, default='pointnet', help='Last layer for upconv')
 33 | parser.add_argument('--knn', action='store_true', help='knn or query ball point')
 34 | parser.add_argument('--flow', type=str, default='default', help='flow embedding module type')
 35 | parser.add_argument('--kitti_dataset', default='data_preprocessing/kitti_self_supervised_flow', help='Dataset directory [default: /data_preprocessing/kitti_self_supervised_flow]')
 36 | 
 37 | 
 38 | FLAGS = parser.parse_args()
 39 | 
 40 | os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu)
 41 | 
 42 | BATCH_SIZE = FLAGS.batch_size
 43 | NUM_POINT = FLAGS.num_point
 44 | DATA = FLAGS.data
 45 | GPU_INDEX = FLAGS.gpu
 46 | NUM_FRAMES = FLAGS.num_frames
 47 | DECAY_STEP = FLAGS.decay_step
 48 | DECAY_RATE = FLAGS.decay_rate
 49 | RADIUS = FLAGS.radius
 50 | 
 51 | print (FLAGS)
 52 | 
 53 | MODEL = importlib.import_module(FLAGS.model) # import network module
 54 | MODEL_FILE = os.path.join(BASE_DIR, FLAGS.model+'.py')
 55 | MODEL_PATH = FLAGS.model_path
 56 | LAYER = FLAGS.layer
 57 | KNN = FLAGS.knn
 58 | FLOW_MODULE = FLAGS.flow
 59 | KITTI_DATASET = FLAGS.kitti_dataset
 60 | 
 61 | BN_INIT_DECAY = 0.5
 62 | BN_DECAY_DECAY_RATE = 0.5
 63 | BN_DECAY_DECAY_STEP = float(DECAY_STEP)
 64 | BN_DECAY_CLIP = 0.99
 65 | 
 66 | 
 67 | def scene_flow_EPE_np(pred, labels, mask):
 68 |     error = np.sqrt(np.sum((pred - labels)**2, 2) + 1e-20)
 69 | 
 70 |     gtflow_len = np.sqrt(np.sum(labels*labels, 2) + 1e-20) # B,N
 71 |     acc1 = np.sum(np.logical_or((error <= 0.05)*mask, (error/gtflow_len <= 0.05)*mask), axis=1)
 72 |     acc2 = np.sum(np.logical_or((error <= 0.1)*mask, (error/gtflow_len <= 0.1)*mask), axis=1)
 73 | 
 74 |     mask_sum = np.sum(mask, 1)
 75 |     acc1 = acc1[mask_sum > 0] / mask_sum[mask_sum > 0]
 76 |     acc1 = np.sum(acc1)
 77 |     acc2 = acc2[mask_sum > 0] / mask_sum[mask_sum > 0]
 78 |     acc2 = np.sum(acc2)
 79 |     
 80 |     EPE = np.sum(error)
 81 | 
 82 | #     EPE = np.sum(error * mask, 1)[mask_sum > 0] / mask_sum[mask_sum > 0]
 83 | #     EPE = np.sum(EPE)
 84 |     return EPE, acc1, acc2, error, gtflow_len
 85 | 
 86 | def get_bn_decay(batch):
 87 |     bn_momentum = tf.train.exponential_decay(
 88 |         BN_INIT_DECAY,
 89 |         batch * BATCH_SIZE,
 90 |         BN_DECAY_DECAY_STEP,
 91 |         BN_DECAY_DECAY_RATE,
 92 |         staircase=True)
 93 |     bn_decay = tf.minimum(BN_DECAY_CLIP, 1 - bn_momentum)
 94 |     return bn_decay
 95 | 
 96 | with tf.Graph().as_default():
 97 |     with tf.device('/gpu:'+str(GPU_INDEX)):
 98 |         pointclouds_pl, labels_pl, masks_pl = MODEL.placeholder_inputs(None, NUM_POINT)
 99 |         
100 |         is_training_pl = tf.placeholder(tf.bool, shape=())
101 |         
102 |         batch = tf.Variable(0)  # batch = 0
103 |         bn_decay = get_bn_decay(batch)    # bn_decay = 0.5
104 |         print("--- Get model and loss")
105 |         # Get model and loss
106 |         pred, end_points = MODEL.get_model(RADIUS, LAYER, pointclouds_pl, is_training_pl, bn_decay=bn_decay, 
107 |                 knn=KNN, flow_module=FLOW_MODULE)
108 |         
109 |         saver = tf.train.Saver()
110 |         
111 |     # Create a session
112 |     config = tf.ConfigProto()
113 |     config.gpu_options.allow_growth = True
114 |     config.allow_soft_placement = True
115 |     config.log_device_placement = False
116 |     sess = tf.Session(config=config)
117 | 
118 |     saver.restore(sess, MODEL_PATH)
119 |     
120 |     ops = {'pointclouds_pl': pointclouds_pl,
121 |            'label': labels_pl,
122 |            'is_training_pl': is_training_pl,
123 |            'pred': pred,
124 |            'end_points': end_points}
125 |     is_training = False
126 |     
127 | #     all_kitti = glob.glob('/home/gaurav/himangi/flownet3d_research/cvpr/after_cvpr/rebuttal/kitti_final_points_estimate/*.npz')
128 |     all_kitti = glob.glob(os.path.join(KITTI_DATASET, 'test/*.npz'))
129 |     l_error_all = []
130 |     l_gt_all = []
131 |     l_dist_center = []
132 |     l_query_ball = []
133 |     l_dist_count_all = []
134 |     
135 |     num_frame = 40
136 |     epe_total = 0
137 |     epe_count = 0
138 |     batch_count = 0
139 |     sample_count = 0
140 |     
141 |     all_pred = []
142 |     all_label = []
143 |     all_points = []
144 |     
145 |     for ki in all_kitti:
146 | #         try:
147 |         x = np.load(ki)
148 | 
149 |         batch_label = []
150 |         batch_data = []
151 |         batch_mask = []
152 |         
153 |         ref_pc = x['pos1'][:, :3]
154 |         ref_center = np.mean(ref_pc, 0)
155 |         print(len(x['pos1']), len(x['pos2']))
156 |         for i in range(0, len_cloud, 2048):
157 |             if i+2048 < len(x['pos1']) and i+2048 < len(x['pos2']):
158 | 
159 |                 pc1 = x['pos1'][i:i+2048, :3]
160 |                 pc2 = x['pos2'][i:i+2048, :3]
161 |                 gt = x['gt'][i:i+2048, :3]
162 |                 
163 |                 pc1 = pc1 - ref_center
164 |                 pc2 = pc2 - ref_center
165 |                 
166 |                 batch_data.append(np.concatenate([np.concatenate([pc1,
167 |                                                                   pc2], axis=0), 
168 |                                                   np.zeros((4096, 3))], axis=1)) # 4096, 6
169 |                 
170 | 
171 |                 batch_label.append(gt)
172 |                
173 |         batch_data = np.array(batch_data) # 10 x 4096 x 6
174 |         
175 |         batch_label = np.array(batch_label)
176 | 
177 |         feed_dict = {ops['pointclouds_pl']: batch_data,
178 |                      ops['is_training_pl']: is_training,}
179 | 
180 |         pred_val, end_points_val = sess.run([ops['pred'], ops['end_points']], feed_dict=feed_dict)
181 |         epe, acc1, acc2, error, gt_label = scene_flow_EPE_np(pred_val, batch_label,
182 |                                         np.ones(pred_val.shape, dtype=np.int32)[:,:,0])
183 | 
184 |         epe_total += epe
185 |         sample_count += batch_data.shape[0]*(batch_data.shape[1]/2)
186 |         batch_count += batch_data.shape[0]
187 |         
188 |         all_pred.append(pred_val)
189 |         all_points.append(batch_data)
190 |         all_label.append(batch_label)
191 |         
192 |             
193 |     all_pred = np.array(all_pred)
194 |     all_points = np.array(all_points)
195 |     all_label = np.array(all_label)
196 |     
197 |     print (all_pred.shape, all_points.shape, all_label.shape)
198 |     
199 |     print('Num batches {} Average EPE {}'.format(sample_count,epe_total/sample_count))
200 |     print ('eval mean EPE 3D: %f' % (epe_total / sample_count))
201 |     
202 | 


--------------------------------------------------------------------------------
/src/evaluate_kitti.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import math
  3 | from datetime import datetime
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import socket
  7 | import importlib
  8 | import os
  9 | import sys
 10 | import glob
 11 | BASE_DIR = 'Self-Supervised-Scene-Flow-Estimation/src'
 12 | sys.path.append(BASE_DIR)
 13 | import pickle
 14 | import pdb
 15 | import utils.tf_util
 16 | from utils.pointnet_util import *
 17 | from tf_grouping import query_ball_point, group_point, knn_point
 18 | from scipy.spatial import distance_matrix
 19 | 
 20 | sys.argv = sys.argv[:1]
 21 | sys.argv += ['--model', 'model_concat_upsa_cycle']
 22 | sys.argv += ['--model_path',log_train_pretrained/model.ckpt']
 23 | sys.argv += ['--num_point', '2048']
 24 | 
 25 | len_cloud = 100000
 26 | 
 27 | parser = argparse.ArgumentParser()
 28 | parser.add_argument('--gpu', type=int, default=0, help='GPU to use [default: GPU 0]')
 29 | parser.add_argument('--model', default='model_concat_upsa', help='Model name [default: model_concat_upsa]')
 30 | parser.add_argument('--data', default='data_preprocessing/data_processed_maxcut_35_20k_2k_8192', help='Dataset directory [default: /data_preprocessing/data_processed_maxcut_35_20k_2k_8192]')
 31 | parser.add_argument('--model_path', default='log_train/model.ckpt', help='model checkpoint file path [default: log_train/model.ckpt]')
 32 | parser.add_argument('--num_point', type=int, default=2048, help='Point Number [default: 2048]')
 33 | parser.add_argument('--batch_size', type=int, default=16, help='Batch Size during training [default: 16]')
 34 | parser.add_argument('--num_frames', type=int, default=2, help='Number of frames to run cycle')
 35 | parser.add_argument('--decay_step', type=int, default=200000, help='Decay step for lr decay [default: 200000]')
 36 | parser.add_argument('--decay_rate', type=float, default=0.7, help='Decay rate for lr decay [default: 0.7]')
 37 | parser.add_argument('--radius', type=float, default=5.0, help='Radius of flow embedding layer')
 38 | parser.add_argument('--layer', type=str, default='pointnet', help='Last layer for upconv')
 39 | parser.add_argument('--knn', action='store_true', help='knn or query ball point')
 40 | parser.add_argument('--flow', type=str, default='default', help='flow embedding module type')
 41 | 
 42 | FLAGS = parser.parse_args()
 43 | 
 44 | os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu)
 45 | 
 46 | BATCH_SIZE = FLAGS.batch_size
 47 | NUM_POINT = FLAGS.num_point
 48 | DATA = FLAGS.data
 49 | GPU_INDEX = FLAGS.gpu
 50 | NUM_FRAMES = FLAGS.num_frames
 51 | DECAY_STEP = FLAGS.decay_step
 52 | DECAY_RATE = FLAGS.decay_rate
 53 | RADIUS = FLAGS.radius
 54 | MODEL = importlib.import_module(FLAGS.model) # import network module
 55 | MODEL_FILE = os.path.join(BASE_DIR, FLAGS.model+'.py')
 56 | MODEL_PATH = FLAGS.model_path
 57 | LAYER = FLAGS.layer
 58 | KNN = FLAGS.knn
 59 | FLOW_MODULE = FLAGS.flow
 60 | 
 61 | BN_INIT_DECAY = 0.5
 62 | BN_DECAY_DECAY_RATE = 0.5
 63 | BN_DECAY_DECAY_STEP = float(DECAY_STEP)
 64 | BN_DECAY_CLIP = 0.99
 65 | 
 66 | def scene_flow_EPE_np(pred, labels, mask):
 67 |     error = np.sqrt(np.sum((pred - labels)**2, 2) + 1e-20)
 68 | 
 69 |     gtflow_len = np.sqrt(np.sum(labels*labels, 2) + 1e-20) # B,N
 70 |     acc1 = np.sum(np.logical_or((error <= 0.05)*mask, (error/gtflow_len <= 0.05)*mask), axis=1)
 71 |     acc2 = np.sum(np.logical_or((error <= 0.1)*mask, (error/gtflow_len <= 0.1)*mask), axis=1)
 72 | 
 73 |     mask_sum = np.sum(mask, 1)
 74 |     acc1 = acc1[mask_sum > 0] / mask_sum[mask_sum > 0]
 75 |     acc1 = np.sum(acc1)
 76 |     acc2 = acc2[mask_sum > 0] / mask_sum[mask_sum > 0]
 77 |     acc2 = np.sum(acc2)
 78 | 
 79 |     EPE = np.sum(error * mask, 1)[mask_sum > 0] / mask_sum[mask_sum > 0]
 80 |     EPE = np.sum(EPE)
 81 |     return EPE, acc1, acc2, error, gtflow_len
 82 | 
 83 | def get_bn_decay(batch):
 84 |     bn_momentum = tf.train.exponential_decay(
 85 |         BN_INIT_DECAY,
 86 |         batch * BATCH_SIZE,
 87 |         BN_DECAY_DECAY_STEP,
 88 |         BN_DECAY_DECAY_RATE,
 89 |         staircase=True)
 90 |     bn_decay = tf.minimum(BN_DECAY_CLIP, 1 - bn_momentum)
 91 |     return bn_decay
 92 | 
 93 | def return_dist_threshold(pc1, threshold=1.0):
 94 |     all_dist_count = []
 95 |     diff_matrix = distance_matrix(pc1, pc1)
 96 |     r, c = np.where(diff_matrix <=  threshold)
 97 |     _, counts_elements = np.unique(r, return_counts = True)
 98 |     for i in counts_elements: 
 99 |         all_dist_count.append(i)
100 |         
101 |     return all_dist_count
102 | 
103 | with tf.Graph().as_default():
104 |     with tf.device('/gpu:'+str(GPU_INDEX)):
105 |         pointclouds_pl, labels_pl, masks_pl = MODEL.placeholder_inputs(None, NUM_POINT)
106 |         
107 |         is_training_pl = tf.placeholder(tf.bool, shape=())
108 |         
109 |         batch = tf.Variable(0)  # batch = 0
110 |         bn_decay = get_bn_decay(batch)    # bn_decay = 0.5
111 |         print("--- Get model and loss")
112 |         # Get model and loss
113 |         pred, end_points = MODEL.get_model(RADIUS, LAYER, pointclouds_pl, is_training_pl, bn_decay=bn_decay, 
114 |                 knn=KNN, flow_module=FLOW_MODULE)
115 |         
116 |         saver = tf.train.Saver()
117 |         
118 |     # Create a session
119 |     config = tf.ConfigProto()
120 |     config.gpu_options.allow_growth = True
121 |     config.allow_soft_placement = True
122 |     config.log_device_placement = False
123 |     sess = tf.Session(config=config)
124 | 
125 |     saver.restore(sess, MODEL_PATH)
126 |     
127 |     ops = {'pointclouds_pl': pointclouds_pl,
128 |            'label': labels_pl,
129 |            'is_training_pl': is_training_pl,
130 |            'pred': pred,
131 |            'end_points': end_points}
132 |     is_training = False
133 |     
134 |     all_kitti = glob.glob('kitti_self_supervised_flow/test/*.npz')
135 |     
136 |     l_error_all = []
137 |     
138 |     num_frame = 40
139 |     epe_total = 0
140 |     epe_count = 0
141 |     sample_count = 0
142 |     for ki in all_kitti:
143 |         x = np.load(ki)
144 | 
145 |         all_pred = []
146 |         all_label = []
147 |         all_points = []
148 | 
149 |         batch_label = []
150 |         batch_data = []
151 |         batch_mask = []
152 |         
153 |         ref_pc = x['pos1'][:, :3]
154 |         ref_center = np.mean(ref_pc, 0)
155 |         print(len(x['pos1']), len(x['pos2']))
156 |         for i in range(0, len_cloud, 2048):
157 |             if i+2048 < len(x['pos1']) and i+2048 < len(x['pos2']):
158 | 
159 |                 pc1 = x['pos1'][i:i+2048, :3]
160 |                 pc2 = x['pos2'][i:i+2048, :3]
161 |                 gt = x['gt'][i:i+2048, :3]
162 |                 
163 |                 pc1 = pc1 - ref_center
164 |                 pc2 = pc2 - ref_center
165 |                 batch_data.append(np.concatenate([np.concatenate([pc1,
166 |                                                                   pc2], axis=0), 
167 |                                                   np.zeros((4096, 3))], axis=1)) # 4096, 6
168 | 
169 |                 batch_label.append(gt)
170 |                 
171 |         batch_data = np.array(batch_data)
172 |         batch_label = np.array(batch_label)
173 | 
174 |         feed_dict = {ops['pointclouds_pl']: batch_data,
175 |                      ops['is_training_pl']: is_training,}
176 | 
177 |         pred_val, end_points_val = sess.run([ops['pred'], ops['end_points']], feed_dict=feed_dict)
178 |         epe, acc1, acc2, error, gt_label = scene_flow_EPE_np(pred_val, batch_label,
179 |                                         np.ones(pred_val.shape, dtype=np.int32)[:,:,0])
180 | 
181 |         epe_total += epe
182 |         sample_count += batch_data.shape[0]
183 |         
184 |         all_pred.append(pred_val)
185 |         all_points.append(batch_data)
186 |         all_label.append(batch_label)
187 |         
188 |         all_pred = np.array(all_pred)
189 |         all_points = np.array(all_points)
190 |         all_label = np.array(all_label)
191 |         
192 |         f_name = ki.split('/')[-1] + "_40"
193 |         
194 |         print(all_pred.shape, all_points.shape, all_label.shape)
195 |         
196 |     print('Num batches {} Average EPE {}'.format(sample_count,epe_total/sample_count))
197 |     print ('eval mean EPE 3D: %f' % (epe_total / sample_count))
198 | 


--------------------------------------------------------------------------------
/src/kitti_dataset_self_supervised_cycle.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path
 3 | import json
 4 | import numpy as np
 5 | import sys
 6 | import pickle
 7 | import glob
 8 | import random
 9 | # import mayavi.mlab as mlab
10 | 
11 | class SceneflowDataset():
12 |     def __init__(self, root = './data_preprocessing/kitti_self_supervised_flow',
13 |                  cache_size = 30000, npoints=2048, train=True,
14 |                  softmax_dist = False, num_frames=3, flip_prob=0,
15 |                  sample_start_idx=-1):
16 |         self.npoints = npoints
17 |         self.train = train
18 |         self.root = root
19 |         if self.train:    
20 |             self.datapath = glob.glob(os.path.join(self.root, 'train/*.npz'))
21 |         else:
22 |             self.datapath = glob.glob(os.path.join(self.root, 'test/*.npz'))
23 |         self.cache = {}
24 |         self.cache_size = cache_size
25 |         self.softmax_dist = softmax_dist
26 |         self.num_frames = num_frames
27 |         self.flip_prob = flip_prob
28 |         self.sample_start_idx = sample_start_idx
29 | 
30 |     def __getitem__(self, index):
31 |         if index in self.cache:
32 |             pos_list, color_list = self.cache[index]
33 |         else:
34 |             fn = self.datapath[index]
35 |             pc_np_list = np.load(fn)
36 |             pc_list = []
37 |             pc_list.append(pc_np_list['pos1'])
38 |             pc_list.append(pc_np_list['pos2'])
39 | 
40 |             start_idx = np.random.choice(np.arange(len(pc_list)-self.num_frames+1),
41 |                                          size=1)[0]
42 |             pos_list = []
43 |             color_list = []
44 |             min_length = np.min([len(x) for x in pc_list])
45 |             # print (min_length, min_length-self.npoints+1)
46 |             if self.sample_start_idx == -1:
47 |                 sample_start_idx = np.random.choice(min_length-self.npoints+1,
48 |                                                     size=1)[0]
49 |             else:
50 |                 sample_start_idx = self.sample_start_idx
51 |             sample_idx = np.arange(sample_start_idx,
52 |                                    sample_start_idx+self.npoints)
53 |             for frame_idx in range(start_idx, start_idx + self.num_frames):
54 |                 data = pc_list[frame_idx] # num_point x 4
55 |                 pos = data[sample_idx, :3]
56 |                 color = np.zeros((len(sample_idx), 3))
57 | 
58 |                 pos_list.append(pos)
59 |                 color_list.append(color)
60 | 
61 |             prob = random.uniform(0, 1)
62 |             if prob < self.flip_prob:
63 |                 pos_list = pos_list[::-1]
64 |                 color_list = color_list[::-1]
65 | 
66 |             if len(self.cache) < self.cache_size:
67 |                 self.cache[index] = (pos_list, color_list)
68 | 
69 |         return np.array(pos_list), np.array(color_list)
70 | 
71 |     def __len__(self):
72 |         return len(self.datapath)
73 | 
74 | 
75 | if __name__ == '__main__':
76 |     d = SceneflowDataset(npoints=2048, train = False)
77 |     print('Len of dataset:', len(d))
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/src/model_concat_upsa.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import math
  5 | import sys
  6 | import os
  7 | 
  8 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  9 | sys.path.append(os.path.join(BASE_DIR, 'utils'))
 10 | import utils.tf_util
 11 | from utils.pointnet_util import *
 12 | 
 13 | 
 14 | def placeholder_inputs(batch_size, num_point):
 15 |     # change here, num_point*2 -> numpoint*5
 16 |     pointclouds_pl = tf.placeholder(tf.float32,
 17 |                                     shape=(batch_size, num_point * 2, 6))
 18 |     labels_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
 19 |     masks_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point))
 20 |     return pointclouds_pl, labels_pl, masks_pl
 21 | 
 22 | 
 23 | def get_model(radius, layer, point_cloud, is_training, bn_decay=None, knn=False, flow_module='default'):
 24 |     """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """
 25 | 
 26 |     end_points = {}
 27 |     batch_size = point_cloud.get_shape()[0].value  # batch_size = 16
 28 |     num_point = point_cloud.get_shape()[1].value // 2
 29 |     # change here, num_point hard coded to 2048
 30 |     # num_point = 2048
 31 | 
 32 |     l0_xyz_f1 = point_cloud[:, :num_point, 0:3]
 33 |     l0_points_f1 = point_cloud[:, :num_point, 3:]
 34 |     l0_xyz_f2 = point_cloud[:, num_point:, 0:3]
 35 |     l0_points_f2 = point_cloud[:, num_point:, 3:]
 36 | 
 37 |     RADIUS1 = 0.5
 38 |     RADIUS2 = 1.0
 39 |     RADIUS3 = 2.0
 40 |     RADIUS4 = 4.0
 41 | 
 42 |     with tf.variable_scope('sa1') as scope:
 43 |         # radius, npoints, nlayers, mlp size, sampling technique
 44 |         # Set conv layers, POINT FEATURE LEARNING
 45 |         # Frame 1, Layer 1 (with radius = 0.5)
 46 |         l1_xyz_f1, l1_points_f1, l1_indices_f1 = pointnet_sa_module(l0_xyz_f1,
 47 |                                                                     l0_points_f1,
 48 |                                                                     npoint=1024,
 49 |                                                                     radius=RADIUS1,
 50 |                                                                     nsample=16,
 51 |                                                                     mlp=[32, 32,
 52 |                                                                          64],
 53 |                                                                     mlp2=None,
 54 |                                                                     group_all=False,
 55 |                                                                     is_training=is_training,
 56 |                                                                     bn_decay=bn_decay,
 57 |                                                                     scope='layer1',
 58 |                                                                     knn=knn)
 59 |         end_points['l1_indices_f1'] = l1_indices_f1
 60 |         end_points['l1_xyz_f1'] = l1_points_f1
 61 |         end_points['l1_input_f1'] = l0_xyz_f1
 62 | 
 63 |         # Frame 1, Layer 2 (with radius = 1.0), Inputs are the above function's output
 64 |         l2_xyz_f1, l2_points_f1, l2_indices_f1 = pointnet_sa_module(l1_xyz_f1,
 65 |                                                                     l1_points_f1,
 66 |                                                                     npoint=256,
 67 |                                                                     radius=RADIUS2,
 68 |                                                                     nsample=16,
 69 |                                                                     mlp=[64, 64,
 70 |                                                                          128],
 71 |                                                                     mlp2=None,
 72 |                                                                     group_all=False,
 73 |                                                                     is_training=is_training,
 74 |                                                                     bn_decay=bn_decay,
 75 |                                                                     scope='layer2',
 76 |                                                                     knn=knn)
 77 |         end_points['l2_indices_f1'] = l2_indices_f1
 78 |         end_points['l2_xyz_f1'] = l2_points_f1
 79 |         end_points['l2_input_f1'] = l1_xyz_f1
 80 | 
 81 |         scope.reuse_variables()
 82 |         # Frame 2, Layer 1 (with radius = 0.5)
 83 |         l1_xyz_f2, l1_points_f2, l1_indices_f2 = pointnet_sa_module(l0_xyz_f2,
 84 |                                                                     l0_points_f2,
 85 |                                                                     npoint=1024,
 86 |                                                                     radius=RADIUS1,
 87 |                                                                     nsample=16,
 88 |                                                                     mlp=[32, 32,
 89 |                                                                          64],
 90 |                                                                     mlp2=None,
 91 |                                                                     group_all=False,
 92 |                                                                     is_training=is_training,
 93 |                                                                     bn_decay=bn_decay,
 94 |                                                                     scope='layer1',
 95 |                                                                     knn=knn)
 96 |         end_points['l1_points_f2'] = l1_points_f2
 97 |         end_points['l1_xyz_f2'] = l1_indices_f2
 98 |         end_points['l1_input_f2'] = l0_xyz_f2
 99 |         # Tensor("sa1/layer1_1/GatherPoint:0", shape=(16, 1024, 3), dtype=float32, device= / device: GPU:0)
100 |         # Tensor("sa1/layer1_1/Squeeze:0", shape=(16, 1024, 64), dtype=float32, device= / device: GPU:0)
101 |         # Tensor("sa1/layer1_1/QueryBallPoint:0", shape=(16, 1024, 16), dtype=int32, device= / device: GPU:0)
102 | 
103 | 
104 |         # Frame 2, Layer 2(with radius = 1.0), input are of the above function's output
105 |         l2_xyz_f2, l2_points_f2, l2_indices_f2 = pointnet_sa_module(l1_xyz_f2,
106 |                                                                     l1_points_f2,
107 |                                                                     npoint=256,
108 |                                                                     radius=RADIUS2,
109 |                                                                     nsample=16,
110 |                                                                     mlp=[64, 64,
111 |                                                                          128],
112 |                                                                     mlp2=None,
113 |                                                                     group_all=False,
114 |                                                                     is_training=is_training,
115 |                                                                     bn_decay=bn_decay,
116 |                                                                     scope='layer2',
117 |                                                                     knn=knn)
118 |         end_points['l2_points_f2'] = l2_points_f2
119 |         end_points['l2_xyz_f2'] = l2_indices_f2
120 |         end_points['l2_input_f2'] = l1_xyz_f2
121 | 
122 | 
123 |         # Tensor("sa1/layer2_1/GatherPoint:0", shape=(16, 256, 3), dtype=float32, device= / device: GPU:0)
124 |         # Tensor("sa1/layer2_1/Squeeze:0", shape=(16, 256, 128), dtype=float32, device= / device: GPU:0)
125 |         # Tensor("sa1/layer2_1/QueryBallPoint:0", shape=(16, 256, 16), dtype=int32, device= / device: GPU:0)
126 | 
127 |     # POINT MIXTURE
128 |     # embedding layer
129 |     # radius = 1, 10, 50
130 |     print("Radius here:", radius)
131 |     print('KNN', knn)
132 |     print('flow module', flow_module)
133 |     if flow_module == 'default':
134 |         _, l2_points_f1_new = flow_embedding_module(l2_xyz_f1, l2_xyz_f2,
135 |                                                     l2_points_f1, l2_points_f2,
136 |                                                     radius=radius, nsample=64,
137 |                                                     mlp=[128, 128, 128],
138 |                                                     is_training=is_training,
139 |                                                     bn_decay=bn_decay,
140 |                                                     scope='flow_embedding', bn=True,
141 |                                                     pooling='max', knn=True,
142 |                                                     corr_func='concat')
143 |         end_points['l2_points_f1_new'] = l2_points_f1_new
144 |     elif flow_module == 'all':
145 |         _, l2_points_f1_new = flow_embedding_module_all(l2_xyz_f1, l2_xyz_f2,
146 |                                                     l2_points_f1, l2_points_f2,
147 |                                                     radius=radius, nsample=256,
148 |                                                     mlp=[128, 128, 128],
149 |                                                     is_training=is_training,
150 |                                                     bn_decay=bn_decay,
151 |                                                     scope='flow_embedding', bn=True,
152 |                                                     pooling='max', knn=True,
153 |                                                     corr_func='concat')
154 |         end_points['l2_points_f1_new'] = l2_points_f1_new
155 | 
156 |     # setconv layer
157 |     # Layer 3 with radius = 2.0
158 |     l3_xyz_f1, l3_points_f1, l3_indices_f1 = pointnet_sa_module(l2_xyz_f1,
159 |                                                                 l2_points_f1_new,
160 |                                                                 npoint=64,
161 |                                                                 radius=RADIUS3,
162 |                                                                 nsample=8,
163 |                                                                 mlp=[128, 128,
164 |                                                                      256],
165 |                                                                 mlp2=None,
166 |                                                                 group_all=False,
167 |                                                                 is_training=is_training,
168 |                                                                 bn_decay=bn_decay,
169 |                                                                 scope='layer3')
170 |     end_points['l3_indices_f1'] = l3_indices_f1
171 |     end_points['l3_xyz_f1'] = l3_points_f1
172 |     # Tensor("layer3/GatherPoint:0", shape=(16, 64, 3), dtype=float32, device=/device:GPU:0)
173 |     # Tensor("layer3/Squeeze:0", shape=(16, 64, 256), dtype=float32, device=/device:GPU:0)
174 |     # Tensor("layer3/QueryBallPoint:0", shape=(16, 64, 8), dtype=int32, device=/device:GPU:0)
175 | 
176 |     # Layer 4 with radius = 4.0
177 |     l4_xyz_f1, l4_points_f1, l4_indices_f1 = pointnet_sa_module(l3_xyz_f1,
178 |                                                                 l3_points_f1,
179 |                                                                 npoint=16,
180 |                                                                 radius=RADIUS4,
181 |                                                                 nsample=8,
182 |                                                                 mlp=[256, 256,
183 |                                                                      512],
184 |                                                                 mlp2=None,
185 |                                                                 group_all=False,
186 |                                                                 is_training=is_training,
187 |                                                                 bn_decay=bn_decay,
188 |                                                                 scope='layer4')
189 |     end_points['l4_indices_f1'] = l4_indices_f1
190 |     end_points['l4_xyz_f1'] = l4_points_f1
191 |     # Tensor("layer4/GatherPoint:0", shape=(16, 16, 3), dtype=float32, device=/device:GPU:0)
192 |     # Tensor("layer4/Squeeze:0", shape=(16, 16, 512), dtype=float32, device=/device:GPU:0)
193 |     # Tensor("layer4/QueryBallPoint:0", shape=(16, 16, 8), dtype=int32, device=/device:GPU:0)
194 | 
195 |     ### FLOW REFINEMENT MODULE
196 |     # Feature Propagation
197 |     # Frame 1, l1->l2; l2->l3; l3->l4
198 |     l3_feat_f1 = set_upconv_module(l3_xyz_f1, l4_xyz_f1, l3_points_f1,
199 |                                    l4_points_f1, nsample=8, radius=2.4, mlp=[],
200 |                                    mlp2=[256, 256], scope='up_sa_layer1',
201 |                                    is_training=is_training, bn_decay=bn_decay,
202 |                                    knn=True)
203 |     end_points['l3_feat_f1'] = l3_feat_f1
204 | 
205 |     l2_feat_f1 = set_upconv_module(l2_xyz_f1, l3_xyz_f1, tf.concat(axis=-1,
206 |                                                                    values=[
207 |                                                                        l2_points_f1,
208 |                                                                        l2_points_f1_new]),
209 |                                    l3_feat_f1, nsample=8, radius=1.2,
210 |                                    mlp=[128, 128, 256], mlp2=[256],
211 |                                    scope='up_sa_layer2',
212 |                                    is_training=is_training, bn_decay=bn_decay,
213 |                                    knn=True)
214 |     end_points['l2_feat_f1'] = l2_feat_f1
215 | 
216 |     l1_feat_f1 = set_upconv_module(l1_xyz_f1, l2_xyz_f1, l1_points_f1,
217 |                                    l2_feat_f1, nsample=8, radius=0.6,
218 |                                    mlp=[128, 128, 256], mlp2=[256],
219 |                                    scope='up_sa_layer3',
220 |                                    is_training=is_training, bn_decay=bn_decay,
221 |                                    knn=True)
222 |     end_points['l1_feat_f1'] = l1_feat_f1
223 | 
224 |     if layer == 'pointnet':
225 |         l0_feat_f1 = pointnet_fp_module(l0_xyz_f1, l1_xyz_f1, l0_points_f1,
226 |                                         l1_feat_f1, [256, 256], is_training,
227 |                                         bn_decay, scope='fa_layer4')
228 |     else:
229 |         print ('Last set conv layer running')
230 |         l0_feat_f1 = set_upconv_module(l0_xyz_f1, l1_xyz_f1, l0_points_f1,
231 |                                        l1_feat_f1, nsample=8, radius=0.3,
232 |                                        mlp=[128,128,256], mlp2=[256],
233 |                                        scope='up_sa_layer4',
234 |                                        is_training=is_training, bn_decay=bn_decay,
235 |                                        knn=True)
236 |     end_points['l0_feat_f1'] = l0_feat_f1
237 | 
238 |     # FC layers
239 |     net = tf_util.conv1d(l0_feat_f1, 128, 1, padding='VALID', bn=True,
240 |                          is_training=is_training, scope='fc1',
241 |                          bn_decay=bn_decay)
242 | 
243 |     end_points['net1'] = net
244 |     net = tf_util.conv1d(net, 3, 1, padding='VALID', activation_fn=None,
245 |                          scope='fc2')
246 | 
247 |     end_points['net'] = net
248 |     return net, end_points
249 | 
250 | 
251 | def huber_loss(error, delta):
252 |     abs_error = tf.abs(error)
253 |     quadratic = tf.minimum(abs_error, delta)
254 |     linear = (abs_error - quadratic)
255 |     losses = 0.5 * quadratic ** 2 + delta * linear
256 |     return tf.reduce_mean(losses)
257 | 
258 | 
259 | def get_loss(pred, label, mask, end_points):
260 |     """ pred: BxNx3,
261 |         label: BxNx3,
262 |         mask: BxN
263 |     """
264 |     batch_size = pred.get_shape()[0].value
265 |     num_point = pred.get_shape()[1].value
266 |     l2_loss = tf.reduce_mean(
267 |         mask * tf.reduce_sum((pred - label) * (pred - label), axis=2) / 2.0)
268 |     tf.summary.scalar('l2 loss', l2_loss)
269 |     tf.add_to_collection('losses', l2_loss)
270 |     return l2_loss
271 | 
272 | 
273 | if __name__ == '__main__':
274 |     with tf.Graph().as_default():
275 |         inputs = tf.zeros((32, 1024 * 2, 6))
276 |         outputs = get_model(inputs, tf.constant(True))
277 |         print(outputs)
278 | 


--------------------------------------------------------------------------------
/src/model_concat_upsa_cycle.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import math
  5 | import sys
  6 | import os
  7 | 
  8 | 
  9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 10 | sys.path.append(os.path.join(BASE_DIR, 'utils'))
 11 | import utils.tf_util
 12 | from utils.pointnet_util import *
 13 | from tf_grouping import query_ball_point, group_point, knn_point
 14 | 
 15 | 
 16 | def placeholder_inputs(batch_size, num_point, num_frames=3):
 17 |     # change here, num_point*2 -> numpoint*5
 18 |     pointclouds_pl = tf.placeholder(tf.float32,
 19 |                                     shape=(batch_size, num_point * num_frames, 6))
 20 |     # labels_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
 21 |     # masks_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point))
 22 |     # return pointclouds_pl, labels_pl, masks_pl
 23 |     return pointclouds_pl
 24 | 
 25 | 
 26 | def get_model(radius, layer, point_cloud, is_training, bn_decay=None, knn=False,
 27 |               flow_module='default', num_frames=2, stop_gradient=False,
 28 |               rigidity=False, rigidity_radius=0.5, rigidity_nsample=4, rgb=False):
 29 | 
 30 |     num_point = point_cloud.get_shape()[1].value // num_frames
 31 | 
 32 |     pred_flow, end_points = get_model_flow(radius, layer, point_cloud, is_training,
 33 |                                       bn_decay=None, knn=False,
 34 |                                       flow_module='default')
 35 | 
 36 |     pred_f = pred_flow + point_cloud[:, :num_point, :3] # flow + p1 = pred_f => pc2_hat
 37 | 
 38 |     _, idx = knn_point(1, point_cloud[:, num_point:num_point*2, :3],
 39 |                        pred_f)
 40 | 
 41 | 
 42 |     grouped_xyz = group_point(point_cloud[:, num_point:num_point*2, :3], idx)
 43 | 
 44 | 
 45 |     grouped_xyz = tf.squeeze(grouped_xyz, axis=2) # grouped_xyz => pc2nn
 46 |     end_points_f = {
 47 |         'idx': idx,
 48 |         'pred_flow': pred_flow,
 49 |         'pc2': point_cloud[:, num_point:num_point*2, :3]
 50 |     }
 51 | 
 52 |     if rigidity:
 53 |         pc1 = point_cloud[:, :2048, :3]
 54 |         rigid_idx, _ = query_ball_point(rigidity_radius, rigidity_nsample, pc1,
 55 |                                         pc1)
 56 |         rigid_grouped_flow = group_point(pred_flow, rigid_idx)
 57 |         end_points_f['rigid_group_flow'] = rigid_grouped_flow
 58 |         end_points_f['rigid_pc1_flow'] = pred_flow
 59 | 
 60 |     if rgb:
 61 |         pred_f_rgb, dist_f, grouped_xyz_rgb_f = get_interpolated_rgb(pred_f, point_cloud[:, num_point:])
 62 |         end_points_f['pred_f_rgb'] = pred_f_rgb
 63 |         end_points_f['dist_f'] = dist_f
 64 |         end_points_f['grouped_xyz_rgb_f'] = grouped_xyz_rgb_f
 65 | 
 66 | 
 67 |     # changes from here
 68 |     if stop_gradient:
 69 |         pred_f_copy = tf.Variable(0, dtype=pred_f.dtype, trainable=False, collections=[])
 70 |         pred_f_copy = tf.assign(pred_f_copy, pred_f, validate_shape=False)
 71 |     else:
 72 |         pred_f_copy = pred_f
 73 | 
 74 |     with tf.variable_scope(tf.get_variable_scope(), reuse=True):
 75 | 
 76 |         pred_fb_xyz = (pred_f_copy + grouped_xyz) / 2
 77 | 
 78 |         pred_fb = tf.concat([pred_fb_xyz, point_cloud[:, :num_point, 3:]], axis = 2)
 79 | 
 80 |         # num_point = pred_f (predicted point cloud 2), num_point:num_point*2 = point cloud 1
 81 |         point_cloud_back = tf.concat([pred_fb, point_cloud[:, :num_point]], axis = 1)
 82 | 
 83 |         # import ipdb; ipdb.set_trace()
 84 | 
 85 |         pred_flow_back, end_points = get_model_flow(radius, layer, point_cloud_back, is_training,
 86 |                                                     bn_decay=None, knn=False,
 87 |                                                     flow_module='default')
 88 | 
 89 |         pred_b = pred_flow_back + pred_fb_xyz
 90 | 
 91 |         end_points_b = {
 92 |             'pred_flow_b': pred_flow_back,
 93 |         }
 94 | 
 95 |         if rgb:
 96 |             pred_b_rgb, dist_b, grouped_xyz_rgb_b = get_interpolated_rgb(pred_b, point_cloud[:, :num_point])
 97 |             end_points_f['pred_b_rgb'] = pred_b_rgb
 98 |             end_points_f['dist_b'] = dist_b
 99 |             end_points_f['grouped_xyz_rgb_b'] = grouped_xyz_rgb_b
100 | 
101 |     return pred_f, pred_b, grouped_xyz, end_points_f, end_points_b
102 | 
103 | def get_model_flow(radius, layer, point_cloud, is_training, bn_decay=None, knn=False, flow_module='default'):
104 |     """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """
105 | 
106 |     end_points = {}
107 |     batch_size = point_cloud.get_shape()[0].value  # batch_size = 16
108 |     num_point = point_cloud.get_shape()[1].value // 2
109 |     # change here, num_point hard coded to 2048
110 |     # num_point = 2048
111 | 
112 |     l0_xyz_f1 = point_cloud[:, :num_point, 0:3]
113 |     l0_points_f1 = point_cloud[:, :num_point, 3:]
114 |     l0_xyz_f2 = point_cloud[:, num_point:, 0:3]
115 |     l0_points_f2 = point_cloud[:, num_point:, 3:]
116 | 
117 |     RADIUS1 = 0.5
118 |     RADIUS2 = 1.0
119 |     RADIUS3 = 2.0
120 |     RADIUS4 = 4.0
121 | 
122 |     with tf.variable_scope('sa1') as scope:
123 |         # radius, npoints, nlayers, mlp size, sampling technique
124 |         # Set conv layers, POINT FEATURE LEARNING
125 |         # Frame 1, Layer 1 (with radius = 0.5)
126 |         l1_xyz_f1, l1_points_f1, l1_indices_f1 = pointnet_sa_module(l0_xyz_f1,
127 |                                                                     l0_points_f1,
128 |                                                                     npoint=1024,
129 |                                                                     radius=RADIUS1,
130 |                                                                     nsample=16,
131 |                                                                     mlp=[32, 32,
132 |                                                                          64],
133 |                                                                     mlp2=None,
134 |                                                                     group_all=False,
135 |                                                                     is_training=is_training,
136 |                                                                     bn_decay=bn_decay,
137 |                                                                     scope='layer1',
138 |                                                                     knn=knn)
139 |         end_points['l1_indices_f1'] = l1_indices_f1
140 |         end_points['l1_xyz_f1'] = l1_points_f1
141 |         end_points['l1_input_f1'] = l0_xyz_f1
142 | 
143 |         # Frame 1, Layer 2 (with radius = 1.0), Inputs are the above function's output
144 |         l2_xyz_f1, l2_points_f1, l2_indices_f1 = pointnet_sa_module(l1_xyz_f1,
145 |                                                                     l1_points_f1,
146 |                                                                     npoint=256,
147 |                                                                     radius=RADIUS2,
148 |                                                                     nsample=16,
149 |                                                                     mlp=[64, 64,
150 |                                                                          128],
151 |                                                                     mlp2=None,
152 |                                                                     group_all=False,
153 |                                                                     is_training=is_training,
154 |                                                                     bn_decay=bn_decay,
155 |                                                                     scope='layer2',
156 |                                                                     knn=knn)
157 |         end_points['l2_indices_f1'] = l2_indices_f1
158 |         end_points['l2_xyz_f1'] = l2_points_f1
159 |         end_points['l2_input_f1'] = l1_xyz_f1
160 | 
161 |         scope.reuse_variables()
162 |         # Frame 2, Layer 1 (with radius = 0.5)
163 |         l1_xyz_f2, l1_points_f2, l1_indices_f2 = pointnet_sa_module(l0_xyz_f2,
164 |                                                                     l0_points_f2,
165 |                                                                     npoint=1024,
166 |                                                                     radius=RADIUS1,
167 |                                                                     nsample=16,
168 |                                                                     mlp=[32, 32,
169 |                                                                          64],
170 |                                                                     mlp2=None,
171 |                                                                     group_all=False,
172 |                                                                     is_training=is_training,
173 |                                                                     bn_decay=bn_decay,
174 |                                                                     scope='layer1',
175 |                                                                     knn=knn)
176 |         end_points['l1_points_f2'] = l1_points_f2
177 |         end_points['l1_xyz_f2'] = l1_indices_f2
178 |         end_points['l1_input_f2'] = l0_xyz_f2
179 |         # Tensor("sa1/layer1_1/GatherPoint:0", shape=(16, 1024, 3), dtype=float32, device= / device: GPU:0)
180 |         # Tensor("sa1/layer1_1/Squeeze:0", shape=(16, 1024, 64), dtype=float32, device= / device: GPU:0)
181 |         # Tensor("sa1/layer1_1/QueryBallPoint:0", shape=(16, 1024, 16), dtype=int32, device= / device: GPU:0)
182 | 
183 | 
184 |         # Frame 2, Layer 2(with radius = 1.0), input are of the above function's output
185 |         l2_xyz_f2, l2_points_f2, l2_indices_f2 = pointnet_sa_module(l1_xyz_f2,
186 |                                                                     l1_points_f2,
187 |                                                                     npoint=256,
188 |                                                                     radius=RADIUS2,
189 |                                                                     nsample=16,
190 |                                                                     mlp=[64, 64,
191 |                                                                          128],
192 |                                                                     mlp2=None,
193 |                                                                     group_all=False,
194 |                                                                     is_training=is_training,
195 |                                                                     bn_decay=bn_decay,
196 |                                                                     scope='layer2',
197 |                                                                     knn=knn)
198 |         end_points['l2_points_f2'] = l2_points_f2
199 |         end_points['l2_xyz_f2'] = l2_indices_f2
200 |         end_points['l2_input_f2'] = l1_xyz_f2
201 | 
202 | 
203 |         # Tensor("sa1/layer2_1/GatherPoint:0", shape=(16, 256, 3), dtype=float32, device= / device: GPU:0)
204 |         # Tensor("sa1/layer2_1/Squeeze:0", shape=(16, 256, 128), dtype=float32, device= / device: GPU:0)
205 |         # Tensor("sa1/layer2_1/QueryBallPoint:0", shape=(16, 256, 16), dtype=int32, device= / device: GPU:0)
206 | 
207 |     # POINT MIXTURE
208 |     # embedding layer
209 |     # radius = 1, 10, 50
210 |     if flow_module == 'default':
211 |         _, l2_points_f1_new = flow_embedding_module(l2_xyz_f1, l2_xyz_f2,
212 |                                                     l2_points_f1, l2_points_f2,
213 |                                                     radius=radius, nsample=64,
214 |                                                     mlp=[128, 128, 128],
215 |                                                     is_training=is_training,
216 |                                                     bn_decay=bn_decay,
217 |                                                     scope='flow_embedding', bn=True,
218 |                                                     pooling='max', knn=True,
219 |                                                     corr_func='concat')
220 |         end_points['l2_points_f1_new'] = l2_points_f1_new
221 |     elif flow_module == 'all':
222 |         _, l2_points_f1_new = flow_embedding_module_all(l2_xyz_f1, l2_xyz_f2,
223 |                                                     l2_points_f1, l2_points_f2,
224 |                                                     radius=radius, nsample=256,
225 |                                                     mlp=[128, 128, 128],
226 |                                                     is_training=is_training,
227 |                                                     bn_decay=bn_decay,
228 |                                                     scope='flow_embedding', bn=True,
229 |                                                     pooling='max', knn=True,
230 |                                                     corr_func='concat')
231 |         end_points['l2_points_f1_new'] = l2_points_f1_new
232 | 
233 |     # setconv layer
234 |     # Layer 3 with radius = 2.0
235 |     l3_xyz_f1, l3_points_f1, l3_indices_f1 = pointnet_sa_module(l2_xyz_f1,
236 |                                                                 l2_points_f1_new,
237 |                                                                 npoint=64,
238 |                                                                 radius=RADIUS3,
239 |                                                                 nsample=8,
240 |                                                                 mlp=[128, 128,
241 |                                                                      256],
242 |                                                                 mlp2=None,
243 |                                                                 group_all=False,
244 |                                                                 is_training=is_training,
245 |                                                                 bn_decay=bn_decay,
246 |                                                                 scope='layer3')
247 |     end_points['l3_indices_f1'] = l3_indices_f1
248 |     end_points['l3_xyz_f1'] = l3_points_f1
249 |     # Tensor("layer3/GatherPoint:0", shape=(16, 64, 3), dtype=float32, device=/device:GPU:0)
250 |     # Tensor("layer3/Squeeze:0", shape=(16, 64, 256), dtype=float32, device=/device:GPU:0)
251 |     # Tensor("layer3/QueryBallPoint:0", shape=(16, 64, 8), dtype=int32, device=/device:GPU:0)
252 | 
253 |     # Layer 4 with radius = 4.0
254 |     l4_xyz_f1, l4_points_f1, l4_indices_f1 = pointnet_sa_module(l3_xyz_f1,
255 |                                                                 l3_points_f1,
256 |                                                                 npoint=16,
257 |                                                                 radius=RADIUS4,
258 |                                                                 nsample=8,
259 |                                                                 mlp=[256, 256,
260 |                                                                      512],
261 |                                                                 mlp2=None,
262 |                                                                 group_all=False,
263 |                                                                 is_training=is_training,
264 |                                                                 bn_decay=bn_decay,
265 |                                                                 scope='layer4')
266 |     end_points['l4_indices_f1'] = l4_indices_f1
267 |     end_points['l4_xyz_f1'] = l4_points_f1
268 |     # Tensor("layer4/GatherPoint:0", shape=(16, 16, 3), dtype=float32, device=/device:GPU:0)
269 |     # Tensor("layer4/Squeeze:0", shape=(16, 16, 512), dtype=float32, device=/device:GPU:0)
270 |     # Tensor("layer4/QueryBallPoint:0", shape=(16, 16, 8), dtype=int32, device=/device:GPU:0)
271 | 
272 |     ### FLOW REFINEMENT MODULE
273 |     # Feature Propagation
274 |     # Frame 1, l1->l2; l2->l3; l3->l4
275 |     l3_feat_f1 = set_upconv_module(l3_xyz_f1, l4_xyz_f1, l3_points_f1,
276 |                                    l4_points_f1, nsample=8, radius=2.4, mlp=[],
277 |                                    mlp2=[256, 256], scope='up_sa_layer1',
278 |                                    is_training=is_training, bn_decay=bn_decay,
279 |                                    knn=True)
280 |     end_points['l3_feat_f1'] = l3_feat_f1
281 | 
282 |     l2_feat_f1 = set_upconv_module(l2_xyz_f1, l3_xyz_f1, tf.concat(axis=-1,
283 |                                                                    values=[
284 |                                                                        l2_points_f1,
285 |                                                                        l2_points_f1_new]),
286 |                                    l3_feat_f1, nsample=8, radius=1.2,
287 |                                    mlp=[128, 128, 256], mlp2=[256],
288 |                                    scope='up_sa_layer2',
289 |                                    is_training=is_training, bn_decay=bn_decay,
290 |                                    knn=True)
291 |     end_points['l2_feat_f1'] = l2_feat_f1
292 | 
293 |     l1_feat_f1 = set_upconv_module(l1_xyz_f1, l2_xyz_f1, l1_points_f1,
294 |                                    l2_feat_f1, nsample=8, radius=0.6,
295 |                                    mlp=[128, 128, 256], mlp2=[256],
296 |                                    scope='up_sa_layer3',
297 |                                    is_training=is_training, bn_decay=bn_decay,
298 |                                    knn=True)
299 |     end_points['l1_feat_f1'] = l1_feat_f1
300 | 
301 |     if layer == 'pointnet':
302 |         l0_feat_f1 = pointnet_fp_module(l0_xyz_f1, l1_xyz_f1, l0_points_f1,
303 |                                         l1_feat_f1, [256, 256], is_training,
304 |                                         bn_decay, scope='fa_layer4')
305 |     else:
306 |         l0_feat_f1 = set_upconv_module(l0_xyz_f1, l1_xyz_f1, l0_points_f1,
307 |                                        l1_feat_f1, nsample=8, radius=0.3,
308 |                                        mlp=[128,128,256], mlp2=[256],
309 |                                        scope='up_sa_layer4',
310 |                                        is_training=is_training, bn_decay=bn_decay,
311 |                                        knn=True)
312 |     end_points['l0_feat_f1'] = l0_feat_f1
313 | 
314 |     # FC layers
315 |     net = tf_util.conv1d(l0_feat_f1, 128, 1, padding='VALID', bn=True,
316 |                          is_training=is_training, scope='fc1',
317 |                          bn_decay=bn_decay)
318 | 
319 |     end_points['net1'] = net
320 |     net = tf_util.conv1d(net, 3, 1, padding='VALID', activation_fn=None,
321 |                          scope='fc2')
322 | 
323 |     end_points['net'] = net
324 |     return net, end_points
325 | 
326 | 
327 | def huber_loss(error, delta):
328 |     abs_error = tf.abs(error)
329 |     quadratic = tf.minimum(abs_error, delta)
330 |     linear = (abs_error - quadratic)
331 |     losses = 0.5 * quadratic ** 2 + delta * linear
332 |     return tf.reduce_mean(losses)
333 | 
334 | 
335 | def get_loss(pred, label):
336 |     """ pred: BxNx3,
337 |         label: BxNx3,
338 |         mask: BxN
339 |     """
340 |     batch_size = pred.get_shape()[0].value
341 |     num_point = pred.get_shape()[1].value
342 |     l2_loss = tf.reduce_mean(
343 |         tf.reduce_sum((pred - label) * (pred - label), axis=2) / 2.0)
344 |     tf.summary.scalar('l2 loss', l2_loss)
345 |     tf.add_to_collection('losses', l2_loss)
346 |     return l2_loss
347 | 
348 | 
349 | def get_cycle_loss(pred_f, grouped_xyz, pred_b, point_cloud1, end_points=None,
350 |                    rigidity=False, rgb=False, point_cloud1_rgb=None, flip_prefix='', cycle_loss_weight=1,
351 |                    knn_loss_weight=1):
352 | 
353 |     end_points_loss = {}
354 | 
355 |     knn_l2_loss = knn_loss_weight*tf.reduce_mean(
356 |         tf.reduce_sum((pred_f - grouped_xyz) * (pred_f - grouped_xyz), axis=2) / 2.0)
357 |     tf.summary.scalar('{}KNN L2 loss'.format(flip_prefix), knn_l2_loss)
358 |     tf.add_to_collection('{}KNN losses'.format(flip_prefix), knn_l2_loss)
359 | 
360 |     end_points_loss['knn_l2_loss'] = knn_l2_loss
361 | 
362 |     cycle_l2_loss = cycle_loss_weight*tf.reduce_mean(
363 |         tf.reduce_sum((pred_b - point_cloud1) * (pred_b - point_cloud1), axis=2) / 2.0)
364 |     tf.summary.scalar('{}Cycle l2 loss'.format(flip_prefix), cycle_l2_loss)
365 |     tf.add_to_collection('{}Cycle losses'.format(flip_prefix), cycle_l2_loss)
366 | 
367 |     end_points_loss['cycle_l2_loss'] = cycle_l2_loss
368 | 
369 |     l2_loss = knn_l2_loss + cycle_l2_loss
370 | 
371 |     avg_distance_metric = tf.reduce_mean(
372 |         tf.reduce_sum((pred_f - grouped_xyz) * (pred_f - grouped_xyz), axis=2) ** 0.5)
373 |     tf.summary.scalar('{}Avg Distance Metric loss'.format(flip_prefix), avg_distance_metric)
374 |     tf.add_to_collection('{}Avg Distance Metric losses'.format(flip_prefix), avg_distance_metric)
375 | 
376 |     if rigidity:
377 |         rigid_group_flow = end_points['rigid_group_flow']
378 |         rigid_pc1_flow = tf.expand_dims(end_points['rigid_pc1_flow'], 2)
379 | 
380 |         rigidity_loss = tf.reduce_mean(
381 |         tf.reduce_sum((rigid_group_flow - rigid_pc1_flow) * (rigid_group_flow - rigid_pc1_flow),
382 |                       axis=3) / 2.0)
383 |         tf.summary.scalar('{}Rigidity loss'.format(flip_prefix), rigidity_loss)
384 |         tf.add_to_collection('{}Rigidity losses'.format(flip_prefix), rigidity_loss)
385 | 
386 |         end_points_loss['rigidity_loss'] = rigidity_loss
387 | 
388 |         l2_loss = l2_loss + rigidity_loss
389 | 
390 |     if rgb:
391 |         pred_f_rgb = end_points['pred_f_rgb']
392 |         rgb_loss_f = 10*tf.reduce_mean(
393 |         tf.reduce_sum((pred_f_rgb - point_cloud1_rgb) * (pred_f_rgb - point_cloud1_rgb), axis=2) / 2.0)
394 | 
395 |         end_points_loss['rgb_loss_f'] = rgb_loss_f
396 | 
397 |         pred_b_rgb = end_points['pred_b_rgb']
398 |         rgb_loss_b = 10*tf.reduce_mean(
399 |         tf.reduce_sum((pred_b_rgb - point_cloud1_rgb) * (pred_b_rgb - point_cloud1_rgb), axis=2) / 2.0)
400 | 
401 |         end_points_loss['rgb_loss_b'] = rgb_loss_b
402 | 
403 |         rgb_loss = rgb_loss_f + rgb_loss_b
404 |         tf.summary.scalar('{}RGB Loss Forward'.format(flip_prefix), rgb_loss_f)
405 |         tf.add_to_collection('{}RGB Loss Forward'.format(flip_prefix), rgb_loss_f)
406 | 
407 |         tf.summary.scalar('{}RGB Loss Backward'.format(flip_prefix), rgb_loss_b)
408 |         tf.add_to_collection('{}RGB Loss Backward'.format(flip_prefix), rgb_loss_b)
409 | 
410 |         tf.summary.scalar('{}RGB Loss'.format(flip_prefix), rgb_loss)
411 |         tf.add_to_collection('{}RGB Loss'.format(flip_prefix), rgb_loss)
412 | 
413 |         end_points_loss['rgb_loss'] = rgb_loss
414 |         l2_loss = l2_loss + rgb_loss
415 | 
416 |     end_points_loss['l2_loss'] = l2_loss
417 |     tf.summary.scalar('{}Total l2 loss'.format(flip_prefix), l2_loss)
418 |     tf.add_to_collection('{}Total losses'.format(flip_prefix), l2_loss)
419 | 
420 |     return l2_loss, end_points_loss
421 | 
422 | if __name__ == '__main__':
423 |     with tf.Graph().as_default():
424 |         inputs = tf.zeros((32, 1024 * 2, 6))
425 |         outputs = get_model(inputs, tf.constant(True))
426 |         print(outputs)
427 | 


--------------------------------------------------------------------------------
/src/nuscenes_dataset_self_supervised_cycle.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import os.path
  4 | import json
  5 | import numpy as np
  6 | import sys
  7 | import pickle
  8 | import glob
  9 | import random
 10 | # import mayavi.mlab as mlab
 11 | 
 12 | class SceneflowDataset():
 13 |     def __init__(self, root = './data_preprocessing/nuscenes_trainval_rgb_pkl',
 14 |                  cache_size = 30000, npoints=2048, train=True,
 15 |                  softmax_dist = False, num_frames=3, flip_prob=0):
 16 |         self.npoints = npoints
 17 |         self.train = train
 18 |         self.root = root
 19 |         if self.train:    # 20007
 20 |             self.datapath = glob.glob(os.path.join(self.root, 'train/*.pkl'))
 21 |         else:
 22 |             # print ('root here:', self.root)
 23 |             self.datapath = glob.glob(os.path.join(self.root, 'test/*.pkl'))
 24 |             # print('path here:', sorted(self.datapath))
 25 |         self.cache = {}
 26 |         self.cache_size = cache_size
 27 |         self.softmax_dist = softmax_dist
 28 |         self.num_frames = num_frames
 29 |         self.flip_prob = flip_prob
 30 | 
 31 |     def __getitem__(self, index):
 32 |         if index in self.cache:
 33 |             pos_list, color_list = self.cache[index]
 34 |         else:
 35 |             fn = self.datapath[index]
 36 |             pc_list, rgb_list = pickle.load(open(fn, 'rb')) # list of point clouds
 37 |             start_idx = np.random.choice(np.arange(len(pc_list)-self.num_frames+1),
 38 |                                          size=1)[0]
 39 |             pos_list = []
 40 |             color_list = []
 41 |             min_length = np.min([len(x) for x in pc_list])
 42 |             sample_start_idx = np.random.choice(min_length-self.npoints+1,
 43 |                                                 size=1)[0]
 44 |             sample_idx = np.arange(sample_start_idx,
 45 |                                    sample_start_idx+self.npoints)
 46 |             for frame_idx in range(start_idx, start_idx + self.num_frames):
 47 |                 data = pc_list[frame_idx] # num_point x 4
 48 |                 rgb_data = rgb_list[frame_idx]/255.
 49 |                 # sample_idx = np.random.choice(data.shape[0], self.npoints, replace=False)
 50 | 
 51 |                 pos = data[sample_idx, :3]
 52 |                 # color = np.tile(data[sample_idx, 3:], [1, 3]) # 2048 x 1 => 2048 x 3
 53 |                 color = rgb_data[sample_idx, :3]
 54 | 
 55 |                 pos_list.append(pos)
 56 |                 color_list.append(color)
 57 | 
 58 |             prob = random.uniform(0, 1)
 59 |             if prob < self.flip_prob:
 60 |                 pos_list = pos_list[::-1]
 61 |                 color_list = color_list[::-1]
 62 | 
 63 |             if len(self.cache) < self.cache_size:
 64 |                 self.cache[index] = (pos_list, color_list)
 65 | 
 66 |         return np.array(pos_list), np.array(color_list)
 67 | 
 68 |     def __len__(self):
 69 |         return len(self.datapath)
 70 | 
 71 | 
 72 | if __name__ == '__main__':
 73 |     # import mayavi.mlab as mlab
 74 |     d = SceneflowDataset(npoints=2048, train = False)
 75 |     print('Len of dataset:', len(d))
 76 |     import time
 77 |     tic = time.time()
 78 |     for i in range(100):
 79 |         # pc1, pc2, c1, c2, flow, m1, m2 = d[i]
 80 |         # print (i)
 81 |         # pc1, pc2, c1, c2, flow, m1 = d[i]
 82 |         pc1, pc2, c1, c2, gt, m1 = d[i]
 83 | 
 84 |         # print (pc1.shape)
 85 |         # print (pc2.shape)
 86 |         # print (c1.shape)
 87 |         # print (c2.shape)
 88 |         # print (gt.shape)
 89 |         # print (m1.shape)
 90 |         # print(np.sum(m1))
 91 |         # print(np.sum(m2))
 92 |         # pc1_m1 = pc1[m1==1,:]
 93 |         # pc1_m1_n = pc1[m1==0,:]
 94 |         # print(pc1_m1.shape)
 95 |         # print(pc1_m1_n.shape)
 96 |         # mlab.points3d(pc1_m1[:,0], pc1_m1[:,1], pc1_m1[:,2], scale_factor=0.05, color=(1,0,0))
 97 |         # mlab.points3d(pc1_m1_n[:,0], pc1_m1_n[:,1], pc1_m1_n[:,2], scale_factor=0.05, color=(0,1,0))
 98 |         # raw_input()
 99 | 
100 |         # mlab.points3d(pc1[:,0], pc1[:,1], pc1[:,2], scale_factor=0.05, color=(1,0,0))
101 |         # mlab.points3d(pc2[:,0], pc2[:,1], pc2[:,2], scale_factor=0.05, color=(0,1,0))
102 |         # raw_input()
103 |         # mlab.quiver3d(pc1[:,0], pc1[:,1], pc1[:,2], flow[:,0], flow[:,1], flow[:,2], scale_factor=1)
104 |         # raw_input()
105 | 
106 |     print(time.time() - tic)
107 |     print(pc1.shape, type(pc1))
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/src/tf_ops/3d_interpolation/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 | CUDA_HOME := /home/hmittal/cuda-9.0
 6 | 
 7 | TF_INC = `python3 -c "import tensorflow; print(tensorflow.sysconfig.get_include())"`
 8 | TF_LIB = `python3 -c "import tensorflow as tf; print(tf.sysconfig.get_lib())"`
 9 | 
10 | LIBFLAGS = -L$(CUDA_HOME)/lib -L$(CUDA_HOME)/lib64 -lcudart -L $(TF_LIB) -ltensorflow_framework -lcublas
11 | 
12 | ifndef ARCHI
13 | 	ARCHI=sm_61
14 | endif
15 | 
16 | arch = $(ARCHI)
17 | 
18 | LD_LIBRARY_PATH=$(CUDA_HOME)/lib64
19 | INCLUDE_PATH=$(CUDA_HOME)/include
20 | 
21 | all: tf_interpolate_so.so
22 | 
23 | tf_interpolate_so.so: tf_interpolate.cpp
24 | 	g++ -std=c++11 -shared -fPIC -o tf_interpolate_so.so tf_interpolate.cpp -I$(TF_INC) -I$(INCLUDE_PATH) -L$(LIBFLAGS) -O2 -D_GLIBCXX_USE_CXX11_ABI=0
25 | 
26 | clean:
27 | 	rm *.so
28 | 


--------------------------------------------------------------------------------
/src/tf_ops/3d_interpolation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/3d_interpolation/__init__.py


--------------------------------------------------------------------------------
/src/tf_ops/3d_interpolation/__pycache__/tf_interpolate.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/3d_interpolation/__pycache__/tf_interpolate.cpython-35.pyc


--------------------------------------------------------------------------------
/src/tf_ops/3d_interpolation/__pycache__/tf_interpolate.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/3d_interpolation/__pycache__/tf_interpolate.cpython-36.pyc


--------------------------------------------------------------------------------
/src/tf_ops/3d_interpolation/interpolate.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include <string>
  7 | #include <vector>
  8 | using namespace std;
  9 | float randomf(){
 10 |     return (rand()+0.5)/(RAND_MAX+1.0);
 11 | }
 12 | static double get_time(){
 13 |     timespec tp;
 14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 16 | }
 17 | 
 18 | // Find three nearest neigbors with square distance
 19 | // input: xyz1 (b,n,3), xyz2(b,m,3)
 20 | // output: dist (b,n,3), idx (b,n,3)
 21 | void threenn_cpu(int b, int n, int m, const float *xyz1, const float *xyz2, float *dist, int *idx) {
 22 |      for (int i=0;i<b;++i) {
 23 |         for (int j=0;j<n;++j) {
 24 | 	    float x1=xyz1[j*3+0];
 25 | 	    float y1=xyz1[j*3+1];
 26 | 	    float z1=xyz1[j*3+2];
 27 |             double best1=1e40; double best2=1e40; double best3=1e40;
 28 |             int besti1=0; int besti2=0; int besti3=0;
 29 |             for (int k=0;k<m;++k) {
 30 |                 float x2=xyz2[k*3+0];
 31 | 	        float y2=xyz2[k*3+1];
 32 | 	        float z2=xyz2[k*3+2];
 33 | 		//float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 34 | 		double d=x2*x2+y2*y2+z2*z2;
 35 |                 if (d<best1) {
 36 |                     best3=best2;
 37 |                     besti3=besti2;
 38 |                     best2=best1;
 39 |                     besti2=besti1;
 40 |                     best1=d;
 41 |                     besti1=k;
 42 |                 } else if (d<best2) {
 43 |                     best3=best2;
 44 |                     besti3=besti2;
 45 |                     best2=d;
 46 |                     besti2=k;
 47 |                 } else if (d<best3) {
 48 |                     best3=d;
 49 |                     besti3=k;
 50 |                 }
 51 |             } 
 52 |             dist[j*3]=best1;
 53 |             idx[j*3]=besti1;
 54 |             dist[j*3+1]=best2;
 55 |             idx[j*3+1]=besti2;
 56 |             dist[j*3+2]=best3;
 57 |             idx[j*3+2]=besti3;
 58 |         } 
 59 |         xyz1+=n*3;
 60 |         xyz2+=m*3;
 61 |         dist+=n*3;
 62 |         idx+=n*3;
 63 |     }
 64 | } 
 65 | 
 66 | // CONSTANT WEIGHT TODO
 67 | // input: dist (b,n,3)
 68 | // output: weight (b,n,3)
 69 | void get_weights_cpu(int b, int n, const float *dist, float *weight) {
 70 |     const float w = 1.0/3.0;
 71 |     for (int i=0;i<b;++i) {
 72 |         for (int j=0;j<n;++j) {
 73 |             weight[j*3]=w;
 74 |             weight[j*3+1]=w;
 75 |             weight[j*3+2]=w;
 76 |         } 
 77 |         dist+=n*3;
 78 |         weight+=n*3;
 79 |     }
 80 | }
 81 | 
 82 | // input: points (b,m,c), idx (b,n,3), weight (b,n,3)
 83 | // output: out (b,n,c)
 84 | void interpolate_cpu(int b, int m, int c, int n, const float *points, const int *idx, const float *weight, float *out) {
 85 |      float w1,w2,w3;
 86 |      int i1,i2,i3;
 87 |      for (int i=0;i<b;++i) {
 88 |         for (int j=0;j<n;++j) {
 89 |             w1=weight[j*3];
 90 |             w2=weight[j*3+1];
 91 |             w3=weight[j*3+2]; 
 92 |             i1=idx[j*3];
 93 |             i2=idx[j*3+1];
 94 |             i3=idx[j*3+2];
 95 |             for (int l=0;l<c;++l) {
 96 |                 out[j*c+l] = points[i1*c+l]*w1 + points[i2*c+l]*w2 + points[i3*c+l]*w3;
 97 |             }
 98 |         } 
 99 |         points+=m*c;
100 |         idx+=n*3;
101 |         weight+=n*3;
102 |         out+=n*c;
103 |     }
104 | }
105 | 
106 | // input: grad_out (b,n,c), idx (b,n,3), weight (b,n,3)
107 | // output: grad_points (b,m,c)
108 | void interpolate_grad_cpu(int b, int n, int c, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points) {
109 |      float w1,w2,w3;
110 |      int i1,i2,i3;
111 |      for (int i=0;i<b;++i) {
112 |         for (int j=0;j<n;++j) {
113 |             w1=weight[j*3];
114 |             w2=weight[j*3+1];
115 |             w3=weight[j*3+2]; 
116 |             i1=idx[j*3];
117 |             i2=idx[j*3+1];
118 |             i3=idx[j*3+2];
119 |             for (int l=0;l<c;++l) {
120 |                 grad_points[i1*c+l] += grad_out[j*c+l]*w1;
121 |                 grad_points[i2*c+l] += grad_out[j*c+l]*w2;
122 |                 grad_points[i3*c+l] += grad_out[j*c+l]*w3;
123 |             }
124 |         } 
125 |         grad_out+=n*c;
126 |         idx+=n*3;
127 |         weight+=n*3;
128 |         grad_points+=m*c;
129 |     }
130 | }
131 | 
132 | int main()
133 | {
134 |     int b=32,n=512,m=128,c=64;
135 |     float *xyz1=new float[b*n*3];
136 |     float *xyz2=new float[b*m*3];
137 |     float *dist=new float[b*n*3];
138 |     int *idx=new int[b*n*3];
139 |     memset(idx, 0, sizeof(int)*b*n*3);
140 |     float *weight=new float[b*n*3];
141 |     float *points=new float[b*m*c];
142 |     float *out=new float[b*n*c];
143 |     float *grad_out=new float[b*n*c]; // grad to out
144 |     memset(grad_out, 0.0, sizeof(float)*b*n*c);
145 |     float *grad_points=new float[b*m*c]; // grad to points
146 |     for (int i=0;i<b*n*3;i++)
147 |         xyz1[i]=randomf();
148 |     for (int i=0;i<b*m*3;i++)
149 |         xyz2[i]=randomf();
150 |     for (int i=0;i<b*m*c;i++)
151 |         points[i]=randomf();
152 | 
153 |     double t0=get_time();
154 |     threenn_cpu(b,n,m,xyz1,xyz2,dist,idx);
155 |     printf("threenn cpu time %f\n",get_time()-t0);
156 |     
157 |     t0=get_time();
158 |     get_weights_cpu(b,n,dist,weight);
159 |     printf("get_weights_cpu cpu time %f\n",get_time()-t0);
160 | 
161 |     t0=get_time();
162 |     interpolate_cpu(b,m,c,n,points,idx,weight,out);
163 |     printf("interpolate_cpu cpu time %f\n",get_time()-t0);
164 | 
165 |     t0=get_time();
166 |     interpolate_grad_cpu(b,n,c,m,grad_out,idx,weight,grad_points);
167 |     printf("interpolate_grad_cpu cpu time %f\n",get_time()-t0);
168 |     return 0;
169 | }
170 | 


--------------------------------------------------------------------------------
/src/tf_ops/3d_interpolation/tf_interpolate.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include "tensorflow/core/framework/op.h"
  7 | #include "tensorflow/core/framework/op_kernel.h"
  8 | #include "tensorflow/core/framework/shape_inference.h"
  9 | #include "tensorflow/core/framework/common_shape_fns.h"
 10 | using namespace tensorflow;
 11 | 
 12 | REGISTER_OP("ThreeNN")
 13 |     .Input("xyz1: float32")
 14 |     .Input("xyz2: float32")
 15 |     .Output("dist: float32")
 16 |     .Output("idx: int32")
 17 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 18 |         c->set_output(0, c->input(0));
 19 |         c->set_output(1, c->input(0));
 20 |         return Status::OK();
 21 |     });
 22 | REGISTER_OP("ThreeInterpolate")
 23 |     .Input("points: float32")
 24 |     .Input("idx: int32")
 25 |     .Input("weight: float32")
 26 |     .Output("out: float32")
 27 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 28 |         ::tensorflow::shape_inference::ShapeHandle dims1; // (b,m,c)
 29 |         c->WithRank(c->input(0), 3, &dims1);
 30 |         ::tensorflow::shape_inference::ShapeHandle dims2; // (b,n,3)
 31 |         c->WithRank(c->input(1), 3, &dims2);
 32 |         // (b,n,c)
 33 |         ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims1, 0), c->Dim(dims2, 1), c->Dim(dims1, 2)});
 34 |         c->set_output(0, output);
 35 |         return Status::OK();
 36 |     });
 37 | REGISTER_OP("ThreeInterpolateGrad")
 38 |     .Input("points: float32")
 39 |     .Input("idx: int32")
 40 |     .Input("weight: float32")
 41 |     .Input("grad_out: float32")
 42 |     .Output("grad_points: float32")
 43 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 44 |         c->set_output(0, c->input(0));
 45 |         return Status::OK();
 46 |     });
 47 | 
 48 | float randomf(){
 49 |     return (rand()+0.5)/(RAND_MAX+1.0);
 50 | }
 51 | static double get_time(){
 52 |     timespec tp;
 53 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 54 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 55 | }
 56 | 
 57 | // Find three nearest neigbors with square distance
 58 | // input: xyz1 (b,n,3), xyz2(b,m,3)
 59 | // output: dist (b,n,3), idx (b,n,3)
 60 | void threenn_cpu(int b, int n, int m, const float *xyz1, const float *xyz2, float *dist, int *idx) {
 61 |      for (int i=0;i<b;++i) {
 62 |         for (int j=0;j<n;++j) {
 63 | 	    float x1=xyz1[j*3+0];
 64 | 	    float y1=xyz1[j*3+1];
 65 | 	    float z1=xyz1[j*3+2];
 66 |             double best1=1e40; double best2=1e40; double best3=1e40;
 67 |             int besti1=0; int besti2=0; int besti3=0;
 68 |             for (int k=0;k<m;++k) {
 69 |                 float x2=xyz2[k*3+0];
 70 | 	        float y2=xyz2[k*3+1];
 71 | 	        float z2=xyz2[k*3+2];
 72 | 		//float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 73 | 		double d=(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1);
 74 |                 if (d<best1) {
 75 |                     best3=best2;
 76 |                     besti3=besti2;
 77 |                     best2=best1;
 78 |                     besti2=besti1;
 79 |                     best1=d;
 80 |                     besti1=k;
 81 |                 } else if (d<best2) {
 82 |                     best3=best2;
 83 |                     besti3=besti2;
 84 |                     best2=d;
 85 |                     besti2=k;
 86 |                 } else if (d<best3) {
 87 |                     best3=d;
 88 |                     besti3=k;
 89 |                 }
 90 |             } 
 91 |             dist[j*3]=best1;
 92 |             idx[j*3]=besti1;
 93 |             dist[j*3+1]=best2;
 94 |             idx[j*3+1]=besti2;
 95 |             dist[j*3+2]=best3;
 96 |             idx[j*3+2]=besti3;
 97 |         } 
 98 |         xyz1+=n*3;
 99 |         xyz2+=m*3;
100 |         dist+=n*3;
101 |         idx+=n*3;
102 |     }
103 | } 
104 | 
105 | // input: points (b,m,c), idx (b,n,3), weight (b,n,3)
106 | // output: out (b,n,c)
107 | void threeinterpolate_cpu(int b, int m, int c, int n, const float *points, const int *idx, const float *weight, float *out) {
108 |      float w1,w2,w3;
109 |      int i1,i2,i3;
110 |      for (int i=0;i<b;++i) {
111 |         for (int j=0;j<n;++j) {
112 |             w1=weight[j*3];
113 |             w2=weight[j*3+1];
114 |             w3=weight[j*3+2]; 
115 |             i1=idx[j*3];
116 |             i2=idx[j*3+1];
117 |             i3=idx[j*3+2];
118 |             for (int l=0;l<c;++l) {
119 |                 out[j*c+l] = points[i1*c+l]*w1 + points[i2*c+l]*w2 + points[i3*c+l]*w3;
120 |             }
121 |         } 
122 |         points+=m*c;
123 |         idx+=n*3;
124 |         weight+=n*3;
125 |         out+=n*c;
126 |     }
127 | }
128 | 
129 | // input: grad_out (b,n,c), idx (b,n,3), weight (b,n,3)
130 | // output: grad_points (b,m,c)
131 | void threeinterpolate_grad_cpu(int b, int n, int c, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points) {
132 |      float w1,w2,w3;
133 |      int i1,i2,i3;
134 |      for (int i=0;i<b;++i) {
135 |         for (int j=0;j<n;++j) {
136 |             w1=weight[j*3];
137 |             w2=weight[j*3+1];
138 |             w3=weight[j*3+2]; 
139 |             i1=idx[j*3];
140 |             i2=idx[j*3+1];
141 |             i3=idx[j*3+2];
142 |             for (int l=0;l<c;++l) {
143 |                 grad_points[i1*c+l] += grad_out[j*c+l]*w1;
144 |                 grad_points[i2*c+l] += grad_out[j*c+l]*w2;
145 |                 grad_points[i3*c+l] += grad_out[j*c+l]*w3;
146 |             }
147 |         } 
148 |         grad_out+=n*c;
149 |         idx+=n*3;
150 |         weight+=n*3;
151 |         grad_points+=m*c;
152 |     }
153 | }
154 | 
155 | 
156 | 
157 | class ThreeNNOp : public OpKernel {
158 |     public:
159 |         explicit ThreeNNOp(OpKernelConstruction* context) : OpKernel(context) {}
160 | 
161 |         void Compute(OpKernelContext* context) override {
162 |             const Tensor& xyz1_tensor = context->input(0);
163 |             OP_REQUIRES(context, xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeNN expects (b,n,3) xyz1 shape."));
164 |             int b = xyz1_tensor.shape().dim_size(0);
165 |             int n = xyz1_tensor.shape().dim_size(1);
166 | 
167 |             const Tensor& xyz2_tensor = context->input(1);
168 |             OP_REQUIRES(context, xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeNN expects (b,m,3) xyz2 shape."));
169 |             int m = xyz2_tensor.shape().dim_size(1);
170 | 
171 |             Tensor *dist_tensor = nullptr;
172 |             OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{b,n,3}, &dist_tensor));
173 |             Tensor *idx_tensor = nullptr;
174 |             OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape{b,n,3}, &idx_tensor));
175 | 
176 |             auto xyz1_flat = xyz1_tensor.flat<float>();
177 |             const float *xyz1 = &(xyz1_flat(0));
178 |             auto xyz2_flat = xyz2_tensor.flat<float>();
179 |             const float *xyz2 = &(xyz2_flat(0));
180 |             auto dist_flat = dist_tensor->flat<float>();
181 |             float *dist = &(dist_flat(0));
182 |             auto idx_flat = idx_tensor->flat<int>();
183 |             int *idx = &(idx_flat(0));
184 |             threenn_cpu(b,n,m,xyz1,xyz2,dist,idx);
185 |         }
186 | };
187 | REGISTER_KERNEL_BUILDER(Name("ThreeNN").Device(DEVICE_CPU), ThreeNNOp);
188 | 
189 | 
190 | 
191 | class ThreeInterpolateOp: public OpKernel{
192 |     public:
193 |         explicit ThreeInterpolateOp(OpKernelConstruction * context):OpKernel(context){}
194 | 
195 |         void Compute(OpKernelContext * context) override {
196 |             const Tensor& points_tensor=context->input(0);
197 |             OP_REQUIRES(context, points_tensor.dims()==3, errors::InvalidArgument("ThreeInterpolate expects (b,m,c) points shape"));
198 |             int b = points_tensor.shape().dim_size(0);
199 |             int m = points_tensor.shape().dim_size(1);
200 |             int c = points_tensor.shape().dim_size(2);
201 | 
202 |             const Tensor& idx_tensor=context->input(1);
203 |             OP_REQUIRES(context,idx_tensor.dims()==3 && idx_tensor.shape().dim_size(0)==b && idx_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeInterpolate expects (b,n,3) idx shape"));
204 |             int n = idx_tensor.shape().dim_size(1);
205 |             const Tensor& weight_tensor=context->input(2);
206 |             OP_REQUIRES(context,weight_tensor.dims()==3 && weight_tensor.shape().dim_size(0)==b && weight_tensor.shape().dim_size(1)==n && weight_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeInterpolate expects (b,n,3) weight shape"));
207 | 
208 |             Tensor * out_tensor = nullptr;
209 |             OP_REQUIRES_OK(context, context->allocate_output(0,TensorShape{b,n,c}, &out_tensor));
210 | 
211 |             auto points_flat = points_tensor.flat<float>();
212 |             const float *points = &(points_flat(0));
213 |             auto idx_flat = idx_tensor.flat<int>();
214 |             const int *idx = &(idx_flat(0));
215 |             auto weight_flat = weight_tensor.flat<float>();
216 |             const float *weight = &(weight_flat(0));
217 |             auto out_flat = out_tensor->flat<float>();
218 |             float *out = &(out_flat(0));
219 |             threeinterpolate_cpu(b,m,c,n,points,idx,weight,out);
220 |         }
221 | };
222 | REGISTER_KERNEL_BUILDER(Name("ThreeInterpolate").Device(DEVICE_CPU),ThreeInterpolateOp);
223 | 
224 | 
225 | class ThreeInterpolateGradOp: public OpKernel{
226 |     public:
227 |         explicit ThreeInterpolateGradOp(OpKernelConstruction * context):OpKernel(context){}
228 | 
229 |         void Compute(OpKernelContext * context) override {
230 |             const Tensor& points_tensor=context->input(0);
231 |             OP_REQUIRES(context, points_tensor.dims()==3, errors::InvalidArgument("ThreeInterpolateGrad expects (b,m,c) points shape"));
232 |             int b = points_tensor.shape().dim_size(0);
233 |             int m = points_tensor.shape().dim_size(1);
234 |             int c = points_tensor.shape().dim_size(2);
235 | 
236 |             const Tensor& idx_tensor=context->input(1);
237 |             OP_REQUIRES(context,idx_tensor.dims()==3 && idx_tensor.shape().dim_size(0)==b, errors::InvalidArgument("ThreeInterpolateGrad expects (b,n,3) idx shape"));
238 |             int n = idx_tensor.shape().dim_size(1);
239 |             const Tensor& weight_tensor=context->input(2);
240 |             OP_REQUIRES(context,weight_tensor.dims()==3 && weight_tensor.shape().dim_size(0)==b && weight_tensor.shape().dim_size(1)==n && weight_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeInterpolateGrad expects (b,n,3) weight shape"));
241 | 
242 |             const Tensor& grad_out_tensor=context->input(3);
243 |             OP_REQUIRES(context,grad_out_tensor.dims()==3 && grad_out_tensor.shape().dim_size(0)==b && grad_out_tensor.shape().dim_size(1)==n && grad_out_tensor.shape().dim_size(2)==c, errors::InvalidArgument("ThreeInterpolateGrad expects (b,n,c) grad_out shape"));
244 | 
245 |             Tensor * grad_points_tensor = nullptr;
246 |             OP_REQUIRES_OK(context, context->allocate_output(0,TensorShape{b,m,c}, &grad_points_tensor));
247 | 
248 |             auto points_flat = points_tensor.flat<float>();
249 |             const float *points = &(points_flat(0));
250 |             auto idx_flat = idx_tensor.flat<int>();
251 |             const int *idx = &(idx_flat(0));
252 |             auto weight_flat = weight_tensor.flat<float>();
253 |             const float *weight = &(weight_flat(0));
254 |             auto grad_out_flat = grad_out_tensor.flat<float>();
255 |             const float *grad_out = &(grad_out_flat(0));
256 |             auto grad_points_flat = grad_points_tensor->flat<float>();
257 |             float *grad_points = &(grad_points_flat(0));
258 |             memset(grad_points, 0, sizeof(float)*b*m*c);
259 |             threeinterpolate_grad_cpu(b,n,c,m,grad_out,idx,weight,grad_points);
260 |         }
261 | };
262 | REGISTER_KERNEL_BUILDER(Name("ThreeInterpolateGrad").Device(DEVICE_CPU),ThreeInterpolateGradOp);
263 | 
264 | 
265 | 


--------------------------------------------------------------------------------
/src/tf_ops/3d_interpolation/tf_interpolate.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.framework import ops
 3 | import sys
 4 | import os
 5 | BASE_DIR = os.path.dirname(__file__)
 6 | sys.path.append(BASE_DIR)
 7 | interpolate_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_interpolate_so.so'))
 8 | def three_nn(xyz1, xyz2):
 9 |     '''
10 |     Input:
11 |         xyz1: (b,n,3) float32 array, unknown points
12 |         xyz2: (b,m,3) float32 array, known points
13 |     Output:
14 |         dist: (b,n,3) float32 array, distances to known points
15 |         idx: (b,n,3) int32 array, indices to known points
16 |     '''
17 |     return interpolate_module.three_nn(xyz1, xyz2)
18 | ops.NoGradient('ThreeNN')
19 | def three_interpolate(points, idx, weight):
20 |     '''
21 |     Input:
22 |         points: (b,m,c) float32 array, known points
23 |         idx: (b,n,3) int32 array, indices to known points
24 |         weight: (b,n,3) float32 array, weights on known points
25 |     Output:
26 |         out: (b,n,c) float32 array, interpolated point values
27 |     '''
28 |     return interpolate_module.three_interpolate(points, idx, weight)
29 | @tf.RegisterGradient('ThreeInterpolate')
30 | def _three_interpolate_grad(op, grad_out):
31 |     points = op.inputs[0]
32 |     idx = op.inputs[1]
33 |     weight = op.inputs[2]
34 |     return [interpolate_module.three_interpolate_grad(points, idx, weight, grad_out), None, None]
35 | 
36 | if __name__=='__main__':
37 |     import numpy as np
38 |     import time
39 |     np.random.seed(100)
40 |     pts = np.random.random((32,128,64)).astype('float32')
41 |     tmp1 = np.random.random((32,512,3)).astype('float32')
42 |     tmp2 = np.random.random((32,128,3)).astype('float32')
43 |     with tf.device('/cpu:0'):
44 |         points = tf.constant(pts)
45 |         xyz1 = tf.constant(tmp1)
46 |         xyz2 = tf.constant(tmp2)
47 |         dist, idx = three_nn(xyz1, xyz2)
48 |         weight = tf.ones_like(dist)/3.0
49 |         interpolated_points = three_interpolate(points, idx, weight)
50 |     with tf.Session('') as sess:
51 |         now = time.time()
52 |         for _ in range(100):
53 |             ret = sess.run(interpolated_points)
54 |         print(time.time() - now)
55 |         print(ret.shape, ret.dtype)
56 |         #print ret
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/src/tf_ops/3d_interpolation/tf_interpolate_op_test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from tf_interpolate import three_nn, three_interpolate
 4 | 
 5 | class GroupPointTest(tf.test.TestCase):
 6 |   def test(self):
 7 |     pass
 8 | 
 9 |   def test_grad(self):
10 |     with self.test_session():
11 |       points = tf.constant(np.random.random((1,8,16)).astype('float32'))
12 |       print points
13 |       xyz1 = tf.constant(np.random.random((1,128,3)).astype('float32'))
14 |       xyz2 = tf.constant(np.random.random((1,8,3)).astype('float32'))
15 |       dist, idx = three_nn(xyz1, xyz2)
16 |       weight = tf.ones_like(dist)/3.0
17 |       interpolated_points = three_interpolate(points, idx, weight)
18 |       print interpolated_points
19 |       err = tf.test.compute_gradient_error(points, (1,8,16), interpolated_points, (1,128,16))
20 |       print err
21 |       self.assertLess(err, 1e-4) 
22 | 
23 | if __name__=='__main__':
24 |   tf.test.main() 
25 | 


--------------------------------------------------------------------------------
/src/tf_ops/3d_interpolation/tf_interpolate_so.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/3d_interpolation/tf_interpolate_so.so


--------------------------------------------------------------------------------
/src/tf_ops/3d_interpolation/visu_interpolation.py:
--------------------------------------------------------------------------------
 1 | ''' Visualize part segmentation '''
 2 | import os
 3 | import sys
 4 | ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 5 | sys.path.append('/home/rqi/Projects/toolkits/visualization')
 6 | from show3d_balls import showpoints
 7 | import numpy as np
 8 | from tf_interpolate import three_nn, three_interpolate
 9 | import tensorflow as tf
10 | 
11 | 
12 | pts2 = np.array([[0,0,1],[1,0,0],[0,1,0],[1,1,0]]).astype('float32')
13 | xyz1 = np.random.random((100,3)).astype('float32')
14 | xyz2 = np.array([[0,0,0],[1,0,0],[0,1,0],[1,1,1]]).astype('float32')
15 | 
16 | def fun(xyz1,xyz2,pts2):
17 |     with tf.device('/cpu:0'):
18 |         points = tf.constant(np.expand_dims(pts2,0))
19 |         xyz1 = tf.constant(np.expand_dims(xyz1,0))
20 |         xyz2 = tf.constant(np.expand_dims(xyz2,0))
21 |         dist, idx = three_nn(xyz1, xyz2)
22 |         #weight = tf.ones_like(dist)/3.0
23 |         dist = tf.maximum(dist, 1e-10)
24 |         norm = tf.reduce_sum((1.0/dist),axis=2,keep_dims=True)
25 |         norm = tf.tile(norm, [1,1,3])
26 |         print norm
27 |         weight = (1.0/dist) / norm
28 |         interpolated_points = three_interpolate(points, idx, weight)
29 |     with tf.Session('') as sess:
30 |         tmp,pts1,d,w = sess.run([xyz1, interpolated_points, dist, weight])
31 |         #print w
32 |         pts1 = pts1.squeeze()
33 |     return pts1
34 | 
35 | pts1 = fun(xyz1,xyz2,pts2) 
36 | all_pts = np.zeros((104,3))
37 | all_pts[0:100,:] = pts1
38 | all_pts[100:,:] = pts2
39 | all_xyz = np.zeros((104,3))
40 | all_xyz[0:100,:]=xyz1
41 | all_xyz[100:,:]=xyz2
42 | showpoints(xyz2, pts2, ballradius=8)
43 | showpoints(xyz1, pts1, ballradius=8)
44 | showpoints(all_xyz, all_pts, ballradius=8)
45 | 


--------------------------------------------------------------------------------
/src/tf_ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/__init__.py


--------------------------------------------------------------------------------
/src/tf_ops/grouping/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 | CUDA_HOME := /home/hmittal/cuda-9.0
 6 | 
 7 | TF_INC = `python3 -c "import tensorflow; print(tensorflow.sysconfig.get_include())"`
 8 | TF_LIB = `python3 -c "import tensorflow as tf; print(tf.sysconfig.get_lib())"`
 9 | 
10 | LIBFLAGS = -L$(CUDA_HOME)/lib -L$(CUDA_HOME)/lib64 -lcudart -L $(TF_LIB) -ltensorflow_framework -lcublas
11 | 
12 | ifndef ARCHI
13 | 	ARCHI=sm_61
14 | endif
15 | 
16 | arch = $(ARCHI)
17 | 
18 | NVCC = $(CUDA_HOME)/bin/nvcc
19 | 
20 | LD_LIBRARY_PATH=$(CUDA_HOME)/lib64
21 | INCLUDE_PATH=$(CUDA_HOME)/include
22 | 
23 | all: tf_grouping_so.so 
24 | 
25 | tf_grouping_so.so: tf_grouping.cpp tf_grouping_g.cu.o
26 | 	g++ -std=c++11 -shared -fPIC -o tf_grouping_so.so tf_grouping.cpp tf_grouping_g.cu.o -I$(TF_INC) -I$(INCLUDE_PATH) -L$(LIBFLAGS) -O2 -D_GLIBCXX_USE_CXX11_ABI=0
27 | 
28 | tf_grouping_g.cu.o: tf_grouping_g.cu
29 | 	$(NVCC) -std=c++11 -c -o tf_grouping_g.cu.o tf_grouping_g.cu -I$(TF_INC) -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch $(arch) --use_fast_math
30 | 		
31 | clean:
32 | 	rm *.o *.so
33 | 


--------------------------------------------------------------------------------
/src/tf_ops/grouping/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/grouping/__init__.py


--------------------------------------------------------------------------------
/src/tf_ops/grouping/__pycache__/tf_grouping.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/grouping/__pycache__/tf_grouping.cpython-35.pyc


--------------------------------------------------------------------------------
/src/tf_ops/grouping/__pycache__/tf_grouping.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/grouping/__pycache__/tf_grouping.cpython-36.pyc


--------------------------------------------------------------------------------
/src/tf_ops/grouping/tf_grouping.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include "tensorflow/core/framework/op.h"
  7 | #include "tensorflow/core/framework/op_kernel.h"
  8 | #include "tensorflow/core/framework/shape_inference.h"
  9 | #include "tensorflow/core/framework/common_shape_fns.h"
 10 | #include <cuda_runtime.h>
 11 | using namespace tensorflow;
 12 | 
 13 | REGISTER_OP("QueryBallPoint")           // Register operation
 14 |     .Attr("radius: float")              // Attribute, radius of grouping
 15 |     .Attr("nsample: int")               // Attribute, = 16
 16 |     .Input("xyz1: float32")             // Input tensor of float32, point cloud 1
 17 |     .Input("xyz2: float32")             // Input tensor of float32, point cloud 2
 18 |     .Output("idx: int32")               // Output tensor of int32, indices of the points grouped
 19 |     .Output("pts_cnt: int32")           // Output tensor of int32, points count?
 20 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {            // Output tensor is same shape as input tensor
 21 |         ::tensorflow::shape_inference::ShapeHandle dims2; // batch_size * npoint * 3
 22 |         c->WithRank(c->input(1), 3, &dims2);              // validates that the c->input(1) has a shape with exactly 3 dimensions
 23 |         int nsample;
 24 |         TF_RETURN_IF_ERROR(c->GetAttr("nsample", &nsample));       // Get the value of attribute nsample in variable nsample
 25 |         ::tensorflow::shape_inference::ShapeHandle output1 = c->MakeShape({c->Dim(dims2, 0), c->Dim(dims2, 1), nsample});  // Indices = batch size, n point, n sample, (16, 1024, 16)
 26 |         c->set_output(0, output1);
 27 |         ::tensorflow::shape_inference::ShapeHandle output2 = c->MakeShape({c->Dim(dims2, 0), c->Dim(dims2, 1)});          // Points count = batch size, npoint (16, 1024)
 28 |         c->set_output(1, output2);
 29 |         return Status::OK();
 30 |     });
 31 | REGISTER_OP("SelectionSort")
 32 |     .Attr("k: int")
 33 |     .Input("dist: float32")
 34 |     .Output("outi: int32")
 35 |     .Output("out: float32")
 36 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 37 |         c->set_output(0, c->input(0));
 38 |         c->set_output(1, c->input(0));
 39 |         return Status::OK();
 40 |     });
 41 | REGISTER_OP("GroupPoint")
 42 |     .Input("points: float32")
 43 |     .Input("idx: int32")
 44 |     .Output("out: float32")
 45 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 46 |         ::tensorflow::shape_inference::ShapeHandle dims1; // batch_size * ndataset * channels
 47 |         c->WithRank(c->input(0), 3, &dims1);
 48 |         ::tensorflow::shape_inference::ShapeHandle dims2; // batch_size * npoints * nsample
 49 |         c->WithRank(c->input(1), 3, &dims2);
 50 |         // batch_size * npoints * nsample * channels
 51 |         ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims2, 0), c->Dim(dims2, 1), c->Dim(dims2, 2), c->Dim(dims1, 2)});
 52 |         c->set_output(0, output);
 53 |         return Status::OK();
 54 |     });
 55 | REGISTER_OP("GroupPointGrad")
 56 |     .Input("points: float32")
 57 |     .Input("idx: int32")
 58 |     .Input("grad_out: float32")
 59 |     .Output("grad_points: float32")
 60 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 61 |         c->set_output(0, c->input(0));
 62 |         return Status::OK();
 63 |     });
 64 | 
 65 | 
 66 | void queryBallPointLauncher(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx, int *pts_cnt);
 67 | class QueryBallPointGpuOp : public OpKernel {
 68 |     public:
 69 |         explicit QueryBallPointGpuOp(OpKernelConstruction* context) : OpKernel(context) {
 70 |             OP_REQUIRES_OK(context, context->GetAttr("radius", &radius_));
 71 |             OP_REQUIRES(context, radius_ > 0, errors::InvalidArgument("QueryBallPoint expects positive radius"));
 72 | 
 73 |             OP_REQUIRES_OK(context, context->GetAttr("nsample", &nsample_));
 74 |             OP_REQUIRES(context, nsample_ > 0, errors::InvalidArgument("QueryBallPoint expects positive nsample"));
 75 |         }
 76 | 
 77 |         void Compute(OpKernelContext* context) override {
 78 |             const Tensor& xyz1_tensor = context->input(0);
 79 |             OP_REQUIRES(context, xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3, errors::InvalidArgument("QueryBallPoint expects (batch_size, ndataset, 3) xyz1 shape."));
 80 |             int b = xyz1_tensor.shape().dim_size(0);
 81 |             int n = xyz1_tensor.shape().dim_size(1);
 82 | 
 83 |             const Tensor& xyz2_tensor = context->input(1);
 84 |             OP_REQUIRES(context, xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3, errors::InvalidArgument("QueryBallPoint expects (batch_size, npoint, 3) xyz2 shape."));
 85 |             int m = xyz2_tensor.shape().dim_size(1);
 86 | 
 87 |             Tensor *idx_tensor = nullptr;
 88 |             OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{b,m,nsample_}, &idx_tensor));
 89 |             Tensor *pts_cnt_tensor = nullptr;
 90 |             OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape{b,m}, &pts_cnt_tensor));
 91 | 
 92 |             auto xyz1_flat = xyz1_tensor.flat<float>();
 93 |             const float *xyz1 = &(xyz1_flat(0));
 94 |             auto xyz2_flat = xyz2_tensor.flat<float>();
 95 |             const float *xyz2 = &(xyz2_flat(0));
 96 |             auto idx_flat = idx_tensor->flat<int>();
 97 |             int *idx = &(idx_flat(0));
 98 |             auto pts_cnt_flat = pts_cnt_tensor->flat<int>();
 99 |             int *pts_cnt = &(pts_cnt_flat(0));
100 |             queryBallPointLauncher(b,n,m,radius_,nsample_,xyz1,xyz2,idx,pts_cnt);
101 |         }
102 |     private:
103 |         float radius_;
104 |         int nsample_;
105 | };
106 | REGISTER_KERNEL_BUILDER(Name("QueryBallPoint").Device(DEVICE_GPU), QueryBallPointGpuOp);
107 | 
108 | void selectionSortLauncher(int b, int n, int m, int k, const float *dist, int *outi, float *out);
109 | class SelectionSortGpuOp : public OpKernel {
110 |     public:
111 |         explicit SelectionSortGpuOp(OpKernelConstruction* context) : OpKernel(context) {
112 |             OP_REQUIRES_OK(context, context->GetAttr("k", &k_));
113 |             OP_REQUIRES(context, k_ > 0, errors::InvalidArgument("SelectionSort expects positive k"));
114 |         }
115 | 
116 |         void Compute(OpKernelContext* context) override {
117 |             const Tensor& dist_tensor = context->input(0);
118 |             OP_REQUIRES(context, dist_tensor.dims()==3, errors::InvalidArgument("SelectionSort expects (b,m,n) dist shape."));
119 |             int b = dist_tensor.shape().dim_size(0);
120 |             int m = dist_tensor.shape().dim_size(1);
121 |             int n = dist_tensor.shape().dim_size(2);
122 | 
123 |             Tensor *outi_tensor = nullptr;
124 |             OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{b,m,n}, &outi_tensor));
125 |             Tensor *out_tensor = nullptr;
126 |             OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape{b,m,n}, &out_tensor));
127 | 
128 |             auto dist_flat = dist_tensor.flat<float>();
129 |             const float *dist = &(dist_flat(0));
130 |             auto outi_flat = outi_tensor->flat<int>();
131 |             int *outi = &(outi_flat(0));
132 |             auto out_flat = out_tensor->flat<float>();
133 |             float *out = &(out_flat(0));
134 |             selectionSortLauncher(b,n,m,k_,dist,outi,out);
135 |         }
136 |     private:
137 |         int k_;
138 | };
139 | REGISTER_KERNEL_BUILDER(Name("SelectionSort").Device(DEVICE_GPU), SelectionSortGpuOp);
140 | 
141 | 
142 | void groupPointLauncher(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out);
143 | class GroupPointGpuOp: public OpKernel{
144 |     public:
145 |         explicit GroupPointGpuOp(OpKernelConstruction * context):OpKernel(context){}
146 | 
147 |         void Compute(OpKernelContext * context) override {
148 |             const Tensor& points_tensor=context->input(0);
149 |             OP_REQUIRES(context, points_tensor.dims()==3, errors::InvalidArgument("GroupPoint expects (batch_size, num_points, channel) points shape"));
150 |             int b = points_tensor.shape().dim_size(0);
151 |             int n = points_tensor.shape().dim_size(1);
152 |             int c = points_tensor.shape().dim_size(2);
153 | 
154 |             const Tensor& idx_tensor=context->input(1);
155 |             OP_REQUIRES(context,idx_tensor.dims()==3 && idx_tensor.shape().dim_size(0)==b, errors::InvalidArgument("GroupPoint expects (batch_size, npoints, nsample) idx shape"));
156 |             int m = idx_tensor.shape().dim_size(1);
157 |             int nsample = idx_tensor.shape().dim_size(2);
158 | 
159 |             Tensor * out_tensor = nullptr;
160 |             OP_REQUIRES_OK(context, context->allocate_output(0,TensorShape{b,m,nsample,c}, &out_tensor));
161 | 
162 |             auto points_flat = points_tensor.flat<float>();
163 |             const float *points = &(points_flat(0));
164 |             auto idx_flat = idx_tensor.flat<int>();
165 |             const int *idx = &(idx_flat(0));
166 |             auto out_flat = out_tensor->flat<float>();
167 |             float *out = &(out_flat(0));
168 |             groupPointLauncher(b,n,c,m,nsample,points,idx,out);
169 |         }
170 | };
171 | REGISTER_KERNEL_BUILDER(Name("GroupPoint").Device(DEVICE_GPU),GroupPointGpuOp);
172 | 
173 | void groupPointGradLauncher(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points);
174 | class GroupPointGradGpuOp: public OpKernel{
175 |     public:
176 |         explicit GroupPointGradGpuOp(OpKernelConstruction * context):OpKernel(context){}
177 | 
178 |         void Compute(OpKernelContext * context) override {
179 |             const Tensor& points_tensor=context->input(0);
180 |             OP_REQUIRES(context, points_tensor.dims()==3, errors::InvalidArgument("GroupPointGrad expects (batch_size, num_points, channel) points shape"));
181 |             int b = points_tensor.shape().dim_size(0);
182 |             int n = points_tensor.shape().dim_size(1);
183 |             int c = points_tensor.shape().dim_size(2);
184 | 
185 |             const Tensor& idx_tensor=context->input(1);
186 |             OP_REQUIRES(context,idx_tensor.dims()==3 && idx_tensor.shape().dim_size(0)==b, errors::InvalidArgument("GroupPointGrad expects (batch_size, npoints, nsample) idx shape"));
187 |             int m = idx_tensor.shape().dim_size(1);
188 |             int nsample = idx_tensor.shape().dim_size(2);
189 | 
190 |             const Tensor& grad_out_tensor=context->input(2);
191 |             OP_REQUIRES(context,grad_out_tensor.dims()==4 && grad_out_tensor.shape().dim_size(0)==b && grad_out_tensor.shape().dim_size(1)==m && grad_out_tensor.shape().dim_size(2)==nsample && grad_out_tensor.shape().dim_size(3)==c, errors::InvalidArgument("GroupPointGrad expects (batch_size, npoints, nsample, channel) grad_out shape"));
192 | 
193 |             Tensor * grad_points_tensor = nullptr;
194 |             OP_REQUIRES_OK(context, context->allocate_output(0,TensorShape{b,n,c}, &grad_points_tensor));
195 | 
196 |             auto points_flat = points_tensor.flat<float>();
197 |             const float *points = &(points_flat(0));
198 |             auto idx_flat = idx_tensor.flat<int>();
199 |             const int *idx = &(idx_flat(0));
200 |             auto grad_out_flat = grad_out_tensor.flat<float>();
201 |             const float *grad_out = &(grad_out_flat(0));
202 |             auto grad_points_flat = grad_points_tensor->flat<float>();
203 |             float *grad_points = &(grad_points_flat(0));
204 |             cudaMemset(grad_points, 0, sizeof(float)*b*n*c);
205 |             groupPointGradLauncher(b,n,c,m,nsample,grad_out,idx,grad_points);
206 |         }
207 | };
208 | REGISTER_KERNEL_BUILDER(Name("GroupPointGrad").Device(DEVICE_GPU),GroupPointGradGpuOp);
209 | 
210 | 
211 | 


--------------------------------------------------------------------------------
/src/tf_ops/grouping/tf_grouping.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.framework import ops
  3 | import sys
  4 | import os
  5 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  6 | sys.path.append(BASE_DIR)
  7 | grouping_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_grouping_so.so'))
  8 | def query_ball_point(radius, nsample, xyz1, xyz2):
  9 |     '''
 10 |     Input:
 11 |         radius: float32, ball search radius
 12 |         nsample: int32, number of points selected in each ball region
 13 |         xyz1: (batch_size, ndataset, 3) float32 array, input points
 14 |         xyz2: (batch_size, npoint, 3) float32 array, query points
 15 |     Output:
 16 |         idx: (batch_size, npoint, nsample) int32 array, indices to input points
 17 |         pts_cnt: (batch_size, npoint) int32 array, number of unique points in each local region
 18 |     '''
 19 |     #return grouping_module.query_ball_point(radius, nsample, xyz1, xyz2)
 20 |     return grouping_module.query_ball_point(xyz1, xyz2, radius, nsample)
 21 | ops.NoGradient('QueryBallPoint')
 22 | def select_top_k(k, dist):
 23 |     '''
 24 |     Input:
 25 |         k: int32, number of k SMALLEST elements selected
 26 |         dist: (b,m,n) float32 array, distance matrix, m query points, n dataset points
 27 |     Output:
 28 |         idx: (b,m,n) int32 array, first k in n are indices to the top k
 29 |         dist_out: (b,m,n) float32 array, first k in n are the top k
 30 |     '''
 31 |     return grouping_module.selection_sort(dist, k)
 32 | ops.NoGradient('SelectionSort')
 33 | def group_point(points, idx):
 34 |     '''
 35 |     Input:
 36 |         points: (batch_size, ndataset, channel) float32 array, points to sample from
 37 |         idx: (batch_size, npoint, nsample) int32 array, indices to points
 38 |     Output:
 39 |         out: (batch_size, npoint, nsample, channel) float32 array, values sampled from points
 40 |     '''
 41 |     return grouping_module.group_point(points, idx)
 42 | @tf.RegisterGradient('GroupPoint')
 43 | def _group_point_grad(op, grad_out):
 44 |     points = op.inputs[0]
 45 |     idx = op.inputs[1]
 46 |     return [grouping_module.group_point_grad(points, idx, grad_out), None]
 47 | 
 48 | def knn_point(k, xyz1, xyz2):
 49 |     '''
 50 |     Input:
 51 |         k: int32, number of k in k-nn search
 52 |         xyz1: (batch_size, ndataset, c) float32 array, input points
 53 |         xyz2: (batch_size, npoint, c) float32 array, query points
 54 |     Output:
 55 |         val: (batch_size, npoint, k) float32 array, L2 distances
 56 |         idx: (batch_size, npoint, k) int32 array, indices to input points
 57 |     '''
 58 |     # b = xyz1.get_shape()[0].value
 59 |     b = tf.shape(xyz1)[0]
 60 |     n = xyz1.get_shape()[1].value
 61 |     c = xyz1.get_shape()[2].value
 62 |     m = xyz2.get_shape()[1].value
 63 | 
 64 |     xyz1 = tf.tile(tf.reshape(xyz1, (b,1,n,c)), [1,m,1,1])
 65 |     xyz2 = tf.tile(tf.reshape(xyz2, (b,m,1,c)), [1,1,n,1])
 66 |     dist = tf.reduce_sum((xyz1-xyz2)**2, -1)
 67 | 
 68 |     outi, out = select_top_k(k, dist)
 69 |     idx = tf.slice(outi, [0,0,0], [-1,-1,k])
 70 |     val = tf.slice(out, [0,0,0], [-1,-1,k])
 71 |     #val, idx = tf.nn.top_k(-dist, k=k) # ONLY SUPPORT CPU
 72 |     return val, idx
 73 | 
 74 | def unique_1nn_point(xyz1, xyz2):
 75 | 
 76 |     b = xyz1.get_shape()[0].value
 77 |     n = xyz1.get_shape()[1].value
 78 |     c = xyz1.get_shape()[2].value
 79 |     m = xyz2.get_shape()[1].value
 80 | 
 81 |     xyz1 = tf.tile(tf.reshape(xyz1, (b,1,n,c)), [1,m,1,1])
 82 |     xyz2 = tf.tile(tf.reshape(xyz2, (b,m,1,c)), [1,1,n,1])
 83 |     var_output = tf.reduce_sum((xyz1-xyz2)**2, -1)
 84 | 
 85 |     dist = tf.Variable(0, dtype=var_output.dtype,
 86 |                        trainable=False, collections=[])
 87 | 
 88 |     dist = tf.assign(dist, var_output, validate_shape=False)
 89 | 
 90 |     idx_list = []
 91 |     for new_xyz in range(m):
 92 |         idx = tf.to_int32(tf.argmin(dist[:, new_xyz, :], axis=-1))
 93 |         if new_xyz != m-1:
 94 |             indices = tf.constant([[k, i] for i in range(new_xyz+1,
 95 |                                                          m) for k in range(b)], dtype=tf.int32) #
 96 |             repeated_idx = tf.expand_dims(tf.tile(idx, [m - new_xyz - 1]), 1)
 97 |             new_indices = tf.concat([indices, repeated_idx], axis=1)
 98 |             updates = tf.constant([float('Inf') for i in range(new_xyz+1,
 99 |                                                                m) for k in range(b)])
100 |             dist = tf.scatter_nd_update(dist, new_indices, updates)
101 |         if new_xyz % 50 == 0:
102 |             print(new_xyz)
103 |         idx_list.append(idx)
104 | 
105 |     # m x b => b x m
106 |     return tf.expand_dims(tf.transpose(tf.stack(idx_list)), -1)
107 | 
108 | 
109 | if __name__=='__main__':
110 |     knn=True
111 |     import numpy as np
112 |     import time
113 |     np.random.seed(100)
114 |     pts = np.random.random((32,512,64)).astype('float32')
115 |     tmp1 = np.random.random((32,512,3)).astype('float32')
116 |     tmp2 = np.random.random((32,128,3)).astype('float32')
117 |     with tf.device('/gpu:1'):
118 |         points = tf.constant(pts)
119 |         xyz1 = tf.constant(tmp1)
120 |         xyz2 = tf.constant(tmp2)
121 |         radius = 0.1
122 |         nsample = 64
123 |         if knn:
124 |             _, idx = knn_point(nsample, xyz1, xyz2)
125 |             grouped_points = group_point(points, idx)
126 |         else:
127 |             idx, _ = query_ball_point(radius, nsample, xyz1, xyz2)
128 |             grouped_points = group_point(points, idx)
129 |             #grouped_points_grad = tf.ones_like(grouped_points)
130 |             #points_grad = tf.gradients(grouped_points, points, grouped_points_grad)
131 |     with tf.Session('') as sess:
132 |         now = time.time()
133 |         for _ in range(100):
134 |             ret = sess.run(grouped_points)
135 |         print(time.time() - now)
136 |         print(ret.shape, ret.dtype)
137 |         print(ret)
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/src/tf_ops/grouping/tf_grouping_g.cu:
--------------------------------------------------------------------------------
  1 | // input: radius (1), nsample (1), xyz1 (b,n,3), xyz2 (b,m,3)
  2 | // output: idx (b,m,nsample), pts_cnt (b,m)
  3 | __global__ void query_ball_point_gpu(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx, int *pts_cnt) {
  4 |     int batch_index = blockIdx.x;
  5 |     xyz1 += n*3*batch_index;
  6 |     xyz2 += m*3*batch_index;
  7 |     idx += m*nsample*batch_index;
  8 |     pts_cnt += m*batch_index; // counting how many unique points selected in local region
  9 | 
 10 |     int index = threadIdx.x;
 11 |     int stride = blockDim.x;
 12 |     
 13 |     for (int j=index;j<m;j+=stride) {
 14 |         int cnt = 0;
 15 |         for (int k=0;k<n;++k) {
 16 |             if (cnt == nsample)
 17 |                 break; // only pick the FIRST nsample points in the ball
 18 |             float x2=xyz2[j*3+0];
 19 |             float y2=xyz2[j*3+1];
 20 |             float z2=xyz2[j*3+2];
 21 |             float x1=xyz1[k*3+0];
 22 |             float y1=xyz1[k*3+1];
 23 |             float z1=xyz1[k*3+2];
 24 |     	    float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 25 |             if (d<radius) {
 26 |                 if (cnt==0) { // set ALL indices to k, s.t. if there are less points in ball than nsample, we still have valid (repeating) indices
 27 |                     for (int l=0;l<nsample;++l)
 28 |                         idx[j*nsample+l] = k;
 29 |                 }
 30 |                 idx[j*nsample+cnt] = k;
 31 |                 cnt+=1;
 32 |             }
 33 |         }
 34 |         pts_cnt[j] = cnt;
 35 |     }
 36 | }
 37 | 
 38 | // input: points (b,n,c), idx (b,m,nsample)
 39 | // output: out (b,m,nsample,c)
 40 | __global__ void group_point_gpu(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out) {
 41 |     int batch_index = blockIdx.x;
 42 |     points += n*c*batch_index;
 43 |     idx += m*nsample*batch_index;
 44 |     out += m*nsample*c*batch_index;
 45 | 
 46 |     int index = threadIdx.x;
 47 |     int stride = blockDim.x;
 48 |     
 49 |     for (int j=index;j<m;j+=stride) {
 50 |         for (int k=0;k<nsample;++k) {
 51 |             int ii = idx[j*nsample+k];
 52 |             for (int l=0;l<c;++l) {
 53 |                 out[j*nsample*c+k*c+l] = points[ii*c+l];
 54 |             }
 55 |         }
 56 |     }
 57 | }
 58 | 
 59 | // input: grad_out (b,m,nsample,c), idx (b,m,nsample), 
 60 | // output: grad_points (b,n,c)
 61 | __global__ void group_point_grad_gpu(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) {
 62 |     int batch_index = blockIdx.x;
 63 |     idx += m*nsample*batch_index;
 64 |     grad_out += m*nsample*c*batch_index;
 65 |     grad_points += n*c*batch_index;
 66 | 
 67 |     int index = threadIdx.x;
 68 |     int stride = blockDim.x;
 69 | 
 70 |     for (int j=index;j<m;j+=stride) {
 71 |         for (int k=0;k<nsample;++k) {
 72 |             int ii = idx[j*nsample+k];
 73 |             for (int l=0;l<c;++l) {
 74 |                  atomicAdd(&grad_points[ii*c+l], grad_out[j*nsample*c+k*c+l]);
 75 |             }
 76 |         }
 77 |     }
 78 | }
 79 | 
 80 | // input: k (1), distance matrix dist (b,m,n)
 81 | // output: idx (b,m,n), dist_out (b,m,n)
 82 | // only the top k results within n are useful
 83 | __global__ void selection_sort_gpu(int b, int n, int m, int k, const float *dist, int *outi, float *out) {
 84 |     int batch_index = blockIdx.x;
 85 |     dist+=m*n*batch_index;
 86 |     outi+=m*n*batch_index;
 87 |     out+=m*n*batch_index;
 88 | 
 89 |     int index = threadIdx.x;
 90 |     int stride = blockDim.x;
 91 | 
 92 |     // copy from dist to dist_out
 93 |     for (int j=index;j<m;j+=stride) {
 94 |         for (int s=0;s<n;++s) {
 95 |             out[j*n+s] = dist[j*n+s];
 96 |             outi[j*n+s] = s;
 97 |         }
 98 |     }
 99 | 
100 |     float *p_dist;
101 |     for (int j=index;j<m;j+=stride) {
102 |         p_dist = out+j*n;
103 |         // selection sort for the first k elements
104 |         for (int s=0;s<k;++s) {
105 |             int min=s; 
106 |             // find the min
107 |             for (int t=s+1;t<n;++t) {
108 |                 if (p_dist[t]<p_dist[min]) {
109 |                     min = t;
110 |                 }
111 |             }
112 |             // swap min-th and i-th element
113 |             if (min!=s) {
114 |                 float tmp = p_dist[min];
115 |                 p_dist[min] = p_dist[s];
116 |                 p_dist[s] = tmp;
117 |                 int tmpi = outi[j*n+min];
118 |                 outi[j*n+min] = outi[j*n+s];
119 |                 outi[j*n+s] = tmpi;
120 |             }
121 |         }
122 |     }
123 | }
124 | 
125 | void queryBallPointLauncher(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx, int *pts_cnt) {
126 |     query_ball_point_gpu<<<b,256>>>(b,n,m,radius,nsample,xyz1,xyz2,idx,pts_cnt);
127 |     //cudaDeviceSynchronize();
128 | }
129 | void selectionSortLauncher(int b, int n, int m, int k, const float *dist, int *outi, float *out) {
130 |     selection_sort_gpu<<<b,256>>>(b,n,m,k,dist,outi,out); 
131 |     //cudaDeviceSynchronize();
132 | }
133 | void groupPointLauncher(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out){
134 |     group_point_gpu<<<b,256>>>(b,n,c,m,nsample,points,idx,out);
135 |     //cudaDeviceSynchronize();
136 | }
137 | void groupPointGradLauncher(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points){
138 |     group_point_grad_gpu<<<b,256>>>(b,n,c,m,nsample,grad_out,idx,grad_points);
139 |     //group_point_grad_gpu<<<1,1>>>(b,n,c,m,nsample,grad_out,idx,grad_points);
140 |     //cudaDeviceSynchronize();
141 | }
142 | 


--------------------------------------------------------------------------------
/src/tf_ops/grouping/tf_grouping_g.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/grouping/tf_grouping_g.cu.o


--------------------------------------------------------------------------------
/src/tf_ops/grouping/tf_grouping_op_test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from tf_grouping import query_ball_point, group_point
 4 | 
 5 | class GroupPointTest(tf.test.TestCase):
 6 |   def test(self):
 7 |     pass
 8 | 
 9 |   def test_grad(self):
10 |     with tf.device('/gpu:0'):
11 |       points = tf.constant(np.random.random((1,128,16)).astype('float32'))
12 |       print points
13 |       xyz1 = tf.constant(np.random.random((1,128,3)).astype('float32'))
14 |       xyz2 = tf.constant(np.random.random((1,8,3)).astype('float32'))
15 |       radius = 0.3 
16 |       nsample = 32
17 |       idx, pts_cnt = query_ball_point(radius, nsample, xyz1, xyz2)
18 |       grouped_points = group_point(points, idx)
19 |       print grouped_points
20 | 
21 |     with self.test_session():
22 |       print "---- Going to compute gradient error"
23 |       err = tf.test.compute_gradient_error(points, (1,128,16), grouped_points, (1,8,32,16))
24 |       print err
25 |       self.assertLess(err, 1e-4) 
26 | 
27 | if __name__=='__main__':
28 |   tf.test.main() 
29 | 


--------------------------------------------------------------------------------
/src/tf_ops/grouping/tf_grouping_so.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/grouping/tf_grouping_so.so


--------------------------------------------------------------------------------
/src/tf_ops/sampling/1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/sampling/1.pkl


--------------------------------------------------------------------------------
/src/tf_ops/sampling/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 | CUDA_HOME := /home/hmittal/cuda-9.0
 6 | 
 7 | TF_INC = `python3 -c "import tensorflow; print(tensorflow.sysconfig.get_include())"`
 8 | TF_LIB = `python3 -c "import tensorflow as tf; print(tf.sysconfig.get_lib())"`
 9 | 
10 | LIBFLAGS = -L$(CUDA_HOME)/lib -L$(CUDA_HOME)/lib64 -lcudart -L $(TF_LIB) -ltensorflow_framework -lcublas
11 | 
12 | ifndef ARCHI
13 | 	ARCHI=sm_61
14 | endif
15 | 
16 | arch = $(ARCHI)
17 | 
18 | NVCC = $(CUDA_HOME)/bin/nvcc
19 | 
20 | LD_LIBRARY_PATH=$(CUDA_HOME)/lib64
21 | INCLUDE_PATH=$(CUDA_HOME)/include
22 | 
23 | all: tf_sampling_so.so
24 | 
25 | tf_sampling_so.so: tf_sampling.cpp tf_sampling_g.cu.o
26 | 	g++ -std=c++11 -shared -fPIC -o tf_sampling_so.so tf_sampling.cpp tf_sampling_g.cu.o -I$(TF_INC) -I$(INCLUDE_PATH) -L$(LIBFLAGS) -O2 -D_GLIBCXX_USE_CXX11_ABI=0
27 | 
28 | tf_sampling_g.cu.o: tf_sampling_g.cu
29 | 	$(NVCC) -std=c++11 -c -o tf_sampling_g.cu.o tf_sampling_g.cu -I$(TF_INC) -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch $(arch) --use_fast_math
30 | 		
31 | clean:
32 | 	rm *.o *.so
33 | 


--------------------------------------------------------------------------------
/src/tf_ops/sampling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/sampling/__init__.py


--------------------------------------------------------------------------------
/src/tf_ops/sampling/__pycache__/tf_sampling.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/sampling/__pycache__/tf_sampling.cpython-35.pyc


--------------------------------------------------------------------------------
/src/tf_ops/sampling/__pycache__/tf_sampling.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/sampling/__pycache__/tf_sampling.cpython-36.pyc


--------------------------------------------------------------------------------
/src/tf_ops/sampling/__pycache__/tf_sampling.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/sampling/__pycache__/tf_sampling.cpython-37.pyc


--------------------------------------------------------------------------------
/src/tf_ops/sampling/tf_sampling.cpp:
--------------------------------------------------------------------------------
  1 | /* Furthest point sampling
  2 |  * Original author: Haoqiang Fan
  3 |  * Modified by Charles R. Qi
  4 |  * All Rights Reserved. 2017. 
  5 |  */
  6 | #include "tensorflow/core/framework/op.h"
  7 | #include "tensorflow/core/framework/op_kernel.h"
  8 | #include "tensorflow/core/framework/shape_inference.h"
  9 | #include "tensorflow/core/framework/common_shape_fns.h"
 10 | #include <cuda_runtime.h>
 11 | 
 12 | using namespace tensorflow;
 13 | 
 14 | REGISTER_OP("ProbSample")
 15 |   .Input("inp: float32")
 16 |   .Input("inpr: float32")
 17 |   .Output("out: int32")
 18 |   .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 19 |     ::tensorflow::shape_inference::ShapeHandle dims1; // batch_size * ncategory
 20 |     c->WithRank(c->input(0), 2, &dims1);
 21 |     ::tensorflow::shape_inference::ShapeHandle dims2; // batch_size * npoints
 22 |     c->WithRank(c->input(1), 2, &dims2);
 23 |     // batch_size * npoints
 24 |     ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims2, 0), c->Dim(dims2, 1)});
 25 |     c->set_output(0, output);
 26 |     return Status::OK();
 27 |   });
 28 | REGISTER_OP("FarthestPointSample")
 29 |   .Attr("npoint: int")
 30 |   .Input("inp: float32")
 31 |   .Output("out: int32")
 32 |   .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 33 |     ::tensorflow::shape_inference::ShapeHandle dims1; // batch_size * npoint * 3
 34 |     c->WithRank(c->input(0), 3, &dims1);
 35 |     int npoint;
 36 |     TF_RETURN_IF_ERROR(c->GetAttr("npoint", &npoint));
 37 |     ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims1, 0), npoint});
 38 |     c->set_output(0, output);
 39 |     return Status::OK();
 40 |   });
 41 | REGISTER_OP("GatherPoint")
 42 |   .Input("inp: float32")
 43 |   .Input("idx: int32")
 44 |   .Output("out: float32")
 45 |   .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 46 |     ::tensorflow::shape_inference::ShapeHandle dims1; // batch_size * ndataset * 3
 47 |     c->WithRank(c->input(0), 3, &dims1);
 48 |     ::tensorflow::shape_inference::ShapeHandle dims2; // batch_size * npoints
 49 |     c->WithRank(c->input(1), 2, &dims2);
 50 |     // batch_size * npoints * 3
 51 |     ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims1, 0), c->Dim(dims2, 1), c->Dim(dims1, 2)});  // (16, 1024, 3) (batch size, npoints, 3)
 52 |     c->set_output(0, output);
 53 |     return Status::OK();
 54 |   });
 55 | REGISTER_OP("GatherPointGrad")
 56 |   .Input("inp: float32")
 57 |   .Input("idx: int32")
 58 |   .Input("out_g: float32")
 59 |   .Output("inp_g: float32")
 60 |   .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 61 |     c->set_output(0, c->input(0));
 62 |     return Status::OK();
 63 |   });
 64 | 
 65 | void probsampleLauncher(int b,int n,int m,const float * inp_p,const float * inp_r,float * temp,int * out);
 66 | class ProbSampleGpuOp: public OpKernel{
 67 |   public:
 68 |     explicit ProbSampleGpuOp(OpKernelConstruction* context):OpKernel(context){}
 69 |     void Compute(OpKernelContext * context)override{
 70 |       const Tensor& inp_tensor=context->input(0);
 71 |       const Tensor& inpr_tensor=context->input(1);
 72 |       auto inp_flat=inp_tensor.flat<float>();
 73 |       auto inpr_flat=inpr_tensor.flat<float>();
 74 |       const float * inp=&(inp_flat(0));
 75 |       const float * inpr=&(inpr_flat(0));
 76 |       OP_REQUIRES(context,inp_tensor.dims()==2,errors::InvalidArgument("ProbSample expects (batch_size,num_choices) inp shape"));
 77 |       int b=inp_tensor.shape().dim_size(0);
 78 |       int n=inp_tensor.shape().dim_size(1);
 79 |       OP_REQUIRES(context,inpr_tensor.dims()==2 && inpr_tensor.shape().dim_size(0)==b,errors::InvalidArgument("ProbSample expects (batch_size,num_points) inpr shape"));
 80 |       int m=inpr_tensor.shape().dim_size(1);
 81 |       Tensor * out_tensor=NULL;
 82 |       OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m},&out_tensor));
 83 |       auto out_flat=out_tensor->flat<int>();
 84 |       int * out=&(out_flat(0));
 85 |       Tensor temp_tensor;
 86 |       OP_REQUIRES_OK(context,context->allocate_temp(DataTypeToEnum<float>::value,TensorShape{b,n},&temp_tensor));
 87 |       auto temp_flat=temp_tensor.flat<float>();
 88 |       float * temp=&(temp_flat(0));
 89 |       probsampleLauncher(b,n,m,inp,inpr,temp,out);
 90 |     }
 91 | };
 92 | REGISTER_KERNEL_BUILDER(Name("ProbSample").Device(DEVICE_GPU), ProbSampleGpuOp);
 93 | 
 94 | void farthestpointsamplingLauncher(int b,int n,int m,const float * inp,float * temp,int * out);
 95 | class FarthestPointSampleGpuOp: public OpKernel{
 96 |   public:
 97 |     explicit FarthestPointSampleGpuOp(OpKernelConstruction* context):OpKernel(context) {
 98 |                     OP_REQUIRES_OK(context, context->GetAttr("npoint", &npoint_));
 99 |                     OP_REQUIRES(context, npoint_ > 0, errors::InvalidArgument("FarthestPointSample expects positive npoint"));
100 |                 }
101 |     void Compute(OpKernelContext * context)override{
102 |       int m = npoint_;
103 | 
104 |       const Tensor& inp_tensor=context->input(0);
105 |       OP_REQUIRES(context,inp_tensor.dims()==3 && inp_tensor.shape().dim_size(2)==3,errors::InvalidArgument("FarthestPointSample expects (batch_size,num_points,3) inp shape"));
106 |       int b=inp_tensor.shape().dim_size(0);
107 |       int n=inp_tensor.shape().dim_size(1);
108 |       auto inp_flat=inp_tensor.flat<float>();
109 |       const float * inp=&(inp_flat(0));
110 |       Tensor * out_tensor;
111 |       OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m},&out_tensor));
112 |       auto out_flat=out_tensor->flat<int>();
113 |       int * out=&(out_flat(0));
114 |       Tensor temp_tensor;
115 |       OP_REQUIRES_OK(context,context->allocate_temp(DataTypeToEnum<float>::value,TensorShape{32,n},&temp_tensor));
116 |       auto temp_flat=temp_tensor.flat<float>();
117 |       float * temp=&(temp_flat(0));
118 |       farthestpointsamplingLauncher(b,n,m,inp,temp,out);
119 |     }
120 |     private:
121 |         int npoint_;
122 | };
123 | REGISTER_KERNEL_BUILDER(Name("FarthestPointSample").Device(DEVICE_GPU),FarthestPointSampleGpuOp);
124 | 
125 | void gatherpointLauncher(int b,int n,int m,const float * inp,const int * idx,float * out);
126 | class GatherPointGpuOp: public OpKernel{
127 |   public:
128 |     explicit GatherPointGpuOp(OpKernelConstruction * context):OpKernel(context){}
129 |     void Compute(OpKernelContext * context)override{
130 |       const Tensor& inp_tensor=context->input(0);
131 |       OP_REQUIRES(context,inp_tensor.dims()==3 && inp_tensor.shape().dim_size(2)==3,errors::InvalidArgument("GatherPoint expects (batch_size,num_points,3) inp shape"));
132 |       int b=inp_tensor.shape().dim_size(0);
133 |       int n=inp_tensor.shape().dim_size(1);
134 |       const Tensor& idx_tensor=context->input(1);
135 |       OP_REQUIRES(context,idx_tensor.dims()==2 && idx_tensor.shape().dim_size(0)==b,errors::InvalidArgument("GatherPoint expects (batch_size,num_result) idx shape"));
136 |       int m=idx_tensor.shape().dim_size(1);
137 |       auto inp_flat=inp_tensor.flat<float>();
138 |       const float * inp=&(inp_flat(0));
139 |       auto idx_flat=idx_tensor.flat<int>();
140 |       const int * idx=&(idx_flat(0));
141 |       Tensor * out_tensor=NULL;
142 |       OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m,3},&out_tensor));
143 |       auto out_flat=out_tensor->flat<float>();
144 |       float * out=&(out_flat(0));
145 |       gatherpointLauncher(b,n,m,inp,idx,out);
146 |     }
147 | };
148 | REGISTER_KERNEL_BUILDER(Name("GatherPoint").Device(DEVICE_GPU),GatherPointGpuOp);
149 | 
150 | void scatteraddpointLauncher(int b,int n,int m,const float * out_g,const int * idx,float * inp_g);
151 | class GatherPointGradGpuOp: public OpKernel{
152 |   public:
153 |     explicit GatherPointGradGpuOp(OpKernelConstruction * context):OpKernel(context){}
154 |     void Compute(OpKernelContext * context)override{
155 |       const Tensor& inp_tensor=context->input(0);
156 |       OP_REQUIRES(context,inp_tensor.dims()==3 && inp_tensor.shape().dim_size(2)==3,errors::InvalidArgument("GatherPointGradGpuOp expects (batch_size,num_points,3) inp"));
157 |       int b=inp_tensor.shape().dim_size(0);
158 |       int n=inp_tensor.shape().dim_size(1);
159 |       const Tensor& idx_tensor=context->input(1);
160 |       OP_REQUIRES(context,idx_tensor.dims()==2 && idx_tensor.shape().dim_size(0)==b,errors::InvalidArgument("GatherPointGradGpuOp expects (batch_size,num_result) idx shape"));
161 |       int m=idx_tensor.shape().dim_size(1);
162 |       auto inp_flat=inp_tensor.flat<float>();
163 |       const float * inp=&(inp_flat(0));
164 |       auto idx_flat=idx_tensor.flat<int>();
165 |       const int * idx=&(idx_flat(0));
166 |       const Tensor& out_g_tensor=context->input(2);
167 |       OP_REQUIRES(context,out_g_tensor.dims()==3 && out_g_tensor.shape().dim_size(0)==b && out_g_tensor.shape().dim_size(1)==m && out_g_tensor.shape().dim_size(2)==3,errors::InvalidArgument("GatherPointGradGpuOp expects (batch_size,num_result,3) out_g shape"));
168 |       auto out_g_flat=out_g_tensor.flat<float>();
169 |       const float * out_g=&(out_g_flat(0));
170 |       Tensor * inp_g_tensor=NULL;
171 |       OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n,3},&inp_g_tensor));
172 |       auto inp_g_flat=inp_g_tensor->flat<float>();
173 |       float * inp_g=&(inp_g_flat(0));
174 |       cudaMemset(inp_g,0,b*n*3*4);
175 |       scatteraddpointLauncher(b,n,m,out_g,idx,inp_g);
176 |     }
177 | };
178 | REGISTER_KERNEL_BUILDER(Name("GatherPointGrad").Device(DEVICE_GPU),GatherPointGradGpuOp);
179 | 
180 | 


--------------------------------------------------------------------------------
/src/tf_ops/sampling/tf_sampling.py:
--------------------------------------------------------------------------------
 1 | ''' Furthest point sampling
 2 | Original author: Haoqiang Fan
 3 | Modified by Charles R. Qi
 4 | All Rights Reserved. 2017.
 5 | '''
 6 | import tensorflow as tf
 7 | from tensorflow.python.framework import ops
 8 | import sys
 9 | import os
10 | import ipdb
11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
12 | sys.path.append(BASE_DIR)
13 | sampling_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_sampling_so.so'))
14 | # ipdb.set_trace()
15 | def prob_sample(inp,inpr):
16 |     '''
17 | input:
18 |     batch_size * ncategory float32
19 |     batch_size * npoints   float32
20 | returns:
21 |     batch_size * npoints   int32
22 |     '''
23 |     return sampling_module.prob_sample(inp,inpr)
24 | ops.NoGradient('ProbSample')
25 | # TF1.0 API requires set shape in C++
26 | #@tf.RegisterShape('ProbSample')
27 | #def _prob_sample_shape(op):
28 | #    shape1=op.inputs[0].get_shape().with_rank(2)
29 | #    shape2=op.inputs[1].get_shape().with_rank(2)
30 | #    return [tf.TensorShape([shape2.dims[0],shape2.dims[1]])]
31 | def gather_point(inp,idx):
32 |     '''
33 | input:
34 |     batch_size * ndataset * 3   float32  (16, 2048, 3) Point cloud (1 or 2)
35 |     batch_size * npoints        int32    (16, 1024)
36 | returns:
37 |     batch_size * npoints * 3    float32  (16, 1024, 3) new xyz
38 |     '''
39 |     return sampling_module.gather_point(inp,idx)
40 | #@tf.RegisterShape('GatherPoint')
41 | #def _gather_point_shape(op):
42 | #    shape1=op.inputs[0].get_shape().with_rank(3)
43 | #    shape2=op.inputs[1].get_shape().with_rank(2)
44 | #    return [tf.TensorShape([shape1.dims[0],shape2.dims[1],shape1.dims[2]])]
45 | @tf.RegisterGradient('GatherPoint')
46 | def _gather_point_grad(op,out_g):
47 |     inp=op.inputs[0]
48 |     idx=op.inputs[1]
49 |     return [sampling_module.gather_point_grad(inp,idx,out_g),None]
50 | def farthest_point_sample(npoint,inp):
51 |     '''
52 | input:
53 |     int32
54 |     batch_size * ndataset * 3   float32
55 | returns:
56 |     batch_size * npoint         int32
57 |     '''
58 |     return sampling_module.farthest_point_sample(inp, npoint)
59 | ops.NoGradient('FarthestPointSample')
60 | 
61 | 
62 | if __name__=='__main__':
63 |     import numpy as np
64 |     np.random.seed(100)
65 |     triangles=np.random.rand(1,5,3,3).astype('float32')
66 |     with tf.device('/gpu:1'):
67 |         inp=tf.constant(triangles)
68 |         tria=inp[:,:,0,:]
69 |         trib=inp[:,:,1,:]
70 |         tric=inp[:,:,2,:]
71 |         areas=tf.sqrt(tf.reduce_sum(tf.cross(trib-tria,tric-tria)**2,2)+1e-9)
72 |         randomnumbers=tf.random_uniform((1,8192))
73 |         triids=prob_sample(areas,randomnumbers)
74 |         tria_sample=gather_point(tria,triids)
75 |         trib_sample=gather_point(trib,triids)
76 |         tric_sample=gather_point(tric,triids)
77 |         us=tf.random_uniform((1,8192))
78 |         vs=tf.random_uniform((1,8192))
79 |         uplusv=1-tf.abs(us+vs-1)
80 |         uminusv=us-vs
81 |         us=(uplusv+uminusv)*0.5
82 |         vs=(uplusv-uminusv)*0.5
83 |         pt_sample=tria_sample+(trib_sample-tria_sample)*tf.expand_dims(us,-1)+(tric_sample-tria_sample)*tf.expand_dims(vs,-1)
84 |         print('pt_sample: ', pt_sample)
85 |         reduced_sample=gather_point(pt_sample,farthest_point_sample(1024,pt_sample))
86 |         print(reduced_sample)
87 |     with tf.Session('') as sess:
88 |         ret=sess.run(reduced_sample)
89 |     print(ret.shape,ret.dtype)
90 |     import pickle
91 |     pickle.dump(ret,open('1.pkl','wb'),-1)
92 | 


--------------------------------------------------------------------------------
/src/tf_ops/sampling/tf_sampling_g.cu:
--------------------------------------------------------------------------------
  1 | /* Furthest point sampling GPU implementation
  2 |  * Original author: Haoqiang Fan
  3 |  * Modified by Charles R. Qi
  4 |  * All Rights Reserved. 2017. 
  5 |  */
  6 | 
  7 | __global__ void cumsumKernel(int b,int n,const float * __restrict__ inp,float * __restrict__ out){
  8 |   const int BlockSize=2048;
  9 |   const int paddingLevel=5;
 10 |   __shared__ float buffer4[BlockSize*4];
 11 |   __shared__ float buffer[BlockSize+(BlockSize>>paddingLevel)];
 12 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
 13 |     float runningsum=0,runningsum2=0;
 14 |     for (int j=0;j<n;j+=BlockSize*4){
 15 |       int n24_i=min(n-j,BlockSize*4);
 16 |       int n24=(n24_i+3)&~3;
 17 |       int n2=n24>>2;
 18 |       for (int k=threadIdx.x*4;k<n24_i;k+=blockDim.x*4){
 19 |         if (k+3<n24_i){
 20 |           float v1=inp[i*n+j+k];
 21 |           float v2=inp[i*n+j+k+1];
 22 |           v2+=v1;
 23 |           float v3=inp[i*n+j+k+2];
 24 |           float v4=inp[i*n+j+k+3];
 25 |           v4+=v3;
 26 |           v3+=v2;
 27 |           v4+=v2;
 28 |           buffer4[k]=v1;
 29 |           buffer4[k+1]=v2;
 30 |           buffer4[k+2]=v3;
 31 |           buffer4[k+3]=v4;
 32 |           buffer[(k>>2)+(k>>(2+paddingLevel))]=v4;
 33 |         }else{
 34 |           float v=0;
 35 |           for (int k2=k;k2<n24_i;k2++){
 36 |             v+=inp[i*n+j+k2];
 37 |             buffer4[k2]=v;
 38 |           }
 39 |           for (int k2=n24_i;k2<n24;k2++){
 40 |             buffer4[k2]=v;
 41 |           }
 42 |           buffer[(k>>2)+(k>>(2+paddingLevel))]=v;
 43 |         }
 44 |       }
 45 |       int u=0;
 46 |       for (;(2<<u)<=n2;u++){
 47 |         __syncthreads();
 48 |         for (int k=threadIdx.x;k<int(n2>>(u+1));k+=blockDim.x){
 49 |           int i1=(((k<<1)+2)<<u)-1;
 50 |           int i2=(((k<<1)+1)<<u)-1;
 51 |           i1+=i1>>paddingLevel;
 52 |           i2+=i2>>paddingLevel;
 53 |           buffer[i1]+=buffer[i2];
 54 |         }
 55 |       }
 56 |       u--;
 57 |       for (;u>=0;u--){
 58 |         __syncthreads();
 59 |         for (int k=threadIdx.x;k<int((n2-(1<<u))>>(u+1));k+=blockDim.x){
 60 |           int i1=(((k<<1)+3)<<u)-1;
 61 |           int i2=(((k<<1)+2)<<u)-1;
 62 |           i1+=i1>>paddingLevel;
 63 |           i2+=i2>>paddingLevel;
 64 |           buffer[i1]+=buffer[i2];
 65 |         }
 66 |       }
 67 |       __syncthreads();
 68 |       for (int k=threadIdx.x*4;k<n24;k+=blockDim.x*4){
 69 |         if (k!=0){
 70 |           int k2=((k>>2)-1)+(((k>>2)-1)>>paddingLevel);
 71 |           buffer4[k]+=buffer[k2];
 72 |           buffer4[k+1]+=buffer[k2];
 73 |           buffer4[k+2]+=buffer[k2];
 74 |           buffer4[k+3]+=buffer[k2];
 75 |         }
 76 |       }
 77 |       __syncthreads();
 78 |       for (int k=threadIdx.x;k<n24_i;k+=blockDim.x){
 79 |         out[i*n+j+k]=buffer4[k]+runningsum;
 80 |       }
 81 |       float t=buffer[(n2-1)+((n2-1)>>paddingLevel)]+runningsum2;
 82 |       float r2=runningsum+t;
 83 |       runningsum2=t-(r2-runningsum);
 84 |       runningsum=r2;
 85 |       __syncthreads();
 86 |     }
 87 |   }
 88 | }
 89 | 
 90 | __global__ void binarysearchKernel(int b,int n,int m,const float * __restrict__ dataset,const float * __restrict__ query, int * __restrict__ result){
 91 |   int base=1;
 92 |   while (base<n)
 93 |     base<<=1;
 94 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
 95 |     for (int j=blockIdx.y*blockDim.x+threadIdx.x;j<m;j+=blockDim.x*gridDim.y){
 96 |       float q=query[i*m+j]*dataset[i*n+n-1];
 97 |       int r=n-1;
 98 |       for (int k=base;k>=1;k>>=1)
 99 |         if (r>=k && dataset[i*n+r-k]>=q)
100 |           r-=k;
101 |       result[i*m+j]=r;
102 |     }
103 |   }
104 | }
105 | __global__ void farthestpointsamplingKernel(int b,int n,int m,const float * __restrict__ dataset,float * __restrict__ temp,int * __restrict__ idxs){
106 |   if (m<=0)
107 |     return;
108 |   const int BlockSize=512;
109 |   __shared__ float dists[BlockSize];
110 |   __shared__ int dists_i[BlockSize];
111 |   const int BufferSize=3072;
112 |   __shared__ float buf[BufferSize*3];
113 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
114 |     int old=0;
115 |     if (threadIdx.x==0)
116 |       idxs[i*m+0]=old;
117 |     for (int j=threadIdx.x;j<n;j+=blockDim.x){
118 |       temp[blockIdx.x*n+j]=1e38;
119 |     }
120 |     for (int j=threadIdx.x;j<min(BufferSize,n)*3;j+=blockDim.x){
121 |       buf[j]=dataset[i*n*3+j];
122 |     }
123 |     __syncthreads();
124 |     for (int j=1;j<m;j++){
125 |       int besti=0;
126 |       float best=-1;
127 |       float x1=dataset[i*n*3+old*3+0];
128 |       float y1=dataset[i*n*3+old*3+1];
129 |       float z1=dataset[i*n*3+old*3+2];
130 |       for (int k=threadIdx.x;k<n;k+=blockDim.x){
131 |         float td=temp[blockIdx.x*n+k];
132 |         float x2,y2,z2;
133 |         if (k<BufferSize){
134 |           x2=buf[k*3+0];
135 |           y2=buf[k*3+1];
136 |           z2=buf[k*3+2];
137 |         }else{
138 |           x2=dataset[i*n*3+k*3+0];
139 |           y2=dataset[i*n*3+k*3+1];
140 |           z2=dataset[i*n*3+k*3+2];
141 |         }
142 |         float d=(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1);
143 |         float d2=min(d,td);
144 |         if (d2!=td)
145 |           temp[blockIdx.x*n+k]=d2;
146 |         if (d2>best){
147 |           best=d2;
148 |           besti=k;
149 |         }
150 |       }
151 |       dists[threadIdx.x]=best;
152 |       dists_i[threadIdx.x]=besti;
153 |       for (int u=0;(1<<u)<blockDim.x;u++){
154 |         __syncthreads();
155 |         if (threadIdx.x<(blockDim.x>>(u+1))){
156 |           int i1=(threadIdx.x*2)<<u;
157 |           int i2=(threadIdx.x*2+1)<<u;
158 |           if (dists[i1]<dists[i2]){
159 |             dists[i1]=dists[i2];
160 |             dists_i[i1]=dists_i[i2];
161 |           }
162 |         }
163 |       }
164 |       __syncthreads();
165 |       old=dists_i[0];
166 |       if (threadIdx.x==0)
167 |         idxs[i*m+j]=old;
168 |     }
169 |   }
170 | }
171 | 
172 | __global__ void gatherpointKernel(int b,int n,int m,const float * __restrict__ inp,const int * __restrict__ idx,float * __restrict__ out){
173 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
174 |     for (int j=blockIdx.y*blockDim.x+threadIdx.x;j<m;j+=blockDim.x*gridDim.y){
175 |       int a=idx[i*m+j];
176 |       out[(i*m+j)*3+0]=inp[(i*n+a)*3+0];
177 |       out[(i*m+j)*3+1]=inp[(i*n+a)*3+1];
178 |       out[(i*m+j)*3+2]=inp[(i*n+a)*3+2];
179 |     }
180 |   }
181 | }
182 | 
183 | __global__ void scatteraddpointKernel(int b,int n,int m,const float * __restrict__ out_g,const int * __restrict__ idx,float * __restrict__ inp_g){
184 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
185 |     for (int j=blockIdx.y*blockDim.x+threadIdx.x;j<m;j+=blockDim.x*gridDim.y){
186 |       int a=idx[i*m+j];
187 |       atomicAdd(&inp_g[(i*n+a)*3+0],out_g[(i*m+j)*3+0]);
188 |       atomicAdd(&inp_g[(i*n+a)*3+1],out_g[(i*m+j)*3+1]);
189 |       atomicAdd(&inp_g[(i*n+a)*3+2],out_g[(i*m+j)*3+2]);
190 |     }
191 |   }
192 | }
193 | 
194 | void cumsumLauncher(int b,int n,const float * inp,float * out){
195 |   cumsumKernel<<<32,512>>>(b,n,inp,out);
196 | }
197 | //require b*n working space
198 | void probsampleLauncher(int b,int n,int m,const float * inp_p,const float * inp_r,float * temp,int * out){
199 |   cumsumKernel<<<32,512>>>(b,n,inp_p,temp);
200 |   binarysearchKernel<<<dim3(32,8,1),512>>>(b,n,m,temp,inp_r,out);
201 | }
202 | //require 32*n working space
203 | void farthestpointsamplingLauncher(int b,int n,int m,const float * inp,float * temp,int * out){
204 |   farthestpointsamplingKernel<<<32,512>>>(b,n,m,inp,temp,out);
205 | }
206 | void gatherpointLauncher(int b,int n,int m,const float * inp,const int * idx,float * out){
207 |   gatherpointKernel<<<dim3(2,8,1),512>>>(b,n,m,inp,idx,out);
208 | }
209 | void scatteraddpointLauncher(int b,int n,int m,const float * out_g,const int * idx,float * inp_g){
210 |   scatteraddpointKernel<<<dim3(2,8,1),512>>>(b,n,m,out_g,idx,inp_g);
211 | }
212 | 
213 | 


--------------------------------------------------------------------------------
/src/tf_ops/sampling/tf_sampling_g.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/sampling/tf_sampling_g.cu.o


--------------------------------------------------------------------------------
/src/tf_ops/sampling/tf_sampling_so.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/tf_ops/sampling/tf_sampling_so.so


--------------------------------------------------------------------------------
/src/train_1nn_cycle_nuscenes.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import argparse
  3 | import math
  4 | from datetime import datetime
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | import socket
  8 | import importlib
  9 | import os
 10 | import sys
 11 | # import ipdb
 12 | from tempfile import TemporaryFile
 13 | from tensorflow.python import debug as tf_debug
 14 | 
 15 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 16 | sys.path.append(BASE_DIR)
 17 | import pickle
 18 | 
 19 | # arguments start from here
 20 | 
 21 | parser = argparse.ArgumentParser()
 22 | parser.add_argument('--gpu', type=int, default=0, help='GPU to use [default: GPU 0]')
 23 | parser.add_argument('--model', default='model_concat_upsa', help='Model name [default: model_concat_upsa]')
 24 | parser.add_argument('--data', default='data_preprocessing/data_processed_maxcut_35_20k_2k_8192', help='Dataset directory [default: data_preprocessing/data_processed_maxcut_35_20k_2k_8192]')
 25 | parser.add_argument('--log_dir', default='log_train', help='Log dir [default: log_train]')
 26 | parser.add_argument('--model_path', default='log_train_pretrained/model.ckpt', help='model weights path')
 27 | parser.add_argument('--num_point', type=int, default=2048, help='Point Number [default: 2048]')
 28 | parser.add_argument('--max_epoch', type=int, default=150, help='Epoch to run [default: 151]')
 29 | parser.add_argument('--batch_size', type=int, default=16, help='Batch Size during training [default: 16]')
 30 | parser.add_argument('--learning_rate', type=float, default=0.001, help='Initial learning rate [default: 0.001]')
 31 | parser.add_argument('--momentum', type=float, default=0.9, help='Initial learning rate [default: 0.9]')
 32 | parser.add_argument('--optimizer', default='adam', help='adam or momentum [default: adam]')
 33 | parser.add_argument('--decay_step', type=int, default=200000, help='Decay step for lr decay [default: 200000]')
 34 | parser.add_argument('--decay_rate', type=float, default=0.7, help='Decay rate for lr decay [default: 0.7]')
 35 | parser.add_argument('--radius', type=float, default=5.0, help='Radius of flow embedding layer')
 36 | parser.add_argument('--layer', type=str, default='pointnet', help='Last layer for upconv')
 37 | parser.add_argument('--flow', type=str, default='default', help='flow embedding module type')
 38 | parser.add_argument('--cache_size', type=int, default=30000, help='knn or query ball point')
 39 | parser.add_argument('--softmax', action='store_true', help='softmax in sampling')
 40 | parser.add_argument('--knn', action='store_true', help='knn or query ball point')
 41 | parser.add_argument('--numfiles', type=int, default=100, help='Number of files to fine tune on')
 42 | parser.add_argument('--num_frames', type=int, default=3, help='Number of frames to run cycle')
 43 | parser.add_argument('--fine_tune', action='store_true', help='load trained model and resume batch')
 44 | parser.add_argument('--dataset', type=str, default='flying_things_dataset', help='dataset to train')
 45 | parser.add_argument('--stop_gradient', action='store_true', help='Stop gradient for predicted point cloud 2')
 46 | parser.add_argument('--flip_prob', type=float, default=0, help='Probability to flip the point cloud frames')
 47 | parser.add_argument('--rigidity', action='store_true', help='Rigidity')
 48 | parser.add_argument('--rgb', action='store_true', help='RGB')
 49 | parser.add_argument('--cycle_loss_weight', type=float, default=1, help='Weight for cycle loss')
 50 | parser.add_argument('--knn_loss_weight', type=float, default=1, help='Weight for KNN loss')
 51 | 
 52 | FLAGS = parser.parse_args()
 53 | 
 54 | os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu)
 55 | 
 56 | EPOCH_CNT = 0
 57 | 
 58 | KNN_LOSS_WEIGHT = FLAGS.knn_loss_weight
 59 | BATCH_SIZE = FLAGS.batch_size
 60 | NUM_POINT = FLAGS.num_point
 61 | DATA = FLAGS.data
 62 | MAX_EPOCH = FLAGS.max_epoch
 63 | BASE_LEARNING_RATE = FLAGS.learning_rate
 64 | GPU_INDEX = FLAGS.gpu
 65 | MOMENTUM = FLAGS.momentum
 66 | OPTIMIZER = FLAGS.optimizer
 67 | DECAY_STEP = FLAGS.decay_step
 68 | DECAY_RATE = FLAGS.decay_rate
 69 | RADIUS = FLAGS.radius
 70 | LAYER = FLAGS.layer
 71 | FLOW_MODULE = FLAGS.flow
 72 | CACHE_SIZE = FLAGS.cache_size
 73 | KNN = FLAGS.knn
 74 | SOFTMAX_ARG = FLAGS.softmax
 75 | NUM_FILES = FLAGS.numfiles
 76 | NUM_FRAMES = FLAGS.num_frames
 77 | FINE_TUNE = FLAGS.fine_tune
 78 | STOP_GRADIENT = FLAGS.stop_gradient
 79 | FLIP_PROB=FLAGS.flip_prob
 80 | RIGIDITY = FLAGS.rigidity
 81 | RGB = FLAGS.rgb
 82 | CYCLE_LOSS_WEIGHT = FLAGS.cycle_loss_weight
 83 | 
 84 | print(FLAGS)
 85 | 
 86 | DATASET = importlib.import_module(FLAGS.dataset)
 87 | 
 88 | MODEL = importlib.import_module(FLAGS.model)  # import network module
 89 | MODEL_FILE = os.path.join(BASE_DIR, FLAGS.model + '.py')
 90 | MODEL_PATH = FLAGS.model_path
 91 | LOG_DIR = FLAGS.log_dir
 92 | if not os.path.exists(LOG_DIR): os.mkdir(LOG_DIR)
 93 | os.system('cp %s %s' % (MODEL_FILE, LOG_DIR))  # bkp of model def
 94 | TRAIN_DATASET = DATASET.SceneflowDataset(DATA, npoints=NUM_POINT,
 95 |                                          cache_size=CACHE_SIZE, softmax_dist=SOFTMAX_ARG,
 96 |                                          train=True, num_frames = NUM_FRAMES, flip_prob=FLIP_PROB)
 97 | print ('len of train: ', len(TRAIN_DATASET))
 98 | 
 99 | os.system('cp %s %s' % (__file__, LOG_DIR))  # bkp of train procedure
100 | os.system('cp %s %s' % ('{}.py'.format(FLAGS.dataset), LOG_DIR))  # bkp of dataset file
101 | LOG_FOUT = open(os.path.join(LOG_DIR, 'log_train.txt'), 'w')
102 | LOG_FOUT.write(str(FLAGS) + '\n')
103 | 
104 | BN_INIT_DECAY = 0.5
105 | BN_DECAY_DECAY_RATE = 0.5
106 | BN_DECAY_DECAY_STEP = float(DECAY_STEP)
107 | BN_DECAY_CLIP = 0.99
108 | 
109 | TEST_DATASET = DATASET.SceneflowDataset(DATA, npoints=NUM_POINT, train=False, num_frames = NUM_FRAMES)
110 | print ('len of test: ', len(TEST_DATASET))
111 | 
112 | def log_string(out_str):
113 |     LOG_FOUT.write(out_str + '\n')
114 |     LOG_FOUT.flush()
115 |     print(out_str)
116 | 
117 | 
118 | def get_learning_rate(batch):
119 |     learning_rate = tf.train.exponential_decay(
120 |         BASE_LEARNING_RATE,  # Base learning rate.
121 |         batch * BATCH_SIZE,  # Current index into the dataset.
122 |         DECAY_STEP,  # Decay step.
123 |         DECAY_RATE,  # Decay rate.
124 |         staircase=True)
125 |     learning_rate = tf.maximum(learning_rate, 0.000001)  # CLIP THE LEARNING RATE!
126 |     return learning_rate
127 | 
128 | 
129 | def get_bn_decay(batch):
130 |     bn_momentum = tf.train.exponential_decay(
131 |         BN_INIT_DECAY,
132 |         batch * BATCH_SIZE,
133 |         BN_DECAY_DECAY_STEP,
134 |         BN_DECAY_DECAY_RATE,
135 |         staircase=True)
136 |     bn_decay = tf.minimum(BN_DECAY_CLIP, 1 - bn_momentum)
137 |     return bn_decay
138 | 
139 | 
140 | def train():
141 |     with tf.Graph().as_default():
142 |         with tf.device('/gpu:' + str(GPU_INDEX)):
143 |             # pointclouds_pl = [16, 4096, 6], labels_pl = [16, 2048, 3], masks_pl = [16, 2048]
144 |             pointclouds_pl = MODEL.placeholder_inputs(BATCH_SIZE, NUM_POINT, NUM_FRAMES)
145 |             # ipdb.set_trace()
146 |             # a = tf.slice(pointclouds_pl, [1, 0, 0], [1, 1, 6])
147 |             is_training_pl = tf.placeholder(tf.bool, shape=())
148 | 
149 |             # Note the global_step=batch parameter to minimize.
150 |             # That tells the optimizer to helpfully increment the 'batch' parameter for you every time it trains.
151 |             batch = tf.Variable(0)  # batch = 0
152 |             bn_decay = get_bn_decay(batch)    # bn_decay = 0.5
153 |             tf.summary.scalar('bn_decay', bn_decay)
154 | 
155 |             print("--- Get model and loss")
156 |             # Get model and loss
157 |             pred_f, pred_b, label_nn, end_points_f, end_points_b = MODEL.get_model(RADIUS,
158 |                                                                                       LAYER,
159 |                                                                                       pointclouds_pl,
160 |                                                                                       is_training_pl,
161 |                                                                                       bn_decay=bn_decay,
162 |                                                                                       knn=KNN,
163 |                                                                                       flow_module=FLOW_MODULE,
164 |                                                                                       num_frames=NUM_FRAMES,
165 |                                                                                       stop_gradient=STOP_GRADIENT,
166 |                                                                                       rigidity=RIGIDITY,
167 |                                                                                       rgb=RGB)
168 | 
169 |             loss, end_points_loss = MODEL.get_cycle_loss(pred_f = pred_f, grouped_xyz = label_nn,
170 |                                         pred_b = pred_b,
171 |                                         point_cloud1 = pointclouds_pl[:, :NUM_POINT, :3],
172 |                                         end_points=end_points_f, rigidity=RIGIDITY,
173 |                                         rgb=RGB, point_cloud1_rgb=pointclouds_pl[:, :NUM_POINT, 3:],
174 |                                         cycle_loss_weight=CYCLE_LOSS_WEIGHT)        ### L2 Loss
175 |             tf.summary.scalar('loss', loss)
176 | 
177 |             print("--- Get training operator")
178 |             # Get training operator
179 |             learning_rate = get_learning_rate(batch)        ### 0.001 in the arguments
180 |             tf.summary.scalar('learning_rate', learning_rate)
181 |             if OPTIMIZER == 'momentum':
182 |                 optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM)
183 |             elif OPTIMIZER == 'adam':       ### given in the arguments
184 |                 optimizer = tf.train.AdamOptimizer(learning_rate)
185 |             # two step below
186 |             # train_op = optimizer.minimize(loss, global_step=batch)
187 | 
188 |             grad_var = optimizer.compute_gradients(loss)
189 |             # grads = tf.gradients(loss, tf.trainable_variables())
190 |             # grad_var = list(zip(grads, tf.trainable_variables()))
191 |             train_op = optimizer.apply_gradients(grad_var, global_step=batch)
192 | 
193 |             # Add ops to save and restore all the variables.
194 |             saver = tf.train.Saver()
195 | 
196 |         # Create a session
197 |         config = tf.ConfigProto()
198 |         config.gpu_options.allow_growth = True
199 |         config.allow_soft_placement = True
200 |         config.log_device_placement = False
201 |         sess = tf.Session(config=config)
202 |         # sess = tf_debug.LocalCLIDebugWrapperSession(sess, dump_root='/media/gaurav/DATADRIVE0/himangi/tf_dbg')
203 | 
204 |         # Add summary writers
205 |         merged = tf.summary.merge_all()
206 |         train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph)
207 |         test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'), sess.graph)
208 | 
209 |         if FINE_TUNE:
210 |             print ('fine tuning, model path:', MODEL_PATH)
211 |             saver.restore(sess, MODEL_PATH)
212 |             log_string('Pretrained model restored')
213 |             # ipdb.set_trace()
214 |             init_new_vars_op = tf.initialize_variables([batch])
215 |             sess.run(init_new_vars_op)
216 |         else:
217 |             # Init variables
218 |             init = tf.global_variables_initializer()
219 |             sess.run(init)
220 | 
221 |         ops = {'pointclouds_pl': pointclouds_pl,
222 |                'label': label_nn,
223 |                'is_training_pl': is_training_pl,
224 |                'pred': pred_f,
225 |                'loss': loss,
226 |                'train_op': train_op,
227 |                'merged': merged,
228 |                'step': batch,
229 |                'grad_var': grad_var,
230 |                'end_points_loss': end_points_loss,
231 |                'end_points_f': end_points_f}
232 | 
233 |         eval_one_epoch(sess, ops, test_writer)
234 |         for epoch in range(MAX_EPOCH):
235 |             log_string('**** EPOCH %03d ****' % (epoch))
236 |             sys.stdout.flush()
237 | 
238 |             train_one_epoch(sess, ops, train_writer)
239 |             eval_one_epoch(sess, ops, test_writer)
240 | 
241 |             print("PROGRESS: {}%".format(((epoch+1) / MAX_EPOCH) * 100))
242 | 
243 |             # Save the variables to disk.
244 |             if epoch % 10 == 0:
245 |                 save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"))
246 |                 log_string("Model saved in file: %s" % save_path)
247 | 
248 | 
249 | def get_batch(dataset, idxs, start_idx, end_idx):
250 |     bsize = end_idx - start_idx
251 |     # change here,  numpoint *(5, 3)
252 |     batch_data = np.zeros((bsize, NUM_POINT * 2, 6))
253 |     batch_label = np.zeros((bsize, NUM_POINT, 3))
254 |     batch_mask = np.zeros((bsize, NUM_POINT))
255 |     # shuffle idx to change point order (change FPS behavior)
256 |     shuffle_idx = np.arange(NUM_POINT)
257 |     # change here
258 |     shuffle_idx2 = np.arange(NUM_POINT)
259 |     np.random.shuffle(shuffle_idx)
260 |     np.random.shuffle(shuffle_idx2)
261 | 
262 |     for i in range(bsize):
263 |         # ipdb.set_trace()
264 |         # if dataset[0] == None:
265 |         #     print (i, bsize)
266 |         # import ipdb; ipdb.set_trace()
267 |         pc1, pc2, color1, color2, flow, mask1 = dataset[idxs[i + start_idx]]
268 | 
269 |         # move pc1 to center
270 |         pc1_center = np.mean(pc1, 0)
271 |         pc1 -= pc1_center
272 |         pc2 -= pc1_center
273 |         batch_data[i, :NUM_POINT, :3] = pc1[shuffle_idx]
274 |         batch_data[i, :NUM_POINT, 3:] = color1[shuffle_idx]
275 |         batch_data[i, NUM_POINT:, :3] = pc2[shuffle_idx2]
276 | 
277 |         batch_data[i, NUM_POINT:, 3:] = color2[shuffle_idx2]
278 |         batch_label[i] = flow[shuffle_idx]
279 |         batch_mask[i] = mask1[shuffle_idx]
280 | 
281 |     return batch_data, batch_label, batch_mask
282 | 
283 | 
284 | def get_cycle_batch(dataset, idxs, start_idx, end_idx):
285 |     bsize = end_idx - start_idx
286 |     # change here,  numpoint *(5, 3)
287 |     batch_data = np.zeros((bsize, NUM_POINT * NUM_FRAMES, 6))
288 | 
289 |     shuffle_idx = np.arange(NUM_POINT)
290 | 
291 |     for i in range(bsize):
292 |         # ipdb.set_trace()
293 |         # if dataset[0] == None:
294 |         #     print (i, bsize)
295 |         pos, color = dataset[idxs[i + start_idx]]
296 | 
297 |         pos1_center = np.mean(pos[0], 0) # 1 * 3
298 | 
299 |         for frame_idx in range(NUM_FRAMES):
300 |             np.random.shuffle(shuffle_idx)
301 |             batch_data[i, NUM_POINT*frame_idx:NUM_POINT*(frame_idx+1), :3] = \
302 |                 pos[frame_idx, shuffle_idx, :] - pos1_center
303 |             batch_data[i, NUM_POINT*frame_idx:NUM_POINT*(frame_idx+1), 3:] = \
304 |                 color[frame_idx, shuffle_idx, :]
305 | 
306 |     return batch_data
307 | 
308 | def train_one_epoch(sess, ops, train_writer):
309 |     """ ops: dict mapping from string to tf ops """
310 |     is_training = True
311 |     outfile = TemporaryFile()
312 | 
313 |     # Shuffle train samples
314 |     train_idxs = np.arange(0, len(TRAIN_DATASET))
315 |     print ('length here:', len(TRAIN_DATASET))
316 |     np.random.shuffle(train_idxs)
317 |     num_batches = len(TRAIN_DATASET) // BATCH_SIZE
318 |     log_string('Len of dataset: %f' % len(TRAIN_DATASET))
319 |     log_string(str(datetime.now()))
320 | 
321 |     loss_sum = 0
322 |     for batch_idx in range(num_batches):
323 |         start_idx = batch_idx * BATCH_SIZE
324 |         end_idx = (batch_idx + 1) * BATCH_SIZE
325 |         batch_data = get_cycle_batch(TRAIN_DATASET, train_idxs, start_idx, end_idx)
326 | 
327 |         feed_dict = {ops['pointclouds_pl']: batch_data,
328 |                      ops['is_training_pl']: is_training, }
329 |         summary, step, _, grad_var_val, \
330 |         loss_val, pred_val, label_val, end_points_loss_val, \
331 |         end_points_f = sess.run([ops['merged'], ops['step'],
332 |                                                   ops['train_op'],
333 |                                                   ops['grad_var'][1:],
334 |                                                   ops['loss'],
335 |                                                   ops['pred'],
336 |                                                   ops['label'],
337 |                                                   ops['end_points_loss'],
338 |                                                   ops['end_points_f']], feed_dict=feed_dict)
339 |         # print('Train end points loss val losses', end_points_loss_val)
340 |         for g, v in grad_var_val:
341 |             if np.isnan(g).any():
342 |                 print('gradient is nan')
343 |                 ipdb.set_trace()
344 |             if np.isnan(v).any():
345 |                 print('variable is nan')
346 |                 ipdb.set_trace()
347 |         if np.isnan(loss_val):
348 |             print('>>>>>> NAN <<<<<<<<')
349 |             ipdb.set_trace()
350 | 
351 |         # x = np.arange(16)
352 |         # np.save('pointcloud', pred_val)
353 |         # print ('point cloud value here:', ops['pointclouds_pl'])
354 | 
355 |         ### OPTIC FLOW HERE
356 |         # print ('pred_val: ', pred_val.shape, type(pred_val))
357 | 
358 |         train_writer.add_summary(summary, step)
359 |         loss_sum += loss_val
360 | 
361 |         if (batch_idx + 1) % 1 == 0:
362 |             log_string(' -- %03d / %03d --' % (batch_idx + 1, num_batches))
363 |             log_string('Cycle Train mean loss: %f' % (loss_sum / 2))
364 |             log_string('Cycle Train all losses {}'.format(end_points_loss_val))
365 |             loss_sum = 0
366 | 
367 | def scene_flow_EPE_np(pred, labels, mask):
368 |     error = np.sqrt(np.sum((pred - labels)**2, 2) + 1e-20)
369 | 
370 |     gtflow_len = np.sqrt(np.sum(labels*labels, 2) + 1e-20) # B,N
371 |     acc1 = np.sum(np.logical_or((error <= 0.05)*mask, (error/gtflow_len <= 0.05)*mask), axis=1)
372 |     acc2 = np.sum(np.logical_or((error <= 0.1)*mask, (error/gtflow_len <= 0.1)*mask), axis=1)
373 | 
374 |     mask_sum = np.sum(mask, 1)
375 |     acc1 = acc1[mask_sum > 0] / mask_sum[mask_sum > 0]
376 |     acc1 = np.mean(acc1)
377 |     acc2 = acc2[mask_sum > 0] / mask_sum[mask_sum > 0]
378 |     acc2 = np.mean(acc2)
379 | 
380 |     EPE = np.sum(error * mask, 1)[mask_sum > 0] / mask_sum[mask_sum > 0]
381 |     EPE = np.mean(EPE)
382 |     return EPE, acc1, acc2
383 | 
384 | 
385 | def eval_one_epoch(sess, ops, test_writer):
386 |     """ ops: dict mapping from string to tf ops """
387 |     global EPOCH_CNT
388 |     is_training = False
389 | 
390 |     test_idxs = np.arange(0, len(TEST_DATASET))
391 |     print ('length here:', len(TEST_DATASET))
392 |     # np.random.shuffle(train_idxs)
393 |     num_batches = len(TEST_DATASET) // BATCH_SIZE
394 |     log_string('Len of dataset: %f' % len(TEST_DATASET))
395 |     log_string(str(datetime.now()))
396 | 
397 |     loss_sum = 0
398 |     for batch_idx in range(num_batches):
399 |         start_idx = batch_idx * BATCH_SIZE
400 |         end_idx = (batch_idx + 1) * BATCH_SIZE
401 |         batch_data = get_cycle_batch(TEST_DATASET, test_idxs,
402 |                                      start_idx, end_idx)
403 | 
404 |         feed_dict = {ops['pointclouds_pl']: batch_data,
405 |                      ops['is_training_pl']: is_training, }
406 |         summary, step, \
407 |         loss_val, pred_val, label_val, end_points_loss_val, end_points_f = sess.run([ops['merged'], ops['step'],
408 |                                                                     ops['loss'],
409 |                                                                     ops['pred'],
410 |                                                                     ops['label'],
411 |                                                                     ops['end_points_loss'],
412 |                                                                     ops['end_points_f']],
413 |                                                                     feed_dict=feed_dict)
414 | 
415 | 
416 |         loss_sum += loss_val
417 |         log_string(' -- %03d / %03d --' % (batch_idx + 1, num_batches))
418 |         log_string('loss: %f' % (loss_val))
419 |         log_string('Eval all losses {}'.format(end_points_loss_val))
420 |         # ipdb.set_trace()
421 | 
422 | 
423 |     EPOCH_CNT += 1
424 |     avg_loss = loss_sum / float(len(TEST_DATASET) / BATCH_SIZE)
425 |     summary = tf.Summary(value=[tf.Summary.Value(tag="loss",
426 |                                                  simple_value=avg_loss)])
427 |     test_writer.add_summary(summary, step)
428 |     log_string('avg loss: %f' % (avg_loss))
429 |     return loss_sum / float(len(TEST_DATASET) / BATCH_SIZE)
430 | 
431 | 
432 | if __name__ == "__main__":
433 |     log_string('pid: %s' % (str(os.getpid())))
434 |     train()
435 |     LOG_FOUT.close()
436 | 


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/utils/__init__.py


--------------------------------------------------------------------------------
/src/utils/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/utils/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/src/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/utils/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/src/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/src/utils/__pycache__/pointnet_util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/utils/__pycache__/pointnet_util.cpython-35.pyc


--------------------------------------------------------------------------------
/src/utils/__pycache__/pointnet_util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/utils/__pycache__/pointnet_util.cpython-36.pyc


--------------------------------------------------------------------------------
/src/utils/__pycache__/pointnet_util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/utils/__pycache__/pointnet_util.cpython-37.pyc


--------------------------------------------------------------------------------
/src/utils/__pycache__/tf_util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/utils/__pycache__/tf_util.cpython-35.pyc


--------------------------------------------------------------------------------
/src/utils/__pycache__/tf_util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/utils/__pycache__/tf_util.cpython-36.pyc


--------------------------------------------------------------------------------
/src/utils/__pycache__/tf_util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HimangiM/Just-Go-with-the-Flow-Self-Supervised-Scene-Flow-Estimation/7052587bd5c238a0b9c3f4bb3334ab923cee402e/src/utils/__pycache__/tf_util.cpython-37.pyc


--------------------------------------------------------------------------------
/src/utils/pointnet_util.py:
--------------------------------------------------------------------------------
  1 | """ PointNet++ Layers
  2 | 
  3 | Original Author: Charles R. Qi
  4 | Modified by Xingyu Liu
  5 | Date: April 2019
  6 | """
  7 | 
  8 | import os
  9 | import sys
 10 | 
 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 12 | ROOT_DIR = os.path.dirname(BASE_DIR)
 13 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 14 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/sampling'))
 15 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/grouping'))
 16 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/3d_interpolation'))
 17 | from tf_sampling import farthest_point_sample, gather_point
 18 | from tf_grouping import query_ball_point, group_point, knn_point
 19 | from tf_interpolate import three_nn, three_interpolate
 20 | import tensorflow as tf
 21 | import numpy as np
 22 | import tf_util
 23 | 
 24 | 
 25 | ### Sampling and grouping, returns new xyz, new points/features, indices from ball point query, grouped xyz
 26 | def sample_and_group(npoint, radius, nsample, xyz, points, knn=False,
 27 |                      use_xyz=True):
 28 |     '''
 29 |     Input:
 30 |         npoint: int32 = 1024
 31 |         radius: float32 = 0.5,1,2,4
 32 |         nsample: int32 = 16
 33 |         xyz: (batch_size, ndataset, 3) TF tensor
 34 |         points: (batch_size, ndataset, channel) TF tensor, if None will just use xyz as points
 35 |         knn: bool, if True use kNN instead of radius search
 36 |         use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features
 37 |     Output:
 38 |         new_xyz: (batch_size, npoint, 3) TF tensor
 39 |         new_points: (batch_size, npoint, nsample, 3+channel) TF tensor
 40 |         idx: (batch_size, npoint, nsample) TF tensor, indices of local points as in ndataset points
 41 |         grouped_xyz: (batch_size, npoint, nsample, 3) TF tensor, normalized point XYZs
 42 |             (subtracted by seed point XYZ) in local regions
 43 |     '''
 44 |     new_xyz = gather_point(xyz, farthest_point_sample(npoint,
 45 |                                                       xyz))  ### Sampling using farthest point sampling
 46 |     # import ipdb; ipdb.set_trace()
 47 |     print ('check for seg fault')
 48 | 
 49 |     # xyz.shape
 50 |     # TensorShape([Dimension(4), Dimension(2048), Dimension(3)])
 51 |     # new_xyz.shape
 52 |     # TensorShape([Dimension(4), Dimension(1024), Dimension(3)])
 53 |     if knn:
 54 |         _, idx = knn_point(nsample, xyz, new_xyz)
 55 | 
 56 |     else:
 57 |         idx, pts_cnt = query_ball_point(radius, nsample, xyz,
 58 |                                         new_xyz)  ### Grouping using ball query
 59 |     grouped_xyz = group_point(xyz,
 60 |                               idx)  # (batch_size, npoint, nsample, 3)  ### Resulting grouped coordinates
 61 |     grouped_xyz -= tf.tile(tf.expand_dims(new_xyz, 2),
 62 |                            [1, 1, nsample, 1])  ### translation normalization
 63 |     if points is not None:
 64 |         grouped_points = group_point(points,
 65 |                                      idx)  # (batch_size, npoint, nsample, channel)   ### Resulting grouped features
 66 |         if use_xyz:
 67 |             new_points = tf.concat([grouped_xyz, grouped_points],
 68 |                                    axis=-1)  # (batch_size, npoint, nample, 3+channel)
 69 |         else:
 70 |             new_points = grouped_points
 71 |     else:
 72 |         new_points = grouped_xyz
 73 | 
 74 |     return new_xyz, new_points, idx, grouped_xyz
 75 | 
 76 | 
 77 | def sample_and_group_all(xyz, points, use_xyz=True):
 78 |     '''
 79 |     Inputs:
 80 |         xyz: (batch_size, ndataset, 3) TF tensor
 81 |         points: (batch_size, ndataset, channel) TF tensor, if None will just use xyz as points
 82 |         use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features
 83 |     Outputs:
 84 |         new_xyz: (batch_size, 1, 3) as (0,0,0)
 85 |         new_points: (batch_size, 1, ndataset, 3+channel) TF tensor
 86 |     Note:
 87 |         Equivalent to sample_and_group with npoint=1, radius=inf, use (0,0,0) as the centroid
 88 |     '''
 89 |     batch_size = xyz.get_shape()[0].value
 90 |     nsample = xyz.get_shape()[1].value
 91 |     new_xyz = tf.constant(
 92 |         np.tile(np.array([0, 0, 0]).reshape((1, 1, 3)), (batch_size, 1, 1)),
 93 |         dtype=tf.float32)  # (batch_size, 1, 3)
 94 |     idx = tf.constant(np.tile(np.array(range(nsample)).reshape((1, 1, nsample)),
 95 |                               (batch_size, 1, 1)))
 96 |     grouped_xyz = tf.reshape(xyz, (
 97 |     batch_size, 1, nsample, 3))  # (batch_size, npoint=1, nsample, 3)
 98 |     if points is not None:
 99 |         if use_xyz:
100 |             new_points = tf.concat([xyz, points],
101 |                                    axis=2)  # (batch_size, 16, 259)
102 |         else:
103 |             new_points = points
104 |         new_points = tf.expand_dims(new_points, 1)  # (batch_size, 1, 16, 259)
105 |     else:
106 |         new_points = grouped_xyz
107 |     return new_xyz, new_points, idx, grouped_xyz
108 | 
109 | def get_interpolated_rgb(xyz, point_cloud):
110 | 
111 |     _, idx = knn_point(3, point_cloud[:, :, :3], xyz)
112 |     grouped_xyz_rgb = group_point(point_cloud, idx) # (batch size, 2048, 3, 6)
113 | 
114 |     dist = ((tf.expand_dims(xyz, 2) - grouped_xyz_rgb[:, :, :, :3])**2)
115 |     dist = tf.maximum(dist, 1e-10)
116 | 
117 |     norm = tf.reduce_sum((1/dist), axis=2) # (batch, 2048, 3)
118 | 
119 |     weighted_rgb = (1/dist)*grouped_xyz_rgb[:, :, :, 3:]
120 |     weighted_norm_rgb = tf.reduce_sum(weighted_rgb, 2)/norm
121 | 
122 |     return weighted_norm_rgb, dist, grouped_xyz_rgb
123 | 
124 | # Point Net Layer, returns new xyz, new features, indices from ball point query
125 | def pointnet_sa_module(xyz, points, npoint, radius, nsample, mlp, mlp2,
126 |                        group_all, is_training, bn_decay, scope, bn=True,
127 |                        pooling='max', knn=False, use_xyz=True, use_nchw=False):
128 |     ''' PointNet Set Abstraction (SA) Module
129 |         Input:
130 |             xyz: (batch_size, ndataset, 3) TF tensor    (16, 2048, 3)
131 |             points: (batch_size, ndataset, channel) TF tensor
132 |             npoint: int32 -- #points sampled in farthest point sampling
133 |             radius: float32 -- search radius in local region
134 |             nsample: int32 -- how many points in each local region
135 |             mlp: list of int32 -- output size for MLP on each point
136 |             mlp2: list of int32 -- output size for MLP on each region
137 |             group_all: bool -- group all points into one PC if set true, OVERRIDE
138 |                 npoint, radius and nsample settings
139 |             use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features
140 |             use_nchw: bool, if True, use NCHW data format for conv2d, which is usually faster than NHWC format
141 |         Return:
142 |             new_xyz: (batch_size, npoint, 3) TF tensor
143 |             new_points: (batch_size, npoint, mlp[-1] or mlp2[-1]) TF tensor
144 |             idx: (batch_size, npoint, nsample) int32 -- indices for local regions
145 |     '''
146 |     data_format = 'NCHW' if use_nchw else 'NHWC'
147 |     with tf.variable_scope(scope) as sc:
148 |         # Sample and Grouping
149 |         # group_all is passed as False
150 |         if group_all:
151 |             nsample = xyz.get_shape()[1].value
152 |             new_xyz, new_points, idx, grouped_xyz = sample_and_group_all(xyz,
153 |                                                                          points,
154 |                                                                          use_xyz)
155 |         else:
156 |             new_xyz, new_points, idx, grouped_xyz = sample_and_group(npoint,
157 |                                                                      radius,
158 |                                                                      nsample,
159 |                                                                      xyz,
160 |                                                                      points,
161 |                                                                      knn,
162 |                                                                      use_xyz)
163 | 
164 |         ### POINT NET LAYER
165 | 
166 |         # Point Feature Embedding
167 |         if use_nchw: new_points = tf.transpose(new_points, [0, 3, 1, 2])
168 | 
169 |         # MLP
170 |         for i, num_out_channel in enumerate(mlp):
171 |             # mlp = 3; tf_util.conv2d is done executed 3 times
172 |             new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1],
173 |                                         padding='VALID', stride=[1, 1],
174 |                                         bn=bn, is_training=is_training,
175 |                                         scope='conv%d' % (i), bn_decay=bn_decay,
176 |                                         data_format=data_format)
177 | 
178 |         if use_nchw: new_points = tf.transpose(new_points, [0, 2, 3, 1])
179 | 
180 |         # Pooling in Local Regions
181 |         if pooling == 'max':
182 |             new_points = tf.reduce_max(new_points, axis=[2], keep_dims=True,
183 |                                        name='maxpool')
184 |         elif pooling == 'avg':
185 |             new_points = tf.reduce_mean(new_points, axis=[2], keep_dims=True,
186 |                                         name='avgpool')
187 |         elif pooling == 'weighted_avg':
188 |             with tf.variable_scope('weighted_avg'):
189 |                 dists = tf.norm(grouped_xyz, axis=-1, ord=2, keep_dims=True)
190 |                 exp_dists = tf.exp(-dists * 5)
191 |                 weights = exp_dists / tf.reduce_sum(exp_dists, axis=2,
192 |                                                     keep_dims=True)  # (batch_size, npoint, nsample, 1)
193 |                 new_points *= weights  # (batch_size, npoint, nsample, mlp[-1])
194 |                 new_points = tf.reduce_sum(new_points, axis=2, keep_dims=True)
195 |         elif pooling == 'max_and_avg':
196 |             max_points = tf.reduce_max(new_points, axis=[2], keep_dims=True,
197 |                                        name='maxpool')
198 |             avg_points = tf.reduce_mean(new_points, axis=[2], keep_dims=True,
199 |                                         name='avgpool')
200 |             new_points = tf.concat([avg_points, max_points], axis=-1)
201 | 
202 |         # [Optional] Further Processing
203 |         if mlp2 is not None:
204 |             if use_nchw: new_points = tf.transpose(new_points, [0, 3, 1, 2])
205 |             for i, num_out_channel in enumerate(mlp2):
206 |                 new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1],
207 |                                             padding='VALID', stride=[1, 1],
208 |                                             bn=bn, is_training=is_training,
209 |                                             scope='conv_post_%d' % (i),
210 |                                             bn_decay=bn_decay,
211 |                                             data_format=data_format)
212 |             if use_nchw: new_points = tf.transpose(new_points, [0, 2, 3, 1])
213 | 
214 |         new_points = tf.squeeze(new_points,
215 |                                 [2])  # (batch_size, npoints, mlp2[-1])
216 |         return new_xyz, new_points, idx
217 | 
218 | 
219 | def pointnet_sa_module_msg(xyz, points, npoint, radius_list, nsample_list,
220 |                            mlp_list, is_training, bn_decay, scope, bn=True,
221 |                            use_xyz=True, use_nchw=False):
222 |     ''' PointNet Set Abstraction (SA) module with Multi-Scale Grouping (MSG)
223 |         Input:
224 |             xyz: (batch_size, ndataset, 3) TF tensor
225 |             points: (batch_size, ndataset, channel) TF tensor
226 |             npoint: int32 -- #points sampled in farthest point sampling
227 |             radius: list of float32 -- search radius in local region
228 |             nsample: list of int32 -- how many points in each local region
229 |             mlp: list of list of int32 -- output size for MLP on each point
230 |             use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features
231 |             use_nchw: bool, if True, use NCHW data format for conv2d, which is usually faster than NHWC format
232 |         Return:
233 |             new_xyz: (batch_size, npoint, 3) TF tensor
234 |             new_points: (batch_size, npoint, \sum_k{mlp[k][-1]}) TF tensor
235 |     '''
236 |     data_format = 'NCHW' if use_nchw else 'NHWC'
237 |     with tf.variable_scope(scope) as sc:
238 |         new_xyz = gather_point(xyz, farthest_point_sample(npoint, xyz))
239 |         new_points_list = []
240 |         for i in range(len(radius_list)):
241 |             radius = radius_list[i]
242 |             nsample = nsample_list[i]
243 |             idx, pts_cnt = query_ball_point(radius, nsample, xyz, new_xyz)
244 |             grouped_xyz = group_point(xyz, idx)
245 |             grouped_xyz -= tf.tile(tf.expand_dims(new_xyz, 2),
246 |                                    [1, 1, nsample, 1])
247 |             if points is not None:
248 |                 grouped_points = group_point(points, idx)
249 |                 if use_xyz:
250 |                     grouped_points = tf.concat([grouped_points, grouped_xyz],
251 |                                                axis=-1)
252 |             else:
253 |                 grouped_points = grouped_xyz
254 |             if use_nchw: grouped_points = tf.transpose(grouped_points,
255 |                                                        [0, 3, 1, 2])
256 |             for j, num_out_channel in enumerate(mlp_list[i]):
257 |                 grouped_points = tf_util.conv2d(grouped_points, num_out_channel,
258 |                                                 [1, 1],
259 |                                                 padding='VALID', stride=[1, 1],
260 |                                                 bn=bn, is_training=is_training,
261 |                                                 scope='conv%d_%d' % (i, j),
262 |                                                 bn_decay=bn_decay)
263 |             if use_nchw: grouped_points = tf.transpose(grouped_points,
264 |                                                        [0, 2, 3, 1])
265 |             new_points = tf.reduce_max(grouped_points, axis=[2])
266 |             new_points_list.append(new_points)
267 |         new_points_concat = tf.concat(new_points_list, axis=-1)
268 |         return new_xyz, new_points_concat
269 | 
270 | 
271 | def pointnet_fp_module(xyz1, xyz2, points1, points2, mlp, is_training, bn_decay,
272 |                        scope, bn=True, last_mlp_activation=True):
273 |     ''' PointNet Feature Propogation (FP) Module
274 |         Input:
275 |             xyz1: (batch_size, ndataset1, 3) TF tensor
276 |             xyz2: (batch_size, ndataset2, 3) TF tensor, sparser than xyz1
277 |             points1: (batch_size, ndataset1, nchannel1) TF tensor
278 |             points2: (batch_size, ndataset2, nchannel2) TF tensor
279 |             mlp: list of int32 -- output size for MLP on each point
280 |         Return:
281 |             new_points: (batch_size, ndataset1, mlp[-1]) TF tensor
282 |     '''
283 |     with tf.variable_scope(scope) as sc:
284 |         dist, idx = three_nn(xyz1, xyz2)
285 |         dist = tf.maximum(dist, 1e-10)
286 |         norm = tf.reduce_sum((1.0 / dist), axis=2, keep_dims=True)
287 |         norm = tf.tile(norm, [1, 1, 3])
288 |         weight = (1.0 / dist) / norm
289 |         ### INTERPOLATION
290 |         interpolated_points = three_interpolate(points2, idx, weight)
291 | 
292 |         if points1 is not None:
293 |             new_points1 = tf.concat(axis=2, values=[interpolated_points,
294 |                                                     points1])  # B,ndataset1,nchannel1+nchannel2
295 |         else:
296 |             new_points1 = interpolated_points
297 |         new_points1 = tf.expand_dims(new_points1, 2)
298 |         for i, num_out_channel in enumerate(mlp):
299 |             if i == len(mlp) - 1 and not (last_mlp_activation):
300 |                 activation_fn = None
301 |             else:
302 |                 activation_fn = tf.nn.relu
303 |             new_points1 = tf_util.conv2d(new_points1, num_out_channel, [1, 1],
304 |                                          padding='VALID', stride=[1, 1],
305 |                                          bn=bn, is_training=is_training,
306 |                                          scope='conv_%d' % (i),
307 |                                          bn_decay=bn_decay,
308 |                                          activation_fn=activation_fn)
309 |         new_points1 = tf.squeeze(new_points1, [2])  # B,ndataset1,mlp[-1]
310 |         return new_points1
311 | 
312 | ## our flow embedding layer
313 | 
314 | def flow_embedding_module_all(xyz1, xyz2, feat1, feat2, radius, nsample, mlp,
315 |                           is_training, bn_decay, scope, bn=True, pooling='max',
316 |                           knn=True, corr_func='elementwise_product'):
317 |     """
318 |     Input:
319 |         xyz1: (batch_size, npoint, 3)
320 |         xyz2: (batch_size, npoint, 3)
321 |         feat1: (batch_size, npoint, channel)
322 |         feat2: (batch_size, npoint, channel)
323 |     Output:
324 |         xyz1: (batch_size, npoint, 3)
325 |         feat1_new: (batch_size, npoint, mlp[-1])
326 |     """
327 | 
328 |     feat1_expanded = tf.expand_dims(feat1, 2)  # batch_size, npoint, 1, 3
329 |     feat2_expanded = tf.expand_dims(feat2, 1)
330 | 
331 |     xyz1_expanded = tf.expand_dims(xyz1, 2)  # batch_size, npoint, 1, 3
332 |     xyz2_expanded = tf.expand_dims(xyz2, 1)
333 | 
334 |     xyz_diff = xyz2_expanded - xyz1_expanded  # batch_size, npoint, nsample, 3    ### Distance vector
335 | 
336 |     # (16, 256, 256, 256)
337 |     feat_diff = tf.concat(axis=-1, values=[tf.tile(feat2_expanded, [1, nsample, 1, 1]),
338 |                                            tf.tile(feat1_expanded, [1, 1, nsample, 1])])
339 | 
340 |     # Concat the coordinates and features difference here
341 |     feat1_new = tf.concat([feat_diff, xyz_diff], axis=3)  # batch_size, npoint, nsample, [channel or 1] + 3
342 | 
343 |     # TODO: move scope to outer indent
344 |     with tf.variable_scope(scope) as sc:
345 |         for i, num_out_channel in enumerate(mlp):
346 |             feat1_new = tf_util.conv2d(feat1_new, num_out_channel, [1, 1],
347 |                                        padding='VALID', stride=[1, 1],
348 |                                        bn=True, is_training=is_training,
349 |                                        scope='conv_diff_%d' % (i),
350 |                                        bn_decay=bn_decay)
351 | 
352 |     # Tensor("flow_embedding/conv_diff_0/Relu:0", shape=(16, 256, 64, 128), dtype=float32, device= / device: GPU:0)
353 |     # Tensor("flow_embedding/conv_diff_1/Relu:0", shape=(16, 256, 64, 128), dtype=float32, device= / device: GPU:0)
354 |     # Tensor("flow_embedding/conv_diff_2/Relu:0", shape=(16, 256, 64, 128), dtype=float32, device= / device: GPU:0)
355 | 
356 |     if pooling == 'max':
357 |         feat1_new = tf.reduce_max(feat1_new, axis=[2], keep_dims=False,
358 |                                   name='maxpool_diff')
359 |     elif pooling == 'avg':
360 |         feat1_new = tf.reduce_mean(feat1_new, axis=[2], keep_dims=False,
361 |                                    name='avgpool_diff')
362 |     return xyz1, feat1_new
363 | 
364 | 
365 | 
366 | ########
367 | 
368 | 
369 | def flow_embedding_module(xyz1, xyz2, feat1, feat2, radius, nsample, mlp,
370 |                           is_training, bn_decay, scope, bn=True, pooling='max',
371 |                           knn=True, corr_func='elementwise_product'):
372 |     """
373 |     Input:
374 |         xyz1: (batch_size, npoint, 3)
375 |         xyz2: (batch_size, npoint, 3)
376 |         feat1: (batch_size, npoint, channel) (16, 256, 128)
377 |         feat2: (batch_size, npoint, channel) (16, 256, 128) 
378 |     Output:
379 |         xyz1: (batch_size, npoint, 3)
380 |         feat1_new: (batch_size, npoint, mlp[-1])
381 |     """
382 |     """
383 |     our case 16 x 256 x 256 x (128 + 128)
384 |     """
385 |     if knn:
386 |         _, idx = knn_point(nsample, xyz2, xyz1)
387 |         print('knn')
388 |     else:
389 |         idx, _ = query_ball_point(radius, nsample, xyz2, xyz1)
390 |         print('query')
391 |     # finds the indices got from knn in pc2 xyz
392 | 
393 | 
394 |     xyz2_grouped = group_point(xyz2,
395 |                                idx)  # batch_size, npoint, nsample, 3      ### Point cloud 2
396 |     xyz1_expanded = tf.expand_dims(xyz1,
397 |                                    2)  # batch_size, npoint, 1, 3          ### Point cloud 1
398 |     xyz_diff = xyz2_grouped - xyz1_expanded  # batch_size, npoint, nsample, 3    ### Distance vector
399 | 
400 |     feat2_grouped = group_point(feat2,
401 |                                 idx)  # batch_size, npoint, nsample, channel
402 |     feat1_expanded = tf.expand_dims(feat1, 2)  # batch_size, npoint, 1, channel
403 |     # TODO: change distance function
404 |     if corr_func == 'elementwise_product':
405 |         feat_diff = feat2_grouped * feat1_expanded  # batch_size, npoint, nsample, channel
406 |     elif corr_func == 'concat':  ### Using this one
407 |         feat_diff = tf.concat(axis=-1, values=[feat2_grouped,
408 |                                                tf.tile(feat1_expanded,
409 |                                                        [1, 1, nsample,
410 |                                                         1])])  # batch_size, npoint, sample, channel*2
411 |     elif corr_func == 'dot_product':
412 |         feat_diff = tf.reduce_sum(feat2_grouped * feat1_expanded, axis=[-1],
413 |                                   keep_dims=True)  # batch_size, npoint, nsample, 1
414 |     elif corr_func == 'cosine_dist':
415 |         feat2_grouped = tf.nn.l2_normalize(feat2_grouped, -1)
416 |         feat1_expanded = tf.nn.l2_normalize(feat1_expanded, -1)
417 |         feat_diff = tf.reduce_sum(feat2_grouped * feat1_expanded, axis=[-1],
418 |                                   keep_dims=True)  # batch_size, npoint, nsample, 1
419 |     elif corr_func == 'flownet_like':  # assuming square patch size k = 0 as the FlowNet paper
420 |         batch_size = xyz1.get_shape()[0].value
421 |         npoint = xyz1.get_shape()[1].value
422 |         feat_diff = tf.reduce_sum(feat2_grouped * feat1_expanded, axis=[-1],
423 |                                   keep_dims=True)  # batch_size, npoint, nsample, 1
424 |         total_diff = tf.concat(axis=-1, values=[xyz_diff,
425 |                                                 feat_diff])  # batch_size, npoint, nsample, 4
426 |         feat1_new = tf.reshape(total_diff, [batch_size, npoint,
427 |                                             -1])  # batch_size, npoint, nsample*4
428 |         # feat1_new = tf.concat(axis=[-1], values=[feat1_new, feat1]) # batch_size, npoint, nsample*4+channel
429 |         return xyz1, feat1_new
430 | 
431 |     # Concat the coordinates and features difference here
432 |     feat1_new = tf.concat([feat_diff, xyz_diff],
433 |                           axis=3)  # batch_size, npoint, nsample, [channel or 1] + 3
434 |     # TODO: move scope to outer indent
435 |     with tf.variable_scope(scope) as sc:
436 |         for i, num_out_channel in enumerate(mlp):
437 |             feat1_new = tf_util.conv2d(feat1_new, num_out_channel, [1, 1],
438 |                                        padding='VALID', stride=[1, 1],
439 |                                        bn=True, is_training=is_training,
440 |                                        scope='conv_diff_%d' % (i),
441 |                                        bn_decay=bn_decay)
442 | 
443 |     # Tensor("flow_embedding/conv_diff_0/Relu:0", shape=(16, 256, 64, 128), dtype=float32, device= / device: GPU:0)
444 |     # Tensor("flow_embedding/conv_diff_1/Relu:0", shape=(16, 256, 64, 128), dtype=float32, device= / device: GPU:0)
445 |     # Tensor("flow_embedding/conv_diff_2/Relu:0", shape=(16, 256, 64, 128), dtype=float32, device= / device: GPU:0)
446 | 
447 |     if pooling == 'max':
448 |         feat1_new = tf.reduce_max(feat1_new, axis=[2], keep_dims=False,
449 |                                   name='maxpool_diff')
450 |     elif pooling == 'avg':
451 |         feat1_new = tf.reduce_mean(feat1_new, axis=[2], keep_dims=False,
452 |                                    name='avgpool_diff')
453 |     return xyz1, feat1_new
454 | 
455 | 
456 | def set_upconv_module(xyz1, xyz2, feat1, feat2, nsample, mlp, mlp2, is_training,
457 |                       scope, bn_decay=None, bn=True, pooling='max', radius=None,
458 |                       knn=True):
459 |     """
460 |         Feature propagation from xyz2 (less points) to xyz1 (more points)
461 | 
462 |     Inputs:
463 |         xyz1: (batch_size, npoint1, 3)
464 |         xyz2: (batch_size, npoint2, 3)
465 |         feat1: (batch_size, npoint1, channel1) features for xyz1 points (earlier layers)
466 |         feat2: (batch_size, npoint2, channel2) features for xyz2 points
467 |     Output:
468 |         feat1_new: (batch_size, npoint2, mlp[-1] or mlp2[-1] or channel1+3)
469 | 
470 |         TODO: Add support for skip links. Study how delta(XYZ) plays a role in feature updating.
471 |     """
472 |     with tf.variable_scope(scope) as sc:
473 |         if knn:
474 |             l2_dist, idx = knn_point(nsample, xyz2, xyz1)
475 |         else:
476 |             idx, pts_cnt = query_ball_point(radius, nsample, xyz2, xyz1)
477 | 
478 |         xyz2_grouped = group_point(xyz2, idx)  # batch_size, npoint1, nsample, 3
479 |         xyz1_expanded = tf.expand_dims(xyz1, 2)  # batch_size, npoint1, 1, 3
480 |         xyz_diff = xyz2_grouped - xyz1_expanded  # batch_size, npoint1, nsample, 3
481 | 
482 |         feat2_grouped = group_point(feat2,
483 |                                     idx)  # batch_size, npoint1, nsample, channel2
484 |         net = tf.concat([feat2_grouped, xyz_diff],
485 |                         axis=3)  # batch_size, npoint1, nsample, channel2+3
486 | 
487 |         if mlp is None: mlp = []
488 |         for i, num_out_channel in enumerate(mlp):
489 |             net = tf_util.conv2d(net, num_out_channel, [1, 1],
490 |                                  padding='VALID', stride=[1, 1],
491 |                                  bn=True, is_training=is_training,
492 |                                  scope='conv%d' % (i), bn_decay=bn_decay)
493 |         if pooling == 'max':
494 |             feat1_new = tf.reduce_max(net, axis=[2], keep_dims=False,
495 |                                       name='maxpool')  # batch_size, npoint1, mlp[-1]
496 |         elif pooling == 'avg':
497 |             feat1_new = tf.reduce_mean(net, axis=[2], keep_dims=False,
498 |                                        name='avgpool')  # batch_size, npoint1, mlp[-1]
499 | 
500 |         if feat1 is not None:
501 |             feat1_new = tf.concat([feat1_new, feat1],
502 |                                   axis=2)  # batch_size, npoint1, mlp[-1]+channel1
503 | 
504 |         feat1_new = tf.expand_dims(feat1_new,
505 |                                    2)  # batch_size, npoint1, 1, mlp[-1]+channel2
506 |         if mlp2 is None: mlp2 = []
507 |         for i, num_out_channel in enumerate(mlp2):
508 |             feat1_new = tf_util.conv2d(feat1_new, num_out_channel, [1, 1],
509 |                                        padding='VALID', stride=[1, 1],
510 |                                        bn=True, is_training=is_training,
511 |                                        scope='post-conv%d' % (i),
512 |                                        bn_decay=bn_decay)
513 |         feat1_new = tf.squeeze(feat1_new, [2])  # batch_size, npoint1, mlp2[-1]
514 |         return feat1_new
515 | 


--------------------------------------------------------------------------------
/src/utils/tf_util.py:
--------------------------------------------------------------------------------
  1 | """ Wrapper functions for TensorFlow layers.
  2 | Author: Charles R. Qi
  3 | Date: November 2017
  4 | """
  5 | 
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | 
  9 | def _variable_on_cpu(name, shape, initializer, use_fp16=False):
 10 |   """Helper to create a Variable stored on CPU memory.
 11 |   Args:
 12 |     name: name of the variable
 13 |     shape: list of ints
 14 |     initializer: initializer for Variable
 15 |   Returns:
 16 |     Variable Tensor
 17 |   """
 18 |   with tf.device("/cpu:0"):
 19 |     dtype = tf.float16 if use_fp16 else tf.float32
 20 |     var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
 21 |   return var
 22 | 
 23 | def _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True):
 24 |   """Helper to create an initialized Variable with weight decay.
 25 | 
 26 |   Note that the Variable is initialized with a truncated normal distribution.
 27 |   A weight decay is added only if one is specified.
 28 | 
 29 |   Args:
 30 |     name: name of the variable
 31 |     shape: list of ints
 32 |     stddev: standard deviation of a truncated Gaussian
 33 |     wd: add L2Loss weight decay multiplied by this float. If None, weight
 34 |         decay is not added for this Variable.
 35 |     use_xavier: bool, whether to use xavier initializer
 36 | 
 37 |   Returns:
 38 |     Variable Tensor
 39 |   """
 40 |   if use_xavier:
 41 |     initializer = tf.contrib.layers.xavier_initializer()
 42 |   else:
 43 |     initializer = tf.truncated_normal_initializer(stddev=stddev)
 44 |   var = _variable_on_cpu(name, shape, initializer)
 45 |   if wd is not None:
 46 |     weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
 47 |     tf.add_to_collection('losses', weight_decay)
 48 |   return var
 49 | 
 50 | 
 51 | def conv1d(inputs,
 52 |            num_output_channels,
 53 |            kernel_size,
 54 |            scope,
 55 |            stride=1,
 56 |            padding='SAME',
 57 |            data_format='NHWC',
 58 |            use_xavier=True,
 59 |            stddev=1e-3,
 60 |            weight_decay=None,
 61 |            activation_fn=tf.nn.relu,
 62 |            bn=False,
 63 |            bn_decay=None,
 64 |            is_training=None):
 65 |   """ 1D convolution with non-linear operation.
 66 | 
 67 |   Args:
 68 |     inputs: 3-D tensor variable BxLxC
 69 |     num_output_channels: int
 70 |     kernel_size: int
 71 |     scope: string
 72 |     stride: int
 73 |     padding: 'SAME' or 'VALID'
 74 |     data_format: 'NHWC' or 'NCHW'
 75 |     use_xavier: bool, use xavier_initializer if true
 76 |     stddev: float, stddev for truncated_normal init
 77 |     weight_decay: float
 78 |     activation_fn: function
 79 |     bn: bool, whether to use batch norm
 80 |     bn_decay: float or float tensor variable in [0,1]
 81 |     is_training: bool Tensor variable
 82 | 
 83 |   Returns:
 84 |     Variable tensor
 85 |   """
 86 |   with tf.variable_scope(scope) as sc:
 87 |     assert(data_format=='NHWC' or data_format=='NCHW')
 88 |     if data_format == 'NHWC':
 89 |       num_in_channels = inputs.get_shape()[-1].value
 90 |     elif data_format=='NCHW':
 91 |       num_in_channels = inputs.get_shape()[1].value
 92 |     kernel_shape = [kernel_size,
 93 |                     num_in_channels, num_output_channels]
 94 |     kernel = _variable_with_weight_decay('weights',
 95 |                                          shape=kernel_shape,
 96 |                                          use_xavier=use_xavier,
 97 |                                          stddev=stddev,
 98 |                                          wd=weight_decay)
 99 |     outputs = tf.nn.conv1d(inputs, kernel,
100 |                            stride=stride,
101 |                            padding=padding,
102 |                            data_format=data_format)
103 |     biases = _variable_on_cpu('biases', [num_output_channels],
104 |                               tf.constant_initializer(0.0))
105 |     outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)
106 | 
107 |     if bn:
108 |       outputs = batch_norm_for_conv1d(outputs, is_training,
109 |                                       bn_decay=bn_decay, scope='bn',
110 |                                       data_format=data_format)
111 | 
112 |     if activation_fn is not None:
113 |       outputs = activation_fn(outputs)
114 |     return outputs
115 | 
116 | 
117 | 
118 | 
119 | def conv2d(inputs,
120 |            num_output_channels,
121 |            kernel_size,
122 |            scope,
123 |            stride=[1, 1],
124 |            padding='SAME',
125 |            data_format='NHWC',
126 |            use_xavier=True,
127 |            stddev=1e-3,
128 |            weight_decay=None,
129 |            activation_fn=tf.nn.relu,
130 |            bn=False,
131 |            bn_decay=None,
132 |            is_training=None):
133 |   """ 2D convolution with non-linear operation.
134 | 
135 |   Args:
136 |     inputs: 4-D tensor variable BxHxWxC
137 |     num_output_channels: int
138 |     kernel_size: a list of 2 ints
139 |     scope: string
140 |     stride: a list of 2 ints
141 |     padding: 'SAME' or 'VALID'
142 |     data_format: 'NHWC' or 'NCHW'
143 |     use_xavier: bool, use xavier_initializer if true
144 |     stddev: float, stddev for truncated_normal init
145 |     weight_decay: float
146 |     activation_fn: function
147 |     bn: bool, whether to use batch norm
148 |     bn_decay: float or float tensor variable in [0,1]
149 |     is_training: bool Tensor variable
150 | 
151 |   Returns:
152 |     Variable tensor
153 |   """
154 |   with tf.variable_scope(scope) as sc:
155 |       kernel_h, kernel_w = kernel_size
156 |       assert(data_format=='NHWC' or data_format=='NCHW')
157 |       if data_format == 'NHWC':
158 |         num_in_channels = inputs.get_shape()[-1].value
159 |       elif data_format=='NCHW':
160 |         num_in_channels = inputs.get_shape()[1].value
161 |       kernel_shape = [kernel_h, kernel_w,
162 |                       num_in_channels, num_output_channels]
163 |       kernel = _variable_with_weight_decay('weights',
164 |                                            shape=kernel_shape,
165 |                                            use_xavier=use_xavier,
166 |                                            stddev=stddev,
167 |                                            wd=weight_decay)
168 |       stride_h, stride_w = stride
169 |       outputs = tf.nn.conv2d(inputs, kernel,
170 |                              [1, stride_h, stride_w, 1],
171 |                              padding=padding,
172 |                              data_format=data_format)
173 |       biases = _variable_on_cpu('biases', [num_output_channels],
174 |                                 tf.constant_initializer(0.0))
175 |       outputs = tf.nn.bias_add(outputs, biases, data_format=data_format)
176 | 
177 |       if bn:
178 |         outputs = batch_norm_for_conv2d(outputs, is_training,
179 |                                         bn_decay=bn_decay, scope='bn',
180 |                                         data_format=data_format)
181 | 
182 |       if activation_fn is not None:
183 |         outputs = activation_fn(outputs)
184 |       return outputs
185 | 
186 | 
187 | def conv2d_transpose(inputs,
188 |                      num_output_channels,
189 |                      kernel_size,
190 |                      scope,
191 |                      stride=[1, 1],
192 |                      padding='SAME',
193 |                      use_xavier=True,
194 |                      stddev=1e-3,
195 |                      weight_decay=None,
196 |                      activation_fn=tf.nn.relu,
197 |                      bn=False,
198 |                      bn_decay=None,
199 |                      is_training=None):
200 |   """ 2D convolution transpose with non-linear operation.
201 | 
202 |   Args:
203 |     inputs: 4-D tensor variable BxHxWxC
204 |     num_output_channels: int
205 |     kernel_size: a list of 2 ints
206 |     scope: string
207 |     stride: a list of 2 ints
208 |     padding: 'SAME' or 'VALID'
209 |     use_xavier: bool, use xavier_initializer if true
210 |     stddev: float, stddev for truncated_normal init
211 |     weight_decay: float
212 |     activation_fn: function
213 |     bn: bool, whether to use batch norm
214 |     bn_decay: float or float tensor variable in [0,1]
215 |     is_training: bool Tensor variable
216 | 
217 |   Returns:
218 |     Variable tensor
219 | 
220 |   Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a
221 |   """
222 |   with tf.variable_scope(scope) as sc:
223 |       kernel_h, kernel_w = kernel_size
224 |       num_in_channels = inputs.get_shape()[-1].value
225 |       kernel_shape = [kernel_h, kernel_w,
226 |                       num_output_channels, num_in_channels] # reversed to conv2d
227 |       kernel = _variable_with_weight_decay('weights',
228 |                                            shape=kernel_shape,
229 |                                            use_xavier=use_xavier,
230 |                                            stddev=stddev,
231 |                                            wd=weight_decay)
232 |       stride_h, stride_w = stride
233 |       
234 |       # from slim.convolution2d_transpose
235 |       def get_deconv_dim(dim_size, stride_size, kernel_size, padding):
236 |           dim_size *= stride_size
237 | 
238 |           if padding == 'VALID' and dim_size is not None:
239 |             dim_size += max(kernel_size - stride_size, 0)
240 |           return dim_size
241 | 
242 |       # caculate output shape
243 |       batch_size = inputs.get_shape()[0].value
244 |       height = inputs.get_shape()[1].value
245 |       width = inputs.get_shape()[2].value
246 |       out_height = get_deconv_dim(height, stride_h, kernel_h, padding)
247 |       out_width = get_deconv_dim(width, stride_w, kernel_w, padding)
248 |       output_shape = [batch_size, out_height, out_width, num_output_channels]
249 | 
250 |       outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape,
251 |                              [1, stride_h, stride_w, 1],
252 |                              padding=padding)
253 |       biases = _variable_on_cpu('biases', [num_output_channels],
254 |                                 tf.constant_initializer(0.0))
255 |       outputs = tf.nn.bias_add(outputs, biases)
256 | 
257 |       if bn:
258 |         outputs = batch_norm_for_conv2d(outputs, is_training,
259 |                                         bn_decay=bn_decay, scope='bn')
260 | 
261 |       if activation_fn is not None:
262 |         outputs = activation_fn(outputs)
263 |       return outputs
264 | 
265 |    
266 | 
267 | def conv3d(inputs,
268 |            num_output_channels,
269 |            kernel_size,
270 |            scope,
271 |            stride=[1, 1, 1],
272 |            padding='SAME',
273 |            use_xavier=True,
274 |            stddev=1e-3,
275 |            weight_decay=None,
276 |            activation_fn=tf.nn.relu,
277 |            bn=False,
278 |            bn_decay=None,
279 |            is_training=None):
280 |   """ 3D convolution with non-linear operation.
281 | 
282 |   Args:
283 |     inputs: 5-D tensor variable BxDxHxWxC
284 |     num_output_channels: int
285 |     kernel_size: a list of 3 ints
286 |     scope: string
287 |     stride: a list of 3 ints
288 |     padding: 'SAME' or 'VALID'
289 |     use_xavier: bool, use xavier_initializer if true
290 |     stddev: float, stddev for truncated_normal init
291 |     weight_decay: float
292 |     activation_fn: function
293 |     bn: bool, whether to use batch norm
294 |     bn_decay: float or float tensor variable in [0,1]
295 |     is_training: bool Tensor variable
296 | 
297 |   Returns:
298 |     Variable tensor
299 |   """
300 |   with tf.variable_scope(scope) as sc:
301 |     kernel_d, kernel_h, kernel_w = kernel_size
302 |     num_in_channels = inputs.get_shape()[-1].value
303 |     kernel_shape = [kernel_d, kernel_h, kernel_w,
304 |                     num_in_channels, num_output_channels]
305 |     kernel = _variable_with_weight_decay('weights',
306 |                                          shape=kernel_shape,
307 |                                          use_xavier=use_xavier,
308 |                                          stddev=stddev,
309 |                                          wd=weight_decay)
310 |     stride_d, stride_h, stride_w = stride
311 |     outputs = tf.nn.conv3d(inputs, kernel,
312 |                            [1, stride_d, stride_h, stride_w, 1],
313 |                            padding=padding)
314 |     biases = _variable_on_cpu('biases', [num_output_channels],
315 |                               tf.constant_initializer(0.0))
316 |     outputs = tf.nn.bias_add(outputs, biases)
317 |     
318 |     if bn:
319 |       outputs = batch_norm_for_conv3d(outputs, is_training,
320 |                                       bn_decay=bn_decay, scope='bn')
321 | 
322 |     if activation_fn is not None:
323 |       outputs = activation_fn(outputs)
324 |     return outputs
325 | 
326 | def fully_connected(inputs,
327 |                     num_outputs,
328 |                     scope,
329 |                     use_xavier=True,
330 |                     stddev=1e-3,
331 |                     weight_decay=None,
332 |                     activation_fn=tf.nn.relu,
333 |                     bn=False,
334 |                     bn_decay=None,
335 |                     is_training=None):
336 |   """ Fully connected layer with non-linear operation.
337 |   
338 |   Args:
339 |     inputs: 2-D tensor BxN
340 |     num_outputs: int
341 |   
342 |   Returns:
343 |     Variable tensor of size B x num_outputs.
344 |   """
345 |   with tf.variable_scope(scope) as sc:
346 |     num_input_units = inputs.get_shape()[-1].value
347 |     weights = _variable_with_weight_decay('weights',
348 |                                           shape=[num_input_units, num_outputs],
349 |                                           use_xavier=use_xavier,
350 |                                           stddev=stddev,
351 |                                           wd=weight_decay)
352 |     outputs = tf.matmul(inputs, weights)
353 |     biases = _variable_on_cpu('biases', [num_outputs],
354 |                              tf.constant_initializer(0.0))
355 |     outputs = tf.nn.bias_add(outputs, biases)
356 |      
357 |     if bn:
358 |       outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn')
359 | 
360 |     if activation_fn is not None:
361 |       outputs = activation_fn(outputs)
362 |     return outputs
363 | 
364 | 
365 | def max_pool2d(inputs,
366 |                kernel_size,
367 |                scope,
368 |                stride=[2, 2],
369 |                padding='VALID'):
370 |   """ 2D max pooling.
371 | 
372 |   Args:
373 |     inputs: 4-D tensor BxHxWxC
374 |     kernel_size: a list of 2 ints
375 |     stride: a list of 2 ints
376 |   
377 |   Returns:
378 |     Variable tensor
379 |   """
380 |   with tf.variable_scope(scope) as sc:
381 |     kernel_h, kernel_w = kernel_size
382 |     stride_h, stride_w = stride
383 |     outputs = tf.nn.max_pool(inputs,
384 |                              ksize=[1, kernel_h, kernel_w, 1],
385 |                              strides=[1, stride_h, stride_w, 1],
386 |                              padding=padding,
387 |                              name=sc.name)
388 |     return outputs
389 | 
390 | def avg_pool2d(inputs,
391 |                kernel_size,
392 |                scope,
393 |                stride=[2, 2],
394 |                padding='VALID'):
395 |   """ 2D avg pooling.
396 | 
397 |   Args:
398 |     inputs: 4-D tensor BxHxWxC
399 |     kernel_size: a list of 2 ints
400 |     stride: a list of 2 ints
401 |   
402 |   Returns:
403 |     Variable tensor
404 |   """
405 |   with tf.variable_scope(scope) as sc:
406 |     kernel_h, kernel_w = kernel_size
407 |     stride_h, stride_w = stride
408 |     outputs = tf.nn.avg_pool(inputs,
409 |                              ksize=[1, kernel_h, kernel_w, 1],
410 |                              strides=[1, stride_h, stride_w, 1],
411 |                              padding=padding,
412 |                              name=sc.name)
413 |     return outputs
414 | 
415 | 
416 | def max_pool3d(inputs,
417 |                kernel_size,
418 |                scope,
419 |                stride=[2, 2, 2],
420 |                padding='VALID'):
421 |   """ 3D max pooling.
422 | 
423 |   Args:
424 |     inputs: 5-D tensor BxDxHxWxC
425 |     kernel_size: a list of 3 ints
426 |     stride: a list of 3 ints
427 |   
428 |   Returns:
429 |     Variable tensor
430 |   """
431 |   with tf.variable_scope(scope) as sc:
432 |     kernel_d, kernel_h, kernel_w = kernel_size
433 |     stride_d, stride_h, stride_w = stride
434 |     outputs = tf.nn.max_pool3d(inputs,
435 |                                ksize=[1, kernel_d, kernel_h, kernel_w, 1],
436 |                                strides=[1, stride_d, stride_h, stride_w, 1],
437 |                                padding=padding,
438 |                                name=sc.name)
439 |     return outputs
440 | 
441 | def avg_pool3d(inputs,
442 |                kernel_size,
443 |                scope,
444 |                stride=[2, 2, 2],
445 |                padding='VALID'):
446 |   """ 3D avg pooling.
447 | 
448 |   Args:
449 |     inputs: 5-D tensor BxDxHxWxC
450 |     kernel_size: a list of 3 ints
451 |     stride: a list of 3 ints
452 |   
453 |   Returns:
454 |     Variable tensor
455 |   """
456 |   with tf.variable_scope(scope) as sc:
457 |     kernel_d, kernel_h, kernel_w = kernel_size
458 |     stride_d, stride_h, stride_w = stride
459 |     outputs = tf.nn.avg_pool3d(inputs,
460 |                                ksize=[1, kernel_d, kernel_h, kernel_w, 1],
461 |                                strides=[1, stride_d, stride_h, stride_w, 1],
462 |                                padding=padding,
463 |                                name=sc.name)
464 |     return outputs
465 | 
466 | 
467 | def batch_norm_template_unused(inputs, is_training, scope, moments_dims, bn_decay):
468 |   """ NOTE: this is older version of the util func. it is deprecated.
469 |   Batch normalization on convolutional maps and beyond...
470 |   Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
471 |   
472 |   Args:
473 |       inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC
474 |       is_training:   boolean tf.Varialbe, true indicates training phase
475 |       scope:         string, variable scope
476 |       moments_dims:  a list of ints, indicating dimensions for moments calculation
477 |       bn_decay:      float or float tensor variable, controling moving average weight
478 |   Return:
479 |       normed:        batch-normalized maps
480 |   """
481 |   with tf.variable_scope(scope) as sc:
482 |     num_channels = inputs.get_shape()[-1].value
483 |     beta = _variable_on_cpu(name='beta',shape=[num_channels],
484 |                             initializer=tf.constant_initializer(0))
485 |     gamma = _variable_on_cpu(name='gamma',shape=[num_channels],
486 |                             initializer=tf.constant_initializer(1.0))
487 |     batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments')
488 |     decay = bn_decay if bn_decay is not None else 0.9
489 |     ema = tf.train.ExponentialMovingAverage(decay=decay)
490 |     # Operator that maintains moving averages of variables.
491 |     # Need to set reuse=False, otherwise if reuse, will see moments_1/mean/ExponentialMovingAverage/ does not exist
492 |     # https://github.com/shekkizh/WassersteinGAN.tensorflow/issues/3
493 |     with tf.variable_scope(tf.get_variable_scope(), reuse=False):
494 |         ema_apply_op = tf.cond(is_training,
495 |                                lambda: ema.apply([batch_mean, batch_var]),
496 |                                lambda: tf.no_op())
497 |     
498 |     # Update moving average and return current batch's avg and var.
499 |     def mean_var_with_update():
500 |       with tf.control_dependencies([ema_apply_op]):
501 |         return tf.identity(batch_mean), tf.identity(batch_var)
502 |     
503 |     # ema.average returns the Variable holding the average of var.
504 |     mean, var = tf.cond(is_training,
505 |                         mean_var_with_update,
506 |                         lambda: (ema.average(batch_mean), ema.average(batch_var)))
507 |     normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)
508 |   return normed
509 | 
510 | 
511 | def batch_norm_template(inputs, is_training, scope, moments_dims_unused, bn_decay, data_format='NHWC'):
512 |   """ Batch normalization on convolutional maps and beyond...
513 |   Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
514 |   
515 |   Args:
516 |       inputs:        Tensor, k-D input ... x C could be BC or BHWC or BDHWC
517 |       is_training:   boolean tf.Varialbe, true indicates training phase
518 |       scope:         string, variable scope
519 |       moments_dims:  a list of ints, indicating dimensions for moments calculation
520 |       bn_decay:      float or float tensor variable, controling moving average weight
521 |       data_format:   'NHWC' or 'NCHW'
522 |   Return:
523 |       normed:        batch-normalized maps
524 |   """
525 |   bn_decay = bn_decay if bn_decay is not None else 0.9
526 |   return tf.contrib.layers.batch_norm(inputs, 
527 |                                       center=True, scale=True,
528 |                                       is_training=is_training, decay=bn_decay,updates_collections=None,
529 |                                       scope=scope,
530 |                                       data_format=data_format)
531 | 
532 | 
533 | def batch_norm_for_fc(inputs, is_training, bn_decay, scope):
534 |   """ Batch normalization on FC data.
535 |   
536 |   Args:
537 |       inputs:      Tensor, 2D BxC input
538 |       is_training: boolean tf.Varialbe, true indicates training phase
539 |       bn_decay:    float or float tensor variable, controling moving average weight
540 |       scope:       string, variable scope
541 |   Return:
542 |       normed:      batch-normalized maps
543 |   """
544 |   return batch_norm_template(inputs, is_training, scope, [0,], bn_decay)
545 | 
546 | 
547 | def batch_norm_for_conv1d(inputs, is_training, bn_decay, scope, data_format):
548 |   """ Batch normalization on 1D convolutional maps.
549 |   
550 |   Args:
551 |       inputs:      Tensor, 3D BLC input maps
552 |       is_training: boolean tf.Varialbe, true indicates training phase
553 |       bn_decay:    float or float tensor variable, controling moving average weight
554 |       scope:       string, variable scope
555 |       data_format: 'NHWC' or 'NCHW'
556 |   Return:
557 |       normed:      batch-normalized maps
558 |   """
559 |   return batch_norm_template(inputs, is_training, scope, [0,1], bn_decay, data_format)
560 | 
561 | 
562 | 
563 |   
564 | def batch_norm_for_conv2d(inputs, is_training, bn_decay, scope, data_format):
565 |   """ Batch normalization on 2D convolutional maps.
566 |   
567 |   Args:
568 |       inputs:      Tensor, 4D BHWC input maps
569 |       is_training: boolean tf.Varialbe, true indicates training phase
570 |       bn_decay:    float or float tensor variable, controling moving average weight
571 |       scope:       string, variable scope
572 |       data_format: 'NHWC' or 'NCHW'
573 |   Return:
574 |       normed:      batch-normalized maps
575 |   """
576 |   return batch_norm_template(inputs, is_training, scope, [0,1,2], bn_decay, data_format)
577 | 
578 | 
579 | def batch_norm_for_conv3d(inputs, is_training, bn_decay, scope):
580 |   """ Batch normalization on 3D convolutional maps.
581 |   
582 |   Args:
583 |       inputs:      Tensor, 5D BDHWC input maps
584 |       is_training: boolean tf.Varialbe, true indicates training phase
585 |       bn_decay:    float or float tensor variable, controling moving average weight
586 |       scope:       string, variable scope
587 |   Return:
588 |       normed:      batch-normalized maps
589 |   """
590 |   return batch_norm_template(inputs, is_training, scope, [0,1,2,3], bn_decay)
591 | 
592 | 
593 | def dropout(inputs,
594 |             is_training,
595 |             scope,
596 |             keep_prob=0.5,
597 |             noise_shape=None):
598 |   """ Dropout layer.
599 | 
600 |   Args:
601 |     inputs: tensor
602 |     is_training: boolean tf.Variable
603 |     scope: string
604 |     keep_prob: float in [0,1]
605 |     noise_shape: list of ints
606 | 
607 |   Returns:
608 |     tensor variable
609 |   """
610 |   with tf.variable_scope(scope) as sc:
611 |     outputs = tf.cond(is_training,
612 |                       lambda: tf.nn.dropout(inputs, keep_prob, noise_shape),
613 |                       lambda: inputs)
614 |     return outputs
615 | 


--------------------------------------------------------------------------------
/visualization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import open3d as o3d
 3 | import os
 4 | 
 5 | _dir = 'kitti_self_supervised_flow/train'
 6 | file_name = '000000.npz'
 7 | point_cloud = np.load(os.path.join(_dir, file_name))
 8 | 
 9 | pc1 = point_cloud['pos1']
10 | pc2 = point_cloud['pos2']
11 | gt_flow = point_cloud['gt'] + pc1
12 | 
13 | print ('Point Cloud 1 shape:{}, Point Cloud 2 shape:{}, Flow Shape:{}'.format(pc1.shape, pc2.shape, gt_flow.shape))
14 | 
15 | pcd1 = o3d.geometry.PointCloud()
16 | pcd1.points = o3d.utility.Vector3dVector(pc1)
17 | pcd1.paint_uniform_color([1,0,0])
18 | 
19 | pcd2 = o3d.geometry.PointCloud()
20 | pcd2.points = o3d.utility.Vector3dVector(pc2)
21 | pcd2.paint_uniform_color([0,1,0])
22 | 
23 | pcd3 = o3d.geometry.PointCloud()
24 | pcd3.points = o3d.utility.Vector3dVector(gt_flow)
25 | pcd3.paint_uniform_color([0,0,1])
26 | 
27 | o3d.visualization.draw_geometries([pcd1, pcd2, pcd3])
28 | 


--------------------------------------------------------------------------------