├── avod
    ├── core
    │   ├── __init__.py
    │   ├── models
    │   │   └── __init__.py
    │   ├── avod_fc_layers
    │   │   ├── __init__.py
    │   │   └── avod_fc_layer_utils.py
    │   ├── bev_generators
    │   │   ├── __init__.py
    │   │   └── bev_generator.py
    │   ├── anchor_generators
    │   │   ├── __init__.py
    │   │   ├── grid_anchor_3d_generator_test.py
    │   │   └── grid_anchor_3d_generator.py
    │   ├── feature_extractors
    │   │   ├── __init__.py
    │   │   ├── bev_feature_extractor.py
    │   │   ├── img_feature_extractor.py
    │   │   └── bev_vgg_test.py
    │   ├── minibatch_samplers
    │   │   ├── __init__.py
    │   │   ├── balanced_positive_negative_sampler_test.py
    │   │   └── balanced_positive_negative_sampler.py
    │   ├── constants.py
    │   ├── orientation_encoder.py
    │   ├── box_3d_projector_test.py
    │   ├── ops.py
    │   ├── trainer_utils.py
    │   ├── model.py
    │   ├── anchor_generator.py
    │   ├── losses_test.py
    │   ├── label_cluster_utils_test.py
    │   ├── format_checker_test.py
    │   ├── minibatch_sampler.py
    │   ├── minibatch_sampler_test.py
    │   ├── anchor_filter_test.py
    │   ├── orientation_encoder_test.py
    │   ├── trainer_test.py
    │   ├── summary_utils.py
    │   ├── ops_test.py
    │   ├── mini_batch_utils_test.py
    │   └── anchor_encoder_test.py
    ├── builders
    │   ├── __init__.py
    │   ├── bev_generator_builder.py
    │   ├── feature_extractor_builder.py
    │   ├── avod_fc_layers_builder.py
    │   ├── config_builder_util.py
    │   └── optimizer_builder.py
    ├── datasets
    │   ├── __init__.py
    │   └── kitti
    │   │   ├── __init__.py
    │   │   ├── kitti_aug_test.py
    │   │   └── kitti_utils_test.py
    ├── experiments
    │   ├── __init__.py
    │   ├── run_training.py
    │   ├── run_evaluation.py
    │   └── run_inference.py
    ├── tests
    │   ├── datasets
    │   │   └── Kitti
    │   │   │   └── object
    │   │   │       ├── val.txt
    │   │   │       ├── train.txt
    │   │   │       ├── training
    │   │   │           ├── label_2
    │   │   │           │   ├── 000000.txt
    │   │   │           │   ├── 000002.txt
    │   │   │           │   ├── 000003.txt
    │   │   │           │   ├── 000005.txt
    │   │   │           │   ├── 000009.txt
    │   │   │           │   ├── 000006.txt
    │   │   │           │   ├── 000007.txt
    │   │   │           │   ├── 000004.txt
    │   │   │           │   ├── 000001.txt
    │   │   │           │   ├── 000217.txt
    │   │   │           │   ├── 000008.txt
    │   │   │           │   ├── 000076.txt
    │   │   │           │   └── 000142.txt
    │   │   │           ├── planes
    │   │   │           │   ├── 000000.txt
    │   │   │           │   ├── 000001.txt
    │   │   │           │   ├── 000002.txt
    │   │   │           │   ├── 000003.txt
    │   │   │           │   ├── 000004.txt
    │   │   │           │   ├── 000005.txt
    │   │   │           │   ├── 000006.txt
    │   │   │           │   ├── 000007.txt
    │   │   │           │   ├── 000008.txt
    │   │   │           │   ├── 000009.txt
    │   │   │           │   ├── 000076.txt
    │   │   │           │   ├── 000142.txt
    │   │   │           │   └── 000217.txt
    │   │   │           ├── image_2
    │   │   │           │   ├── 000000.png
    │   │   │           │   ├── 000001.png
    │   │   │           │   ├── 000002.png
    │   │   │           │   ├── 000003.png
    │   │   │           │   ├── 000004.png
    │   │   │           │   ├── 000005.png
    │   │   │           │   ├── 000006.png
    │   │   │           │   ├── 000007.png
    │   │   │           │   ├── 000008.png
    │   │   │           │   ├── 000009.png
    │   │   │           │   ├── 000076.png
    │   │   │           │   ├── 000142.png
    │   │   │           │   └── 000217.png
    │   │   │           ├── velodyne
    │   │   │           │   ├── 000000.bin
    │   │   │           │   ├── 000001.bin
    │   │   │           │   ├── 000002.bin
    │   │   │           │   ├── 000003.bin
    │   │   │           │   ├── 000004.bin
    │   │   │           │   ├── 000005.bin
    │   │   │           │   ├── 000006.bin
    │   │   │           │   ├── 000007.bin
    │   │   │           │   ├── 000008.bin
    │   │   │           │   ├── 000009.bin
    │   │   │           │   ├── 000076.bin
    │   │   │           │   ├── 000142.bin
    │   │   │           │   └── 000217.bin
    │   │   │           └── calib
    │   │   │           │   ├── 000000.txt
    │   │   │           │   ├── 000001.txt
    │   │   │           │   ├── 000002.txt
    │   │   │           │   ├── 000003.txt
    │   │   │           │   ├── 000004.txt
    │   │   │           │   ├── 000005.txt
    │   │   │           │   ├── 000007.txt
    │   │   │           │   ├── 000008.txt
    │   │   │           │   ├── 000009.txt
    │   │   │           │   ├── 000142.txt
    │   │   │           │   ├── 000217.txt
    │   │   │           │   ├── 000006.txt
    │   │   │           │   └── 000076.txt
    │   │   │       ├── test.txt
    │   │   │       └── trainval.txt
    │   ├── __init__.py
    │   └── run_unit_tests.sh
    ├── protos
    │   ├── run_protoc.sh
    │   ├── clear_protos.sh
    │   ├── pipeline.proto
    │   ├── eval.proto
    │   ├── kitti_utils.proto
    │   ├── train.proto
    │   ├── mini_batch.proto
    │   ├── kitti_dataset.proto
    │   ├── optimizer.proto
    │   ├── layers.proto
    │   └── model.proto
    ├── __init__.py
    ├── configs
    │   ├── mb_preprocessing
    │   │   ├── rpn_cars.config
    │   │   ├── rpn_cyclists.config
    │   │   ├── rpn_pedestrians.config
    │   │   └── rpn_people.config
    │   ├── unittest_model.config
    │   └── unittest_pipeline.config
    └── utils
    │   └── demo_utils.py
├── scripts
    ├── __init__.py
    ├── preprocessing
    │   ├── __init__.py
    │   ├── travis_test_preprocessing.py
    │   └── gen_label_clusters.py
    ├── offline_eval
    │   ├── .gitignore
    │   ├── kitti_native_eval
    │   │   ├── run_make.sh
    │   │   ├── run_eval.sh
    │   │   ├── run_eval_05_iou.sh
    │   │   ├── all_eval.sh
    │   │   ├── Makefile
    │   │   ├── mail.h
    │   │   └── README.md
    │   └── plot_ap.py
    └── install
    │   ├── build_integral_image_lib.bash
    │   └── travis_install.bash
├── .coveragerc
├── .gitmodules
├── requirements.txt
├── .gitignore
├── .travis.yml
├── LICENSE
└── demos
    └── dataset
        ├── data_mean.py
        ├── data_histograms.py
        └── car_clustering.py


/avod/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/avod/builders/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/avod/core/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/avod/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/avod/experiments/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/avod/datasets/kitti/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/avod/core/avod_fc_layers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/avod/core/bev_generators/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/avod/core/anchor_generators/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/avod/core/feature_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/avod/core/minibatch_samplers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/offline_eval/.gitignore:
--------------------------------------------------------------------------------
1 | merged*
2 | results*
3 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | omit = */__init__.py, */test_*.py, scripts/*
3 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/val.txt:
--------------------------------------------------------------------------------
1 | 000001
2 | 000002
3 | 000004
4 | 000005
5 | 000006
6 | 000008


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "wavedata"]
2 | 	path = wavedata
3 | 	url = git@github.com:kujason/wavedata.git
4 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/train.txt:
--------------------------------------------------------------------------------
1 | 000000
2 | 000003
3 | 000007
4 | 000009
5 | 000076
6 | 000142
7 | 000217


--------------------------------------------------------------------------------
/avod/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | 
4 | def test_path():
5 |     return os.path.dirname(os.path.realpath(__file__))
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | numpy>=1.13.0
3 | opencv-python
4 | pandas
5 | pillow
6 | protobuf==3.2.0
7 | scipy
8 | sklearn
9 | 


--------------------------------------------------------------------------------
/scripts/offline_eval/kitti_native_eval/run_make.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | 
5 | cd $1
6 | make -f Makefile main eval_05_iou
7 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000000.txt:
--------------------------------------------------------------------------------
1 | Pedestrian 0.00 0 -0.20 712.40 143.00 810.73 307.92 1.89 0.48 1.20 1.84 1.47 8.41 0.01
2 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/test.txt:
--------------------------------------------------------------------------------
 1 | 000000
 2 | 000001
 3 | 000002
 4 | 000003
 5 | 000004
 6 | 000005
 7 | 000006
 8 | 000007
 9 | 000008
10 | 000009


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000000.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | -7.051729e-03 -9.997791e-01 -1.980151e-02 1.680367e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000001.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | -1.851372e-02 -9.998285e-01 -5.362325e-04 1.678761e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000002.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | -8.587473e-03 -9.995657e-01 2.818883e-02 1.519515e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000003.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | -4.009626e-02 -9.986394e-01 3.334112e-02 1.473070e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000004.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | -1.223635e-02 -9.999069e-01 6.044845e-03 1.632201e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000005.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | -2.917402e-02 -9.995687e-01 3.349818e-03 1.637302e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000006.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | -1.691065e-02 -9.997467e-01 -1.485037e-02 1.664847e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000007.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | -1.949878e-02 -9.998097e-01 -5.575465e-04 1.721678e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000008.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | 3.892676e-02 -9.991437e-01 -1.401704e-02 1.760554e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000009.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | -1.492800e-02 -9.998797e-01 4.205277e-03 1.641223e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000076.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | -1.848409e-02 -9.997599e-01 -1.176761e-02 1.627940e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000142.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | -1.937749e-02 -9.997965e-01 -5.601527e-03 1.696127e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/planes/000217.txt:
--------------------------------------------------------------------------------
1 | # Matrix
2 | WIDTH 4
3 | HEIGHT 1
4 | -4.133546e-02 -9.991421e-01 2.540240e-03 1.635158e+00 
5 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000000.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000001.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000002.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000003.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000004.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000005.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000006.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000007.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000008.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000009.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000009.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000076.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000076.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000142.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000142.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/image_2/000217.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000217.png


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000000.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000000.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000001.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000001.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000002.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000002.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000003.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000003.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000004.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000004.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000005.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000005.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000006.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000006.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000007.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000007.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000008.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000008.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000009.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000009.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000076.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000076.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000142.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000142.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/velodyne/000217.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000217.bin


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/trainval.txt:
--------------------------------------------------------------------------------
 1 | 000000
 2 | 000001
 3 | 000002
 4 | 000003
 5 | 000004
 6 | 000005
 7 | 000006
 8 | 000007
 9 | 000008
10 | 000009
11 | 000076
12 | 000142
13 | 000217


--------------------------------------------------------------------------------
/avod/protos/run_protoc.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | set -e
4 | 
5 | cd "$(dirname "$0")"
6 | echo "Compiling protos in $(pwd)"
7 | cd ../..
8 | protoc avod/protos/*.proto --python_out=.
9 | echo 'Done'


--------------------------------------------------------------------------------
/avod/protos/clear_protos.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | set -e
4 | 
5 | cd "$(dirname "$0")"
6 | echo "Removing old protos from $(dirname "$0")"
7 | find . -name '*_pb2.py'
8 | find . -name '*_pb2.py' -delete
9 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000002.txt:
--------------------------------------------------------------------------------
1 | Misc 0.00 0 -1.82 804.79 167.34 995.43 327.94 1.63 1.48 2.37 3.23 1.59 8.55 -1.47
2 | Car 0.00 0 -1.67 657.39 190.13 700.07 223.39 1.41 1.58 4.36 3.18 2.27 34.38 -1.58
3 | 


--------------------------------------------------------------------------------
/avod/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def root_dir():
 5 |     return os.path.dirname(os.path.realpath(__file__))
 6 | 
 7 | 
 8 | def top_dir():
 9 |     avod_root_dir = root_dir()
10 |     return os.path.split(avod_root_dir)[0]
11 | 


--------------------------------------------------------------------------------
/scripts/install/build_integral_image_lib.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e # exit on first error
 3 | 
 4 | build_integral_image_lib()
 5 | {
 6 |     cd wavedata/wavedata/tools/core/lib
 7 |     cmake src
 8 |     make
 9 | }
10 | 
11 | build_integral_image_lib
12 | 


--------------------------------------------------------------------------------
/scripts/offline_eval/kitti_native_eval/run_eval.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | cd $1
 6 | echo "$3" | tee -a ./$4_results_$2.txt
 7 | ./evaluate_object_3d_offline ~/Kitti/object/training/label_2/ $2/$3 | tee -a ./$4_results_$2.txt
 8 | 
 9 | cp $4_results_$2.txt $5
10 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000003.txt:
--------------------------------------------------------------------------------
1 | Car 0.00 0 1.55 614.24 181.78 727.31 284.77 1.57 1.73 4.15 1.00 1.75 13.22 1.62
2 | DontCare -1 -1 -10 5.00 229.89 214.12 367.61 -1 -1 -1 -1000 -1000 -1000 -10
3 | DontCare -1 -1 -10 522.25 202.35 547.77 219.71 -1 -1 -1 -1000 -1000 -1000 -10
4 | 


--------------------------------------------------------------------------------
/scripts/offline_eval/kitti_native_eval/run_eval_05_iou.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | cd $1
 6 | echo "$3" | tee -a ./$4_results_05_iou_$2.txt
 7 | ./evaluate_object_3d_offline_05_iou ~/Kitti/object/training/label_2/ $2/$3 | tee -a ./$4_results_05_iou_$2.txt
 8 | 
 9 | cp $4_results_05_iou_$2.txt $5
10 | 


--------------------------------------------------------------------------------
/avod/tests/run_unit_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd "$(dirname "$0")"
 4 | cd ../..
 5 | 
 6 | export PYTHONPATH=$PYTHONPATH:$(pwd)/wavedata
 7 | echo $PYTHONPATH
 8 | 
 9 | echo "Running unit tests in $(pwd)/avod"
10 | coverage run --source avod -m unittest discover -b --pattern "*_test.py"
11 | 
12 | #coverage report -m
13 | 


--------------------------------------------------------------------------------
/scripts/offline_eval/kitti_native_eval/all_eval.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # set -e
 4 | # set -x
 5 | 
 6 | # Sort by step
 7 | folders=$(ls ./$1/ | sort -V)
 8 | 
 9 | for folder in $folders
10 | do
11 | 	echo "$folder" | tee -a ./results_$1.txt
12 | 	./evaluate_object_3d_offline ~/Kitti/object/training/label_2/ $1/$folder | tee -a ./results_$1.txt
13 | done
14 | 


--------------------------------------------------------------------------------
/avod/builders/bev_generator_builder.py:
--------------------------------------------------------------------------------
 1 | from avod.core.bev_generators import bev_slices
 2 | 
 3 | 
 4 | def build(bev_maps_type_config, kitti_utils):
 5 | 
 6 |     bev_maps_type = bev_maps_type_config.WhichOneof('bev_maps_type')
 7 | 
 8 |     if bev_maps_type == 'slices':
 9 |         return bev_slices.BevSlices(
10 |             bev_maps_type_config.slices, kitti_utils)
11 | 
12 |     raise ValueError('Invalid bev_maps_type', bev_maps_type)
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | avod/data
 2 | avod/checkpoints
 3 | avod/logs
 4 | 
 5 | # Python
 6 | *.pyc
 7 | __pycache__
 8 | 
 9 | # Coverage
10 | *.coverage
11 | 
12 | # Misc
13 | run.sh
14 | tags
15 | 
16 | # PyCharm
17 | .idea
18 | 
19 | # Rope
20 | .ropeproject
21 | 
22 | # Distribution / packaging
23 | build/
24 | dist/
25 | avod.egg-info/
26 | 
27 | # Protobuf files
28 | *_pb2.py
29 | 
30 | # line_profiler
31 | *.lprof
32 | 
33 | # tf_profiler
34 | *.json
35 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000005.txt:
--------------------------------------------------------------------------------
1 | Pedestrian 0.00 0 1.94 330.06 178.74 360.77 238.64 1.87 0.96 0.65 -8.50 2.07 23.02 1.59
2 | DontCare -1 -1 -10 606.64 170.67 621.06 184.34 -1 -1 -1 -1000 -1000 -1000 -10
3 | DontCare -1 -1 -10 606.00 170.91 621.35 184.28 -1 -1 -1 -1000 -1000 -1000 -10
4 | DontCare -1 -1 -10 605.68 171.21 620.77 184.34 -1 -1 -1 -1000 -1000 -1000 -10
5 | DontCare -1 -1 -10 566.39 168.89 585.07 184.56 -1 -1 -1 -1000 -1000 -1000 -10
6 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000009.txt:
--------------------------------------------------------------------------------
1 | Car 0.00 0 -1.50 601.96 177.01 659.15 229.51 1.61 1.66 3.20 0.70 1.76 23.88 -1.48
2 | Car 0.00 2 1.75 600.14 177.09 624.65 193.31 1.44 1.61 3.66 0.24 1.84 66.37 1.76
3 | Car 0.00 0 1.78 574.98 178.64 598.45 194.01 1.41 1.53 3.37 -2.19 1.96 68.25 1.75
4 | DontCare -1 -1 -10 710.60 167.73 736.68 182.35 -1 -1 -1 -1000 -1000 -1000 -10
5 | DontCare -1 -1 -10 758.52 156.27 782.52 179.23 -1 -1 -1 -1000 -1000 -1000 -10
6 | 


--------------------------------------------------------------------------------
/scripts/offline_eval/kitti_native_eval/Makefile:
--------------------------------------------------------------------------------
 1 | main: evaluate_object_3d_offline.cpp
 2 | 	g++ -o evaluate_object_3d_offline  evaluate_object_3d_offline.cpp -lboost_system -lboost_filesystem
 3 | 
 4 | eval_05_iou: evaluate_object_3d_offline_05_iou.cpp
 5 | 	g++ -o evaluate_object_3d_offline_05_iou  evaluate_object_3d_offline_05_iou.cpp -lboost_system -lboost_filesystem
 6 | 
 7 | .PHONY: clean
 8 | 
 9 | clean:
10 | 	rm -f evaluate_object_3d_offline
11 | 	rm -f evaluate_object_3d_offline_05_iou
12 | 


--------------------------------------------------------------------------------
/avod/core/constants.py:
--------------------------------------------------------------------------------
 1 | KEY_LABEL_BOXES_3D = 'label_boxes_3d'
 2 | KEY_LABEL_ANCHORS = 'label_anchors'
 3 | KEY_LABEL_CLASSES = 'label_classes'
 4 | 
 5 | KEY_IMAGE_INPUT = 'image_input'
 6 | KEY_BEV_INPUT = 'bev_input'
 7 | 
 8 | KEY_SAMPLE_IDX = 'sample_idx'
 9 | KEY_SAMPLE_NAME = 'sample_name'
10 | KEY_SAMPLE_AUGS = 'sample_augs'
11 | 
12 | KEY_ANCHORS_INFO = 'anchors_info'
13 | 
14 | KEY_POINT_CLOUD = 'point_cloud'
15 | KEY_GROUND_PLANE = 'ground_plane'
16 | KEY_STEREO_CALIB_P2 = 'stereo_calib_p2'
17 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000006.txt:
--------------------------------------------------------------------------------
1 | Car 0.00 2 -1.55 548.00 171.33 572.40 194.42 1.48 1.56 3.62 -2.72 0.82 48.22 -1.62
2 | Car 0.00 0 -1.21 505.25 168.37 575.44 209.18 1.67 1.64 4.32 -2.61 1.13 31.73 -1.30
3 | Car 0.00 0 0.15 49.70 185.65 227.42 246.96 1.50 1.62 3.88 -12.54 1.64 19.72 -0.42
4 | Car 0.00 1 2.05 328.67 170.65 397.24 204.16 1.68 1.67 4.29 -12.66 1.13 38.44 1.73
5 | DontCare -1 -1 -10 603.36 169.62 631.06 186.56 -1 -1 -1 -1000 -1000 -1000 -10
6 | DontCare -1 -1 -10 578.97 168.88 603.78 187.56 -1 -1 -1 -1000 -1000 -1000 -10
7 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000007.txt:
--------------------------------------------------------------------------------
1 | Car 0.00 0 -1.56 564.62 174.59 616.43 224.74 1.61 1.66 3.20 -0.69 1.69 25.01 -1.59
2 | Car 0.00 0 1.71 481.59 180.09 512.55 202.42 1.40 1.51 3.70 -7.43 1.88 47.55 1.55
3 | Car 0.00 0 1.64 542.05 175.55 565.27 193.79 1.46 1.66 4.05 -4.71 1.71 60.52 1.56
4 | Cyclist 0.00 0 1.89 330.60 176.09 355.61 213.60 1.72 0.50 1.95 -12.63 1.88 34.09 1.54
5 | DontCare -1 -1 -10 753.33 164.32 798.00 186.74 -1 -1 -1 -1000 -1000 -1000 -10
6 | DontCare -1 -1 -10 738.50 171.32 753.27 184.42 -1 -1 -1 -1000 -1000 -1000 -10
7 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000004.txt:
--------------------------------------------------------------------------------
1 | Car 0.00 0 1.96 280.38 185.10 344.90 215.59 1.49 1.76 4.01 -15.71 2.16 38.26 1.57
2 | Car 0.00 0 1.88 365.14 184.54 406.11 205.20 1.38 1.80 3.41 -15.89 2.23 51.17 1.58
3 | DontCare -1 -1 -10 402.27 166.69 477.31 197.98 -1 -1 -1 -1000 -1000 -1000 -10
4 | DontCare -1 -1 -10 518.53 177.31 531.51 187.17 -1 -1 -1 -1000 -1000 -1000 -10
5 | DontCare -1 -1 -10 1207.50 233.35 1240.00 333.39 -1 -1 -1 -1000 -1000 -1000 -10
6 | DontCare -1 -1 -10 535.06 177.65 545.26 185.82 -1 -1 -1 -1000 -1000 -1000 -10
7 | DontCare -1 -1 -10 558.03 177.88 567.50 184.65 -1 -1 -1 -1000 -1000 -1000 -10
8 | 


--------------------------------------------------------------------------------
/scripts/preprocessing/travis_test_preprocessing.py:
--------------------------------------------------------------------------------
 1 | from avod.builders.dataset_builder import DatasetBuilder
 2 | 
 3 | from scripts.preprocessing import gen_mini_batches
 4 | from scripts.preprocessing import gen_label_clusters
 5 | 
 6 | 
 7 | def main():
 8 | 
 9 |     dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_UNITTEST)
10 |     dataset_config.data_split = "trainval"
11 |     unittest_dataset = DatasetBuilder.build_kitti_dataset(dataset_config)
12 | 
13 |     gen_label_clusters.main(unittest_dataset)
14 |     gen_mini_batches.main(unittest_dataset)
15 | 
16 | 
17 | if __name__ == '__main__':
18 |     main()
19 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000001.txt:
--------------------------------------------------------------------------------
1 | Truck 0.00 0 -1.57 599.41 156.40 629.75 189.25 2.85 2.63 12.34 0.47 1.49 69.44 -1.56
2 | Car 0.00 0 1.85 387.63 181.54 423.81 203.12 1.67 1.87 3.69 -16.53 2.39 58.49 1.57
3 | Cyclist 0.00 3 -1.65 676.60 163.95 688.98 193.93 1.86 0.60 2.02 4.59 1.32 45.84 -1.55
4 | DontCare -1 -1 -10 503.89 169.71 590.61 190.13 -1 -1 -1 -1000 -1000 -1000 -10
5 | DontCare -1 -1 -10 511.35 174.96 527.81 187.45 -1 -1 -1 -1000 -1000 -1000 -10
6 | DontCare -1 -1 -10 532.37 176.35 542.68 185.27 -1 -1 -1 -1000 -1000 -1000 -10
7 | DontCare -1 -1 -10 559.62 175.83 575.40 183.15 -1 -1 -1 -1000 -1000 -1000 -10
8 | 


--------------------------------------------------------------------------------
/avod/datasets/kitti/kitti_aug_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from avod.datasets.kitti import kitti_aug
 6 | 
 7 | 
 8 | class KittiAugTest(unittest.TestCase):
 9 | 
10 |     def test_flip_boxes_3d(self):
11 | 
12 |         boxes_3d = np.array([
13 |             [1, 2, 3, 4, 5, 6, np.pi / 4],
14 |             [1, 2, 3, 4, 5, 6, -np.pi / 4]
15 |         ])
16 | 
17 |         exp_flipped_boxes_3d = np.array([
18 |             [-1, 2, 3, 4, 5, 6, 3 * np.pi / 4],
19 |             [-1, 2, 3, 4, 5, 6, -3 * np.pi / 4]
20 |         ])
21 | 
22 |         flipped_boxes_3d = kitti_aug.flip_boxes_3d(boxes_3d)
23 | 
24 |         np.testing.assert_almost_equal(flipped_boxes_3d, exp_flipped_boxes_3d)
25 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000217.txt:
--------------------------------------------------------------------------------
1 | Car 0.65 3 2.10 0.00 196.92 358.49 374.00 1.48 1.80 4.51 -3.88 1.75 6.20 1.56
2 | Car 0.00 1 1.80 316.11 191.89 461.97 282.37 1.37 1.64 4.40 -4.07 1.78 13.88 1.52
3 | Car 0.00 0 1.72 475.46 183.89 525.42 222.67 1.40 1.45 3.30 -4.27 1.85 28.24 1.57
4 | Van 0.00 2 1.64 499.86 160.21 547.31 212.16 2.39 1.96 5.36 -4.26 1.84 36.10 1.52
5 | Car 0.00 0 1.58 562.18 178.55 594.26 206.23 1.40 1.63 3.89 -1.73 1.72 38.85 1.53
6 | Cyclist 0.00 0 0.99 979.93 140.54 1173.61 373.94 1.76 0.59 1.56 3.91 1.53 6.26 1.53
7 | Cyclist 0.08 2 -2.22 1051.55 132.89 1241.00 332.88 1.77 0.63 1.82 5.36 1.43 7.33 -1.61
8 | Car 0.00 1 1.64 537.61 179.82 561.21 200.59 1.38 1.33 4.32 -4.24 1.88 50.62 1.56
9 | 


--------------------------------------------------------------------------------
/avod/protos/pipeline.proto:
--------------------------------------------------------------------------------
 1 | package avod.protos;
 2 | 
 3 | import "avod/protos/model.proto";
 4 | import "avod/protos/train.proto";
 5 | import "avod/protos/eval.proto";
 6 | import "avod/protos/kitti_dataset.proto";
 7 | 
 8 | // Convenience message for configuring a training and eval pipeline.
 9 | // Allows all of the pipeline parameters to be configured from one file.
10 | message NetworkPipelineConfig {
11 | 
12 | 	// Detection Model config
13 | 	optional ModelConfig model_config = 1;
14 | 
15 | 	// Training config
16 | 	optional TrainConfig train_config = 2;
17 | 
18 | 	// Evaluation config
19 | 	optional EvalConfig eval_config = 3;
20 | 
21 | 	// KittiDataset configuration
22 | 	optional KittiDatasetConfig dataset_config = 4;
23 | }
24 | 


--------------------------------------------------------------------------------
/scripts/install/travis_install.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e # exit on first error
 3 | 
 4 | install_wavedata()
 5 | {
 6 |     echo "Cloning wavedata ..."
 7 |     git clone git@github.com:kujason/wavedata.git
 8 |     cd wavedata
 9 |     sudo /home/travis/virtualenv/python3.5.2/bin/python setup.py install
10 |     cd ../
11 | }
12 | 
13 | install_protoc()
14 | {
15 |     # Make sure you grab the latest version
16 |     curl -OL https://github.com/google/protobuf/releases/download/v3.2.0/protoc-3.2.0-linux-x86_64.zip
17 |     # Unzip
18 |     unzip protoc-3.2.0-linux-x86_64.zip -d protoc3
19 |     # Move only protoc* to /usr/bin/
20 |     sudo mv protoc3/bin/protoc /usr/bin/protoc
21 | }
22 | 
23 | #install_wavedata
24 | install_protoc
25 | # install cmake
26 | sudo apt-get install cmake
27 | 


--------------------------------------------------------------------------------
/avod/protos/eval.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto2";
 2 | 
 3 | package avod.protos;
 4 | 
 5 | // Message for configuring DetectionModel evaluator.
 6 | message EvalConfig {
 7 | 
 8 |     // Evaluation intervals during training
 9 |     optional uint32 eval_interval = 1 [default = 500];
10 | 
11 |     // Evaluation mode, 'val' or 'test'
12 |     optional string eval_mode = 2 [default = 'val'];
13 | 
14 |     // Checkpoint indices to evaluate
15 |     repeated int32 ckpt_indices = 3;
16 | 
17 |     // Evaluate repeatedly while waiting for new checkpoints
18 |     optional bool evaluate_repeatedly = 4 [default = true];
19 | 
20 |     // GPU options
21 |     optional bool allow_gpu_mem_growth = 5 [default = false];
22 | 
23 |     // Kitti native evaluation
24 |     optional float kitti_score_threshold = 6 [default = 0.1];
25 | }
26 | 


--------------------------------------------------------------------------------
/scripts/preprocessing/gen_label_clusters.py:
--------------------------------------------------------------------------------
 1 | from avod.builders.dataset_builder import DatasetBuilder
 2 | 
 3 | 
 4 | def main(dataset=None):
 5 |     if not dataset:
 6 |         dataset = DatasetBuilder.build_kitti_dataset(
 7 |             DatasetBuilder.KITTI_TRAIN)
 8 | 
 9 |     label_cluster_utils = dataset.kitti_utils.label_cluster_utils
10 | 
11 |     print("Generating clusters in {}/{}".format(
12 |         label_cluster_utils.data_dir, dataset.data_split))
13 |     clusters, std_devs = dataset.get_cluster_info()
14 | 
15 |     print("Clusters generated")
16 |     print("classes: {}".format(dataset.classes))
17 |     print("num_clusters: {}".format(dataset.num_clusters))
18 |     print("all_clusters:\n {}".format(clusters))
19 |     print("all_std_devs:\n {}".format(std_devs))
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     main()
24 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000008.txt:
--------------------------------------------------------------------------------
 1 | Car 0.88 3 -0.69 0.00 192.37 402.31 374.00 1.60 1.57 3.23 -2.70 1.74 3.68 -1.29
 2 | Car 0.00 1 2.04 334.85 178.94 624.50 372.04 1.57 1.50 3.68 -1.17 1.65 7.86 1.90
 3 | Car 0.34 3 -1.84 937.29 197.39 1241.00 374.00 1.39 1.44 3.08 3.81 1.64 6.15 -1.31
 4 | Car 0.00 1 -1.33 597.59 176.18 720.90 261.14 1.47 1.60 3.66 1.07 1.55 14.44 -1.25
 5 | Car 0.00 0 1.74 741.18 168.83 792.25 208.43 1.70 1.63 4.08 7.24 1.55 33.20 1.95
 6 | Car 0.00 0 -1.65 884.52 178.31 956.41 240.18 1.59 1.59 2.47 8.48 1.75 19.96 -1.25
 7 | DontCare -1 -1 -10 800.38 163.67 825.45 184.07 -1 -1 -1 -1000 -1000 -1000 -10
 8 | DontCare -1 -1 -10 859.58 172.34 886.26 194.51 -1 -1 -1 -1000 -1000 -1000 -10
 9 | DontCare -1 -1 -10 801.81 163.96 825.20 183.59 -1 -1 -1 -1000 -1000 -1000 -10
10 | DontCare -1 -1 -10 826.87 162.28 845.84 178.86 -1 -1 -1 -1000 -1000 -1000 -10
11 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000076.txt:
--------------------------------------------------------------------------------
 1 | Pedestrian 0.28 0 2.39 90.64 186.57 329.47 373.00 1.51 1.01 0.97 -2.63 1.56 4.83 1.90
 2 | Pedestrian 0.00 0 -1.35 964.98 157.64 1023.92 289.42 1.59 0.66 0.65 4.82 1.30 9.12 -0.88
 3 | Pedestrian 0.00 2 0.66 236.84 169.82 288.56 339.57 1.66 0.65 0.72 -3.58 1.55 7.41 0.22
 4 | Cyclist 0.00 0 1.68 413.06 169.47 489.86 291.05 1.67 0.93 1.69 -2.24 1.51 10.79 1.47
 5 | Pedestrian 0.00 1 -1.90 811.24 152.52 841.74 249.83 1.75 0.82 1.14 4.05 1.23 13.53 -1.61
 6 | Pedestrian 0.00 0 -1.53 583.85 164.01 608.02 237.87 1.77 0.94 0.93 -0.26 1.35 17.75 -1.55
 7 | Pedestrian 0.00 0 2.63 524.52 165.79 549.51 230.67 1.73 0.67 0.83 -1.94 1.31 19.66 2.53
 8 | Pedestrian 0.00 0 1.66 560.09 165.43 577.09 224.39 1.82 0.80 0.86 -1.22 1.32 22.65 1.60
 9 | Pedestrian 0.00 2 1.43 708.67 157.73 729.27 222.05 1.88 0.78 1.03 3.36 1.19 21.58 1.58
10 | DontCare -1 -1 -10 614.86 162.09 693.85 210.96 -1 -1 -1 -1000 -1000 -1000 -10
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: trusty
 2 | sudo: required
 3 | group: edge
 4 | 
 5 | language: python
 6 | 
 7 | python:
 8 |     - "3.5"
 9 | 
10 | cache: pip
11 | 
12 | install:
13 |     # install protobuf
14 |     - sudo bash scripts/install/travis_install.bash
15 | 
16 |     # install python dependencies
17 |     - pip install -r requirements.txt
18 |     - pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.3.0-cp35-cp35m-linux_x86_64.whl
19 | 
20 |     # install c++ lib for wavedata
21 |     - sudo bash scripts/install/build_integral_image_lib.bash
22 | 
23 | before_script:
24 |     - git submodule init
25 |     - git submodule update
26 |     - export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/wavedata
27 |     - sudo protoc avod/protos/*.proto --python_out=.
28 | 
29 | script:
30 |     - python scripts/preprocessing/travis_test_preprocessing.py
31 |     - python -m unittest discover -b --pattern "*_test.py"
32 | 
33 | notifications:
34 |     email: false
35 | 


--------------------------------------------------------------------------------
/avod/builders/feature_extractor_builder.py:
--------------------------------------------------------------------------------
 1 | from avod.core.feature_extractors.bev_vgg import BevVgg
 2 | from avod.core.feature_extractors.bev_vgg_pyramid import BevVggPyr
 3 | 
 4 | from avod.core.feature_extractors.img_vgg import ImgVgg
 5 | from avod.core.feature_extractors.img_vgg_pyramid import ImgVggPyr
 6 | 
 7 | 
 8 | def get_extractor(extractor_config):
 9 | 
10 |     extractor_type = extractor_config.WhichOneof('feature_extractor')
11 | 
12 |     # BEV feature extractors
13 |     if extractor_type == 'bev_vgg':
14 |         return BevVgg(extractor_config.bev_vgg)
15 |     elif extractor_type == 'bev_vgg_pyr':
16 |         return BevVggPyr(extractor_config.bev_vgg_pyr)
17 | 
18 |     # Image feature extractors
19 |     elif extractor_type == 'img_vgg':
20 |         return ImgVgg(extractor_config.img_vgg)
21 |     elif extractor_type == 'img_vgg_pyr':
22 |         return ImgVggPyr(extractor_config.img_vgg_pyr)
23 | 
24 |     return ValueError('Invalid feature extractor type', extractor_type)
25 | 


--------------------------------------------------------------------------------
/avod/core/feature_extractors/bev_feature_extractor.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | class BevFeatureExtractor:
 6 | 
 7 |     def __init__(self, extractor_config):
 8 |         self.config = extractor_config
 9 | 
10 |     def preprocess_input(self, tensor_in, output_shape):
11 |         """Preprocesses the given input.
12 | 
13 |         Args:
14 |             tensor_in: A `Tensor` of shape=(batch_size, height,
15 |                 width, channel) representing an input image.
16 |             output_shape: The shape of the output (H x W)
17 | 
18 |         Returns:
19 |             Preprocessed tensor input, resized to the output_size
20 |         """
21 | 
22 |         # Only reshape if input shape does not match
23 |         if not tensor_in.shape[1:3] == output_shape:
24 |             return tf.image.resize_images(tensor_in, output_shape)
25 | 
26 |         return tensor_in
27 | 
28 |     @abstractmethod
29 |     def build(self, **kwargs):
30 |         pass
31 | 


--------------------------------------------------------------------------------
/avod/protos/kitti_utils.proto:
--------------------------------------------------------------------------------
 1 | package avod.protos;
 2 | 
 3 | import "avod/protos/mini_batch.proto";
 4 | 
 5 | message KittiUtilsConfig {
 6 |     // 3D area extents [min_x, max_x, min_y, max_y, min_z, max_z]
 7 |     repeated float area_extents = 1;
 8 | 
 9 |     // Voxel grid size (for 2D and 3D)
10 |     required float voxel_size = 2;
11 | 
12 |     // Anchor strides
13 |     repeated float anchor_strides = 3;
14 | 
15 |     // Anchor filtering density threshold
16 |     optional int32 density_threshold = 4 [default = 1];
17 | 
18 |     required BevGenerator bev_generator = 20;
19 | 
20 |     required MiniBatchConfig mini_batch_config = 21;
21 | }
22 | 
23 | message BevGenerator {
24 |     oneof bev_maps_type {
25 |         Slices slices = 1;
26 |     }
27 | 
28 |     message Slices {
29 |         // Min and max height
30 |         required float height_lo = 1;
31 |         required float height_hi = 2;
32 | 
33 |         // Number of slices to create
34 |         required int32 num_slices = 3;
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/scripts/offline_eval/kitti_native_eval/mail.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAIL_H
 2 | #define MAIL_H
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdarg.h>
 6 | #include <string.h>
 7 | 
 8 | class Mail {
 9 | 
10 | public:
11 | 
12 |   Mail (std::string email = "") {
13 |     if (email.compare("")) {
14 |       mail = popen("/usr/lib/sendmail -t -f noreply@cvlibs.net","w");
15 |       fprintf(mail,"To: %s\n", email.c_str());
16 |       fprintf(mail,"From: noreply@cvlibs.net\n");
17 |       fprintf(mail,"Subject: KITTI Evaluation Benchmark\n");
18 |       fprintf(mail,"\n\n");
19 |     } else {
20 |       mail = 0;
21 |     }
22 |   }
23 |   
24 |   ~Mail() {
25 |     if (mail) {
26 |       pclose(mail);
27 |     }
28 |   }
29 |   
30 |   void msg (const char *format, ...) {
31 |     va_list args;
32 |     va_start(args,format);
33 |     if (mail) {
34 |       vfprintf(mail,format,args);
35 |       fprintf(mail,"\n");
36 |     }
37 |     vprintf(format,args);
38 |     printf("\n");
39 |     va_end(args);
40 |   }
41 |     
42 | private:
43 | 
44 |   FILE *mail;
45 |   
46 | };
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/avod/protos/train.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto2";
 2 | 
 3 | package avod.protos;
 4 | import "avod/protos/optimizer.proto";
 5 | 
 6 | // Message for configuring DetectionModel training jobs (train.py).
 7 | message TrainConfig {
 8 | 
 9 |     // Input queue batch size.
10 |     optional uint32 batch_size = 1 [default = 1];
11 | 
12 |     // Max training iteration
13 |     required uint32 max_iterations = 2 [default = 500];
14 | 
15 |     // Optimizer used to train the DetectionModel.
16 |     optional Optimizer optimizer = 3;
17 | 
18 |     // Checkpoint options
19 |     optional uint32 checkpoint_interval = 4 [default = 50];
20 |     optional uint32 max_checkpoints_to_keep = 5 [default = 10];
21 |     optional bool overwrite_checkpoints = 6 [default = false];
22 | 
23 |     // Summary options
24 |     required uint32 summary_interval = 7 [default = 10];
25 |     required bool summary_histograms = 8;
26 |     required bool summary_img_images = 9;
27 |     required bool summary_bev_images = 10;
28 | 
29 |     // GPU options
30 |     optional bool allow_gpu_mem_growth = 11 [default = false];
31 | }
32 | 


--------------------------------------------------------------------------------
/avod/protos/mini_batch.proto:
--------------------------------------------------------------------------------
 1 | package avod.protos;
 2 | 
 3 | message MiniBatchConfig {
 4 | 
 5 |     // Density threshold for removing empty anchors
 6 |     required int32 density_threshold = 1;
 7 | 
 8 |     required MiniBatchRpnConfig rpn_config = 2;
 9 |     required MiniBatchAvodConfig avod_config = 3;
10 | }
11 | 
12 | message MiniBatchIouThresholds {
13 |     // RPN negative/positive iou ranges
14 |     required float neg_iou_lo = 3;
15 |     required float neg_iou_hi = 4;
16 |     required float pos_iou_lo = 5;
17 |     required float pos_iou_hi = 6;
18 | }
19 | 
20 | message MiniBatchRpnConfig {
21 |     oneof iou_type {
22 |         MiniBatchIouThresholds iou_2d_thresholds = 1;
23 |         MiniBatchIouThresholds iou_3d_thresholds = 2;
24 |     }
25 | 
26 |     // Number of anchors in an RPN mini batch
27 |     required int32 mini_batch_size = 3;
28 | }
29 | 
30 | message MiniBatchAvodConfig {
31 |     // AVOD positive/negative 2D iou ranges
32 |     required MiniBatchIouThresholds iou_2d_thresholds = 1;
33 | 
34 |     // Number of anchors in an AVOD mini batch
35 |     required int32 mini_batch_size = 2;
36 | }
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018 Jason Ku, Melissa Mozifian
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/avod/core/orientation_encoder.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def tf_orientation_to_angle_vector(orientations_tensor):
 5 |     """ Converts orientation angles into angle unit vector representation.
 6 |         e.g. 45 -> [0.717, 0.717], 90 -> [0, 1]
 7 | 
 8 |     Args:
 9 |         orientations_tensor: A tensor of shape (N,) of orientation angles
10 | 
11 |     Returns:
12 |         A tensor of shape (N, 2) of angle unit vectors in the format [x, y]
13 |     """
14 |     x = tf.cos(orientations_tensor)
15 |     y = tf.sin(orientations_tensor)
16 | 
17 |     return tf.stack([x, y], axis=1)
18 | 
19 | 
20 | def tf_angle_vector_to_orientation(angle_vectors_tensor):
21 |     """ Converts angle unit vectors into orientation angle representation.
22 |         e.g. [0.717, 0.717] -> 45, [0, 1] -> 90
23 | 
24 |     Args:
25 |         angle_vectors_tensor: a tensor of shape (N, 2) of angle unit vectors
26 |             in the format [x, y]
27 | 
28 |     Returns:
29 |         A tensor of shape (N,) of orientation angles
30 |     """
31 |     x = angle_vectors_tensor[:, 0]
32 |     y = angle_vectors_tensor[:, 1]
33 | 
34 |     return tf.atan2(y, x)
35 | 


--------------------------------------------------------------------------------
/avod/core/box_3d_projector_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from avod.core import box_3d_projector
 6 | 
 7 | 
 8 | class Box3dProjectorTest(unittest.TestCase):
 9 |     def test_project_to_bev(self):
10 |         boxes_3d = np.array([[0, 0, 0, 1, 0.5, 1, 0],
11 |                              [0, 0, 0, 1, 0.5, 1, np.pi / 2],
12 |                              [1, 0, 1, 1, 0.5, 1, np.pi / 2]])
13 | 
14 |         box_points, box_points_norm = \
15 |             box_3d_projector.project_to_bev(boxes_3d, [[-1, 1], [-1, 1]])
16 | 
17 |         expected_boxes = np.array(
18 |             [[[0.5, 0.25],
19 |               [-0.5, 0.25],
20 |               [-0.5, -0.25],
21 |               [0.5, -0.25]],
22 |              [[0.25, -0.5],
23 |               [0.25, 0.5],
24 |               [-0.25, 0.5],
25 |               [-0.25, -0.5]],
26 |              [[1.25, 0.5],
27 |               [1.25, 1.5],
28 |               [0.75, 1.5],
29 |               [0.75, 0.5]]],
30 |             dtype=np.float32)
31 | 
32 |         for box, exp_box in zip(box_points, expected_boxes):
33 |             np.testing.assert_allclose(box, exp_box, rtol=1E-5)
34 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/label_2/000142.txt:
--------------------------------------------------------------------------------
 1 | Pedestrian 0.75 1 -2.28 1199.30 108.54 1241.00 348.03 1.67 0.70 0.44 4.69 1.22 5.12 -1.57
 2 | Cyclist 0.26 0 2.11 36.57 165.55 248.54 374.00 1.81 0.51 1.71 -3.49 1.77 5.50 1.56
 3 | Car 0.52 0 -2.07 835.06 166.73 1241.00 374.00 1.57 1.54 3.22 2.86 1.56 5.12 -1.59
 4 | Car 0.00 1 -1.81 726.07 154.71 916.86 306.71 1.74 1.60 3.86 2.70 1.56 10.24 -1.56
 5 | Car 0.00 2 -1.74 688.52 153.34 806.10 252.85 1.83 1.69 4.44 2.74 1.50 15.58 -1.57
 6 | Car 0.00 2 -1.67 678.28 170.36 747.97 231.25 1.61 1.62 3.78 2.90 1.57 21.17 -1.53
 7 | Pedestrian 0.00 0 0.63 656.08 160.85 672.41 199.98 1.90 0.42 1.04 2.54 1.33 35.54 0.70
 8 | Pedestrian 0.00 1 0.63 661.72 162.97 676.05 200.69 1.87 0.50 0.90 2.99 1.39 36.22 0.71
 9 | Car 0.00 0 1.67 530.82 167.68 560.49 197.37 1.86 1.57 3.83 -4.22 1.56 47.51 1.58
10 | Pedestrian 0.00 0 -0.04 605.72 167.06 616.06 191.92 1.78 0.53 0.95 0.06 1.37 52.31 -0.04
11 | Car 0.00 1 1.80 428.14 178.08 495.67 225.49 1.50 1.46 3.70 -5.05 1.69 24.84 1.60
12 | Car 0.53 1 2.21 0.00 197.78 273.61 374.00 1.54 1.54 3.72 -5.07 1.84 7.13 1.61
13 | Car 0.00 1 1.95 197.05 186.01 398.12 301.00 1.57 1.60 4.24 -5.08 1.82 12.30 1.57
14 | Car 0.00 1 1.88 337.15 176.21 452.69 252.15 1.65 1.61 3.73 -5.17 1.75 17.71 1.60
15 | 


--------------------------------------------------------------------------------
/scripts/offline_eval/kitti_native_eval/README.md:
--------------------------------------------------------------------------------
 1 | # kitti_native_eval
 2 | 
 3 | `evaluate_object_3d_offline.cpp`evaluates your KITTI detection locally on your own computer using your validation data selected from KITTI training dataset, with the following metrics:
 4 | 
 5 | - Average Precision In 2D Image Frame (AP)
 6 | - oriented overlap on image (AOS)
 7 | - Average Precision In BEV (AP)
 8 | - Average Precision In 3D (AP)
 9 | 
10 | 1. Install:
11 | ```
12 | sudo apt-get install gnuplot gnuplot5
13 | 
14 | cd /kitti_native_eval
15 | 
16 | make
17 | ```
18 | 
19 | 2. Copy the results folder into this folder. Each step should contain a 'data' folder.
20 | 
21 | 3. Run the evaluation on all steps in the folder, for example:
22 | ```
23 | ./all_eval.sh 0.5
24 | ```
25 | ---
26 | Alternatively, you can run the evaluation using the following command on a single step:
27 | ```
28 | ./evaluate_object_3d_offline groundtruth_dir result_dir
29 | ```
30 | 
31 | - Place the results folder in data folder and use /kitti_native_eval as results_dir
32 | - Use ~/Kitti/object/training/label_2  as your groundtruth_dir
33 | 
34 | ---
35 | 
36 | Note that you don't have to detect over all KITTI training data. The evaluator only evaluates samples whose result files exist.
37 | 
38 | - Results will appear per class in terminal for easy, medium and difficult data.
39 | - Precision-Recall Curves will be generated and saved to 'plot' dir.
40 | 


--------------------------------------------------------------------------------
/avod/configs/mb_preprocessing/rpn_cars.config:
--------------------------------------------------------------------------------
 1 | name: 'kitti'
 2 | data_split: 'trainval'
 3 | data_split_dir: 'training'
 4 | has_labels: True
 5 | 
 6 | cluster_split: 'train',
 7 | classes: ['Car']
 8 | num_clusters: [2]
 9 | bev_source: 'lidar'
10 | 
11 | kitti_utils_config {
12 |     area_extents: [-40, 40, -5, 3, 0, 70]
13 |     voxel_size: 0.1
14 |     anchor_strides: [0.5, 0.5]
15 | 
16 |     bev_generator {
17 |         slices {
18 |             height_lo: -0.2
19 |             height_hi: 2.3
20 |             num_slices: 5
21 |         }
22 |     }
23 | 
24 |     mini_batch_config {
25 |         density_threshold: 1
26 | 
27 |         rpn_config {
28 |             iou_2d_thresholds {
29 |                 neg_iou_lo: 0.0
30 |                 neg_iou_hi: 0.3
31 |                 pos_iou_lo: 0.5
32 |                 pos_iou_hi: 1.0
33 |             }
34 |             # iou_3d_thresholds {
35 |             #     neg_iou_lo: 0.0
36 |             #     neg_iou_hi: 0.3
37 |             #     pos_iou_lo: 0.4
38 |             #     pos_iou_hi: 1.0
39 |             # }
40 | 
41 |             mini_batch_size: 512
42 |         }
43 | 
44 |         avod_config {
45 |             iou_2d_thresholds {
46 |                 neg_iou_lo: 0.0
47 |                 neg_iou_hi: 0.55
48 |                 pos_iou_lo: 0.65
49 |                 pos_iou_hi: 1.0
50 |             }
51 | 
52 |             mini_batch_size: 1024
53 |         }
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/avod/configs/mb_preprocessing/rpn_cyclists.config:
--------------------------------------------------------------------------------
 1 | name: 'kitti'
 2 | data_split: 'trainval'
 3 | data_split_dir: 'training'
 4 | has_labels: True
 5 | 
 6 | cluster_split: 'train',
 7 | classes: ['Cyclist']
 8 | num_clusters: [1]
 9 | bev_source: 'lidar'
10 | 
11 | kitti_utils_config {
12 |     area_extents: [-40, 40, -5, 3, 0, 70]
13 |     voxel_size: 0.1
14 |     anchor_strides: [0.5, 0.5]
15 | 
16 |     bev_generator {
17 |         slices {
18 |             height_lo: -0.2
19 |             height_hi: 2.3
20 |             num_slices: 5
21 |         }
22 |     }
23 | 
24 |     mini_batch_config {
25 |         density_threshold: 1
26 | 
27 |         rpn_config {
28 |             iou_2d_thresholds {
29 |                 neg_iou_lo: 0.0
30 |                 neg_iou_hi: 0.3
31 |                 pos_iou_lo: 0.45
32 |                 pos_iou_hi: 1.0
33 |             }
34 |             # iou_3d_thresholds {
35 |             #     neg_iou_lo: 0.0
36 |             #     neg_iou_hi: 0.1
37 |             #     pos_iou_lo: 0.3
38 |             #     pos_iou_hi: 1.0
39 |             # }
40 | 
41 |             mini_batch_size: 512
42 |         }
43 | 
44 |         avod_config {
45 |             iou_2d_thresholds {
46 |                 neg_iou_lo: 0.0
47 |                 neg_iou_hi: 0.45
48 |                 pos_iou_lo: 0.55
49 |                 pos_iou_hi: 1.0
50 |             }
51 | 
52 |             mini_batch_size: 1024
53 |         }
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/avod/configs/mb_preprocessing/rpn_pedestrians.config:
--------------------------------------------------------------------------------
 1 | name: 'kitti'
 2 | data_split: 'trainval'
 3 | data_split_dir: 'training'
 4 | has_labels: True
 5 | 
 6 | cluster_split: 'train',
 7 | classes: ['Pedestrian']
 8 | num_clusters: [1]
 9 | bev_source: 'lidar'
10 | 
11 | kitti_utils_config {
12 |     area_extents: [-40, 40, -5, 3, 0, 70]
13 |     voxel_size: 0.1
14 |     anchor_strides: [0.5, 0.5]
15 | 
16 |     bev_generator {
17 |         slices {
18 |             height_lo: -0.2
19 |             height_hi: 2.3
20 |             num_slices: 5
21 |         }
22 |     }
23 | 
24 |     mini_batch_config {
25 |         density_threshold: 1
26 | 
27 |         rpn_config {
28 |             iou_2d_thresholds {
29 |                 neg_iou_lo: 0.0
30 |                 neg_iou_hi: 0.3
31 |                 pos_iou_lo: 0.45
32 |                 pos_iou_hi: 1.0
33 |             }
34 |             # iou_3d_thresholds {
35 |             #     neg_iou_lo: 0.0
36 |             #     neg_iou_hi: 0.1
37 |             #     pos_iou_lo: 0.3
38 |             #     pos_iou_hi: 1.0
39 |             # }
40 | 
41 |             mini_batch_size: 512
42 |         }
43 | 
44 |         avod_config {
45 |             iou_2d_thresholds {
46 |                 neg_iou_lo: 0.0
47 |                 neg_iou_hi: 0.45
48 |                 pos_iou_lo: 0.55
49 |                 pos_iou_hi: 1.0
50 |             }
51 | 
52 |             mini_batch_size: 1024
53 |         }
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/avod/core/bev_generators/bev_generator.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class BevGenerator:
 7 | 
 8 |     @abc.abstractmethod
 9 |     def generate_bev(self, **params):
10 |         """Generates BEV maps
11 | 
12 |         Args:
13 |             **params: additional keyword arguments for
14 |                 specific implementations of BevGenerator.
15 | 
16 |         Returns:
17 |             Dictionary with entries for height maps and one density map
18 |                 height_maps: list of height maps
19 |                 density_map: density map
20 |         """
21 |         pass
22 | 
23 |     def _create_density_map(self,
24 |                             num_divisions,
25 |                             voxel_indices_2d,
26 |                             num_pts_per_voxel,
27 |                             norm_value):
28 | 
29 |         # Create empty density map
30 |         density_map = np.zeros((num_divisions[0],
31 |                                 num_divisions[2]))
32 | 
33 |         # Only update pixels where voxels have num_pts values
34 |         density_map[voxel_indices_2d[:, 0], voxel_indices_2d[:, 1]] = \
35 |             np.minimum(1.0, np.log(num_pts_per_voxel + 1) / norm_value)
36 | 
37 |         # Density is calculated as min(1.0, log(N+1)/log(x))
38 |         # x=64 for stereo, x=16 for lidar, x=64 for depth
39 |         density_map = np.flip(density_map.transpose(), axis=0)
40 | 
41 |         return density_map
42 | 


--------------------------------------------------------------------------------
/avod/configs/mb_preprocessing/rpn_people.config:
--------------------------------------------------------------------------------
 1 | name: 'kitti'
 2 | data_split: 'trainval'
 3 | data_split_dir: 'training'
 4 | has_labels: True
 5 | 
 6 | cluster_split: 'train',
 7 | classes: ['Pedestrian', 'Cyclist']
 8 | num_clusters: [1, 1]
 9 | bev_source: 'lidar'
10 | 
11 | kitti_utils_config {
12 |     area_extents: [-40, 40, -5, 3, 0, 70]
13 |     voxel_size: 0.1
14 |     anchor_strides: [0.5, 0.5, 0.5, 0.5]
15 | 
16 |     bev_generator {
17 |         slices {
18 |             height_lo: -0.2
19 |             height_hi: 2.3
20 |             num_slices: 5
21 |         }
22 |     }
23 | 
24 |     mini_batch_config {
25 |         density_threshold: 1
26 | 
27 |         rpn_config {
28 |             iou_2d_thresholds {
29 |                 neg_iou_lo: 0.0
30 |                 neg_iou_hi: 0.3
31 |                 pos_iou_lo: 0.45
32 |                 pos_iou_hi: 1.0
33 |             }
34 |             # iou_3d_thresholds {
35 |             #     neg_iou_lo: 0.0
36 |             #     neg_iou_hi: 0.1
37 |             #     pos_iou_lo: 0.3
38 |             #     pos_iou_hi: 1.0
39 |             # }
40 | 
41 |             mini_batch_size: 512
42 |         }
43 | 
44 |         avod_config {
45 |             iou_2d_thresholds {
46 |                 neg_iou_lo: 0.0
47 |                 neg_iou_hi: 0.45
48 |                 pos_iou_lo: 0.55
49 |                 pos_iou_hi: 1.0
50 |             }
51 | 
52 |             mini_batch_size: 1024
53 |         }
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/avod/protos/kitti_dataset.proto:
--------------------------------------------------------------------------------
 1 | package avod.protos;
 2 | 
 3 | import "avod/protos/kitti_utils.proto";
 4 | 
 5 | message KittiDatasetConfig {
 6 | 
 7 |     // Unique name for dataset
 8 |     optional string name = 1 [default = "kitti"];
 9 | 
10 |     // Top level directory of the dataset
11 |     optional string dataset_dir = 2 [default = "~/Kitti/object"];
12 | 
13 |     // Split for the data (e.g. 'train', 'val')
14 |     optional string data_split = 3 [default = "train"];
15 | 
16 |     // Folder that holds the data for the chosen data split
17 |     optional string data_split_dir = 4 [default = "training"];
18 | 
19 |     // Whether the samples have labels
20 |     optional bool has_labels = 5 [default = true];
21 | 
22 |     // The data split to be used for calculating clusters (e.g. val split
23 |     // should use the train split for clustering)
24 |     optional string cluster_split = 6 [default = "train"];
25 | 
26 |     // Classes to be classified (e.g. ['Car', 'Pedestrian', 'Cyclist']
27 |     repeated string classes = 7;
28 | 
29 |     // Number of clusters corresponding to each class (e.g. [2, 1, 2])
30 |     repeated int32 num_clusters = 8;
31 | 
32 |     // BEV source, e.g. 'lidar'
33 |     required string bev_source = 9;
34 | 
35 |     // Augmentations (e.g. [''], ['flipping'], ['flipping', 'pca_jitter'])
36 |     repeated string aug_list = 10;
37 | 
38 |     // KittiUtils configuration
39 |     optional KittiUtilsConfig kitti_utils_config = 20;
40 | }


--------------------------------------------------------------------------------
/avod/core/ops.py:
--------------------------------------------------------------------------------
 1 | """A module for helper tensorflow ops."""
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def indices_to_dense_vector(indices,
 7 |                             size,
 8 |                             indices_value=1.,
 9 |                             default_value=0,
10 |                             dtype=tf.float32):
11 |     """Creates dense vector with indices set to specific value
12 |        and rest to zeros.
13 | 
14 |       This function exists because it is unclear if it is safe to use
15 |       tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
16 |       with indices which are not ordered. This function accepts a
17 |       dynamic size (e.g. tf.shape(tensor)[0])
18 | 
19 |     Args:
20 |       indices: 1d Tensor with integer indices which are to be set to
21 |                indices_values.
22 |       size: scalar with size (integer) of output Tensor.
23 |       indices_value: values of elements specified by indices in the output
24 |                      vector
25 |       default_value: values of other elements in the output vector.
26 |       dtype: data type.
27 | 
28 |     Returns:
29 |       dense 1D Tensor of shape [size] with indices set to indices_values and the
30 |           rest set to default_value.
31 |     """
32 |     size = tf.to_int32(size)
33 |     zeros = tf.ones([size], dtype=dtype) * default_value
34 |     values = tf.ones_like(indices, dtype=dtype) * indices_value
35 | 
36 |     return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)],
37 |                              [zeros, values])
38 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000000.txt:
--------------------------------------------------------------------------------
1 | P0: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 0.000000000000e+00 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 -3.797842000000e+02 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 4.575831000000e+01 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 -3.454157000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 4.981016000000e-03
4 | P3: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 -3.341081000000e+02 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 2.330660000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 3.201153000000e-03
5 | R0_rect: 9.999128000000e-01 1.009263000000e-02 -8.511932000000e-03 -1.012729000000e-02 9.999406000000e-01 -4.037671000000e-03 8.470675000000e-03 4.123522000000e-03 9.999556000000e-01
6 | Tr_velo_to_cam: 6.927964000000e-03 -9.999722000000e-01 -2.757829000000e-03 -2.457729000000e-02 -1.162982000000e-03 2.749836000000e-03 -9.999955000000e-01 -6.127237000000e-02 9.999753000000e-01 6.931141000000e-03 -1.143899000000e-03 -3.321029000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000001.txt:
--------------------------------------------------------------------------------
1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03
4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03
5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01
6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 
9 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000002.txt:
--------------------------------------------------------------------------------
1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03
4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03
5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01
6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 
9 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000003.txt:
--------------------------------------------------------------------------------
1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03
4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03
5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01
6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 
9 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000004.txt:
--------------------------------------------------------------------------------
1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03
4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03
5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01
6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 
9 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000005.txt:
--------------------------------------------------------------------------------
1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03
4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03
5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01
6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 
9 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000007.txt:
--------------------------------------------------------------------------------
1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03
4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03
5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01
6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 
9 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000008.txt:
--------------------------------------------------------------------------------
1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03
4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03
5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01
6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 
9 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000009.txt:
--------------------------------------------------------------------------------
1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03
4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03
5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01
6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 
9 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000142.txt:
--------------------------------------------------------------------------------
1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03
4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03
5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01
6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 
9 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000217.txt:
--------------------------------------------------------------------------------
1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03
4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03
5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01
6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 
9 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000006.txt:
--------------------------------------------------------------------------------
1 | P0: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 0.000000000000e+00 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 -3.858846000000e+02 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 4.450382000000e+01 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 -5.951107000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.616315000000e-03
4 | P3: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 -3.363147000000e+02 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 3.159867000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 5.323834000000e-03
5 | R0_rect: 9.999478000000e-01 9.791707000000e-03 -2.925305000000e-03 -9.806939000000e-03 9.999382000000e-01 -5.238719000000e-03 2.873828000000e-03 5.267134000000e-03 9.999820000000e-01
6 | Tr_velo_to_cam: 7.755449000000e-03 -9.999694000000e-01 -1.014303000000e-03 -7.275538000000e-03 2.294056000000e-03 1.032122000000e-03 -9.999968000000e-01 -6.324057000000e-02 9.999673000000e-01 7.753097000000e-03 2.301990000000e-03 -2.670414000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 
9 | 


--------------------------------------------------------------------------------
/avod/tests/datasets/Kitti/object/training/calib/000076.txt:
--------------------------------------------------------------------------------
1 | P0: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 0.000000000000e+00 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
2 | P1: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 -3.858846000000e+02 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00
3 | P2: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 4.450382000000e+01 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 -5.951107000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.616315000000e-03
4 | P3: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 -3.363147000000e+02 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 3.159867000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 5.323834000000e-03
5 | R0_rect: 9.999478000000e-01 9.791707000000e-03 -2.925305000000e-03 -9.806939000000e-03 9.999382000000e-01 -5.238719000000e-03 2.873828000000e-03 5.267134000000e-03 9.999820000000e-01
6 | Tr_velo_to_cam: 7.755449000000e-03 -9.999694000000e-01 -1.014303000000e-03 -7.275538000000e-03 2.294056000000e-03 1.032122000000e-03 -9.999968000000e-01 -6.324057000000e-02 9.999673000000e-01 7.753097000000e-03 2.301990000000e-03 -2.670414000000e-01
7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01
8 | 
9 | 


--------------------------------------------------------------------------------
/avod/core/trainer_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tensorflow as tf
 3 | 
 4 | slim = tf.contrib.slim
 5 | 
 6 | 
 7 | def load_checkpoints(checkpoint_dir, saver):
 8 | 
 9 |     # Load latest checkpoint if available
10 |     all_checkpoint_states = tf.train.get_checkpoint_state(
11 |         checkpoint_dir)
12 |     if all_checkpoint_states is not None:
13 |         all_checkpoint_paths = \
14 |             all_checkpoint_states.all_model_checkpoint_paths
15 |         # Save the checkpoint list into saver.last_checkpoints
16 |         saver.recover_last_checkpoints(all_checkpoint_paths)
17 |     else:
18 |         print('No checkpoints found')
19 | 
20 | 
21 | def get_global_step(sess, global_step_tensor):
22 |     # Read the global step if restored
23 |     global_step = tf.train.global_step(sess,
24 |                                        global_step_tensor)
25 |     return global_step
26 | 
27 | 
28 | def create_dir(dir):
29 |     """
30 |     Checks if a directory exists, or else create it
31 | 
32 |     Args:
33 |         dir: directory to create
34 |     """
35 |     if not os.path.exists(dir):
36 |         os.makedirs(dir)
37 | 
38 | 
39 | def load_model_weights(sess, checkpoint_dir):
40 |     """Restores the model weights.
41 | 
42 |     Loads the weights loaded from checkpoint dir onto the
43 |     model. It ignores the missing weights since this is used
44 |     to load the RPN weights onto AVOD.
45 | 
46 |     Args:
47 |         sess: A TensorFlow session
48 |         checkpoint_dir: Path to the weights to be loaded
49 |     """
50 | 
51 |     init_fn = slim.assign_from_checkpoint_fn(
52 |         checkpoint_dir, slim.get_model_variables(), ignore_missing_vars=True)
53 |     init_fn(sess)
54 | 


--------------------------------------------------------------------------------
/avod/core/avod_fc_layers/avod_fc_layer_utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | OFFSETS_OUTPUT_SIZE = {
 4 |     'box_3d': 6,
 5 |     'box_8c': 24,
 6 |     'box_8co': 24,
 7 |     'box_4c': 10,
 8 |     'box_4ca': 10,
 9 | }
10 | 
11 | ANG_VECS_OUTPUT_SIZE = {
12 |     'box_3d': 2,
13 |     'box_8c': 0,
14 |     'box_8co': 0,
15 |     'box_4c': 0,
16 |     'box_4ca': 2,
17 | }
18 | 
19 | 
20 | def feature_fusion(fusion_method, inputs, input_weights):
21 |     """Applies feature fusion to multiple inputs
22 | 
23 |     Args:
24 |         fusion_method: 'mean' or 'concat'
25 |         inputs: Input tensors of shape (batch_size, width, height, depth)
26 |             If fusion_method is 'mean', inputs must have same dimensions.
27 |             If fusion_method is 'concat', width and height must be the same.
28 |         input_weights: Weight of each input if using 'mean' fusion method
29 | 
30 |     Returns:
31 |         fused_features: Features after fusion
32 |     """
33 | 
34 |     # Feature map fusion
35 |     with tf.variable_scope('fusion'):
36 |         fused_features = None
37 | 
38 |         if fusion_method == 'mean':
39 |             rois_sum = tf.reduce_sum(inputs, axis=0)
40 |             rois_mean = tf.divide(rois_sum, tf.reduce_sum(input_weights))
41 |             fused_features = rois_mean
42 | 
43 |         elif fusion_method == 'concat':
44 |             # Concatenate along last axis
45 |             last_axis = len(inputs[0].get_shape()) - 1
46 |             fused_features = tf.concat(inputs, axis=last_axis)
47 | 
48 |         elif fusion_method == 'max':
49 |             fused_features = tf.maximum(inputs[0], inputs[1])
50 | 
51 |         else:
52 |             raise ValueError('Invalid fusion method', fusion_method)
53 | 
54 |     return fused_features
55 | 


--------------------------------------------------------------------------------
/avod/core/model.py:
--------------------------------------------------------------------------------
 1 | """Abstract detection model.
 2 | 
 3 | This file defines a generic base class for detection models.  Programs that are
 4 | designed to work with arbitrary detection models should only depend on this
 5 | class.  We intend for the functions in this class to follow tensor-in/tensor-out
 6 | design, thus all functions have tensors or lists/dictionaries holding tensors as
 7 | inputs and outputs.
 8 | 
 9 | Abstractly, detection models predict output tensors given input images
10 | which can be passed to a loss function at training time or passed to a
11 | postprocessing function at eval time. The postprocessing happens outside the
12 | model.
13 | 
14 | """
15 | from abc import ABCMeta
16 | from abc import abstractmethod
17 | 
18 | 
19 | class DetectionModel(object):
20 |     """Abstract base class for detection models."""
21 |     __metaclass__ = ABCMeta
22 | 
23 |     def __init__(self, model_config):
24 |         """Constructor.
25 | 
26 |         Args:
27 |             model_config: configuration for the model
28 |         """
29 |         self._config = model_config
30 | 
31 |     @property
32 |     def model_config(self):
33 |         return self._config
34 | 
35 |     @abstractmethod
36 |     def create_feed_dict(self):
37 |         """ To be overridden
38 |         Creates a feed_dict that can be passed into a tensorflow session
39 | 
40 |         Returns: a dictionary with tensors as keys and numpy arrays as values
41 |         """
42 |         return dict()
43 | 
44 |     @abstractmethod
45 |     def loss(self, prediction_dict):
46 |         """Compute scalar loss tensors with respect to provided groundtruth.
47 | 
48 |         Calling this function requires that groundtruth tensors have been
49 |         provided via the provide_groundtruth function.
50 | 
51 |         Args:
52 |           prediction_dict: a dictionary holding predicted tensors
53 | 
54 |         Returns:
55 |           a dictionary mapping strings (loss names) to scalar tensors
56 |             representing loss values.
57 |         """
58 |         pass
59 | 


--------------------------------------------------------------------------------
/avod/configs/unittest_model.config:
--------------------------------------------------------------------------------
 1 | # This config is used for model unit tests
 2 | 
 3 | model_name: 'avod_model'
 4 | checkpoint_name: 'unittest_model'
 5 | 
 6 | rpn_config {
 7 |     rpn_proposal_roi_crop_size: 3
 8 |     rpn_fusion_method: 'mean'
 9 |     rpn_train_nms_size: 128
10 |     rpn_test_nms_size: 128
11 |     rpn_nms_iou_thresh: 0.8
12 | }
13 | 
14 | avod_config {
15 |     avod_proposal_roi_crop_size: 3
16 |     avod_positive_selection: 'corr_cls'
17 |     avod_nms_size: 128
18 |     avod_nms_iou_thresh: 0.1
19 |     avod_box_representation: 'box_3d'
20 | }
21 | 
22 | label_smoothing_epsilon: 0.001
23 | expand_proposals_xz: 0.0
24 | path_drop_probabilities: [0.5, 0.5]
25 | train_on_all_samples: False
26 | eval_all_samples: False
27 | 
28 | layers_config {
29 |     bev_feature_extractor {
30 |         bev_vgg {
31 |             vgg_conv1: [2, 8]
32 |             vgg_conv2: [2, 16]
33 |             vgg_conv3: [3, 32]
34 |             vgg_conv4: [3, 64]
35 |             upsampling_multiplier: 1
36 | 
37 |             l2_weight_decay: 0.0005
38 |         }
39 |     }
40 |     img_feature_extractor {
41 |         img_vgg {
42 |             vgg_conv1: [2, 8]
43 |             vgg_conv2: [2, 16]
44 |             vgg_conv3: [3, 32]
45 |             vgg_conv4: [3, 64]
46 |             upsampling_multiplier: 1
47 | 
48 |             l2_weight_decay: 0.0005
49 |         }
50 |     }
51 |     rpn_config {
52 |         cls_fc6 : 16
53 |         cls_fc7 : 16
54 | 
55 |         reg_fc6 : 16
56 |         reg_fc7 : 16
57 | 
58 |         l2_weight_decay: 0.001
59 |         keep_prob: 0.5
60 |     }
61 |     avod_config {
62 |         basic_fc_layers {
63 |                  num_layers: 2
64 |                  layer_sizes: [32, 32]
65 |                  l2_weight_decay: 0.005
66 |                  keep_prob: 0.5
67 |                  fusion_method: 'mean'  # 'mean' or 'concat'
68 |             }
69 |     }
70 | }
71 | 
72 | # Loss function weights
73 | loss_config {
74 |     reg_loss_weight: 10.0
75 |     ang_loss_weight: 10.0
76 |     cls_loss_weight: 5.0
77 | }
78 | 


--------------------------------------------------------------------------------
/demos/dataset/data_mean.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import Image
 3 | 
 4 | from avod.builders.dataset_builder import DatasetBuilder
 5 | 
 6 | 
 7 | def main():
 8 |     """
 9 |     Calculates and prints the mean values for the RGB channels in a dataset
10 |     """
11 | 
12 |     dataset_builder = DatasetBuilder()
13 |     dataset = dataset_builder.build_kitti_dataset(
14 |         dataset_builder.KITTI_TRAIN
15 |         # dataset_builder.KITTI_TRAIN_MINI
16 |     )
17 | 
18 |     # Options
19 |     debug_print = True
20 |     get_bev_mean = False
21 | 
22 |     # Dataset values
23 |     dataset_utils = dataset.kitti_utils
24 |     num_samples = dataset.num_samples
25 |     clusters, _ = dataset.get_cluster_info()
26 |     num_bev_maps = len(clusters) + 1  # Height Maps + Density Map
27 | 
28 |     pixels_sum = np.zeros(3)  # RGB
29 |     bev_sum = np.zeros(num_bev_maps)
30 | 
31 |     for sample_idx in range(num_samples):
32 |         sample_name = dataset.sample_names[sample_idx]
33 | 
34 |         image_path = dataset.get_rgb_image_path(sample_name)
35 |         image = np.asarray(Image.open(image_path))
36 | 
37 |         pixels_r = np.mean(image[:, :, 0])
38 |         pixels_g = np.mean(image[:, :, 1])
39 |         pixels_b = np.mean(image[:, :, 2])
40 | 
41 |         pixel_means = np.stack((pixels_r, pixels_g, pixels_b))
42 |         pixels_sum += pixel_means
43 | 
44 |         if get_bev_mean:
45 |             bev_images = dataset_utils.create_bev_maps(sample_name,
46 |                                                        source='lidar')
47 |             height_maps = np.asarray(bev_images['height_maps'])
48 |             density_map = np.asarray(bev_images['density_map'])
49 | 
50 |             height_means = [np.mean(height_map) for height_map in height_maps]
51 |             density_mean = np.mean(density_map)
52 | 
53 |             bev_means = np.stack((*height_means, density_mean))
54 |             bev_sum += bev_means
55 | 
56 |         if debug_print:
57 |             debug_string = '{} / {}, Sample {}, pixel_means {}'.format(
58 |                 sample_idx + 1, num_samples, sample_name, pixel_means)
59 |             if get_bev_mean:
60 |                 debug_string += ' density_means {}'.format(bev_means)
61 | 
62 |             print(debug_string)
63 | 
64 |     print("Dataset: {}, split: {}".format(dataset.name, dataset.data_split))
65 |     print("Image mean: {}".format(pixels_sum / num_samples))
66 | 
67 |     if get_bev_mean:
68 |         print("BEV mean: {}".format(bev_sum / num_samples))
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     main()
73 | 


--------------------------------------------------------------------------------
/avod/core/anchor_generator.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Base anchor generator.
17 | 
18 | The job of the anchor generator is to create (or load) a collection
19 | of bounding boxes to be used as anchors.
20 | 
21 | Generated anchors are assumed to match some convolutional grid or list of grid
22 | shapes.  For example, we might want to generate anchors matching an 8x8
23 | feature map and a 4x4 feature map.  If we place 3 anchors per grid location
24 | on the first feature map and 6 anchors per grid location on the second feature
25 | map, then 3*8*8 + 6*4*4 = 288 anchors are generated in total.
26 | 
27 | To support fully convolutional settings, feature map shapes are passed
28 | dynamically at generation time.  The number of anchors to place at each location
29 | is static --- implementations of AnchorGenerator must always be able return
30 | the number of anchors that it uses per location for each feature map.
31 | """
32 | from abc import ABCMeta
33 | from abc import abstractmethod
34 | 
35 | import tensorflow as tf
36 | 
37 | 
38 | class AnchorGenerator(object):
39 |     """Abstract base class for anchor generators."""
40 |     __metaclass__ = ABCMeta
41 | 
42 |     @abstractmethod
43 |     def name_scope(self):
44 |         """Name scope.
45 | 
46 |         Must be defined by implementations.
47 | 
48 |         Returns:
49 |           a string representing the name scope of the anchor generation operation.
50 |         """
51 |         pass
52 | 
53 |     def generate(self, **params):
54 |         """Generates a collection of bounding boxes to be used as anchors.
55 |         """
56 |         return self._generate(**params)
57 | 
58 |     @abstractmethod
59 |     def _generate(self, **params):
60 |         """To be overridden by implementations.
61 | 
62 |         Args:
63 |           **params: parameters for anchor generation op
64 | 
65 |         Returns:
66 |           boxes: a BoxList holding a collection of N anchor boxes
67 |         """
68 |         pass
69 | 


--------------------------------------------------------------------------------
/scripts/offline_eval/plot_ap.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | 
 5 | def main():
 6 |     """Plots AP scores from the native eval script and prints top 5 checkpoints
 7 |         for each metric
 8 |     """
 9 | 
10 |     # Output from native eval
11 |     results_file = 'results/pyramid_cars_with_aug_example_results_0.1.txt'
12 | 
13 |     # Top n medium score indices to print
14 |     top_n_to_print = 5
15 | 
16 |     with open(results_file) as f:
17 |         lines = f.readlines()
18 | 
19 |     num_lines = len(lines)
20 |     line_idx = 0
21 | 
22 |     ap_dict = {}
23 |     current_step = 0
24 | 
25 |     while line_idx < num_lines - 1:
26 |         line = lines[line_idx].rstrip('\n')
27 | 
28 |         # Step
29 |         if line.isdigit():
30 |             current_step = int(line)
31 |         else:
32 |             ap_line = line.split(' ')
33 | 
34 |             if '_detection' in ap_line[0] or '_heading' in ap_line[0]:
35 |                 detection_type = str(ap_line[0])
36 |                 ap_vals = np.hstack([current_step, ap_line[2:]])
37 | 
38 |                 if ap_dict.get(detection_type):
39 |                     ap_dict[detection_type].append(ap_vals)
40 |                 else:
41 |                     ap_dict.update({detection_type: [ap_vals]})
42 |             else:
43 |                 # Ignore line (e.g. 'done', 'directory exists', etc.)
44 |                 pass
45 | 
46 |         line_idx += 1
47 | 
48 |     # Plot results (2D, 3D, BEV, 3D_heading, BEV_heading)
49 |     num_ap_plots = len(ap_dict)
50 |     plot_cols = 5
51 |     plot_rows = int(np.ceil(num_ap_plots / plot_cols))
52 | 
53 |     fig, ax_arr = plt.subplots(plot_rows, plot_cols,
54 |                                figsize=(17, 4 * plot_rows))
55 |     fig.canvas.set_window_title(results_file)
56 |     ax_arr = ax_arr.reshape(-1, plot_cols)
57 | 
58 |     # Create plots
59 |     sorted_items = sorted(ap_dict.items())
60 |     for plot_idx in range(num_ap_plots):
61 | 
62 |         # Get values from dict
63 |         values = sorted_items[plot_idx]
64 |         detection_type = values[0]
65 |         lines = np.asarray(values[1], dtype=np.float32)
66 |         steps = lines[:, 0]
67 |         ap_values = lines[:, 1:]
68 | 
69 |         top_n_med_indices = np.argsort(ap_values[:, 1])[-top_n_to_print:][::-1]
70 | 
71 |         print('{:25s}'.format(detection_type), steps.take(top_n_med_indices))
72 | 
73 |         # Plot
74 |         plot_row = int(plot_idx / plot_cols)
75 |         plot_col = plot_idx % plot_cols
76 |         ax_arr[plot_row, plot_col].plot(steps, ap_values)
77 |         ax_arr[plot_row, plot_col].set_title(detection_type)
78 | 
79 |     plt.legend(labels=['easy', 'medium', 'hard'])
80 |     plt.show()
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     main()
85 | 


--------------------------------------------------------------------------------
/avod/core/feature_extractors/img_feature_extractor.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | class ImgFeatureExtractor:
 7 | 
 8 |     # Kitti image mean per channel
 9 |     _R_MEAN = 92.8403
10 |     _G_MEAN = 97.7996
11 |     _B_MEAN = 93.5843
12 | 
13 |     def __init__(self, extractor_config):
14 |         self.config = extractor_config
15 | 
16 |     def preprocess_input(self, tensor_in, output_size):
17 |         """Preprocesses the given input.
18 | 
19 |         Args:
20 |             tensor_in: A `Tensor` of shape=(batch_size, height,
21 |                 width, channels) representing an input image.
22 |             output_size: The size of the input (H x W)
23 | 
24 |         Returns:
25 |             Preprocessed tensor input, resized to the output_size
26 |         """
27 |         image = tf.image.resize_images(tensor_in, output_size)
28 |         image = tf.squeeze(image)
29 |         image = tf.to_float(image)
30 |         image_normalized = self._mean_image_subtraction(image,
31 |                                                         [self._R_MEAN,
32 |                                                          self._G_MEAN,
33 |                                                          self._B_MEAN])
34 |         tensor_out = tf.expand_dims(image_normalized, axis=0)
35 |         return tensor_out
36 | 
37 |     def _mean_image_subtraction(self, image, means):
38 |         """Subtracts the given means from each image channel.
39 | 
40 |         For example:
41 |         means = [123.68, 116.779, 103.939]
42 |         image = _mean_image_subtraction(image, means)
43 | 
44 |         Note that the rank of `image` must be known.
45 | 
46 |         Args:
47 |         image: a tensor of size [height, width, C].
48 |         means: a C-vector of values to subtract from each channel.
49 | 
50 |         Returns:
51 |         the centered image.
52 | 
53 |         Raises:
54 |         ValueError: If the rank of `image` is unknown, if `image` has a rank
55 |             other than three or if the number of channels in `image` doesn't
56 |             match the number of values in `means`.
57 |         """
58 |         if image.get_shape().ndims != 3:
59 |             raise ValueError('Input must be of size [height, width, C>0]')
60 |         num_channels = image.get_shape().as_list()[-1]
61 |         if len(means) != num_channels:
62 |             raise ValueError('len(means) must match the number of channels')
63 | 
64 |         channels = tf.split(
65 |             axis=2,
66 |             num_or_size_splits=num_channels,
67 |             value=image)
68 |         for i in range(num_channels):
69 |             channels[i] -= means[i]
70 |         return tf.concat(axis=2, values=channels)
71 | 
72 |     @abstractmethod
73 |     def build(self, **kwargs):
74 |         pass
75 | 


--------------------------------------------------------------------------------
/avod/core/losses_test.py:
--------------------------------------------------------------------------------
 1 | """Tests for object detection losses module."""
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | 
 6 | from avod.core import losses
 7 | 
 8 | 
 9 | class WeightedL2LocalizationLossTest(tf.test.TestCase):
10 | 
11 |     def testReturnsCorrectLoss(self):
12 |         batch_size = 3
13 |         num_anchors = 10
14 |         code_size = 4
15 |         prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
16 |         target_tensor = tf.zeros([batch_size, num_anchors, code_size])
17 |         weights = tf.constant([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
18 |                                [1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
19 |                                [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]], tf.float32)
20 |         loss_op = losses.WeightedL2LocalizationLoss()
21 |         loss = loss_op(prediction_tensor, target_tensor, weights=weights)
22 | 
23 |         expected_loss = (3 * 5 * 4) / 2.0
24 |         with self.test_session() as sess:
25 |             loss_output = sess.run(loss)
26 |             self.assertAllClose(loss_output, expected_loss)
27 | 
28 |     def testReturnsCorrectLossSum(self):
29 |         batch_size = 3
30 |         num_anchors = 16
31 |         code_size = 4
32 |         prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
33 |         target_tensor = tf.zeros([batch_size, num_anchors, code_size])
34 |         weights = tf.ones([batch_size, num_anchors])
35 |         loss_op = losses.WeightedL2LocalizationLoss()
36 |         loss = loss_op(prediction_tensor, target_tensor, weights=weights)
37 | 
38 |         expected_loss = tf.nn.l2_loss(prediction_tensor - target_tensor)
39 |         with self.test_session() as sess:
40 |             loss_output = sess.run(loss)
41 |             expected_loss_output = sess.run(expected_loss)
42 |             self.assertAllClose(loss_output, expected_loss_output)
43 | 
44 |     def testReturnsCorrectNanLoss(self):
45 |         batch_size = 3
46 |         num_anchors = 10
47 |         code_size = 4
48 |         prediction_tensor = tf.ones([batch_size, num_anchors, code_size])
49 |         target_tensor = tf.concat([
50 |             tf.zeros([batch_size, num_anchors, code_size / 2]),
51 |             tf.ones([batch_size, num_anchors, code_size / 2]) * np.nan
52 |             ],
53 |             axis=2)
54 |         weights = tf.ones([batch_size, num_anchors])
55 |         loss_op = losses.WeightedL2LocalizationLoss()
56 |         loss = loss_op(prediction_tensor, target_tensor, weights=weights,
57 |                        ignore_nan_targets=True)
58 | 
59 |         expected_loss = (3 * 5 * 4) / 2.0
60 |         with self.test_session() as sess:
61 |             loss_output = sess.run(loss)
62 |             self.assertAllClose(loss_output, expected_loss)
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     tf.test.main()
67 | 


--------------------------------------------------------------------------------
/avod/core/label_cluster_utils_test.py:
--------------------------------------------------------------------------------
 1 | """LabelClusterUtils unit test module"""
 2 | 
 3 | import unittest
 4 | 
 5 | import array
 6 | import numpy as np
 7 | import os
 8 | 
 9 | import avod
10 | import avod.tests as tests
11 | 
12 | from avod.builders.dataset_builder import DatasetBuilder
13 | from avod.core.label_cluster_utils import LabelClusterUtils
14 | 
15 | 
16 | class LabelClusterUtilsTest(unittest.TestCase):
17 |     @classmethod
18 |     def setUpClass(cls):
19 |         cls.fake_kitti_dir = tests.test_path() + "/datasets/Kitti/object"
20 |         cls.dataset = DatasetBuilder.build_kitti_dataset(
21 |             DatasetBuilder.KITTI_UNITTEST)
22 | 
23 |     def test_get_clusters(self):
24 | 
25 |         # classes = ['Car', 'Pedestrian', 'Cyclist']
26 |         num_clusters = [2, 1, 1]
27 | 
28 |         label_cluster_utils = LabelClusterUtils(self.dataset)
29 |         clusters, std_devs = label_cluster_utils.get_clusters()
30 | 
31 |         # Check that correct number of clusters are returned
32 |         clusters_per_class = [len(cls_clusters) for cls_clusters in clusters]
33 |         std_devs_per_class = [len(cls_std_devs) for cls_std_devs in std_devs]
34 | 
35 |         self.assertEqual(clusters_per_class, num_clusters)
36 |         self.assertEqual(std_devs_per_class, num_clusters)
37 | 
38 |         # Check that text files were saved
39 |         txt_folder_exists = os.path.isdir(
40 |             avod.root_dir() + "/data/label_clusters/unittest-kitti")
41 |         self.assertTrue(txt_folder_exists)
42 | 
43 |         # Calling get_clusters again should read from files
44 |         read_clusters, read_std_devs = label_cluster_utils.get_clusters()
45 | 
46 |         # Check that read values are the same as generated ones
47 |         np.testing.assert_allclose(np.vstack(clusters),
48 |                                    np.vstack(read_clusters))
49 |         np.testing.assert_allclose(np.vstack(std_devs),
50 |                                    np.vstack(read_std_devs))
51 | 
52 |     def test_flatten_data(self):
53 |         data_to_reshape = list()
54 | 
55 |         data_to_reshape.append([[1, 2, 3], [4, 5, 6]])
56 |         data_to_reshape.append([[7, 8, 9]])
57 |         data_to_reshape.append([[10, 11, 12], [13, 14, 15]])
58 | 
59 |         expected_output = np.array([[1, 2, 3],
60 |                                     [4, 5, 6],
61 |                                     [7, 8, 9],
62 |                                     [10, 11, 12],
63 |                                     [13, 14, 15]])
64 | 
65 |         label_cluster_utils = LabelClusterUtils(self.dataset)
66 | 
67 |         flattened = label_cluster_utils._flatten_data(data_to_reshape)
68 |         np.testing.assert_array_equal(flattened,
69 |                                       expected_output,
70 |                                       err_msg='Wrong flattened array')
71 | 


--------------------------------------------------------------------------------
/avod/datasets/kitti/kitti_utils_test.py:
--------------------------------------------------------------------------------
 1 | """KittiUtil unit test module."""
 2 | 
 3 | import numpy as np
 4 | import unittest
 5 | 
 6 | from wavedata.tools.obj_detection import obj_utils as obj_utils
 7 | from avod.builders.dataset_builder import DatasetBuilder
 8 | 
 9 | 
10 | class KittiUtilsTest(unittest.TestCase):
11 |     @classmethod
12 |     def setUpClass(cls):
13 |         dataset_config = DatasetBuilder.copy_config(
14 |             DatasetBuilder.KITTI_UNITTEST)
15 | 
16 |         cls.dataset = DatasetBuilder.build_kitti_dataset(dataset_config)
17 |         cls.label_dir = cls.dataset.label_dir
18 | 
19 |     def test_create_slice_filter(self):
20 |         # Test slice filtering between 0.2 and 2.0m on three points located
21 |         # at y=[0.0, 1.0, 3.0] with a flat ground plane along y
22 | 
23 |         # Create fake point cloud
24 |         point_cloud = np.array([[1.0, 1.0, 1.0],
25 |                                 [0.0, 1.0, 3.0],
26 |                                 [1.0, 1.0, 1.0]])
27 | 
28 |         area_extents = [[-2, 2], [-5, 5], [-2, 2]]
29 |         ground_plane = [0, 1, 0, 0]
30 | 
31 |         ground_offset_dist = 0.2
32 |         offset_dist = 2.0
33 | 
34 |         expected_slice_filter = [False, True, False]
35 | 
36 |         slice_filter = self.dataset.kitti_utils.create_slice_filter(
37 |             point_cloud, area_extents, ground_plane,
38 |             ground_offset_dist, offset_dist)
39 | 
40 |         np.testing.assert_equal(slice_filter, expected_slice_filter)
41 | 
42 |     def test_rotate_map_90_degrees(self):
43 |         # Check that a transpose and flip returns the same ndarray as np.rot90
44 |         # This logic is part of create_bev_images
45 | 
46 |         np.random.seed(123)
47 |         fake_bev_map = np.random.rand(800, 700)
48 | 
49 |         # Rotate with a transpose then flip (faster than np.rot90)
50 |         np_transpose_then_flip_out = np.flip(fake_bev_map.transpose(), axis=0)
51 | 
52 |         # Expected result from np.rot90
53 |         np_rot_90_out = np.rot90(fake_bev_map)
54 | 
55 |         np.testing.assert_allclose(np_transpose_then_flip_out,
56 |                                    np_rot_90_out)
57 | 
58 |     def test_filter_labels_by_class(self):
59 | 
60 |         sample_name = '000007'
61 |         obj_labels = obj_utils.read_labels(self.label_dir,
62 |                                            int(sample_name))
63 |         # This particular sample has 2 valid classes
64 |         exp_num_valid_classes = 2
65 | 
66 |         filtered_labels = \
67 |             self.dataset.kitti_utils.filter_labels(obj_labels, difficulty=None)
68 |         all_types = []
69 |         for label in filtered_labels:
70 |             if label.type not in all_types:
71 |                 all_types.append(label.type)
72 |         self.assertEqual(len(all_types),
73 |                          exp_num_valid_classes,
74 |                          msg='Wrong number of labels after filtering')
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     unittest.main()
79 | 


--------------------------------------------------------------------------------
/avod/experiments/run_training.py:
--------------------------------------------------------------------------------
 1 | """Detection model trainer.
 2 | 
 3 | This runs the DetectionModel trainer.
 4 | """
 5 | 
 6 | import argparse
 7 | import os
 8 | 
 9 | import tensorflow as tf
10 | 
11 | import avod
12 | import avod.builders.config_builder_util as config_builder
13 | from avod.builders.dataset_builder import DatasetBuilder
14 | from avod.core.models.avod_model import AvodModel
15 | from avod.core.models.rpn_model import RpnModel
16 | from avod.core import trainer
17 | 
18 | tf.logging.set_verbosity(tf.logging.ERROR)
19 | 
20 | 
21 | def train(model_config, train_config, dataset_config):
22 | 
23 |     dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
24 |                                                  use_defaults=False)
25 | 
26 |     train_val_test = 'train'
27 |     model_name = model_config.model_name
28 | 
29 |     with tf.Graph().as_default():
30 |         if model_name == 'rpn_model':
31 |             model = RpnModel(model_config,
32 |                              train_val_test=train_val_test,
33 |                              dataset=dataset)
34 |         elif model_name == 'avod_model':
35 |             model = AvodModel(model_config,
36 |                               train_val_test=train_val_test,
37 |                               dataset=dataset)
38 |         else:
39 |             raise ValueError('Invalid model_name')
40 | 
41 |         trainer.train(model, train_config)
42 | 
43 | 
44 | def main(_):
45 |     parser = argparse.ArgumentParser()
46 | 
47 |     # Defaults
48 |     default_pipeline_config_path = avod.root_dir() + \
49 |         '/configs/avod_cars_example.config'
50 |     default_data_split = 'train'
51 |     default_device = '1'
52 | 
53 |     parser.add_argument('--pipeline_config',
54 |                         type=str,
55 |                         dest='pipeline_config_path',
56 |                         default=default_pipeline_config_path,
57 |                         help='Path to the pipeline config')
58 | 
59 |     parser.add_argument('--data_split',
60 |                         type=str,
61 |                         dest='data_split',
62 |                         default=default_data_split,
63 |                         help='Data split for training')
64 | 
65 |     parser.add_argument('--device',
66 |                         type=str,
67 |                         dest='device',
68 |                         default=default_device,
69 |                         help='CUDA device id')
70 | 
71 |     args = parser.parse_args()
72 | 
73 |     # Parse pipeline config
74 |     model_config, train_config, _, dataset_config = \
75 |         config_builder.get_configs_from_pipeline_file(
76 |             args.pipeline_config_path, is_training=True)
77 | 
78 |     # Overwrite data split
79 |     dataset_config.data_split = args.data_split
80 | 
81 |     # Set CUDA device id
82 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.device
83 | 
84 |     train(model_config, train_config, dataset_config)
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     tf.app.run()
89 | 


--------------------------------------------------------------------------------
/avod/protos/optimizer.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto2";
 2 | 
 3 | package avod.protos;
 4 | 
 5 | // Messages for configuring the optimizing strategy for training object
 6 | // detection models.
 7 | 
 8 | // Top level optimizer message.
 9 | message Optimizer {
10 |   oneof optimizer {
11 |     RMSPropOptimizer rms_prop_optimizer = 1;
12 |     MomentumOptimizer momentum_optimizer = 2;
13 |     AdamOptimizer adam_optimizer = 3;
14 |     GradientDescentOptimizer gradient_descent = 6;
15 |   }
16 |   optional bool use_moving_average = 4 [default=true];
17 |   optional float moving_average_decay = 5 [default=0.9999];
18 | }
19 | 
20 | // Configuration message for the RMSPropOptimizer
21 | // See: https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
22 | message RMSPropOptimizer {
23 |   optional LearningRate learning_rate = 1;
24 |   optional float momentum_optimizer_value = 2 [default=0.9];
25 |   optional float decay = 3 [default=0.9];
26 |   optional float epsilon = 4 [default=1.0];
27 | }
28 | 
29 | // Configuration message for the MomentumOptimizer
30 | // See: https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer
31 | message MomentumOptimizer {
32 |   optional LearningRate learning_rate = 1;
33 |   optional float momentum_optimizer_value = 2 [default=0.9];
34 | }
35 | 
36 | // Configuration message for the AdamOptimizer
37 | // See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer
38 | message AdamOptimizer {
39 |   optional LearningRate learning_rate = 1;
40 | }
41 | 
42 | // Configuration message for the GradientDescent
43 | // See: https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer
44 | message GradientDescentOptimizer {
45 |   optional LearningRate learning_rate = 1;
46 | }
47 | 
48 | 
49 | // Configuration message for optimizer learning rate.
50 | message LearningRate {
51 |   oneof learning_rate {
52 |     ConstantLearningRate constant_learning_rate = 1;
53 |     ExponentialDecayLearningRate exponential_decay_learning_rate = 2;
54 |     ManualStepLearningRate manual_step_learning_rate = 3;
55 |   }
56 | }
57 | 
58 | // Configuration message for a constant learning rate.
59 | message ConstantLearningRate {
60 |   optional float learning_rate = 1 [default=0.002];
61 | }
62 | 
63 | // Configuration message for an exponentially decaying learning rate.
64 | // See https://www.tensorflow.org/versions/master/api_docs/python/train/ \
65 | //     decaying_the_learning_rate#exponential_decay
66 | message ExponentialDecayLearningRate {
67 |   optional float initial_learning_rate = 1 [default=0.002];
68 |   optional uint32 decay_steps = 2 [default=4000000];
69 |   optional float decay_factor = 3 [default=0.95];
70 |   optional bool staircase = 4 [default=true];
71 | }
72 | 
73 | // Configuration message for a manually defined learning rate schedule.
74 | message ManualStepLearningRate {
75 |   optional float initial_learning_rate = 1 [default=0.002];
76 |   message LearningRateSchedule {
77 |     optional uint32 step = 1;
78 |     optional float learning_rate = 2 [default=0.002];
79 |   }
80 |   repeated LearningRateSchedule schedule = 2;
81 | }
82 | 


--------------------------------------------------------------------------------
/avod/core/format_checker_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | 
 5 | from avod.core import format_checker as fc
 6 | from wavedata.tools.obj_detection import obj_utils
 7 | 
 8 | 
 9 | class FormatCheckerTest(unittest.TestCase):
10 | 
11 |     def test_check_box_3d_format(self):
12 | 
13 |         # Case 1, invalid type
14 |         test_var = [0, 0, 0, 0, 0, 0, 0]
15 |         np.testing.assert_raises(TypeError,
16 |                                  fc.check_box_3d_format, test_var)
17 | 
18 |         # Case 2, invalid shape
19 |         test_var = np.ones([1, 5])
20 |         np.testing.assert_raises(TypeError,
21 |                                  fc.check_box_3d_format, test_var)
22 | 
23 |         test_var = np.ones([5, 6])
24 |         np.testing.assert_raises(TypeError,
25 |                                  fc.check_box_3d_format, test_var)
26 | 
27 |         test_var = np.ones([1, 7])
28 |         fc.check_box_3d_format(test_var)
29 | 
30 |         test_var = np.ones([10, 7])
31 |         fc.check_box_3d_format(test_var)
32 | 
33 |         test_var = tf.ones([5, 7])
34 |         fc.check_box_3d_format(test_var)
35 | 
36 |         test_var = tf.ones([5, 3])
37 |         np.testing.assert_raises(TypeError,
38 |                                  fc.check_box_3d_format, test_var)
39 | 
40 |     def test_check_object_label_format(self):
41 |         test_obj = obj_utils.ObjectLabel()
42 |         test_obj.h = 1
43 |         test_obj.w = 1
44 |         test_obj.l = 1
45 |         test_obj.t = [1, 1, 1]
46 |         test_obj.ry = 0
47 | 
48 |         # Case 1, Single instance of object label
49 |         test_obj_list = [test_obj]
50 |         fc.check_object_label_format(test_obj_list)
51 | 
52 |         test_obj_list = [test_obj, test_obj, test_obj]
53 |         fc.check_object_label_format(test_obj_list)
54 | 
55 |         test_obj_list = [test_obj, test_obj, '0']
56 |         np.testing.assert_raises(TypeError,
57 |                                  fc.check_object_label_format, test_obj_list)
58 | 
59 |         # Case 2, Range check
60 |         test_obj.t = [1, 1]
61 |         test_obj_list = [test_obj]
62 |         np.testing.assert_raises(TypeError,
63 |                                  fc.check_object_label_format, test_obj_list)
64 | 
65 |     def test_check_anchor_format(self):
66 |         # Case 1, invalid type
67 |         test_var = [0, 0, 0, 0, 0, 0]
68 |         np.testing.assert_raises(TypeError,
69 |                                  fc.check_anchor_format, test_var)
70 | 
71 |         # Case 2, invalid shape
72 |         test_var = np.ones([1, 5])
73 |         np.testing.assert_raises(TypeError,
74 |                                  fc.check_anchor_format, test_var)
75 | 
76 |         test_var = np.ones([1, 6])
77 |         fc.check_anchor_format(test_var)
78 | 
79 |         test_var = np.ones([5, 6])
80 |         fc.check_anchor_format(test_var)
81 | 
82 |         test_var = tf.ones([5, 6])
83 |         fc.check_anchor_format(test_var)
84 | 
85 |         test_var = tf.ones([5, 4])
86 |         np.testing.assert_raises(TypeError,
87 |                                  fc.check_anchor_format, test_var)
88 | 


--------------------------------------------------------------------------------
/avod/builders/avod_fc_layers_builder.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from avod.core.avod_fc_layers import basic_fc_layers
 4 | from avod.core.avod_fc_layers import fusion_fc_layers
 5 | 
 6 | 
 7 | KEY_CLS_LOGITS = 'classification_logits'
 8 | KEY_OFFSETS = 'offsets'
 9 | KEY_ANGLE_VECTORS = 'angle_vectors'
10 | KEY_ENDPOINTS = 'end_points'
11 | 
12 | 
13 | def build(layers_config,
14 |           input_rois, input_weights,
15 |           num_final_classes, box_rep,
16 |           top_anchors, ground_plane,
17 |           is_training):
18 |     """Builds second stage fully connected layers
19 | 
20 |     Args:
21 |         layers_config: Configuration object
22 |         input_rois: List of input ROI feature maps
23 |         input_weights: List of weights for each input e.g. [1.0, 1.0]
24 |         num_final_classes: Number of output classes, including 'Background'
25 |         box_rep: Box representation (e.g. 'box_3d', 'box_8c', etc.)
26 |         top_anchors: Top proposal anchors, to include location information
27 |         ground_plane: Ground plane coefficients
28 |         is_training (bool): Whether the network is training or evaluating
29 | 
30 |     Returns:
31 |         fc_output_layers: Output layer dictionary
32 |     """
33 | 
34 |     # Default all output layers to None
35 |     cls_logits = offsets = angle_vectors = end_points = None
36 | 
37 |     with tf.variable_scope('box_predictor') as sc:
38 |         end_points_collection = sc.name + '_end_points'
39 | 
40 |         fc_layers_type = layers_config.WhichOneof('fc_layers')
41 | 
42 |         if fc_layers_type == 'basic_fc_layers':
43 |             fc_layers_config = layers_config.basic_fc_layers
44 | 
45 |             cls_logits, offsets, angle_vectors, end_points = \
46 |                 basic_fc_layers.build(
47 |                     fc_layers_config=fc_layers_config,
48 |                     input_rois=input_rois,
49 |                     input_weights=input_weights,
50 |                     num_final_classes=num_final_classes,
51 |                     box_rep=box_rep,
52 | 
53 |                     is_training=is_training,
54 |                     end_points_collection=end_points_collection)
55 | 
56 |         elif fc_layers_type == 'fusion_fc_layers':
57 |             fc_layers_config = layers_config.fusion_fc_layers
58 | 
59 |             cls_logits, offsets, angle_vectors, end_points = \
60 |                 fusion_fc_layers.build(
61 |                     fc_layers_config=fc_layers_config,
62 |                     input_rois=input_rois,
63 |                     input_weights=input_weights,
64 |                     num_final_classes=num_final_classes,
65 |                     box_rep=box_rep,
66 | 
67 |                     is_training=is_training,
68 |                     end_points_collection=end_points_collection)
69 | 
70 |         else:
71 |             raise ValueError('Invalid fc layers config')
72 | 
73 |     # # Histogram summaries
74 |     # with tf.variable_scope('histograms_avod'):
75 |     #     for fc_layer in end_points:
76 |     #         tf.summary.histogram(fc_layer, end_points[fc_layer])
77 | 
78 |     fc_output_layers = dict()
79 |     fc_output_layers[KEY_CLS_LOGITS] = cls_logits
80 |     fc_output_layers[KEY_OFFSETS] = offsets
81 |     fc_output_layers[KEY_ANGLE_VECTORS] = angle_vectors
82 |     fc_output_layers[KEY_ENDPOINTS] = end_points
83 | 
84 |     return fc_output_layers
85 | 


--------------------------------------------------------------------------------
/avod/protos/layers.proto:
--------------------------------------------------------------------------------
  1 | package avod.protos;
  2 | 
  3 | // Message for configuring Model Layer params.
  4 | message LayersConfig {
  5 | 
  6 |     required FeatureExtractor bev_feature_extractor = 1;
  7 |     required FeatureExtractor img_feature_extractor = 2;
  8 | 
  9 |     required RPNLayersConfig rpn_config = 3;
 10 |     required AVODLayersConfig avod_config = 4;
 11 | }
 12 | 
 13 | message FeatureExtractor {
 14 |     oneof feature_extractor {
 15 | 
 16 |         VGGLayersConfig bev_vgg = 1;
 17 |         VGGLayersConfig img_vgg = 2;
 18 | 
 19 |         PyramidLayersConfig img_vgg_pyr = 3;
 20 |         PyramidLayersConfig bev_vgg_pyr = 4;
 21 |     }
 22 | }
 23 | 
 24 | message VGGLayersConfig {
 25 |     // Conv layer 1 [repeat, num filter]
 26 |     repeated int32 vgg_conv1 = 1;
 27 | 
 28 |     // Conv layer 2 [repeat, num filter]
 29 |     repeated int32 vgg_conv2 = 2;
 30 | 
 31 |     // Conv layer 3 [repeat, num filter]
 32 |     repeated int32 vgg_conv3 = 3;
 33 | 
 34 |     // Conv layer 4 [repeat, num filter]
 35 |     repeated int32 vgg_conv4 = 4;
 36 | 
 37 |     // Upsampling multiplier
 38 |     required int32 upsampling_multiplier = 5;
 39 | 
 40 |     // L2 norm weight decay
 41 |     optional float l2_weight_decay = 6 [default = 0.0005];
 42 | }
 43 | 
 44 | message PyramidLayersConfig {
 45 |     // Conv layer 1 [repeat, num filter]
 46 |     repeated int32 vgg_conv1 = 1;
 47 | 
 48 |     // Conv layer 2 [repeat, num filter]
 49 |     repeated int32 vgg_conv2 = 2;
 50 | 
 51 |     // Conv layer 3 [repeat, num filter]
 52 |     repeated int32 vgg_conv3 = 3;
 53 | 
 54 |     // Conv layer 4 [repeat, num filter]
 55 |     repeated int32 vgg_conv4 = 4;
 56 | 
 57 |     // L2 norm weight decay
 58 |     optional float l2_weight_decay = 6 [default = 0.0005];
 59 | }
 60 | 
 61 | message RPNLayersConfig {
 62 |     // Anchor predictor layer configs
 63 |     // classification fc layer size
 64 |     required int32 cls_fc6 = 1;
 65 |     required int32 cls_fc7 = 2;
 66 | 
 67 |     // Regression fc layer size
 68 |     required int32 reg_fc6 = 3;
 69 |     required int32 reg_fc7 = 4;
 70 | 
 71 |     // L2 weight decay
 72 |     required float l2_weight_decay = 6;
 73 | 
 74 |     // Dropout probability - the probabilit that a neuron's
 75 |     // output is kept during dropout
 76 |     optional float keep_prob = 5 [default = 0.5];
 77 | }
 78 | 
 79 | message AVODLayersConfig{
 80 |     oneof fc_layers {
 81 |         BasicFcLayers basic_fc_layers = 1;
 82 |         FusionFcLayers fusion_fc_layers = 2;
 83 |     }
 84 | }
 85 | 
 86 | message BasicFcLayers {
 87 |     required int32 num_layers = 1;
 88 |     repeated int32 layer_sizes = 2;
 89 | 
 90 |     // L2 weight decay
 91 |     required float l2_weight_decay = 3;
 92 | 
 93 |     // Dropout keep probability
 94 |     required float keep_prob = 4;
 95 | 
 96 |     // Fusion method ('mean', 'concat')
 97 |     required string fusion_method = 5;
 98 | }
 99 | 
100 | message FusionFcLayers {
101 | 
102 |     required int32 num_layers = 1;
103 |     repeated int32 layer_sizes = 2;
104 | 
105 |     // L2 weight decay
106 |     required float l2_weight_decay = 3;
107 | 
108 |     // Dropout keep probability
109 |     required float keep_prob = 4;
110 | 
111 |     // Fusion method ('mean', 'concat')
112 |     required string fusion_method = 5;
113 | 
114 |     // Fusion type (early, late, deep)
115 |     required string fusion_type = 6;
116 | }
117 | 


--------------------------------------------------------------------------------
/avod/core/anchor_generators/grid_anchor_3d_generator_test.py:
--------------------------------------------------------------------------------
 1 | """Grid Anchor Generation unit test module."""
 2 | import unittest
 3 | import numpy as np
 4 | 
 5 | import avod.tests as tests
 6 | 
 7 | from avod.core.anchor_generators import grid_anchor_3d_generator
 8 | from avod.builders.dataset_builder import DatasetBuilder
 9 | 
10 | 
11 | def generate_fake_dataset():
12 |     return DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_UNITTEST)
13 | 
14 | 
15 | class GridAnchor3dGeneratorTest(unittest.TestCase):
16 |     @classmethod
17 |     def setUpClass(cls):
18 |         cls.fake_kitti_dir = tests.test_path() + "/datasets/Kitti/object"
19 |         cls.dataset = generate_fake_dataset()
20 | 
21 |         # create generic ground plane (normal vector is straight up)
22 |         cls.ground_plane = np.array([0., -1., 0., 0.])
23 |         cls.clusters = np.array([[1., 1., 1.], [2., 1., 1.]])
24 | 
25 |         cls.anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()
26 | 
27 |     def test_generate_anchors(self):
28 |         normal_area = [(-1., 1.), (-1., 0.), (0., 1.)]
29 |         no_x_area = [(0., 0.), (-1., 0.), (0., 2.)]
30 |         no_z_area = [(-1., 1.), (-1., 0.), (0., 0.)]
31 | 
32 |         expected_anchors = np.array([[-0.5, 0., 0.5, 1., 1., 1., 0.],
33 |                                      [-0.5, 0., 0.5, 1., 1., 1., np.pi / 2],
34 |                                      [-0.5, 0., 0.5, 2., 1., 1., 0.],
35 |                                      [-0.5, 0., 0.5, 2., 1., 1., np.pi / 2],
36 |                                      [0.5, 0., 0.5, 1., 1., 1., 0.],
37 |                                      [0.5, 0., 0.5, 1., 1., 1., np.pi / 2],
38 |                                      [0.5, 0., 0.5, 2., 1., 1., 0.],
39 |                                      [0.5, 0., 0.5, 2., 1., 1., np.pi / 2]])
40 |         gen_anchors = \
41 |             self.anchor_generator.generate(area_3d=normal_area,
42 |                                            anchor_3d_sizes=self.clusters,
43 |                                            anchor_stride=[1, 1],
44 |                                            ground_plane=self.ground_plane)
45 |         self.assertEqual(gen_anchors.shape, expected_anchors.shape)
46 |         np.testing.assert_almost_equal(gen_anchors,
47 |                                        expected_anchors,
48 |                                        decimal=3)
49 | 
50 |         expected_anchors = np.ndarray(shape=(0, 7))
51 |         gen_anchors = \
52 |             self.anchor_generator.generate(area_3d=no_x_area,
53 |                                            anchor_3d_sizes=self.clusters,
54 |                                            anchor_stride=[1, 1],
55 |                                            ground_plane=self.ground_plane)
56 |         self.assertEqual(gen_anchors.shape, expected_anchors.shape)
57 |         np.testing.assert_almost_equal(gen_anchors,
58 |                                        expected_anchors,
59 |                                        decimal=3)
60 | 
61 |         expected_anchors = np.ndarray(shape=(0, 7))
62 |         gen_anchors = \
63 |             self.anchor_generator.generate(area_3d=no_z_area,
64 |                                            anchor_3d_sizes=self.clusters,
65 |                                            anchor_stride=[1, 1],
66 |                                            ground_plane=self.ground_plane)
67 |         self.assertEqual(gen_anchors.shape, expected_anchors.shape)
68 |         np.testing.assert_almost_equal(gen_anchors,
69 |                                        expected_anchors,
70 |                                        decimal=3)
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     unittest.main()
75 | 


--------------------------------------------------------------------------------
/avod/utils/demo_utils.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | 
  5 | from wavedata.tools.obj_detection import obj_utils
  6 | from wavedata.tools.obj_detection import evaluation
  7 | 
  8 | from avod.core import anchor_projector
  9 | from avod.core import box_3d_encoder
 10 | 
 11 | 
 12 | COLOUR_SCHEME_PREDICTIONS = {
 13 |     "Easy GT": (255, 255, 0),     # Yellow
 14 |     "Medium GT": (255, 128, 0),   # Orange
 15 |     "Hard GT": (255, 0, 0),       # Red
 16 | 
 17 |     "Prediction": (50, 255, 50),  # Green
 18 | }
 19 | 
 20 | 
 21 | def get_gts_based_on_difficulty(dataset, img_idx):
 22 |     """Returns lists of ground-truth based on difficulty.
 23 |     """
 24 |     # Get all ground truth labels
 25 |     all_gt_objs = obj_utils.read_labels(dataset.label_dir, img_idx)
 26 | 
 27 |     # Filter to dataset classes
 28 |     gt_objs = dataset.kitti_utils.filter_labels(all_gt_objs)
 29 | 
 30 |     # Filter objects to desired difficulty
 31 |     easy_gt_objs = dataset.kitti_utils.filter_labels(
 32 |         copy.deepcopy(gt_objs), difficulty=0)
 33 |     medium_gt_objs = dataset.kitti_utils.filter_labels(
 34 |         copy.deepcopy(gt_objs), difficulty=1)
 35 |     hard_gt_objs = dataset.kitti_utils.filter_labels(
 36 |         copy.deepcopy(gt_objs), difficulty=2)
 37 | 
 38 |     for gt_obj in easy_gt_objs:
 39 |         gt_obj.type = 'Easy GT'
 40 |     for gt_obj in medium_gt_objs:
 41 |         gt_obj.type = 'Medium GT'
 42 |     for gt_obj in hard_gt_objs:
 43 |         gt_obj.type = 'Hard GT'
 44 | 
 45 |     return easy_gt_objs, medium_gt_objs, hard_gt_objs, all_gt_objs
 46 | 
 47 | 
 48 | def get_max_ious_3d(all_gt_boxes_3d, pred_boxes_3d):
 49 |     """Helper function to calculate 3D IoU for the given predictions.
 50 | 
 51 |     Args:
 52 |         all_gt_boxes_3d: A list of the same ground-truth boxes in box_3d
 53 |             format.
 54 |         pred_boxes_3d: A list of predictions in box_3d format.
 55 |     """
 56 | 
 57 |     # Only calculate ious if there are predictions
 58 |     if pred_boxes_3d:
 59 |         # Convert to iou format
 60 |         gt_objs_iou_fmt = box_3d_encoder.box_3d_to_3d_iou_format(
 61 |             all_gt_boxes_3d)
 62 |         pred_objs_iou_fmt = box_3d_encoder.box_3d_to_3d_iou_format(
 63 |             pred_boxes_3d)
 64 | 
 65 |         max_ious_3d = np.zeros(len(all_gt_boxes_3d))
 66 |         for gt_obj_idx in range(len(all_gt_boxes_3d)):
 67 | 
 68 |             gt_obj_iou_fmt = gt_objs_iou_fmt[gt_obj_idx]
 69 | 
 70 |             ious_3d = evaluation.three_d_iou(gt_obj_iou_fmt,
 71 |                                              pred_objs_iou_fmt)
 72 | 
 73 |             max_ious_3d[gt_obj_idx] = np.amax(ious_3d)
 74 |     else:
 75 |         # No detections, all ious = 0
 76 |         max_ious_3d = np.zeros(len(all_gt_boxes_3d))
 77 | 
 78 |     return max_ious_3d
 79 | 
 80 | 
 81 | def tf_project_to_image_space(anchors, calib_p2, image_shape, img_idx):
 82 |     """Helper function to convert data to tensors and project
 83 |        to image space using the tf projection function.
 84 |     """
 85 | 
 86 |     anchors_tensor = tf.convert_to_tensor(anchors, tf.float32)
 87 |     calib_p2_tensor = tf.convert_to_tensor(calib_p2, tf.float32)
 88 |     image_shape_tensor = tf.convert_to_tensor(image_shape, tf.float32)
 89 | 
 90 |     projected_boxes_tensor, _ = \
 91 |         anchor_projector.tf_project_to_image_space(
 92 |             anchors_tensor,
 93 |             calib_p2_tensor,
 94 |             image_shape_tensor)
 95 |     sess = tf.Session()
 96 | 
 97 |     with sess.as_default():
 98 |         projected_boxes = projected_boxes_tensor.eval()
 99 | 
100 |     return projected_boxes
101 | 


--------------------------------------------------------------------------------
/avod/core/minibatch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Base minibatch sampler module.
17 | 
18 | The job of the minibatch_sampler is to subsample a minibatch based on some
19 | criterion.
20 | 
21 | The main function call is:
22 |     subsample(indicator, batch_size, **params).
23 | Indicator is a 1d boolean tensor where True denotes which examples can be
24 | sampled. It returns a boolean indicator where True denotes an example has been
25 | sampled..
26 | 
27 | Subclasses should implement the Subsample function and can make use of the
28 | @staticmethod SubsampleIndicator.
29 | """
30 | 
31 | from abc import ABCMeta
32 | from abc import abstractmethod
33 | 
34 | import tensorflow as tf
35 | 
36 | from avod.core import ops
37 | 
38 | 
39 | class MinibatchSampler(object):
40 |     """Abstract base class for subsampling minibatches."""
41 |     __metaclass__ = ABCMeta
42 | 
43 |     def __init__(self):
44 |         """Constructs a minibatch sampler."""
45 |         pass
46 | 
47 |     @abstractmethod
48 |     def subsample(self, indicator, batch_size, **params):
49 |         """Returns subsample of entries in indicator.
50 | 
51 |         Args:
52 |             indicator: boolean tensor of shape [N] whose
53 |                 True entries can be sampled.
54 |             batch_size: desired batch size.
55 |             **params: additional keyword arguments for
56 |                 specific implementations of the MinibatchSampler.
57 | 
58 |         Returns:
59 |         sample_indicator: boolean tensor of shape [N] whose
60 |             True entries have been sampled.
61 |             If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
62 |         """
63 |         pass
64 | 
65 |     @staticmethod
66 |     def subsample_indicator(indicator, num_samples):
67 |         """Subsample indicator vector.
68 | 
69 |         Given a boolean indicator vector with M elements set to `True`, the function
70 |         assigns all but `num_samples` of these previously `True` elements to
71 |         `False`. If `num_samples` is greater than M, the original indicator vector
72 |         is returned.
73 | 
74 |         Args:
75 |           indicator: a 1-dimensional boolean tensor indicating which elements
76 |             are allowed to be sampled and which are not.
77 |           num_samples: int32 scalar tensor
78 | 
79 |         Returns:
80 |           a boolean tensor with the same shape as input (indicator) tensor
81 |         """
82 |         indices = tf.where(indicator)
83 |         indices = tf.random_shuffle(indices)
84 |         indices = tf.reshape(indices, [-1])
85 | 
86 |         num_samples = tf.minimum(tf.size(indices), num_samples)
87 |         selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1]))
88 | 
89 |         selected_indicator = ops.indices_to_dense_vector(selected_indices,
90 |                                                          tf.shape(indicator)[
91 |                                                              0])
92 | 
93 |         return tf.equal(selected_indicator, 1)
94 | 


--------------------------------------------------------------------------------
/avod/protos/model.proto:
--------------------------------------------------------------------------------
  1 | package avod.protos;
  2 | 
  3 | import "avod/protos/layers.proto";
  4 | 
  5 | // Message for configuring the DetectionModel.
  6 | message ModelConfig {
  7 | 
  8 |     // Model name used to run either RPN or AVOD
  9 |     optional string model_name = 1 [default = 'avod_model'];
 10 | 
 11 |     // Checkpoint name
 12 |     optional string checkpoint_name = 2 [default = 'detection_model'];
 13 | 
 14 |     optional PathsConfig paths_config = 3;
 15 |     required InputConfig input_config = 4;
 16 |     required RpnConfig rpn_config = 5;
 17 |     required AvodConfig avod_config = 6;
 18 | 
 19 |     // Label smoothing epsilon
 20 |     required float label_smoothing_epsilon = 7;
 21 | 
 22 |     // Expand proposals lengths along x and z for larger context region (in m)
 23 |     // (0.0 - 1.0 recommended)
 24 |     required float expand_proposals_xz = 8;
 25 | 
 26 |     // Global path drop (p_keep_img, p_keep_bev)
 27 |     // To disable path drop, set both to 1.0
 28 |     repeated float path_drop_probabilities = 9;
 29 | 
 30 |     // To keep all the samples including the ones without anchor-info
 31 |     // i.e. labels during training
 32 |     required bool train_on_all_samples = 10;
 33 | 
 34 |     // To keep all the samples including the ones without anchor-info
 35 |     // i.e. labels during validation
 36 |     required bool eval_all_samples = 11;
 37 | 
 38 |     // Layer configurations
 39 |     required LayersConfig layers_config = 12;
 40 | 
 41 |     // Loss configurations
 42 |     required LossConfig loss_config = 13;
 43 | }
 44 | 
 45 | message PathsConfig {
 46 |     // Checkpoint dir
 47 |     optional string checkpoint_dir = 1;
 48 | 
 49 |     // Log dir (no underscore to match tensorboard)
 50 |     optional string logdir = 2;
 51 | 
 52 |     // Directory to save predictions
 53 |     optional string pred_dir = 3;
 54 | }
 55 | 
 56 | message InputConfig {
 57 |     // Bev dimensions
 58 |     optional int32 bev_dims_h = 1 [default = 700];
 59 |     optional int32 bev_dims_w = 2 [default = 800];
 60 |     optional int32 bev_depth = 3 [default = 6];
 61 | 
 62 |     // Image dimensions
 63 |     optional int32 img_dims_h = 4 [default = 480];
 64 |     optional int32 img_dims_w = 5 [default = 1590];
 65 |     optional int32 img_depth = 6 [default = 3];
 66 | }
 67 | 
 68 | message RpnConfig {
 69 |     // RPN proposal ROI crop size
 70 |     required int32 rpn_proposal_roi_crop_size = 1;
 71 | 
 72 |     // RPN proposal ROI fusion method, one of ['mean', 'concat']
 73 |     required string rpn_fusion_method = 2;
 74 | 
 75 |     // RPN Non-max suppression boxes during training
 76 |     required int32 rpn_train_nms_size = 3;
 77 | 
 78 |     // RPN Non-max suppression boxes during testing
 79 |     required int32 rpn_test_nms_size = 4;
 80 | 
 81 |     // RPN NMS IoU threshold
 82 |     required float rpn_nms_iou_thresh = 5;
 83 | }
 84 | 
 85 | message AvodConfig {
 86 |     // AVOD Proposal ROI crop size
 87 |     required int32 avod_proposal_roi_crop_size = 1;
 88 | 
 89 |     // Positive selection, one of ['corr_cls', 'not_bkg']
 90 |     required string avod_positive_selection = 3;
 91 | 
 92 |     // AVOD Non-max suppression boxes
 93 |     required int32 avod_nms_size = 4;
 94 | 
 95 |     // AVOD NMS IoU threshold
 96 |     required float avod_nms_iou_thresh = 5;
 97 | 
 98 |     // AVOD bounding box representation, one of ['box_3d', 'box_8c']
 99 |     required string avod_box_representation = 6;
100 | }
101 | 
102 | message LossConfig {
103 |     // RPN/AVOD Regression loss weight
104 |     required float reg_loss_weight = 1;
105 | 
106 |     // AVOD angle vector loss weight
107 |     required float ang_loss_weight = 2;
108 | 
109 |     // RPN/AVOD Classification loss weight
110 |     required float cls_loss_weight = 3;
111 | }
112 | 
113 | 


--------------------------------------------------------------------------------
/avod/core/minibatch_sampler_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Tests for google3.research.vale.object_detection.minibatch_sampler."""
17 | 
18 | import numpy as np
19 | import tensorflow as tf
20 | 
21 | from avod.core import minibatch_sampler
22 | 
23 | 
24 | class MinibatchSamplerTest(tf.test.TestCase):
25 |     def test_subsample_indicator_when_more_true_elements_than_num_samples(
26 |             self):
27 |         np_indicator = [True, False, True, False, True, True, False]
28 |         indicator = tf.constant(np_indicator)
29 |         samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
30 |             indicator, 3)
31 |         with self.test_session() as sess:
32 |             samples_out = sess.run(samples)
33 |             self.assertTrue(np.sum(samples_out), 3)
34 |             self.assertAllEqual(samples_out,
35 |                                 np.logical_and(samples_out, np_indicator))
36 | 
37 |     def test_subsample_when_more_true_elements_than_num_samples_no_shape(self):
38 |         np_indicator = [True, False, True, False, True, True, False]
39 |         indicator = tf.placeholder(tf.bool)
40 |         feed_dict = {indicator: np_indicator}
41 | 
42 |         samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
43 |             indicator, 3)
44 |         with self.test_session() as sess:
45 |             samples_out = sess.run(samples, feed_dict=feed_dict)
46 |             self.assertTrue(np.sum(samples_out), 3)
47 |             self.assertAllEqual(samples_out,
48 |                                 np.logical_and(samples_out, np_indicator))
49 | 
50 |     def test_subsample_indicator_when_less_true_elements_than_num_samples(
51 |             self):
52 |         np_indicator = [True, False, True, False, True, True, False]
53 |         indicator = tf.constant(np_indicator)
54 |         samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
55 |             indicator, 5)
56 |         with self.test_session() as sess:
57 |             samples_out = sess.run(samples)
58 |             self.assertTrue(np.sum(samples_out), 4)
59 |             self.assertAllEqual(samples_out,
60 |                                 np.logical_and(samples_out, np_indicator))
61 | 
62 |     def test_subsample_indicator_when_num_samples_is_zero(self):
63 |         np_indicator = [True, False, True, False, True, True, False]
64 |         indicator = tf.constant(np_indicator)
65 |         samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator(
66 |             indicator, 0)
67 |         with self.test_session() as sess:
68 |             samples_none_out = sess.run(samples_none)
69 |             self.assertAllEqual(
70 |                 np.zeros_like(samples_none_out, dtype=bool),
71 |                 samples_none_out)
72 | 
73 |     def test_subsample_indicator_when_indicator_all_false(self):
74 |         indicator_empty = tf.zeros([0], dtype=tf.bool)
75 |         samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator(
76 |             indicator_empty, 4)
77 |         with self.test_session() as sess:
78 |             samples_empty_out = sess.run(samples_empty)
79 |             self.assertEqual(0, samples_empty_out.size)
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     tf.test.main()
84 | 


--------------------------------------------------------------------------------
/avod/core/anchor_filter_test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | 
  4 | from avod.core import anchor_filter
  5 | from avod.core import box_3d_encoder
  6 | from wavedata.tools.core.voxel_grid import VoxelGrid
  7 | 
  8 | 
  9 | class AnchorFilterTest(unittest.TestCase):
 10 | 
 11 |     def test_get_empty_anchor_filter_in_2d(self):
 12 |         # create generic ground plane (normal vector is straight up)
 13 |         area_extent = [(0., 2.), (-1., 0.), (0., 2.)]
 14 | 
 15 |         # Creates a voxel grid in following format at y = bin (-1.5, -0.5]
 16 |         # [ ][ ][ ][ ]
 17 |         # [ ][ ][x][ ]
 18 |         # [ ][ ][ ][ ]
 19 |         # [ ][ ][x][ ]
 20 |         pts = np.array([[0.51, -0.5, 1.1],
 21 |                         [1.51, -0.5, 1.1]])
 22 | 
 23 |         voxel_size = 0.5
 24 |         voxel_grid = VoxelGrid()
 25 |         voxel_grid.voxelize(pts, voxel_size, extents=area_extent)
 26 | 
 27 |         # Define anchors to test
 28 |         boxes_3d = np.array([
 29 |             [0.51, 0, 0.51, 1, 1, 1, 0],
 30 |             [0.51, 0, 0.51, 1, 1, 1, np.pi / 2.],
 31 |             [0.51, 0, 1.1, 1, 1, 1, 0],
 32 |             [0.51, 0, 1.1, 1, 1, 1, np.pi / 2.],
 33 |             [1.51, 0, 0.51, 1, 1, 1, 0],
 34 |             [1.51, 0, 0.51, 1, 1, 1, np.pi / 2.],
 35 |             [1.51, 0, 1.1, 1, 1, 1, 0],
 36 |             [1.51, 0, 1.1, 1, 1, 1, np.pi / 2.],
 37 |         ])
 38 | 
 39 |         anchors = box_3d_encoder.box_3d_to_anchor(boxes_3d)
 40 | 
 41 |         # test anchor locations, number indicates the anchors indices
 42 |         # [ ][ ][ ][ ]
 43 |         # [ ][1][3][ ]
 44 |         # [ ][ ][ ][ ]
 45 |         # [ ][5][7][ ]
 46 | 
 47 |         gen_filter = anchor_filter.get_empty_anchor_filter(anchors,
 48 |                                                            voxel_grid,
 49 |                                                            density_threshold=1)
 50 | 
 51 |         expected_filter = np.array(
 52 |             [False, False, True, True, False, False, True, True])
 53 | 
 54 |         self.assertTrue((gen_filter == expected_filter).all())
 55 | 
 56 |         boxes_3d = np.array([
 57 |             [0.5, 0, 0.5, 2, 1, 1, 0],  # case 1
 58 |             [0.5, 0, 0.5, 2, 1, 1, np.pi / 2.],
 59 |             [0.5, 0, 1.5, 1, 2, 1, 0],  # case 2
 60 |             [0.5, 0, 1.5, 1, 2, 1, np.pi / 2.],
 61 |             [1.5, 0, 0.5, 2, 1, 1, 0],  # case 3
 62 |             [1.5, 0, 0.5, 2, 1, 1, np.pi / 2.],
 63 |             [1.5, 0, 1.5, 1, 2, 1, 0],  # case 4
 64 |             [1.5, 0, 1.5, 1, 2, 1, np.pi / 2.]
 65 |         ])
 66 | 
 67 |         anchors = box_3d_encoder.box_3d_to_anchor(boxes_3d)
 68 | 
 69 |         # case 1
 70 |         # [ ][ ][ ][ ]   [ ][ ][ ][ ]
 71 |         # [ ][o][ ][ ]   [ ][o][o][ ]
 72 |         # [ ][o][ ][ ]   [ ][ ][ ][ ]
 73 |         # [ ][ ][ ][ ]   [ ][ ][ ][ ]
 74 | 
 75 |         # case 2
 76 |         # [ ][ ][ ][ ]   [ ][ ][ ][ ]
 77 |         # [ ][ ][o][o]   [ ][ ][o][ ]
 78 |         # [ ][ ][ ][ ]   [ ][ ][o][ ]
 79 |         # [ ][ ][ ][ ]   [ ][ ][ ][ ]
 80 | 
 81 |         # case 3
 82 |         # [ ][ ][ ][ ]   [ ][ ][ ][ ]
 83 |         # [ ][ ][ ][ ]   [ ][ ][ ][ ]
 84 |         # [ ][o][ ][ ]   [ ][o][o][ ]
 85 |         # [ ][o][ ][ ]   [ ][ ][ ][ ]
 86 | 
 87 |         # case 4
 88 |         # [ ][ ][ ][ ]   [ ][ ][ ][ ]
 89 |         # [ ][ ][ ][ ]   [ ][ ][ ][ ]
 90 |         # [ ][ ][o][o]   [ ][ ][o][ ]
 91 |         # [ ][ ][ ][ ]   [ ][ ][o][ ]
 92 | 
 93 |         gen_filter = anchor_filter.get_empty_anchor_filter(anchors,
 94 |                                                            voxel_grid,
 95 |                                                            density_threshold=1)
 96 |         expected_filter = np.array(
 97 |             [False, True, True, True, False, True, True, True])
 98 | 
 99 |         self.assertTrue((gen_filter == expected_filter).all())
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     unittest.main()
104 | 


--------------------------------------------------------------------------------
/avod/core/minibatch_samplers/balanced_positive_negative_sampler_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Tests for object_detection.core.balanced_positive_negative_sampler."""
17 | 
18 | import numpy as np
19 | import tensorflow as tf
20 | 
21 | from avod.core.minibatch_samplers import balanced_positive_negative_sampler
22 | 
23 | 
24 | class BalancedPositiveNegativeSamplerTest(tf.test.TestCase):
25 |     def test_subsample_all_examples(self):
26 |         numpy_labels = np.random.permutation(300)
27 |         indicator = tf.constant(np.ones(300) == 1)
28 |         numpy_labels = (numpy_labels - 200) > 0
29 | 
30 |         labels = tf.constant(numpy_labels)
31 | 
32 |         sampler = (balanced_positive_negative_sampler.
33 |                    BalancedPositiveNegativeSampler())
34 |         is_sampled, _ = sampler.subsample(indicator, 64, labels)
35 |         with self.test_session() as sess:
36 |             is_sampled = sess.run(is_sampled)
37 |             self.assertTrue(sum(is_sampled) == 64)
38 |             self.assertTrue(
39 |                 sum(np.logical_and(numpy_labels, is_sampled)) == 32)
40 |             self.assertTrue(sum(np.logical_and(
41 |                 np.logical_not(numpy_labels), is_sampled)) == 32)
42 | 
43 |     def test_subsample_selection(self):
44 |         # Test random sampling when only some examples can be sampled:
45 |         # 100 samples, 20 positives, 10 positives cannot be sampled
46 |         numpy_labels = np.arange(100)
47 |         numpy_indicator = numpy_labels < 90
48 |         indicator = tf.constant(numpy_indicator)
49 |         numpy_labels = (numpy_labels - 80) >= 0
50 | 
51 |         labels = tf.constant(numpy_labels)
52 | 
53 |         sampler = (balanced_positive_negative_sampler.
54 |                    BalancedPositiveNegativeSampler())
55 |         is_sampled, _ = sampler.subsample(indicator, 64, labels)
56 |         with self.test_session() as sess:
57 |             is_sampled = sess.run(is_sampled)
58 |             self.assertTrue(sum(is_sampled) == 64)
59 |             self.assertTrue(
60 |                 sum(np.logical_and(numpy_labels, is_sampled)) == 10)
61 |             self.assertTrue(sum(np.logical_and(
62 |                 np.logical_not(numpy_labels), is_sampled)) == 54)
63 |             self.assertAllEqual(is_sampled, np.logical_and(is_sampled,
64 |                                                            numpy_indicator))
65 | 
66 |     def test_raises_error_with_incorrect_label_shape(self):
67 |         labels = tf.constant([[True, False, False]])
68 |         indicator = tf.constant([True, False, True])
69 |         sampler = (balanced_positive_negative_sampler.
70 |                    BalancedPositiveNegativeSampler())
71 |         with self.assertRaises(ValueError):
72 |             sampler.subsample(indicator, 64, labels)
73 | 
74 |     def test_raises_error_with_incorrect_indicator_shape(self):
75 |         labels = tf.constant([True, False, False])
76 |         indicator = tf.constant([[True, False, True]])
77 |         sampler = (balanced_positive_negative_sampler.
78 |                    BalancedPositiveNegativeSampler())
79 |         with self.assertRaises(ValueError):
80 |             sampler.subsample(indicator, 64, labels)
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     tf.test.main()
85 | 


--------------------------------------------------------------------------------
/avod/core/orientation_encoder_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | from avod.core import orientation_encoder
 5 | 
 6 | 
 7 | class OrientationEncoderTest(tf.test.TestCase):
 8 |     def test_tf_orientation_to_angle_vector(self):
 9 |         # Test conversion for angles between [-pi, pi] with 0.5 degree steps
10 |         np_orientations = np.arange(-np.pi, np.pi, np.pi / 360.0)
11 | 
12 |         expected_angle_vectors = np.stack([np.cos(np_orientations),
13 |                                            np.sin(np_orientations)], axis=1)
14 | 
15 |         # Convert to tensors and convert to angle unit vectors
16 |         tf_orientations = tf.convert_to_tensor(np_orientations)
17 |         tf_angle_vectors = orientation_encoder.tf_orientation_to_angle_vector(
18 |             tf_orientations)
19 | 
20 |         with self.test_session() as sess:
21 |             angle_vectors_out = sess.run(tf_angle_vectors)
22 | 
23 |             np.testing.assert_allclose(angle_vectors_out,
24 |                                        expected_angle_vectors)
25 | 
26 |     def test_angle_vectors_to_orientation(self):
27 |         # Test conversion for angles between [-pi, pi] with 0.5 degree steps
28 |         np_angle_vectors = \
29 |             np.asarray([[np.cos(angle), np.sin(angle)]
30 |                         for angle in np.arange(-np.pi, np.pi, np.pi / 360.0)])
31 | 
32 |         # Check that tf output matches numpy's arctan2 output
33 |         expected_orientations = np.arctan2(np_angle_vectors[:, 1],
34 |                                            np_angle_vectors[:, 0])
35 | 
36 |         # Convert to tensors and convert to orientation angles
37 |         tf_angle_vectors = tf.convert_to_tensor(np_angle_vectors)
38 |         tf_orientations = orientation_encoder.tf_angle_vector_to_orientation(
39 |             tf_angle_vectors)
40 | 
41 |         with self.test_session() as sess:
42 |             orientations_out = sess.run(tf_orientations)
43 |             np.testing.assert_allclose(orientations_out,
44 |                                        expected_orientations)
45 | 
46 |     def test_zeros_angle_vectors_to_orientation(self):
47 |         # Test conversion for angle vectors with zeros in them
48 |         np_angle_vectors = np.asarray(
49 |             [[0, 0],
50 |              [1, 0], [10, 0],
51 |              [0, 1], [0, 10],
52 |              [-1, 0], [-10, 0],
53 |              [0, -1], [0, -10]])
54 | 
55 |         half_pi = np.pi / 2
56 |         expected_orientations = [0,
57 |                                  0, 0,
58 |                                  half_pi, half_pi,
59 |                                  np.pi, np.pi,
60 |                                  -half_pi, -half_pi]
61 | 
62 |         # Convert to tensors and convert to orientation angles
63 |         tf_angle_vectors = tf.convert_to_tensor(np_angle_vectors,
64 |                                                 dtype=tf.float64)
65 |         tf_orientations = orientation_encoder.tf_angle_vector_to_orientation(
66 |             tf_angle_vectors)
67 | 
68 |         with self.test_session() as sess:
69 |             orientations_out = sess.run(tf_orientations)
70 |             np.testing.assert_allclose(orientations_out,
71 |                                        expected_orientations)
72 | 
73 |     def test_two_way_conversion(self):
74 |         # Test conversion for angles between [-pi, pi] with 0.5 degree steps
75 |         np_orientations = np.arange(np.pi, np.pi, np.pi / 360.0)
76 | 
77 |         tf_angle_vectors = orientation_encoder.tf_orientation_to_angle_vector(
78 |             np_orientations)
79 |         tf_orientations = orientation_encoder.tf_angle_vector_to_orientation(
80 |             tf_angle_vectors)
81 | 
82 |         # Check that conversion from orientation -> angle vector ->
83 |         # orientation results in the same values
84 |         with self.test_session() as sess:
85 |             orientations_out = sess.run(tf_orientations)
86 |             np.testing.assert_allclose(orientations_out,
87 |                                        np_orientations)
88 | 


--------------------------------------------------------------------------------
/demos/dataset/data_histograms.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | 
  6 | from wavedata.tools.obj_detection import obj_utils
  7 | 
  8 | from avod.builders.dataset_builder import DatasetBuilder
  9 | 
 10 | 
 11 | def main():
 12 |     """Show histograms of ground truth labels
 13 |     """
 14 | 
 15 |     dataset = DatasetBuilder.build_kitti_dataset(
 16 |         # DatasetBuilder.KITTI_TRAIN
 17 |         # DatasetBuilder.KITTI_VAL
 18 |         DatasetBuilder.KITTI_TRAINVAL
 19 |     )
 20 | 
 21 |     difficulty = 2
 22 | 
 23 |     centroid_bins = 51
 24 |     dimension_bins = 21
 25 |     orientation_bins = 65
 26 | 
 27 |     classes = ['Car']
 28 |     # classes = ['Pedestrian']
 29 |     # classes = ['Cyclist']
 30 |     # classes = ['Pedestrian', 'Cyclist']
 31 | 
 32 |     # Dataset values
 33 |     num_samples = dataset.num_samples
 34 | 
 35 |     all_centroids_x = []
 36 |     all_centroids_y = []
 37 |     all_centroids_z = []
 38 |     all_lengths = []
 39 |     all_widths = []
 40 |     all_heights = []
 41 |     all_orientations = []
 42 | 
 43 |     # Counter for total number of valid samples
 44 |     num_valid_samples = 0
 45 | 
 46 |     for sample_idx in range(num_samples):
 47 | 
 48 |         sys.stdout.write('\r{} / {}'.format(sample_idx + 1, num_samples))
 49 | 
 50 |         sample_name = dataset.sample_names[sample_idx]
 51 |         img_idx = int(sample_name)
 52 | 
 53 |         obj_labels = obj_utils.read_labels(dataset.label_dir, img_idx)
 54 |         obj_labels = dataset.kitti_utils.filter_labels(obj_labels,
 55 |                                                        classes=classes,
 56 |                                                        difficulty=difficulty)
 57 | 
 58 |         centroids = np.asarray([obj.t for obj in obj_labels])
 59 |         lengths = np.asarray([obj.l for obj in obj_labels])
 60 |         widths = np.asarray([obj.w for obj in obj_labels])
 61 |         heights = np.asarray([obj.h for obj in obj_labels])
 62 |         orientations = np.asarray([obj.ry for obj in obj_labels])
 63 | 
 64 |         if any(orientations) and np.amax(np.abs(orientations) > np.pi):
 65 |             raise ValueError('Invalid orientation')
 66 | 
 67 |         if len(centroids) > 0:
 68 |             all_centroids_x.extend(centroids[:, 0])
 69 |             all_centroids_y.extend(centroids[:, 1])
 70 |             all_centroids_z.extend(centroids[:, 2])
 71 |             all_lengths.extend(lengths)
 72 |             all_widths.extend(widths)
 73 |             all_heights.extend(heights)
 74 |             all_orientations.extend(orientations)
 75 | 
 76 |             num_valid_samples += 1
 77 | 
 78 |     print('Finished reading labels, num_valid_samples', num_valid_samples)
 79 | 
 80 |     # Get means
 81 |     mean_centroid_x = np.mean(all_centroids_x)
 82 |     mean_centroid_y = np.mean(all_centroids_y)
 83 |     mean_centroid_z = np.mean(all_centroids_z)
 84 |     mean_dims = np.mean([all_lengths, all_widths, all_heights])
 85 | 
 86 |     np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
 87 |     print('mean_centroid_x {0:0.3f}'.format(mean_centroid_x))
 88 |     print('mean_centroid_y {0:0.3f}'.format(mean_centroid_y))
 89 |     print('mean_centroid_z {0:0.3f}'.format(mean_centroid_z))
 90 |     print('mean_dims {0:0.3f}'.format(mean_dims))
 91 | 
 92 |     # Make plots
 93 |     f, ax_arr = plt.subplots(3, 3)
 94 | 
 95 |     # xyz
 96 |     ax_arr[0, 0].hist(all_centroids_x, centroid_bins, facecolor='green')
 97 |     ax_arr[0, 1].hist(all_centroids_y, centroid_bins, facecolor='green')
 98 |     ax_arr[0, 2].hist(all_centroids_z, centroid_bins, facecolor='green')
 99 | 
100 |     # lwh
101 |     ax_arr[1, 0].hist(all_lengths, dimension_bins, facecolor='green')
102 |     ax_arr[1, 1].hist(all_widths, dimension_bins, facecolor='green')
103 |     ax_arr[1, 2].hist(all_heights, dimension_bins, facecolor='green')
104 | 
105 |     # orientations
106 |     ax_arr[2, 0].hist(all_orientations, orientation_bins, facecolor='green')
107 | 
108 |     plt.show(block=True)
109 | 
110 | 
111 | if __name__ == '__main__':
112 |     main()
113 | 


--------------------------------------------------------------------------------
/avod/builders/config_builder_util.py:
--------------------------------------------------------------------------------
  1 | """Config file reader utils."""
  2 | 
  3 | import os
  4 | import shutil
  5 | 
  6 | from google.protobuf import text_format
  7 | 
  8 | import avod
  9 | from avod.protos import model_pb2
 10 | from avod.protos import pipeline_pb2
 11 | 
 12 | 
 13 | class ConfigObj:
 14 |     pass
 15 | 
 16 | 
 17 | def proto_to_obj(config):
 18 |     """Hack to convert proto config into an object so repeated fields can be
 19 |     overwritten
 20 | 
 21 |     Args:
 22 |         config: proto config
 23 | 
 24 |     Returns:
 25 |         config_obj: object with same fields as the config
 26 |     """
 27 |     all_fields = list(config.DESCRIPTOR.fields_by_name)
 28 |     config_obj = ConfigObj()
 29 |     for field in all_fields:
 30 |         field_value = eval('config.{}'.format(field))
 31 |         setattr(config_obj, field, field_value)
 32 | 
 33 |     return config_obj
 34 | 
 35 | 
 36 | def get_model_config_from_file(config_path):
 37 |     """Reads model configuration from a configuration file.
 38 |        This merges the layer config info with model default configs.
 39 |     Args:
 40 |         config_path: A path to the config
 41 | 
 42 |     Returns:
 43 |         layers_config: A configured model_pb2 config
 44 |     """
 45 | 
 46 |     model_config = model_pb2.ModelConfig()
 47 |     with open(config_path, 'r') as f:
 48 |         text_format.Merge(f.read(), model_config)
 49 |     return model_config
 50 | 
 51 | 
 52 | def get_configs_from_pipeline_file(pipeline_config_path,
 53 |                                    is_training):
 54 |     """Reads model configuration from a pipeline_pb2.NetworkPipelineConfig.
 55 |     Args:
 56 |         pipeline_config_path: A path directory to the network pipeline config
 57 |         is_training: A boolean flag to indicate training stage, used for
 58 |             creating the checkpoint directory which must be created at the
 59 |             first training iteration.
 60 |     Returns:
 61 |         model_config: A model_pb2.ModelConfig
 62 |         train_config: A train_pb2.TrainConfig
 63 |         eval_config: A eval_pb2.EvalConfig
 64 |         dataset_config: A kitti_dataset_pb2.KittiDatasetConfig
 65 |     """
 66 | 
 67 |     pipeline_config = pipeline_pb2.NetworkPipelineConfig()
 68 |     with open(pipeline_config_path, 'r') as f:
 69 |         text_format.Merge(f.read(), pipeline_config)
 70 | 
 71 |     model_config = pipeline_config.model_config
 72 | 
 73 |     # Make sure the checkpoint name matches the config filename
 74 |     config_file_name = \
 75 |         os.path.split(pipeline_config_path)[1].split('.')[0]
 76 |     checkpoint_name = model_config.checkpoint_name
 77 |     if config_file_name != checkpoint_name:
 78 |         raise ValueError('Config and checkpoint names must match.')
 79 | 
 80 |     output_root_dir = avod.root_dir() + '/data/outputs/' + checkpoint_name
 81 | 
 82 |     # Construct paths
 83 |     paths_config = model_config.paths_config
 84 |     if not paths_config.checkpoint_dir:
 85 |         checkpoint_dir = output_root_dir + '/checkpoints'
 86 | 
 87 |         if is_training:
 88 |             if not os.path.exists(checkpoint_dir):
 89 |                 os.makedirs(checkpoint_dir)
 90 | 
 91 |         paths_config.checkpoint_dir = checkpoint_dir
 92 | 
 93 |     if not paths_config.logdir:
 94 |         paths_config.logdir = output_root_dir + '/logs/'
 95 | 
 96 |     if not paths_config.pred_dir:
 97 |         paths_config.pred_dir = output_root_dir + '/predictions'
 98 | 
 99 |     train_config = pipeline_config.train_config
100 |     eval_config = pipeline_config.eval_config
101 |     dataset_config = pipeline_config.dataset_config
102 | 
103 |     if is_training:
104 |         # Copy the config to the experiments folder
105 |         experiment_config_path = output_root_dir + '/' +\
106 |             model_config.checkpoint_name
107 |         experiment_config_path += '.config'
108 |         # Copy this even if the config exists, in case some parameters
109 |         # were modified
110 |         shutil.copy(pipeline_config_path, experiment_config_path)
111 | 
112 |     return model_config, train_config, eval_config, dataset_config
113 | 


--------------------------------------------------------------------------------
/avod/core/anchor_generators/grid_anchor_3d_generator.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generates 3D anchors, placing them on the ground plane
  3 | """
  4 | 
  5 | import numpy as np
  6 | 
  7 | from avod.core import anchor_generator
  8 | 
  9 | 
 10 | class GridAnchor3dGenerator(anchor_generator.AnchorGenerator):
 11 | 
 12 |     def name_scope(self):
 13 |         return 'GridAnchor3dGenerator'
 14 | 
 15 |     def _generate(self, **params):
 16 |         """
 17 |         Generates 3D anchors in a grid in the provided 3d area and places
 18 |         them on the ground_plane.
 19 | 
 20 |         Args:
 21 |             **params:
 22 |                 area_3d: [[min_x, max_x], [min_y, max_y], [min_z, max_z]]
 23 | 
 24 |         Returns:
 25 |             list of 3D anchors in the form N x [x, y, z, l, w, h, ry]
 26 |         """
 27 | 
 28 |         area_3d = params.get('area_3d')
 29 |         anchor_3d_sizes = params.get('anchor_3d_sizes')
 30 |         anchor_stride = params.get('anchor_stride')
 31 |         ground_plane = params.get('ground_plane')
 32 | 
 33 |         return tile_anchors_3d(area_3d,
 34 |                                anchor_3d_sizes,
 35 |                                anchor_stride,
 36 |                                ground_plane)
 37 | 
 38 | 
 39 | def tile_anchors_3d(area_extents,
 40 |                     anchor_3d_sizes,
 41 |                     anchor_stride,
 42 |                     ground_plane):
 43 |     """
 44 |     Tiles anchors over the area extents by using meshgrids to
 45 |     generate combinations of (x, y, z), (l, w, h) and ry.
 46 | 
 47 |     Args:
 48 |         area_extents: [[min_x, max_x], [min_y, max_y], [min_z, max_z]]
 49 |         anchor_3d_sizes: list of 3d anchor sizes N x (l, w, h)
 50 |         anchor_stride: stride lengths (x_stride, z_stride)
 51 |         ground_plane: coefficients of the ground plane e.g. [0, -1, 0, 0]
 52 | 
 53 |     Returns:
 54 |         boxes: list of 3D anchors in box_3d format N x [x, y, z, l, w, h, ry]
 55 |     """
 56 |     # Convert sizes to ndarray
 57 |     anchor_3d_sizes = np.asarray(anchor_3d_sizes)
 58 | 
 59 |     anchor_stride_x = anchor_stride[0]
 60 |     anchor_stride_z = anchor_stride[1]
 61 |     anchor_rotations = np.asarray([0, np.pi / 2.0])
 62 | 
 63 |     x_start = area_extents[0][0] + anchor_stride[0] / 2.0
 64 |     x_end = area_extents[0][1]
 65 |     x_centers = np.array(np.arange(x_start, x_end, step=anchor_stride_x),
 66 |                          dtype=np.float32)
 67 | 
 68 |     z_start = area_extents[2][1] - anchor_stride[1] / 2.0
 69 |     z_end = area_extents[2][0]
 70 |     z_centers = np.array(np.arange(z_start, z_end, step=-anchor_stride_z),
 71 |                          dtype=np.float32)
 72 | 
 73 |     # Use ranges for substitution
 74 |     size_indices = np.arange(0, len(anchor_3d_sizes))
 75 |     rotation_indices = np.arange(0, len(anchor_rotations))
 76 | 
 77 |     # Generate matrix for substitution
 78 |     # e.g. for two sizes and two rotations
 79 |     # [[x0, z0, 0, 0], [x0, z0, 0, 1], [x0, z0, 1, 0], [x0, z0, 1, 1],
 80 |     #  [x1, z0, 0, 0], [x1, z0, 0, 1], [x1, z0, 1, 0], [x1, z0, 1, 1], ...]
 81 |     before_sub = np.stack(np.meshgrid(x_centers,
 82 |                                       z_centers,
 83 |                                       size_indices,
 84 |                                       rotation_indices),
 85 |                           axis=4).reshape(-1, 4)
 86 | 
 87 |     # Place anchors on the ground plane
 88 |     a, b, c, d = ground_plane
 89 |     all_x = before_sub[:, 0]
 90 |     all_z = before_sub[:, 1]
 91 |     all_y = -(a * all_x + c * all_z + d) / b
 92 | 
 93 |     # Create empty matrix to return
 94 |     num_anchors = len(before_sub)
 95 |     all_anchor_boxes_3d = np.zeros((num_anchors, 7))
 96 | 
 97 |     # Fill in x, y, z
 98 |     all_anchor_boxes_3d[:, 0:3] = np.stack((all_x, all_y, all_z), axis=1)
 99 | 
100 |     # Fill in shapes
101 |     sizes = anchor_3d_sizes[np.asarray(before_sub[:, 2], np.int32)]
102 |     all_anchor_boxes_3d[:, 3:6] = sizes
103 | 
104 |     # Fill in rotations
105 |     rotations = anchor_rotations[np.asarray(before_sub[:, 3], np.int32)]
106 |     all_anchor_boxes_3d[:, 6] = rotations
107 | 
108 |     return all_anchor_boxes_3d
109 | 


--------------------------------------------------------------------------------
/demos/dataset/car_clustering.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import numpy as np
  4 | from sklearn.cluster import KMeans
  5 | 
  6 | from wavedata.tools.obj_detection import obj_utils
  7 | 
  8 | from avod.builders.dataset_builder import DatasetBuilder
  9 | from avod.core.label_cluster_utils import LabelClusterUtils
 10 | 
 11 | 
 12 | def main():
 13 |     """
 14 |     Calculates clusters for each class
 15 | 
 16 |     Returns:
 17 |         all_clusters: list of clusters for each class
 18 |         all_std_devs: list of cluster standard deviations for each class
 19 |     """
 20 | 
 21 |     dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAIN)
 22 | 
 23 |     # Calculate the remaining clusters
 24 |     # Load labels corresponding to the sample list for clustering
 25 |     sample_list = dataset.load_sample_names(dataset.cluster_split)
 26 |     all_dims = []
 27 | 
 28 |     num_samples = len(sample_list)
 29 |     for sample_idx in range(num_samples):
 30 | 
 31 |         sys.stdout.write("\rClustering labels {} / {}".format(
 32 |             sample_idx + 1, num_samples))
 33 |         sys.stdout.flush()
 34 | 
 35 |         sample_name = sample_list[sample_idx]
 36 |         img_idx = int(sample_name)
 37 | 
 38 |         obj_labels = obj_utils.read_labels(dataset.label_dir, img_idx)
 39 |         filtered_lwh = LabelClusterUtils._filter_labels_by_class(
 40 |                 obj_labels, dataset.classes)
 41 | 
 42 |         if filtered_lwh[0]:
 43 |             all_dims.extend(filtered_lwh[0])
 44 | 
 45 |     all_dims = np.array(all_dims)
 46 |     print("\nFinished reading labels, clustering data...\n")
 47 | 
 48 |     # Print 3 decimal places
 49 |     np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
 50 | 
 51 |     # Calculate average cluster
 52 |     k_means = KMeans(n_clusters=1,
 53 |                      random_state=0).fit(all_dims)
 54 | 
 55 |     cluster_centre = k_means.cluster_centers_[0]
 56 | 
 57 |     # Calculate std. dev
 58 |     std_dev = np.std(all_dims, axis=0)
 59 | 
 60 |     # Calculate 2 and 3 standard deviations below the mean
 61 |     two_sigma_length_lo = cluster_centre[0] - 2 * std_dev[0]
 62 |     three_sigma_length_lo = cluster_centre[0] - 3 * std_dev[0]
 63 | 
 64 |     # Remove all labels with length above two std dev
 65 |     # from the mean and re-cluster
 66 |     small_mask_2 = all_dims[:, 0] < two_sigma_length_lo
 67 |     small_dims_2 = all_dims[small_mask_2]
 68 | 
 69 |     small_mask_3 = all_dims[:, 0] < three_sigma_length_lo
 70 |     small_dims_3 = all_dims[small_mask_3]
 71 | 
 72 |     small_k_means_2 = KMeans(n_clusters=1, random_state=0).fit(small_dims_2)
 73 |     small_k_means_3 = KMeans(n_clusters=1, random_state=0).fit(small_dims_3)
 74 |     small_std_dev_2 = np.std(small_dims_2, axis=0)
 75 |     small_std_dev_3 = np.std(small_dims_3, axis=0)
 76 | 
 77 |     print('small_k_means_2:', small_k_means_2.cluster_centers_)
 78 |     print('small_k_means_3:', small_k_means_3.cluster_centers_)
 79 |     print('small_std_dev_2:', small_std_dev_2)
 80 |     print('small_std_dev_3:', small_std_dev_3)
 81 | 
 82 |     # Calculate 2 and 3 standard deviations above the mean
 83 |     two_sigma_length_hi = cluster_centre[0] + 2 * std_dev[0]
 84 |     three_sigma_length_hi = cluster_centre[0] + 3 * std_dev[0]
 85 | 
 86 |     # Remove all labels with length above two std dev
 87 |     # from the mean and re-cluster
 88 |     large_mask_2 = all_dims[:, 0] > two_sigma_length_hi
 89 |     large_dims_2 = all_dims[large_mask_2]
 90 | 
 91 |     large_mask_3 = all_dims[:, 0] > three_sigma_length_hi
 92 |     large_dims_3 = all_dims[large_mask_3]
 93 | 
 94 |     large_k_means_2 = KMeans(n_clusters=1, random_state=0).fit(large_dims_2)
 95 |     large_k_means_3 = KMeans(n_clusters=1, random_state=0).fit(large_dims_3)
 96 | 
 97 |     large_std_dev_2 = np.std(large_dims_2, axis=0)
 98 |     large_std_dev_3 = np.std(large_dims_3, axis=0)
 99 | 
100 |     print('large_k_means_2:', large_k_means_2.cluster_centers_)
101 |     print('large_k_means_3:', large_k_means_3.cluster_centers_)
102 |     print('large_std_dev_2:', large_std_dev_2)
103 |     print('large_std_dev_3:', large_std_dev_3)
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     main()
108 | 


--------------------------------------------------------------------------------
/avod/core/feature_extractors/bev_vgg_test.py:
--------------------------------------------------------------------------------
  1 | """Testing VGG BEV network.
  2 | """
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from google.protobuf import text_format
  6 | 
  7 | import avod.tests as tests
  8 | from avod.builders import optimizer_builder
  9 | from avod.builders.dataset_builder import DatasetBuilder
 10 | from avod.core import constants
 11 | from avod.core.feature_extractors import bev_vgg as vgg
 12 | from avod.datasets.kitti.kitti_dataset import KittiDataset
 13 | from avod.protos import train_pb2
 14 | 
 15 | slim = tf.contrib.slim
 16 | 
 17 | 
 18 | def fill_feed_dict(dataset: KittiDataset, input_pl, batch_size):
 19 |     sample = dataset.next_batch(batch_size)
 20 | 
 21 |     bev_input = sample[0].get(constants.KEY_BEV_INPUT)
 22 |     bev_input = np.expand_dims(bev_input, axis=0)
 23 | 
 24 |     labels = sample[0].get(constants.KEY_LABEL_CLASSES)
 25 |     labels = np.expand_dims(labels, axis=1)
 26 | 
 27 |     label_pl = tf.placeholder(tf.float32, [None, 1])
 28 | 
 29 |     feed_dict = {
 30 |         input_pl: bev_input,
 31 |         label_pl: labels
 32 |     }
 33 | 
 34 |     return feed_dict, label_pl
 35 | 
 36 | 
 37 | class BevVggTest(tf.test.TestCase):
 38 | 
 39 |     @classmethod
 40 |     def setUpClass(cls):
 41 |         # Initialize the Kitti dataset
 42 |         test_dir = tests.test_path()
 43 | 
 44 |         # Get the unittest-kitti dataset
 45 |         dataset_builder = DatasetBuilder()
 46 |         cls.dataset = dataset_builder.build_kitti_dataset(
 47 |             dataset_builder.KITTI_UNITTEST)
 48 | 
 49 |         cls.log_dir = test_dir + '/logs'
 50 |         cls.bev_vgg_cls = vgg.BevVggClassification()
 51 | 
 52 |     def test_vgg_layers_build(self):
 53 |         train_config_text_proto = """
 54 |         optimizer {
 55 |           gradient_descent {
 56 |            learning_rate {
 57 |              constant_learning_rate {
 58 |                learning_rate: 0.1
 59 |               }
 60 |             }
 61 |           }
 62 |         }
 63 |         """
 64 |         train_config = train_pb2.TrainConfig()
 65 |         text_format.Merge(train_config_text_proto, train_config)
 66 |         global_summaries = set([])
 67 |         batch_size = 1
 68 | 
 69 |         with tf.Graph().as_default():
 70 |             with tf.name_scope('input'):
 71 |                 # BEV image placeholder
 72 |                 image_placeholder = tf.placeholder(
 73 |                     tf.float32, (None, 700, 800, 6))
 74 |                 image_summary = tf.expand_dims(image_placeholder, axis=0)
 75 |                 tf.summary.image("image", image_summary, max_outputs=5)
 76 | 
 77 |             # Check invalid BEV shape
 78 |             bev_shape = (300, 300)
 79 |             processed_image = self.bev_vgg_cls.preprocess_input(
 80 |                 image_placeholder, bev_shape)
 81 | 
 82 |             predictions, end_points = self.bev_vgg_cls.build(
 83 |                 processed_image, num_classes=1, is_training=True)
 84 | 
 85 |             feed_dict, label_pl = fill_feed_dict(
 86 |                 self.dataset, image_placeholder, batch_size)
 87 | 
 88 |             ###########################
 89 |             # Loss Function
 90 |             ###########################
 91 |             cross_entropy = tf.nn.weighted_cross_entropy_with_logits(
 92 |                 label_pl,
 93 |                 predictions,
 94 |                 1.0)
 95 |             loss = tf.reduce_mean(cross_entropy)
 96 | 
 97 |             ###########################
 98 |             # Optimizer
 99 |             ###########################
100 |             training_optimizer = optimizer_builder.build(
101 |                 train_config.optimizer, global_summaries)
102 | 
103 |             ###########################
104 |             # Train-op
105 |             ###########################
106 |             train_op = slim.learning.create_train_op(loss, training_optimizer)
107 | 
108 |             sess = tf.Session()
109 |             init = tf.global_variables_initializer()
110 |             sess.run(init)
111 | 
112 |             loss = sess.run(train_op, feed_dict=feed_dict)
113 | 
114 |             self.assertLess(loss, 1)
115 |             print('Loss ', loss)
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     tf.test.main()
120 | 


--------------------------------------------------------------------------------
/avod/core/trainer_test.py:
--------------------------------------------------------------------------------
  1 | """Tests for avod.core.trainer with a dummy Detection Model"""
  2 | 
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | from tensorflow.contrib.layers.python.layers import layers
  6 | from tensorflow.python.framework import random_seed
  7 | from tensorflow.python.ops import math_ops
  8 | from tensorflow.python.framework import constant_op
  9 | from tensorflow.python.framework import dtypes
 10 | 
 11 | from google.protobuf import text_format
 12 | 
 13 | from avod.core import trainer
 14 | from avod.core import model
 15 | 
 16 | from avod.protos import train_pb2
 17 | from avod.protos import model_pb2
 18 | 
 19 | 
 20 | class FakeBatchNormClassifier(model.DetectionModel):
 21 | 
 22 |     def __init__(self, model_config, num_classes=1):
 23 |         # Sets model configs (_config and _num_classes)
 24 |         super(FakeBatchNormClassifier, self).__init__(model_config)
 25 | 
 26 |         self.tf_inputs, self.tf_labels = self.get_input()
 27 |         self._train_op = None
 28 |         self._loss = None
 29 | 
 30 |     def BatchNormClassifier(self, inputs):
 31 |         inputs = layers.batch_norm(inputs, decay=0.1, fused=None)
 32 |         return layers.fully_connected(inputs, 1, activation_fn=math_ops.sigmoid)
 33 | 
 34 |     def get_input(self):
 35 |         """Creates an easy training set."""
 36 |         np.random.seed(0)
 37 | 
 38 |         inputs = np.zeros((16, 4))
 39 |         labels = np.random.randint(
 40 |                 0, 2, size=(16, 1)).astype(
 41 |                 np.float32)
 42 | 
 43 |         for i in range(16):
 44 |             j = int(2 * labels[i] + np.random.randint(0, 2))
 45 |             inputs[i, j] = 1
 46 | 
 47 |         random_seed.set_random_seed(0)
 48 |         tf_inputs = constant_op.constant(inputs, dtype=dtypes.float32)
 49 |         tf_labels = constant_op.constant(labels, dtype=dtypes.float32)
 50 | 
 51 |         return tf_inputs, tf_labels
 52 | 
 53 |     def build(self):
 54 |         """Prediction tensors from inputs tensor.
 55 | 
 56 |         Args:
 57 |             preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor.
 58 | 
 59 |         Returns:
 60 |             prediction_dict: a dictionary holding prediction tensors to be
 61 |                              passed to the Loss or Postprocess functions.
 62 |         """
 63 |         tf_predictions = self.BatchNormClassifier(self.tf_inputs)
 64 |         return tf_predictions
 65 | 
 66 |     def loss(self,  tf_predictions):
 67 |         """Compute scalar loss tensors with respect to provided groundtruth.
 68 |         """
 69 |         # trainer expects two losses, pass in a dummy one
 70 |         dummy_loss_dict = {}
 71 |         total_loss = tf.losses.log_loss(self.tf_labels,
 72 |                                         tf_predictions,
 73 |                                         scope='BatchNormLoss')
 74 |         return dummy_loss_dict, total_loss
 75 | 
 76 | 
 77 | class ClassifierTrainerTest(tf.test.TestCase):
 78 | 
 79 |     def test_batch_norm_class(self):
 80 |         # This tests the model and trainer set up
 81 |         train_config_text_proto = """
 82 |         optimizer {
 83 |           gradient_descent {
 84 |             learning_rate {
 85 |               constant_learning_rate {
 86 |                 learning_rate: 1.0
 87 |               }
 88 |             }
 89 |           }
 90 |         }
 91 |         max_iterations: 5
 92 |         """
 93 |         model_config_text_proto = """
 94 |             path_drop_probabilities: [1.0, 1.0]
 95 |         """
 96 |         train_config = train_pb2.TrainConfig()
 97 |         text_format.Merge(train_config_text_proto, train_config)
 98 | 
 99 |         model_config = model_pb2.ModelConfig()
100 |         text_format.Merge(model_config_text_proto, model_config)
101 |         train_config.overwrite_checkpoints = True
102 |         test_root_dir = '/tmp/avod_unit_test/'
103 | 
104 |         paths_config = model_config.paths_config
105 |         paths_config.logdir = test_root_dir + 'logs/'
106 |         paths_config.checkpoint_dir = test_root_dir
107 | 
108 |         classifier = FakeBatchNormClassifier(model_config)
109 |         trainer.train(classifier,
110 |                       train_config)
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     tf.test.main()
115 | 


--------------------------------------------------------------------------------
/avod/core/summary_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | 
  5 | def add_feature_maps_from_dict(end_points, layer_name):
  6 |     """ Calls add_feature_maps for a specified layer
  7 |         in a dictionary of end points
  8 | 
  9 |     Args:
 10 |         end_points: dictionary of network end points
 11 |         layer_name: dict key of the layer to add
 12 |     """
 13 |     feature_maps = end_points.get(layer_name)
 14 |     add_feature_maps(feature_maps, layer_name)
 15 | 
 16 | 
 17 | def add_feature_maps(feature_maps, layer_name):
 18 |     """ Adds an image summary showing tiled feature maps
 19 | 
 20 |     Args:
 21 |         feature_maps: a tensor of feature maps to show, dimensions should be
 22 |             (1, ?, ?, ?) (batch_size, height, width, depth)
 23 |         layer_name: name of the layer which will show up in tensorboard
 24 |     """
 25 |     with tf.name_scope(layer_name):
 26 |         batch, maps_height, maps_width, num_maps = np.array(
 27 |             feature_maps.shape).astype(np.int32)
 28 | 
 29 |         # Resize to a visible size
 30 |         map_width_out = 300
 31 |         ratio = map_width_out / maps_width
 32 |         map_height_out = int(maps_height * ratio)
 33 |         map_size_out = tf.convert_to_tensor([map_height_out, map_width_out],
 34 |                                             tf.int32)
 35 | 
 36 |         resized_maps = tf.image.resize_bilinear(feature_maps, map_size_out)
 37 | 
 38 |         # Take first image only
 39 |         output = tf.slice(resized_maps, (0, 0, 0, 0), (1, -1, -1, -1))
 40 |         output = tf.reshape(output, (map_height_out, map_width_out, num_maps))
 41 | 
 42 |         # Add padding around each map
 43 |         map_width_out += 5
 44 |         map_height_out += 5
 45 |         output = tf.image.resize_image_with_crop_or_pad(
 46 |             output, map_height_out, map_width_out)
 47 | 
 48 |         # Find good image size for display
 49 |         map_sizes = [1, 32, 64, 128, 256, 512]
 50 |         # columns, rows
 51 |         image_sizes = [(1, 1), (4, 8), (8, 8), (8, 16), (8, 32), (16, 32)]
 52 |         size_idx = map_sizes.index(num_maps)
 53 |         desired_image_size = image_sizes[size_idx]
 54 |         image_width = desired_image_size[0]
 55 |         image_height = desired_image_size[1]
 56 | 
 57 |         # Arrange maps into a grid
 58 |         output = tf.reshape(output,
 59 |                             (map_height_out, map_width_out, image_height,
 60 |                              image_width))
 61 |         output = tf.transpose(output, (2, 0, 3, 1))
 62 |         output = tf.reshape(output, (1, image_height * map_height_out,
 63 |                                      image_width * map_width_out, 1))
 64 | 
 65 |         layer_name = layer_name.split('/')[-1]
 66 |         tf.summary.image(layer_name, output, max_outputs=16)
 67 | 
 68 | 
 69 | def add_scalar_summary(summary_name, scalar_value,
 70 |                        summary_writer, global_step):
 71 |     """ Adds a single scalar summary value to the logs without adding a
 72 |         summary node to the graph
 73 | 
 74 |     Args:
 75 |         summary_name: name of the summary to add
 76 |         scalar_value: value of the scalar
 77 |         summary_writer: a summary writer object
 78 |         global_step: the current global step
 79 |     """
 80 | 
 81 |     avg_summary = tf.Summary()
 82 |     avg_summary.value.add(tag=summary_name,
 83 |                           simple_value=scalar_value)
 84 | 
 85 |     summary_writer.add_summary(avg_summary, global_step)
 86 | 
 87 | 
 88 | def summaries_to_keep(summaries,
 89 |                       global_summaries,
 90 |                       histograms=True,
 91 |                       input_imgs=True,
 92 |                       input_bevs=True):
 93 | 
 94 |     if histograms and input_imgs and input_bevs:
 95 |         # Keep everything
 96 |         summaries |= global_summaries
 97 | 
 98 |     else:
 99 |         for summary in summaries.copy():
100 |             name = summary.name
101 |             if not histograms and name.startswith('histograms'):
102 |                 summaries.remove(summary)
103 |             if not input_imgs and name.startswith('img_'):
104 |                 summaries.remove(summary)
105 |             if not input_bevs and name.startswith('bev_'):
106 |                 summaries.remove(summary)
107 | 
108 |     # Merge all summaries together.
109 |     summary_op = tf.summary.merge(list(summaries), name='summary_op')
110 | 
111 |     return summary_op
112 | 


--------------------------------------------------------------------------------
/avod/configs/unittest_pipeline.config:
--------------------------------------------------------------------------------
  1 | # Avod unittest configuration sample.
  2 | 
  3 | model_config {
  4 |     model_name: 'avod_model'
  5 |     checkpoint_name: 'unittest_pipeline'
  6 | 
  7 |     input_config {
  8 |         bev_depth: 6
  9 |         img_depth: 3
 10 |     }
 11 | 
 12 |     rpn_config {
 13 |         rpn_proposal_roi_crop_size: 3
 14 |         rpn_fusion_method: 'mean'
 15 |         rpn_train_nms_size: 1024
 16 |         rpn_test_nms_size: 300
 17 |         rpn_nms_iou_thresh: 0.8
 18 |     }
 19 | 
 20 |     avod_config {
 21 |         avod_proposal_roi_crop_size: 7
 22 |         avod_nms_size: 100
 23 |         avod_nms_iou_thresh: 0.01
 24 |         avod_box_representation: 'box_3d'
 25 |     }
 26 | 
 27 |     label_smoothing_epsilon: 0.001
 28 |     expand_proposals_xz: 0.0
 29 |     path_drop_probabilities: [0.5, 0.5]
 30 |     train_on_all_samples: False
 31 |     eval_all_samples: False
 32 | 
 33 |     layers_config {
 34 |         bev_feature_extractor {
 35 |             bev_vgg {
 36 |                 vgg_conv1: [2, 32]
 37 |                 vgg_conv2: [2, 64]
 38 |                 vgg_conv3: [3, 128]
 39 |                 vgg_conv4: [3, 256]
 40 |                 upsampling_multiplier: 2
 41 | 
 42 |                 l2_weight_decay: 0.0005
 43 |             }
 44 |         }
 45 |         img_feature_extractor {
 46 |             img_vgg {
 47 |                 vgg_conv1: [2, 32]
 48 |                 vgg_conv2: [2, 64]
 49 |                 vgg_conv3: [3, 128]
 50 |                 vgg_conv4: [3, 256]
 51 |                 upsampling_multiplier: 2
 52 | 
 53 |                 l2_weight_decay: 0.0005
 54 |             }
 55 |         }
 56 |         rpn_config {
 57 |             cls_fc6: 32
 58 |             cls_fc7: 32
 59 | 
 60 |             reg_fc6: 32
 61 |             reg_fc7: 32
 62 | 
 63 |             l2_weight_decay: 0.005
 64 |             keep_prob: 0.5
 65 |         }
 66 |         avod_config {
 67 |             basic_fc_layers {
 68 |                  num_layers: 2
 69 |                  layer_sizes: [64, 64]
 70 |                  l2_weight_decay: 0.005
 71 |                  keep_prob: 0.5
 72 |                  fusion_method: 'mean'  # 'mean' or 'concat'
 73 |             }
 74 |         }
 75 |     }
 76 |     # Loss function weights
 77 |     loss_config {
 78 |         cls_loss_weight: 5.0
 79 |         reg_loss_weight: 10.0
 80 |         ang_loss_weight: 10.0
 81 |     }
 82 | }
 83 | 
 84 | train_config {
 85 | 
 86 |     batch_size: 1
 87 | 
 88 |     optimizer {
 89 |         adam_optimizer {
 90 |             learning_rate {
 91 |                 constant_learning_rate {
 92 |                     learning_rate: 0.0001
 93 |                 }
 94 |             }
 95 |         }
 96 |     }
 97 | 
 98 |     overwrite_checkpoints: False
 99 | 
100 |     max_checkpoints_to_keep: 10000
101 |     max_iterations: 10
102 |     checkpoint_interval: 1
103 | 
104 |     summary_interval: 10
105 |     summary_histograms: False
106 |     summary_img_images: False
107 |     summary_bev_images: False
108 | }
109 | 
110 | 
111 | dataset_config {
112 |     bev_source: 'lidar'
113 | 
114 |     kitti_utils_config {
115 |         area_extents: [-40, 40, -5, 3, 0, 70]
116 |         voxel_size: 0.1
117 |         anchor_strides: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
118 |         density_threshold: 1
119 | 
120 |         bev_generator {
121 |             slices {
122 |                 height_lo: -0.2
123 |                 height_hi: 2.3
124 |                 num_slices: 5
125 |             }
126 |         }
127 | 
128 |         mini_batch_config {
129 |             density_threshold: 1
130 | 
131 |             rpn_config {
132 |                 iou_2d_thresholds {
133 |                     neg_iou_lo: 0.0
134 |                     neg_iou_hi: 0.3
135 |                     pos_iou_lo: 0.5
136 |                     pos_iou_hi: 1.0
137 |                 }
138 |                 # iou_3d_thresholds {
139 |                 #     neg_iou_lo: 0.001
140 |                 #     neg_iou_hi: 0.005
141 |                 #     pos_iou_lo: 0.3
142 |                 #     pos_iou_hi: 1.0
143 |                 # }
144 | 
145 |                 mini_batch_size: 64
146 |             }
147 | 
148 |             avod_config {
149 |                 iou_2d_thresholds {
150 |                     neg_iou_lo: 0.0
151 |                     neg_iou_hi: 0.55
152 |                     pos_iou_lo: 0.65
153 |                     pos_iou_hi: 1.0
154 |                 }
155 | 
156 |                 mini_batch_size: 64
157 |             }
158 |         }
159 |     }
160 | }
161 | 


--------------------------------------------------------------------------------
/avod/core/minibatch_samplers/balanced_positive_negative_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Class to subsample minibatches by balancing positives and negatives.
17 | 
18 | Subsamples minibatches based on a pre-specified positive fraction in range
19 | [0,1]. The class presumes there are many more negatives than positive examples:
20 | if the desired batch_size cannot be achieved with the pre-specified positive
21 | fraction, it fills the rest with negative examples. If this is not sufficient
22 | for obtaining the desired batch_size, it returns fewer examples.
23 | 
24 | The main function to call is Subsample(self, indicator, labels). For convenience
25 | one can also call SubsampleWeights(self, weights, labels) which is defined in
26 | the minibatch_sampler base class.
27 | """
28 | 
29 | import tensorflow as tf
30 | 
31 | from avod.core import minibatch_sampler
32 | 
33 | 
34 | class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
35 |     """Subsamples minibatches to a desired balance of positives and negatives."""
36 | 
37 |     def __init__(self, positive_fraction=0.5):
38 |         """Constructs a minibatch sampler.
39 | 
40 |         Args:
41 |           positive_fraction: desired fraction of positive examples (scalar in [0,1])
42 | 
43 |         Raises:
44 |           ValueError: if positive_fraction < 0, or positive_fraction > 1
45 |         """
46 |         if positive_fraction < 0 or positive_fraction > 1:
47 |             raise ValueError('positive_fraction should be in range [0,1]. '
48 |                              'Received: %s.' % positive_fraction)
49 |         self._positive_fraction = positive_fraction
50 | 
51 |     def subsample(self, indicator, batch_size, labels):
52 |         """Returns subsampled minibatch.
53 | 
54 |         Args:
55 |             indicator: boolean tensor of shape [N] whose
56 |                 True entries can be sampled.
57 |             batch_size: desired batch size.
58 |             labels: boolean tensor of shape [N] denoting
59 |                 positive(=True) and negative(=False) examples.
60 | 
61 |         Returns:
62 |           sampled_idx: boolean tensor of shape [N], True for entries which are
63 |               sampled.
64 |           sampled_pos_idx: boolean tensor of shape [N], True for entries which are
65 |               positive samples.
66 | 
67 |         Raises:
68 |           ValueError: if labels and indicator are not 1D boolean tensors.
69 |         """
70 |         if len(indicator.get_shape().as_list()) != 1:
71 |             raise ValueError(
72 |                 'indicator must be 1 dimensional, got a tensor of '
73 |                 'shape %s' % indicator.get_shape())
74 |         if len(labels.get_shape().as_list()) != 1:
75 |             raise ValueError('labels must be 1 dimensional, got a tensor of '
76 |                              'shape %s' % labels.get_shape())
77 |         if labels.dtype != tf.bool:
78 |             raise ValueError('labels should be of type bool. Received: %s' %
79 |                              labels.dtype)
80 |         if indicator.dtype != tf.bool:
81 |             raise ValueError('indicator should be of type bool. Received: %s' %
82 |                              indicator.dtype)
83 | 
84 |         # Only sample from indicated samples
85 |         negative_idx = tf.logical_not(labels)
86 |         positive_idx = tf.logical_and(labels, indicator)
87 |         negative_idx = tf.logical_and(negative_idx, indicator)
88 | 
89 |         # Sample positive and negative samples separately
90 |         max_num_pos = int(self._positive_fraction * batch_size)
91 |         sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
92 |         max_num_neg = batch_size - tf.reduce_sum(
93 |             tf.cast(sampled_pos_idx, tf.int32))
94 |         sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg)
95 | 
96 |         sampled_idx = tf.logical_or(sampled_pos_idx, sampled_neg_idx)
97 | 
98 |         return sampled_idx, sampled_pos_idx
99 | 


--------------------------------------------------------------------------------
/avod/experiments/run_evaluation.py:
--------------------------------------------------------------------------------
  1 | """Detection model evaluator.
  2 | 
  3 | This runs the DetectionModel evaluator.
  4 | """
  5 | 
  6 | import argparse
  7 | import os
  8 | 
  9 | import tensorflow as tf
 10 | 
 11 | import avod
 12 | import avod.builders.config_builder_util as config_builder
 13 | from avod.builders.dataset_builder import DatasetBuilder
 14 | from avod.core.models.avod_model import AvodModel
 15 | from avod.core.models.rpn_model import RpnModel
 16 | from avod.core.evaluator import Evaluator
 17 | 
 18 | 
 19 | def evaluate(model_config, eval_config, dataset_config):
 20 | 
 21 |     # Parse eval config
 22 |     eval_mode = eval_config.eval_mode
 23 |     if eval_mode not in ['val', 'test']:
 24 |         raise ValueError('Evaluation mode can only be set to `val` or `test`')
 25 |     evaluate_repeatedly = eval_config.evaluate_repeatedly
 26 | 
 27 |     # Parse dataset config
 28 |     data_split = dataset_config.data_split
 29 |     if data_split == 'train':
 30 |         dataset_config.data_split_dir = 'training'
 31 |         dataset_config.has_labels = True
 32 | 
 33 |     elif data_split.startswith('val'):
 34 |         dataset_config.data_split_dir = 'training'
 35 | 
 36 |         # Don't load labels for val split when running in test mode
 37 |         if eval_mode == 'val':
 38 |             dataset_config.has_labels = True
 39 |         elif eval_mode == 'test':
 40 |             dataset_config.has_labels = False
 41 | 
 42 |     elif data_split == 'test':
 43 |         dataset_config.data_split_dir = 'testing'
 44 |         dataset_config.has_labels = False
 45 | 
 46 |     else:
 47 |         raise ValueError('Invalid data split', data_split)
 48 | 
 49 |     # Convert to object to overwrite repeated fields
 50 |     dataset_config = config_builder.proto_to_obj(dataset_config)
 51 | 
 52 |     # Remove augmentation during evaluation
 53 |     dataset_config.aug_list = []
 54 | 
 55 |     # Build the dataset object
 56 |     dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
 57 |                                                  use_defaults=False)
 58 | 
 59 |     # Setup the model
 60 |     model_name = model_config.model_name
 61 | 
 62 |     # Convert to object to overwrite repeated fields
 63 |     model_config = config_builder.proto_to_obj(model_config)
 64 | 
 65 |     # Switch path drop off during evaluation
 66 |     model_config.path_drop_probabilities = [1.0, 1.0]
 67 | 
 68 |     with tf.Graph().as_default():
 69 |         if model_name == 'avod_model':
 70 |             model = AvodModel(model_config, train_val_test=eval_mode,
 71 |                               dataset=dataset)
 72 |         elif model_name == 'rpn_model':
 73 |             model = RpnModel(model_config, train_val_test=eval_mode,
 74 |                              dataset=dataset)
 75 |         else:
 76 |             raise ValueError('Invalid model name {}'.format(model_name))
 77 | 
 78 |         model_evaluator = Evaluator(model,
 79 |                                     dataset_config,
 80 |                                     eval_config)
 81 | 
 82 |         if evaluate_repeatedly:
 83 |             model_evaluator.repeated_checkpoint_run()
 84 |         else:
 85 |             model_evaluator.run_latest_checkpoints()
 86 | 
 87 | 
 88 | def main(_):
 89 |     parser = argparse.ArgumentParser()
 90 | 
 91 |     default_pipeline_config_path = avod.root_dir() + \
 92 |         '/configs/avod_cars_example.config'
 93 | 
 94 |     parser.add_argument('--pipeline_config',
 95 |                         type=str,
 96 |                         dest='pipeline_config_path',
 97 |                         default=default_pipeline_config_path,
 98 |                         help='Path to the pipeline config')
 99 | 
100 |     parser.add_argument('--data_split',
101 |                         type=str,
102 |                         dest='data_split',
103 |                         default='val',
104 |                         help='Data split for evaluation')
105 | 
106 |     parser.add_argument('--device',
107 |                         type=str,
108 |                         dest='device',
109 |                         default='0',
110 |                         help='CUDA device id')
111 | 
112 |     args = parser.parse_args()
113 | 
114 |     # Parse pipeline config
115 |     model_config, _, eval_config, dataset_config = \
116 |         config_builder.get_configs_from_pipeline_file(
117 |             args.pipeline_config_path,
118 |             is_training=False)
119 | 
120 |     # Overwrite data split
121 |     dataset_config.data_split = args.data_split
122 | 
123 |     # Set CUDA device id
124 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.device
125 | 
126 |     evaluate(model_config, eval_config, dataset_config)
127 | 
128 | 
129 | if __name__ == '__main__':
130 |     tf.app.run()
131 | 


--------------------------------------------------------------------------------
/avod/core/ops_test.py:
--------------------------------------------------------------------------------
  1 | """Tests for object_detection.utils.ops."""
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | 
  5 | from avod.core import ops
  6 | 
  7 | 
  8 | class OpsTestIndicesToDenseVector(tf.test.TestCase):
  9 | 
 10 |     def test_indices_to_dense_vector(self):
 11 |         size = 10000
 12 |         num_indices = np.random.randint(size)
 13 |         rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
 14 | 
 15 |         expected_output = np.zeros(size, dtype=np.float32)
 16 |         expected_output[rand_indices] = 1.
 17 | 
 18 |         tf_rand_indices = tf.constant(rand_indices)
 19 |         indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
 20 | 
 21 |         with self.test_session() as sess:
 22 |             output = sess.run(indicator)
 23 |             self.assertAllEqual(output, expected_output)
 24 |             self.assertEqual(output.dtype, expected_output.dtype)
 25 | 
 26 |     def test_indices_to_dense_vector_size_at_inference(self):
 27 |         size = 5000
 28 |         num_indices = 250
 29 |         all_indices = np.arange(size)
 30 |         rand_indices = np.random.permutation(all_indices)[0:num_indices]
 31 | 
 32 |         expected_output = np.zeros(size, dtype=np.float32)
 33 |         expected_output[rand_indices] = 1.
 34 | 
 35 |         tf_all_indices = tf.placeholder(tf.int32)
 36 |         tf_rand_indices = tf.constant(rand_indices)
 37 |         indicator = ops.indices_to_dense_vector(tf_rand_indices,
 38 |                                                 tf.shape(tf_all_indices)[0])
 39 |         feed_dict = {tf_all_indices: all_indices}
 40 | 
 41 |         with self.test_session() as sess:
 42 |             output = sess.run(indicator, feed_dict=feed_dict)
 43 |             self.assertAllEqual(output, expected_output)
 44 |             self.assertEqual(output.dtype, expected_output.dtype)
 45 | 
 46 |     def test_indices_to_dense_vector_int(self):
 47 |         size = 500
 48 |         num_indices = 25
 49 |         rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
 50 | 
 51 |         expected_output = np.zeros(size, dtype=np.int64)
 52 |         expected_output[rand_indices] = 1
 53 | 
 54 |         tf_rand_indices = tf.constant(rand_indices)
 55 |         indicator = ops.indices_to_dense_vector(
 56 |             tf_rand_indices, size, 1, dtype=tf.int64)
 57 | 
 58 |         with self.test_session() as sess:
 59 |             output = sess.run(indicator)
 60 |             self.assertAllEqual(output, expected_output)
 61 |             self.assertEqual(output.dtype, expected_output.dtype)
 62 | 
 63 |     def test_indices_to_dense_vector_custom_values(self):
 64 |         size = 100
 65 |         num_indices = 10
 66 |         rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
 67 |         indices_value = np.random.rand(1)
 68 |         default_value = np.random.rand(1)
 69 | 
 70 |         expected_output = np.float32(np.ones(size) * default_value)
 71 |         expected_output[rand_indices] = indices_value
 72 | 
 73 |         tf_rand_indices = tf.constant(rand_indices)
 74 |         indicator = ops.indices_to_dense_vector(
 75 |             tf_rand_indices,
 76 |             size,
 77 |             indices_value=indices_value,
 78 |             default_value=default_value)
 79 | 
 80 |         with self.test_session() as sess:
 81 |             output = sess.run(indicator)
 82 |             self.assertAllClose(output, expected_output)
 83 |             self.assertEqual(output.dtype, expected_output.dtype)
 84 | 
 85 |     def test_indices_to_dense_vector_all_indices_as_input(self):
 86 |         size = 500
 87 |         num_indices = 500
 88 |         rand_indices = np.random.permutation(np.arange(size))[0:num_indices]
 89 | 
 90 |         expected_output = np.ones(size, dtype=np.float32)
 91 | 
 92 |         tf_rand_indices = tf.constant(rand_indices)
 93 |         indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
 94 | 
 95 |         with self.test_session() as sess:
 96 |             output = sess.run(indicator)
 97 |             self.assertAllEqual(output, expected_output)
 98 |             self.assertEqual(output.dtype, expected_output.dtype)
 99 | 
100 |     def test_indices_to_dense_vector_empty_indices_as_input(self):
101 |         size = 500
102 |         rand_indices = []
103 | 
104 |         expected_output = np.zeros(size, dtype=np.float32)
105 | 
106 |         tf_rand_indices = tf.constant(rand_indices)
107 |         indicator = ops.indices_to_dense_vector(tf_rand_indices, size)
108 | 
109 |         with self.test_session() as sess:
110 |             output = sess.run(indicator)
111 |             self.assertAllEqual(output, expected_output)
112 |             self.assertEqual(output.dtype, expected_output.dtype)
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     tf.test.main()
117 | 


--------------------------------------------------------------------------------
/avod/core/mini_batch_utils_test.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | from avod.core import box_list
  5 | from avod.core import box_list_ops
  6 | from avod.builders.dataset_builder import DatasetBuilder
  7 | 
  8 | 
  9 | class MiniBatchUtilsTest(tf.test.TestCase):
 10 | 
 11 |     @classmethod
 12 |     def setUpClass(cls):
 13 |         cls.dataset = DatasetBuilder.build_kitti_dataset(
 14 |             DatasetBuilder.KITTI_UNITTEST)
 15 | 
 16 |         cls.mb_utils = cls.dataset.kitti_utils.mini_batch_utils
 17 | 
 18 |     def test_get_anchors_info(self):
 19 | 
 20 |         # Take the first non empty sample
 21 |         sample = self.dataset.sample_names[1]
 22 | 
 23 |         # Check the anchors info for first class type
 24 |         anchors_info = self.mb_utils.get_anchors_info(
 25 |             self.dataset.classes_name,
 26 |             self.dataset.kitti_utils.anchor_strides,
 27 |             sample)
 28 | 
 29 |         anchor_indices = anchors_info[0]
 30 |         anchor_ious = anchors_info[1]
 31 |         anchor_offsets = anchors_info[2]
 32 |         anchor_classes = anchors_info[3]
 33 | 
 34 |         # Lengths should all be the same
 35 |         self.assertTrue(len(anchor_indices), len(anchor_ious))
 36 |         self.assertTrue(len(anchor_indices), len(anchor_offsets))
 37 |         self.assertTrue(len(anchor_indices), len(anchor_classes))
 38 | 
 39 |         # Indices, IOUs, and classes values should all be >= 0
 40 |         self.assertTrue((anchor_indices >= 0).all())
 41 |         self.assertTrue((anchor_ious >= 0).all())
 42 |         self.assertTrue((anchor_classes >= 0).all())
 43 | 
 44 |         # Offsets should be (N, 6)
 45 |         self.assertTrue(len(anchor_offsets.shape) == 2)
 46 |         self.assertTrue(anchor_offsets.shape[1] == 6)
 47 | 
 48 |     def test_iou_mask_ops(self):
 49 |         # corners are in [y1, x1, y2, x2] format
 50 |         corners_pred = tf.constant(
 51 |             [[4.0, 3.0, 7.0, 5.0],
 52 |              [14.0, 14.0, 16.0, 16.0],
 53 |              [0.0, 0.0, 21.0, 19.0],
 54 |              [3.0, 4.0, 5.0, 7.0]])
 55 |         corners_gt = tf.constant(
 56 |             [[4.0, 3.0, 7.0, 6.0],
 57 |              [14.0, 14.0, 15.0, 15.0],
 58 |              [0.0, 0.0, 20.0, 20.0]])
 59 |         # 3 classes
 60 |         class_indices = tf.constant([1., 2., 3.])
 61 | 
 62 |         exp_ious = [[0.66666669, 0., 0.02255639, 0.15384616],
 63 |                     [0., 0.25, 0.00250627, 0.],
 64 |                     [0.015, 0.01, 0.90692127, 0.015]]
 65 | 
 66 |         exp_max_ious = np.array([0.66666669, 0.25, 0.90692127, 0.15384616])
 67 |         exp_max_indices = np.array([0, 1, 2, 0])
 68 | 
 69 |         exp_pos_mask = np.array([True, False, True, False])
 70 | 
 71 |         exp_class_and_background_indices = np.array([1, 0, 3, 0])
 72 | 
 73 |         # Convert to box_list format
 74 |         boxes_pred = box_list.BoxList(corners_pred)
 75 |         boxes_gt = box_list.BoxList(corners_gt)
 76 |         # Calculate IoU
 77 |         iou = box_list_ops.iou(boxes_gt,
 78 |                                boxes_pred)
 79 | 
 80 |         # Get max IoU, the dimension should match the anchors we are
 81 |         # evaluating
 82 |         max_ious = tf.reduce_max(iou, axis=0)
 83 |         max_iou_indices = tf.argmax(iou, axis=0)
 84 | 
 85 |         # Sample a mini-batch from anchors with highest IoU match
 86 |         mini_batch_size = 4
 87 | 
 88 |         # Custom positive/negative iou ranges
 89 |         neg_2d_iou_range = [0.0, 0.3]
 90 |         pos_2d_iou_range = [0.6, 0.7]
 91 | 
 92 |         mb_mask, mb_pos_mask = \
 93 |             self.mb_utils.sample_mini_batch(max_ious,
 94 |                                             mini_batch_size,
 95 |                                             neg_2d_iou_range,
 96 |                                             pos_2d_iou_range)
 97 | 
 98 |         mb_class_indices = self.mb_utils.mask_class_label_indices(
 99 |             mb_pos_mask, mb_mask, max_iou_indices, class_indices)
100 | 
101 |         with self.test_session() as sess:
102 |             iou_out = sess.run(iou)
103 |             max_ious_out, max_iou_indices_out = sess.run([max_ious,
104 |                                                           max_iou_indices])
105 |             mb_mask_out, mb_pos_mask_out = sess.run([mb_mask,
106 |                                                      mb_pos_mask])
107 |             class_indices_out = sess.run(mb_class_indices)
108 | 
109 |             self.assertAllClose(iou_out, exp_ious)
110 |             self.assertAllClose(max_ious_out, exp_max_ious)
111 |             self.assertAllEqual(max_iou_indices_out, exp_max_indices)
112 |             self.assertAllEqual(exp_pos_mask, mb_pos_mask_out)
113 |             self.assertAllEqual(class_indices_out,
114 |                                 exp_class_and_background_indices)
115 | 
116 | 
117 | if __name__ == '__main__':
118 |     tf.test.main()
119 | 


--------------------------------------------------------------------------------
/avod/experiments/run_inference.py:
--------------------------------------------------------------------------------
  1 | """Detection model inferencing.
  2 | 
  3 | This runs the DetectionModel evaluator in test mode to output detections.
  4 | """
  5 | 
  6 | import argparse
  7 | import os
  8 | import sys
  9 | 
 10 | import tensorflow as tf
 11 | 
 12 | import avod
 13 | import avod.builders.config_builder_util as config_builder
 14 | from avod.builders.dataset_builder import DatasetBuilder
 15 | from avod.core.models.avod_model import AvodModel
 16 | from avod.core.models.rpn_model import RpnModel
 17 | from avod.core.evaluator import Evaluator
 18 | 
 19 | 
 20 | def inference(model_config, eval_config,
 21 |               dataset_config, data_split,
 22 |               ckpt_indices):
 23 | 
 24 |     # Overwrite the defaults
 25 |     dataset_config = config_builder.proto_to_obj(dataset_config)
 26 | 
 27 |     dataset_config.data_split = data_split
 28 |     dataset_config.data_split_dir = 'training'
 29 |     if data_split == 'test':
 30 |         dataset_config.data_split_dir = 'testing'
 31 | 
 32 |     eval_config.eval_mode = 'test'
 33 |     eval_config.evaluate_repeatedly = False
 34 | 
 35 |     dataset_config.has_labels = False
 36 |     # Enable this to see the actually memory being used
 37 |     eval_config.allow_gpu_mem_growth = True
 38 | 
 39 |     eval_config = config_builder.proto_to_obj(eval_config)
 40 |     # Grab the checkpoint indices to evaluate
 41 |     eval_config.ckpt_indices = ckpt_indices
 42 | 
 43 |     # Remove augmentation during evaluation in test mode
 44 |     dataset_config.aug_list = []
 45 | 
 46 |     # Build the dataset object
 47 |     dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
 48 |                                                  use_defaults=False)
 49 | 
 50 |     # Setup the model
 51 |     model_name = model_config.model_name
 52 |     # Overwrite repeated field
 53 |     model_config = config_builder.proto_to_obj(model_config)
 54 |     # Switch path drop off during evaluation
 55 |     model_config.path_drop_probabilities = [1.0, 1.0]
 56 | 
 57 |     with tf.Graph().as_default():
 58 |         if model_name == 'avod_model':
 59 |             model = AvodModel(model_config,
 60 |                               train_val_test=eval_config.eval_mode,
 61 |                               dataset=dataset)
 62 |         elif model_name == 'rpn_model':
 63 |             model = RpnModel(model_config,
 64 |                              train_val_test=eval_config.eval_mode,
 65 |                              dataset=dataset)
 66 |         else:
 67 |             raise ValueError('Invalid model name {}'.format(model_name))
 68 | 
 69 |         model_evaluator = Evaluator(model, dataset_config, eval_config)
 70 |         model_evaluator.run_latest_checkpoints()
 71 | 
 72 | 
 73 | def main(_):
 74 |     parser = argparse.ArgumentParser()
 75 | 
 76 |     # Example usage
 77 |     # --checkpoint_name='avod_cars_example'
 78 |     # --data_split='test'
 79 |     # --ckpt_indices=50 100 112
 80 |     # Optional arg:
 81 |     # --device=0
 82 | 
 83 |     parser.add_argument('--checkpoint_name',
 84 |                         type=str,
 85 |                         dest='checkpoint_name',
 86 |                         required=True,
 87 |                         help='Checkpoint name must be specified as a str\
 88 |                         and must match the experiment config file name.')
 89 | 
 90 |     parser.add_argument('--data_split',
 91 |                         type=str,
 92 |                         dest='data_split',
 93 |                         required=True,
 94 |                         help='Data split must be specified e.g. val or test')
 95 | 
 96 |     parser.add_argument(
 97 |         '--ckpt_indices',
 98 |         type=int,
 99 |         nargs='+',
100 |         dest='ckpt_indices',
101 |         required=True,
102 |         help='Checkpoint indices must be a set of \
103 |         integers with space in between -> 0 10 20 etc')
104 | 
105 |     parser.add_argument('--device',
106 |                         type=str,
107 |                         dest='device',
108 |                         default='0',
109 |                         help='CUDA device id')
110 | 
111 |     args = parser.parse_args()
112 |     if len(sys.argv) == 1:
113 |         parser.print_help()
114 |         sys.exit(1)
115 | 
116 |     experiment_config = args.checkpoint_name + '.config'
117 | 
118 |     # Read the config from the experiment folder
119 |     experiment_config_path = avod.root_dir() + '/data/outputs/' +\
120 |         args.checkpoint_name + '/' + experiment_config
121 | 
122 |     model_config, _, eval_config, dataset_config = \
123 |         config_builder.get_configs_from_pipeline_file(
124 |             experiment_config_path, is_training=False)
125 | 
126 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.device
127 |     inference(model_config, eval_config,
128 |               dataset_config, args.data_split,
129 |               args.ckpt_indices)
130 | 
131 | 
132 | if __name__ == '__main__':
133 |     tf.app.run()
134 | 


--------------------------------------------------------------------------------
/avod/builders/optimizer_builder.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Functions to build DetectionModel training optimizers."""
 17 | 
 18 | import tensorflow as tf
 19 | 
 20 | slim = tf.contrib.slim
 21 | 
 22 | 
 23 | def build(optimizer_config,
 24 |           global_summaries,
 25 |           global_step=None):
 26 |     """Create optimizer based on config.
 27 | 
 28 |     Args:
 29 |         optimizer_config: A Optimizer proto message.
 30 |         global_summaries: A set to attach learning rate summary to.
 31 |         global_step: (optional) A tensor that contains the global step.
 32 |             This is required for applying exponential decay to the learning
 33 |             rate.
 34 | 
 35 |     Returns:
 36 |         An optimizer.
 37 | 
 38 |     Raises:
 39 |         ValueError: when using an unsupported input data type.
 40 |     """
 41 |     optimizer_type = optimizer_config.WhichOneof('optimizer')
 42 |     optimizer = None
 43 | 
 44 |     if optimizer_type == 'rms_prop_optimizer':
 45 |         config = optimizer_config.rms_prop_optimizer
 46 |         optimizer = tf.train.RMSPropOptimizer(
 47 |             _create_learning_rate(config.learning_rate,
 48 |                                   global_summaries,
 49 |                                   global_step),
 50 |             decay=config.decay,
 51 |             momentum=config.momentum_optimizer_value,
 52 |             epsilon=config.epsilon)
 53 | 
 54 |     elif optimizer_type == 'momentum_optimizer':
 55 |         config = optimizer_config.momentum_optimizer
 56 |         optimizer = tf.train.MomentumOptimizer(
 57 |             _create_learning_rate(config.learning_rate,
 58 |                                   global_summaries,
 59 |                                   global_step),
 60 |             momentum=config.momentum_optimizer_value)
 61 | 
 62 |     elif optimizer_type == 'adam_optimizer':
 63 |         config = optimizer_config.adam_optimizer
 64 |         optimizer = tf.train.AdamOptimizer(
 65 |             _create_learning_rate(config.learning_rate,
 66 |                                   global_summaries,
 67 |                                   global_step))
 68 | 
 69 |     elif optimizer_type == 'gradient_descent':
 70 |         config = optimizer_config.gradient_descent
 71 |         optimizer = tf.train.GradientDescentOptimizer(
 72 |             _create_learning_rate(config.learning_rate,
 73 |                                   global_summaries,
 74 |                                   global_step))
 75 | 
 76 |     if optimizer is None:
 77 |         raise ValueError('Optimizer %s not supported.' % optimizer_type)
 78 | 
 79 |     if optimizer_config.use_moving_average:
 80 |         optimizer = tf.contrib.opt.MovingAverageOptimizer(
 81 |             optimizer, average_decay=optimizer_config.moving_average_decay)
 82 | 
 83 |     return optimizer
 84 | 
 85 | 
 86 | def _create_learning_rate(learning_rate_config,
 87 |                           global_summaries,
 88 |                           global_step):
 89 |     """Create optimizer learning rate based on config.
 90 | 
 91 |     Args:
 92 |         learning_rate_config: A LearningRate proto message.
 93 |         global_summaries: A set to attach learning rate summary to.
 94 |         global_step: A tensor that contains the global step.
 95 | 
 96 |     Returns:
 97 |         A learning rate.
 98 | 
 99 |     Raises:
100 |         ValueError: when using an unsupported input data type.
101 |     """
102 |     learning_rate = None
103 |     learning_rate_type = learning_rate_config.WhichOneof('learning_rate')
104 |     if learning_rate_type == 'constant_learning_rate':
105 |         config = learning_rate_config.constant_learning_rate
106 |         learning_rate = config.learning_rate
107 | 
108 |     elif learning_rate_type == 'exponential_decay_learning_rate':
109 |         config = learning_rate_config.exponential_decay_learning_rate
110 |         learning_rate = tf.train.exponential_decay(
111 |             config.initial_learning_rate,
112 |             global_step,
113 |             config.decay_steps,
114 |             config.decay_factor,
115 |             staircase=config.staircase)
116 | 
117 |     if learning_rate is None:
118 |         raise ValueError('Learning_rate %s not supported.' % learning_rate_type)
119 | 
120 |     global_summaries.add(tf.summary.scalar('Learning_Rate', learning_rate))
121 |     return learning_rate
122 | 


--------------------------------------------------------------------------------
/avod/core/anchor_encoder_test.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | 
  5 | from avod.core import anchor_encoder
  6 | 
  7 | 
  8 | class AnchorEncoderTest(unittest.TestCase):
  9 | 
 10 |     def test_anchor_to_offset(self):
 11 | 
 12 |         # anchor format is [x, y, z, dim_x, dim_y, dim_z]
 13 |         anchors = np.asarray([[1, 2, 3, 4, 6, 5],
 14 |                               [0, 0, 0, 2, 3, 1]], dtype=np.float32)
 15 | 
 16 |         # same formatting goes for the labels
 17 |         # which are also in anchor format
 18 |         anchors_gt =\
 19 |             np.array([2.0,  1.5,  7.0,  1.0,  0.5,  1.8])
 20 | 
 21 |         expected_offsets = np.array(
 22 |             [[0.25, -0.083, 0.8, -1.386, -2.484, -1.022],
 23 |              [1., 0.5, 7., -0.693, -1.791, 0.588]],
 24 |             dtype=np.float32)
 25 | 
 26 |         anchor_offsets = anchor_encoder.anchor_to_offset(anchors,
 27 |                                                          anchors_gt)
 28 |         np.testing.assert_almost_equal(anchor_offsets,
 29 |                                        expected_offsets,
 30 |                                        decimal=3)
 31 | 
 32 |     def test_anchor_tensor_to_offset(self):
 33 | 
 34 |         # anchor format is [x, y, z, dim_x, dim_y, dim_z]
 35 |         anchors = np.asarray([[1, 2, 3, 4, 6, 5],
 36 |                               [0, 0, 0, 2, 3, 1]], dtype=np.float32)
 37 | 
 38 |         anchors_tensor = \
 39 |             tf.convert_to_tensor(anchors, dtype=tf.float32)
 40 | 
 41 |         # we expect this in matrix format for the tensor version
 42 |         # of this function. In this case, it's just a repeated
 43 |         # gt associated with each anchor
 44 |         anchors_gt =\
 45 |             np.array([[2.0,  1.5,  7.0,  1.0,  0.5,  1.8],
 46 |                       [2.0,  1.5,  7.0,  1.0,  0.5,  1.8]])
 47 | 
 48 |         anchors_gt_tensor = \
 49 |             tf.convert_to_tensor(anchors_gt, dtype=tf.float32)
 50 | 
 51 |         expected_offsets = np.array(
 52 |             [[0.25, -0.083, 0.8, -1.386, -2.484, -1.022],
 53 |              [1., 0.5, 7., -0.693, -1.791, 0.588]],
 54 |             dtype=np.float32)
 55 | 
 56 |         # test in tensor space
 57 |         anchor_offsets = anchor_encoder.tf_anchor_to_offset(anchors_tensor,
 58 |                                                             anchors_gt_tensor)
 59 | 
 60 |         sess = tf.Session()
 61 |         with sess.as_default():
 62 |             anchor_offsets_out = anchor_offsets.eval()
 63 |             np.testing.assert_almost_equal(anchor_offsets_out,
 64 |                                            expected_offsets,
 65 |                                            decimal=3)
 66 | 
 67 |     def test_offset_to_anchor(self):
 68 | 
 69 |         # anchor format is [x, y, z, dim_x, dim_y, dim_z]
 70 |         anchors = np.asarray([[1, 2, 3, 4, 6, 5],
 71 |                               [0, 0, 0, 2, 3, 1]], dtype=np.float32)
 72 | 
 73 |         # anchor offset prediction is [tx, ty, tz, tdim_x, tdim_y, tdim_z]
 74 |         anchor_offsets = np.array(
 75 |             [[0.5, 0.02, 0.01, 0.1, 0.4, 0.03],
 76 |              [0.04, 0.1, 0.03, 0.001, 0.3, 0.03]],
 77 |             dtype=np.float32)
 78 | 
 79 |         expected_anchors = np.array(
 80 |             [[3.0, 2.12, 3.05, 4.420, 8.9509, 5.152],
 81 |              [0.08, 0.3, 0.03, 2.002, 4.05, 1.03]],
 82 |             dtype=np.float32)
 83 | 
 84 |         anchors = anchor_encoder.offset_to_anchor(anchors,
 85 |                                                   anchor_offsets)
 86 |         np.testing.assert_almost_equal(anchors,
 87 |                                        expected_anchors,
 88 |                                        decimal=3)
 89 | 
 90 |     def test_offset_tensor_to_anchor(self):
 91 | 
 92 |         # anchor format is [x, y, z, dim_x, dim_y, dim_z]
 93 |         anchors = np.asarray([[1, 2, 3, 4, 6, 5],
 94 |                               [0, 0, 0, 2, 3, 1]], dtype=np.float32)
 95 | 
 96 |         anchor_tensor = \
 97 |             tf.convert_to_tensor(anchors, dtype=tf.float32)
 98 | 
 99 |         # anchor offset prediction is [tx, ty, tz, tdim_x, tdim_y, tdim_z]
100 |         anchor_offsets = np.array(
101 |             [[0.5, 0.02, 0.01, 0.1, 0.4, 0.03],
102 |              [0.04, 0.1, 0.03, 0.001, 0.3, 0.03]],
103 |             dtype=np.float32)
104 | 
105 |         anchor_offset_tensor = \
106 |             tf.convert_to_tensor(anchor_offsets, dtype=tf.float32)
107 | 
108 |         expected_anchors = np.array(
109 |             [[3.0, 2.12, 3.05, 4.420, 8.9509, 5.152],
110 |              [0.08, 0.3, 0.03, 2.002, 4.05, 1.03]],
111 |             dtype=np.float32)
112 | 
113 |         anchors_tensor = anchor_encoder.offset_to_anchor(
114 |             anchor_tensor, anchor_offset_tensor)
115 | 
116 |         sess = tf.Session()
117 |         with sess.as_default():
118 |             anchors = anchors_tensor.eval()
119 | 
120 |             np.testing.assert_almost_equal(anchors,
121 |                                            expected_anchors,
122 |                                            decimal=3)
123 | 
124 | 
125 | if __name__ == '__main__':
126 |     unittest.main()
127 | 


--------------------------------------------------------------------------------