├── avod ├── core │ ├── __init__.py │ ├── models │ │ └── __init__.py │ ├── avod_fc_layers │ │ ├── __init__.py │ │ └── avod_fc_layer_utils.py │ ├── bev_generators │ │ ├── __init__.py │ │ └── bev_generator.py │ ├── anchor_generators │ │ ├── __init__.py │ │ ├── grid_anchor_3d_generator_test.py │ │ └── grid_anchor_3d_generator.py │ ├── feature_extractors │ │ ├── __init__.py │ │ ├── bev_feature_extractor.py │ │ ├── img_feature_extractor.py │ │ └── bev_vgg_test.py │ ├── minibatch_samplers │ │ ├── __init__.py │ │ ├── balanced_positive_negative_sampler_test.py │ │ └── balanced_positive_negative_sampler.py │ ├── constants.py │ ├── orientation_encoder.py │ ├── box_3d_projector_test.py │ ├── ops.py │ ├── trainer_utils.py │ ├── model.py │ ├── anchor_generator.py │ ├── losses_test.py │ ├── label_cluster_utils_test.py │ ├── format_checker_test.py │ ├── minibatch_sampler.py │ ├── minibatch_sampler_test.py │ ├── anchor_filter_test.py │ ├── orientation_encoder_test.py │ ├── trainer_test.py │ ├── summary_utils.py │ ├── ops_test.py │ ├── mini_batch_utils_test.py │ └── anchor_encoder_test.py ├── builders │ ├── __init__.py │ ├── bev_generator_builder.py │ ├── feature_extractor_builder.py │ ├── avod_fc_layers_builder.py │ ├── config_builder_util.py │ └── optimizer_builder.py ├── datasets │ ├── __init__.py │ └── kitti │ │ ├── __init__.py │ │ ├── kitti_aug_test.py │ │ └── kitti_utils_test.py ├── experiments │ ├── __init__.py │ ├── run_training.py │ ├── run_evaluation.py │ └── run_inference.py ├── tests │ ├── datasets │ │ └── Kitti │ │ │ └── object │ │ │ ├── val.txt │ │ │ ├── train.txt │ │ │ ├── training │ │ │ ├── label_2 │ │ │ │ ├── 000000.txt │ │ │ │ ├── 000002.txt │ │ │ │ ├── 000003.txt │ │ │ │ ├── 000005.txt │ │ │ │ ├── 000009.txt │ │ │ │ ├── 000006.txt │ │ │ │ ├── 000007.txt │ │ │ │ ├── 000004.txt │ │ │ │ ├── 000001.txt │ │ │ │ ├── 000217.txt │ │ │ │ ├── 000008.txt │ │ │ │ ├── 000076.txt │ │ │ │ └── 000142.txt │ │ │ ├── planes │ │ │ │ ├── 000000.txt │ │ │ │ ├── 000001.txt │ │ │ │ ├── 000002.txt │ │ │ │ ├── 000003.txt │ │ │ │ ├── 000004.txt │ │ │ │ ├── 000005.txt │ │ │ │ ├── 000006.txt │ │ │ │ ├── 000007.txt │ │ │ │ ├── 000008.txt │ │ │ │ ├── 000009.txt │ │ │ │ ├── 000076.txt │ │ │ │ ├── 000142.txt │ │ │ │ └── 000217.txt │ │ │ ├── image_2 │ │ │ │ ├── 000000.png │ │ │ │ ├── 000001.png │ │ │ │ ├── 000002.png │ │ │ │ ├── 000003.png │ │ │ │ ├── 000004.png │ │ │ │ ├── 000005.png │ │ │ │ ├── 000006.png │ │ │ │ ├── 000007.png │ │ │ │ ├── 000008.png │ │ │ │ ├── 000009.png │ │ │ │ ├── 000076.png │ │ │ │ ├── 000142.png │ │ │ │ └── 000217.png │ │ │ ├── velodyne │ │ │ │ ├── 000000.bin │ │ │ │ ├── 000001.bin │ │ │ │ ├── 000002.bin │ │ │ │ ├── 000003.bin │ │ │ │ ├── 000004.bin │ │ │ │ ├── 000005.bin │ │ │ │ ├── 000006.bin │ │ │ │ ├── 000007.bin │ │ │ │ ├── 000008.bin │ │ │ │ ├── 000009.bin │ │ │ │ ├── 000076.bin │ │ │ │ ├── 000142.bin │ │ │ │ └── 000217.bin │ │ │ └── calib │ │ │ │ ├── 000000.txt │ │ │ │ ├── 000001.txt │ │ │ │ ├── 000002.txt │ │ │ │ ├── 000003.txt │ │ │ │ ├── 000004.txt │ │ │ │ ├── 000005.txt │ │ │ │ ├── 000007.txt │ │ │ │ ├── 000008.txt │ │ │ │ ├── 000009.txt │ │ │ │ ├── 000142.txt │ │ │ │ ├── 000217.txt │ │ │ │ ├── 000006.txt │ │ │ │ └── 000076.txt │ │ │ ├── test.txt │ │ │ └── trainval.txt │ ├── __init__.py │ └── run_unit_tests.sh ├── protos │ ├── run_protoc.sh │ ├── clear_protos.sh │ ├── pipeline.proto │ ├── eval.proto │ ├── kitti_utils.proto │ ├── train.proto │ ├── mini_batch.proto │ ├── kitti_dataset.proto │ ├── optimizer.proto │ ├── layers.proto │ └── model.proto ├── __init__.py ├── configs │ ├── mb_preprocessing │ │ ├── rpn_cars.config │ │ ├── rpn_cyclists.config │ │ ├── rpn_pedestrians.config │ │ └── rpn_people.config │ ├── unittest_model.config │ └── unittest_pipeline.config └── utils │ └── demo_utils.py ├── scripts ├── __init__.py ├── preprocessing │ ├── __init__.py │ ├── travis_test_preprocessing.py │ └── gen_label_clusters.py ├── offline_eval │ ├── .gitignore │ ├── kitti_native_eval │ │ ├── run_make.sh │ │ ├── run_eval.sh │ │ ├── run_eval_05_iou.sh │ │ ├── all_eval.sh │ │ ├── Makefile │ │ ├── mail.h │ │ └── README.md │ └── plot_ap.py └── install │ ├── build_integral_image_lib.bash │ └── travis_install.bash ├── .coveragerc ├── .gitmodules ├── requirements.txt ├── .gitignore ├── .travis.yml ├── LICENSE └── demos └── dataset ├── data_mean.py ├── data_histograms.py └── car_clustering.py /avod/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /avod/builders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /avod/core/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /avod/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /avod/experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /avod/datasets/kitti/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /avod/core/avod_fc_layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /avod/core/bev_generators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /avod/core/anchor_generators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /avod/core/feature_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /avod/core/minibatch_samplers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/offline_eval/.gitignore: -------------------------------------------------------------------------------- 1 | merged* 2 | results* 3 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | omit = */__init__.py, */test_*.py, scripts/* 3 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/val.txt: -------------------------------------------------------------------------------- 1 | 000001 2 | 000002 3 | 000004 4 | 000005 5 | 000006 6 | 000008 -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "wavedata"] 2 | path = wavedata 3 | url = git@github.com:kujason/wavedata.git 4 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/train.txt: -------------------------------------------------------------------------------- 1 | 000000 2 | 000003 3 | 000007 4 | 000009 5 | 000076 6 | 000142 7 | 000217 -------------------------------------------------------------------------------- /avod/tests/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def test_path(): 5 | return os.path.dirname(os.path.realpath(__file__)) 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy>=1.13.0 3 | opencv-python 4 | pandas 5 | pillow 6 | protobuf==3.2.0 7 | scipy 8 | sklearn 9 | -------------------------------------------------------------------------------- /scripts/offline_eval/kitti_native_eval/run_make.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | cd $1 6 | make -f Makefile main eval_05_iou 7 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000000.txt: -------------------------------------------------------------------------------- 1 | Pedestrian 0.00 0 -0.20 712.40 143.00 810.73 307.92 1.89 0.48 1.20 1.84 1.47 8.41 0.01 2 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/test.txt: -------------------------------------------------------------------------------- 1 | 000000 2 | 000001 3 | 000002 4 | 000003 5 | 000004 6 | 000005 7 | 000006 8 | 000007 9 | 000008 10 | 000009 -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000000.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | -7.051729e-03 -9.997791e-01 -1.980151e-02 1.680367e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000001.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | -1.851372e-02 -9.998285e-01 -5.362325e-04 1.678761e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000002.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | -8.587473e-03 -9.995657e-01 2.818883e-02 1.519515e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000003.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | -4.009626e-02 -9.986394e-01 3.334112e-02 1.473070e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000004.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | -1.223635e-02 -9.999069e-01 6.044845e-03 1.632201e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000005.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | -2.917402e-02 -9.995687e-01 3.349818e-03 1.637302e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000006.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | -1.691065e-02 -9.997467e-01 -1.485037e-02 1.664847e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000007.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | -1.949878e-02 -9.998097e-01 -5.575465e-04 1.721678e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000008.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | 3.892676e-02 -9.991437e-01 -1.401704e-02 1.760554e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000009.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | -1.492800e-02 -9.998797e-01 4.205277e-03 1.641223e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000076.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | -1.848409e-02 -9.997599e-01 -1.176761e-02 1.627940e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000142.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | -1.937749e-02 -9.997965e-01 -5.601527e-03 1.696127e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/planes/000217.txt: -------------------------------------------------------------------------------- 1 | # Matrix 2 | WIDTH 4 3 | HEIGHT 1 4 | -4.133546e-02 -9.991421e-01 2.540240e-03 1.635158e+00 5 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000000.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000001.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000002.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000003.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000004.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000005.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000006.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000007.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000008.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000009.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000009.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000076.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000076.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000142.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000142.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/image_2/000217.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/image_2/000217.png -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000000.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000000.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000001.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000001.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000002.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000002.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000003.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000003.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000004.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000004.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000005.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000005.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000006.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000006.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000007.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000007.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000008.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000008.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000009.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000009.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000076.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000076.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000142.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000142.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/velodyne/000217.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kujason/avod/HEAD/avod/tests/datasets/Kitti/object/training/velodyne/000217.bin -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/trainval.txt: -------------------------------------------------------------------------------- 1 | 000000 2 | 000001 3 | 000002 4 | 000003 5 | 000004 6 | 000005 7 | 000006 8 | 000007 9 | 000008 10 | 000009 11 | 000076 12 | 000142 13 | 000217 -------------------------------------------------------------------------------- /avod/protos/run_protoc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | cd "$(dirname "$0")" 6 | echo "Compiling protos in $(pwd)" 7 | cd ../.. 8 | protoc avod/protos/*.proto --python_out=. 9 | echo 'Done' -------------------------------------------------------------------------------- /avod/protos/clear_protos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | cd "$(dirname "$0")" 6 | echo "Removing old protos from $(dirname "$0")" 7 | find . -name '*_pb2.py' 8 | find . -name '*_pb2.py' -delete 9 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000002.txt: -------------------------------------------------------------------------------- 1 | Misc 0.00 0 -1.82 804.79 167.34 995.43 327.94 1.63 1.48 2.37 3.23 1.59 8.55 -1.47 2 | Car 0.00 0 -1.67 657.39 190.13 700.07 223.39 1.41 1.58 4.36 3.18 2.27 34.38 -1.58 3 | -------------------------------------------------------------------------------- /avod/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def root_dir(): 5 | return os.path.dirname(os.path.realpath(__file__)) 6 | 7 | 8 | def top_dir(): 9 | avod_root_dir = root_dir() 10 | return os.path.split(avod_root_dir)[0] 11 | -------------------------------------------------------------------------------- /scripts/install/build_integral_image_lib.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e # exit on first error 3 | 4 | build_integral_image_lib() 5 | { 6 | cd wavedata/wavedata/tools/core/lib 7 | cmake src 8 | make 9 | } 10 | 11 | build_integral_image_lib 12 | -------------------------------------------------------------------------------- /scripts/offline_eval/kitti_native_eval/run_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | cd $1 6 | echo "$3" | tee -a ./$4_results_$2.txt 7 | ./evaluate_object_3d_offline ~/Kitti/object/training/label_2/ $2/$3 | tee -a ./$4_results_$2.txt 8 | 9 | cp $4_results_$2.txt $5 10 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000003.txt: -------------------------------------------------------------------------------- 1 | Car 0.00 0 1.55 614.24 181.78 727.31 284.77 1.57 1.73 4.15 1.00 1.75 13.22 1.62 2 | DontCare -1 -1 -10 5.00 229.89 214.12 367.61 -1 -1 -1 -1000 -1000 -1000 -10 3 | DontCare -1 -1 -10 522.25 202.35 547.77 219.71 -1 -1 -1 -1000 -1000 -1000 -10 4 | -------------------------------------------------------------------------------- /scripts/offline_eval/kitti_native_eval/run_eval_05_iou.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | cd $1 6 | echo "$3" | tee -a ./$4_results_05_iou_$2.txt 7 | ./evaluate_object_3d_offline_05_iou ~/Kitti/object/training/label_2/ $2/$3 | tee -a ./$4_results_05_iou_$2.txt 8 | 9 | cp $4_results_05_iou_$2.txt $5 10 | -------------------------------------------------------------------------------- /avod/tests/run_unit_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd "$(dirname "$0")" 4 | cd ../.. 5 | 6 | export PYTHONPATH=$PYTHONPATH:$(pwd)/wavedata 7 | echo $PYTHONPATH 8 | 9 | echo "Running unit tests in $(pwd)/avod" 10 | coverage run --source avod -m unittest discover -b --pattern "*_test.py" 11 | 12 | #coverage report -m 13 | -------------------------------------------------------------------------------- /scripts/offline_eval/kitti_native_eval/all_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # set -e 4 | # set -x 5 | 6 | # Sort by step 7 | folders=$(ls ./$1/ | sort -V) 8 | 9 | for folder in $folders 10 | do 11 | echo "$folder" | tee -a ./results_$1.txt 12 | ./evaluate_object_3d_offline ~/Kitti/object/training/label_2/ $1/$folder | tee -a ./results_$1.txt 13 | done 14 | -------------------------------------------------------------------------------- /avod/builders/bev_generator_builder.py: -------------------------------------------------------------------------------- 1 | from avod.core.bev_generators import bev_slices 2 | 3 | 4 | def build(bev_maps_type_config, kitti_utils): 5 | 6 | bev_maps_type = bev_maps_type_config.WhichOneof('bev_maps_type') 7 | 8 | if bev_maps_type == 'slices': 9 | return bev_slices.BevSlices( 10 | bev_maps_type_config.slices, kitti_utils) 11 | 12 | raise ValueError('Invalid bev_maps_type', bev_maps_type) 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | avod/data 2 | avod/checkpoints 3 | avod/logs 4 | 5 | # Python 6 | *.pyc 7 | __pycache__ 8 | 9 | # Coverage 10 | *.coverage 11 | 12 | # Misc 13 | run.sh 14 | tags 15 | 16 | # PyCharm 17 | .idea 18 | 19 | # Rope 20 | .ropeproject 21 | 22 | # Distribution / packaging 23 | build/ 24 | dist/ 25 | avod.egg-info/ 26 | 27 | # Protobuf files 28 | *_pb2.py 29 | 30 | # line_profiler 31 | *.lprof 32 | 33 | # tf_profiler 34 | *.json 35 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000005.txt: -------------------------------------------------------------------------------- 1 | Pedestrian 0.00 0 1.94 330.06 178.74 360.77 238.64 1.87 0.96 0.65 -8.50 2.07 23.02 1.59 2 | DontCare -1 -1 -10 606.64 170.67 621.06 184.34 -1 -1 -1 -1000 -1000 -1000 -10 3 | DontCare -1 -1 -10 606.00 170.91 621.35 184.28 -1 -1 -1 -1000 -1000 -1000 -10 4 | DontCare -1 -1 -10 605.68 171.21 620.77 184.34 -1 -1 -1 -1000 -1000 -1000 -10 5 | DontCare -1 -1 -10 566.39 168.89 585.07 184.56 -1 -1 -1 -1000 -1000 -1000 -10 6 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000009.txt: -------------------------------------------------------------------------------- 1 | Car 0.00 0 -1.50 601.96 177.01 659.15 229.51 1.61 1.66 3.20 0.70 1.76 23.88 -1.48 2 | Car 0.00 2 1.75 600.14 177.09 624.65 193.31 1.44 1.61 3.66 0.24 1.84 66.37 1.76 3 | Car 0.00 0 1.78 574.98 178.64 598.45 194.01 1.41 1.53 3.37 -2.19 1.96 68.25 1.75 4 | DontCare -1 -1 -10 710.60 167.73 736.68 182.35 -1 -1 -1 -1000 -1000 -1000 -10 5 | DontCare -1 -1 -10 758.52 156.27 782.52 179.23 -1 -1 -1 -1000 -1000 -1000 -10 6 | -------------------------------------------------------------------------------- /scripts/offline_eval/kitti_native_eval/Makefile: -------------------------------------------------------------------------------- 1 | main: evaluate_object_3d_offline.cpp 2 | g++ -o evaluate_object_3d_offline evaluate_object_3d_offline.cpp -lboost_system -lboost_filesystem 3 | 4 | eval_05_iou: evaluate_object_3d_offline_05_iou.cpp 5 | g++ -o evaluate_object_3d_offline_05_iou evaluate_object_3d_offline_05_iou.cpp -lboost_system -lboost_filesystem 6 | 7 | .PHONY: clean 8 | 9 | clean: 10 | rm -f evaluate_object_3d_offline 11 | rm -f evaluate_object_3d_offline_05_iou 12 | -------------------------------------------------------------------------------- /avod/core/constants.py: -------------------------------------------------------------------------------- 1 | KEY_LABEL_BOXES_3D = 'label_boxes_3d' 2 | KEY_LABEL_ANCHORS = 'label_anchors' 3 | KEY_LABEL_CLASSES = 'label_classes' 4 | 5 | KEY_IMAGE_INPUT = 'image_input' 6 | KEY_BEV_INPUT = 'bev_input' 7 | 8 | KEY_SAMPLE_IDX = 'sample_idx' 9 | KEY_SAMPLE_NAME = 'sample_name' 10 | KEY_SAMPLE_AUGS = 'sample_augs' 11 | 12 | KEY_ANCHORS_INFO = 'anchors_info' 13 | 14 | KEY_POINT_CLOUD = 'point_cloud' 15 | KEY_GROUND_PLANE = 'ground_plane' 16 | KEY_STEREO_CALIB_P2 = 'stereo_calib_p2' 17 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000006.txt: -------------------------------------------------------------------------------- 1 | Car 0.00 2 -1.55 548.00 171.33 572.40 194.42 1.48 1.56 3.62 -2.72 0.82 48.22 -1.62 2 | Car 0.00 0 -1.21 505.25 168.37 575.44 209.18 1.67 1.64 4.32 -2.61 1.13 31.73 -1.30 3 | Car 0.00 0 0.15 49.70 185.65 227.42 246.96 1.50 1.62 3.88 -12.54 1.64 19.72 -0.42 4 | Car 0.00 1 2.05 328.67 170.65 397.24 204.16 1.68 1.67 4.29 -12.66 1.13 38.44 1.73 5 | DontCare -1 -1 -10 603.36 169.62 631.06 186.56 -1 -1 -1 -1000 -1000 -1000 -10 6 | DontCare -1 -1 -10 578.97 168.88 603.78 187.56 -1 -1 -1 -1000 -1000 -1000 -10 7 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000007.txt: -------------------------------------------------------------------------------- 1 | Car 0.00 0 -1.56 564.62 174.59 616.43 224.74 1.61 1.66 3.20 -0.69 1.69 25.01 -1.59 2 | Car 0.00 0 1.71 481.59 180.09 512.55 202.42 1.40 1.51 3.70 -7.43 1.88 47.55 1.55 3 | Car 0.00 0 1.64 542.05 175.55 565.27 193.79 1.46 1.66 4.05 -4.71 1.71 60.52 1.56 4 | Cyclist 0.00 0 1.89 330.60 176.09 355.61 213.60 1.72 0.50 1.95 -12.63 1.88 34.09 1.54 5 | DontCare -1 -1 -10 753.33 164.32 798.00 186.74 -1 -1 -1 -1000 -1000 -1000 -10 6 | DontCare -1 -1 -10 738.50 171.32 753.27 184.42 -1 -1 -1 -1000 -1000 -1000 -10 7 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000004.txt: -------------------------------------------------------------------------------- 1 | Car 0.00 0 1.96 280.38 185.10 344.90 215.59 1.49 1.76 4.01 -15.71 2.16 38.26 1.57 2 | Car 0.00 0 1.88 365.14 184.54 406.11 205.20 1.38 1.80 3.41 -15.89 2.23 51.17 1.58 3 | DontCare -1 -1 -10 402.27 166.69 477.31 197.98 -1 -1 -1 -1000 -1000 -1000 -10 4 | DontCare -1 -1 -10 518.53 177.31 531.51 187.17 -1 -1 -1 -1000 -1000 -1000 -10 5 | DontCare -1 -1 -10 1207.50 233.35 1240.00 333.39 -1 -1 -1 -1000 -1000 -1000 -10 6 | DontCare -1 -1 -10 535.06 177.65 545.26 185.82 -1 -1 -1 -1000 -1000 -1000 -10 7 | DontCare -1 -1 -10 558.03 177.88 567.50 184.65 -1 -1 -1 -1000 -1000 -1000 -10 8 | -------------------------------------------------------------------------------- /scripts/preprocessing/travis_test_preprocessing.py: -------------------------------------------------------------------------------- 1 | from avod.builders.dataset_builder import DatasetBuilder 2 | 3 | from scripts.preprocessing import gen_mini_batches 4 | from scripts.preprocessing import gen_label_clusters 5 | 6 | 7 | def main(): 8 | 9 | dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_UNITTEST) 10 | dataset_config.data_split = "trainval" 11 | unittest_dataset = DatasetBuilder.build_kitti_dataset(dataset_config) 12 | 13 | gen_label_clusters.main(unittest_dataset) 14 | gen_mini_batches.main(unittest_dataset) 15 | 16 | 17 | if __name__ == '__main__': 18 | main() 19 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000001.txt: -------------------------------------------------------------------------------- 1 | Truck 0.00 0 -1.57 599.41 156.40 629.75 189.25 2.85 2.63 12.34 0.47 1.49 69.44 -1.56 2 | Car 0.00 0 1.85 387.63 181.54 423.81 203.12 1.67 1.87 3.69 -16.53 2.39 58.49 1.57 3 | Cyclist 0.00 3 -1.65 676.60 163.95 688.98 193.93 1.86 0.60 2.02 4.59 1.32 45.84 -1.55 4 | DontCare -1 -1 -10 503.89 169.71 590.61 190.13 -1 -1 -1 -1000 -1000 -1000 -10 5 | DontCare -1 -1 -10 511.35 174.96 527.81 187.45 -1 -1 -1 -1000 -1000 -1000 -10 6 | DontCare -1 -1 -10 532.37 176.35 542.68 185.27 -1 -1 -1 -1000 -1000 -1000 -10 7 | DontCare -1 -1 -10 559.62 175.83 575.40 183.15 -1 -1 -1 -1000 -1000 -1000 -10 8 | -------------------------------------------------------------------------------- /avod/datasets/kitti/kitti_aug_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | from avod.datasets.kitti import kitti_aug 6 | 7 | 8 | class KittiAugTest(unittest.TestCase): 9 | 10 | def test_flip_boxes_3d(self): 11 | 12 | boxes_3d = np.array([ 13 | [1, 2, 3, 4, 5, 6, np.pi / 4], 14 | [1, 2, 3, 4, 5, 6, -np.pi / 4] 15 | ]) 16 | 17 | exp_flipped_boxes_3d = np.array([ 18 | [-1, 2, 3, 4, 5, 6, 3 * np.pi / 4], 19 | [-1, 2, 3, 4, 5, 6, -3 * np.pi / 4] 20 | ]) 21 | 22 | flipped_boxes_3d = kitti_aug.flip_boxes_3d(boxes_3d) 23 | 24 | np.testing.assert_almost_equal(flipped_boxes_3d, exp_flipped_boxes_3d) 25 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000217.txt: -------------------------------------------------------------------------------- 1 | Car 0.65 3 2.10 0.00 196.92 358.49 374.00 1.48 1.80 4.51 -3.88 1.75 6.20 1.56 2 | Car 0.00 1 1.80 316.11 191.89 461.97 282.37 1.37 1.64 4.40 -4.07 1.78 13.88 1.52 3 | Car 0.00 0 1.72 475.46 183.89 525.42 222.67 1.40 1.45 3.30 -4.27 1.85 28.24 1.57 4 | Van 0.00 2 1.64 499.86 160.21 547.31 212.16 2.39 1.96 5.36 -4.26 1.84 36.10 1.52 5 | Car 0.00 0 1.58 562.18 178.55 594.26 206.23 1.40 1.63 3.89 -1.73 1.72 38.85 1.53 6 | Cyclist 0.00 0 0.99 979.93 140.54 1173.61 373.94 1.76 0.59 1.56 3.91 1.53 6.26 1.53 7 | Cyclist 0.08 2 -2.22 1051.55 132.89 1241.00 332.88 1.77 0.63 1.82 5.36 1.43 7.33 -1.61 8 | Car 0.00 1 1.64 537.61 179.82 561.21 200.59 1.38 1.33 4.32 -4.24 1.88 50.62 1.56 9 | -------------------------------------------------------------------------------- /avod/protos/pipeline.proto: -------------------------------------------------------------------------------- 1 | package avod.protos; 2 | 3 | import "avod/protos/model.proto"; 4 | import "avod/protos/train.proto"; 5 | import "avod/protos/eval.proto"; 6 | import "avod/protos/kitti_dataset.proto"; 7 | 8 | // Convenience message for configuring a training and eval pipeline. 9 | // Allows all of the pipeline parameters to be configured from one file. 10 | message NetworkPipelineConfig { 11 | 12 | // Detection Model config 13 | optional ModelConfig model_config = 1; 14 | 15 | // Training config 16 | optional TrainConfig train_config = 2; 17 | 18 | // Evaluation config 19 | optional EvalConfig eval_config = 3; 20 | 21 | // KittiDataset configuration 22 | optional KittiDatasetConfig dataset_config = 4; 23 | } 24 | -------------------------------------------------------------------------------- /scripts/install/travis_install.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e # exit on first error 3 | 4 | install_wavedata() 5 | { 6 | echo "Cloning wavedata ..." 7 | git clone git@github.com:kujason/wavedata.git 8 | cd wavedata 9 | sudo /home/travis/virtualenv/python3.5.2/bin/python setup.py install 10 | cd ../ 11 | } 12 | 13 | install_protoc() 14 | { 15 | # Make sure you grab the latest version 16 | curl -OL https://github.com/google/protobuf/releases/download/v3.2.0/protoc-3.2.0-linux-x86_64.zip 17 | # Unzip 18 | unzip protoc-3.2.0-linux-x86_64.zip -d protoc3 19 | # Move only protoc* to /usr/bin/ 20 | sudo mv protoc3/bin/protoc /usr/bin/protoc 21 | } 22 | 23 | #install_wavedata 24 | install_protoc 25 | # install cmake 26 | sudo apt-get install cmake 27 | -------------------------------------------------------------------------------- /avod/protos/eval.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package avod.protos; 4 | 5 | // Message for configuring DetectionModel evaluator. 6 | message EvalConfig { 7 | 8 | // Evaluation intervals during training 9 | optional uint32 eval_interval = 1 [default = 500]; 10 | 11 | // Evaluation mode, 'val' or 'test' 12 | optional string eval_mode = 2 [default = 'val']; 13 | 14 | // Checkpoint indices to evaluate 15 | repeated int32 ckpt_indices = 3; 16 | 17 | // Evaluate repeatedly while waiting for new checkpoints 18 | optional bool evaluate_repeatedly = 4 [default = true]; 19 | 20 | // GPU options 21 | optional bool allow_gpu_mem_growth = 5 [default = false]; 22 | 23 | // Kitti native evaluation 24 | optional float kitti_score_threshold = 6 [default = 0.1]; 25 | } 26 | -------------------------------------------------------------------------------- /scripts/preprocessing/gen_label_clusters.py: -------------------------------------------------------------------------------- 1 | from avod.builders.dataset_builder import DatasetBuilder 2 | 3 | 4 | def main(dataset=None): 5 | if not dataset: 6 | dataset = DatasetBuilder.build_kitti_dataset( 7 | DatasetBuilder.KITTI_TRAIN) 8 | 9 | label_cluster_utils = dataset.kitti_utils.label_cluster_utils 10 | 11 | print("Generating clusters in {}/{}".format( 12 | label_cluster_utils.data_dir, dataset.data_split)) 13 | clusters, std_devs = dataset.get_cluster_info() 14 | 15 | print("Clusters generated") 16 | print("classes: {}".format(dataset.classes)) 17 | print("num_clusters: {}".format(dataset.num_clusters)) 18 | print("all_clusters:\n {}".format(clusters)) 19 | print("all_std_devs:\n {}".format(std_devs)) 20 | 21 | 22 | if __name__ == '__main__': 23 | main() 24 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000008.txt: -------------------------------------------------------------------------------- 1 | Car 0.88 3 -0.69 0.00 192.37 402.31 374.00 1.60 1.57 3.23 -2.70 1.74 3.68 -1.29 2 | Car 0.00 1 2.04 334.85 178.94 624.50 372.04 1.57 1.50 3.68 -1.17 1.65 7.86 1.90 3 | Car 0.34 3 -1.84 937.29 197.39 1241.00 374.00 1.39 1.44 3.08 3.81 1.64 6.15 -1.31 4 | Car 0.00 1 -1.33 597.59 176.18 720.90 261.14 1.47 1.60 3.66 1.07 1.55 14.44 -1.25 5 | Car 0.00 0 1.74 741.18 168.83 792.25 208.43 1.70 1.63 4.08 7.24 1.55 33.20 1.95 6 | Car 0.00 0 -1.65 884.52 178.31 956.41 240.18 1.59 1.59 2.47 8.48 1.75 19.96 -1.25 7 | DontCare -1 -1 -10 800.38 163.67 825.45 184.07 -1 -1 -1 -1000 -1000 -1000 -10 8 | DontCare -1 -1 -10 859.58 172.34 886.26 194.51 -1 -1 -1 -1000 -1000 -1000 -10 9 | DontCare -1 -1 -10 801.81 163.96 825.20 183.59 -1 -1 -1 -1000 -1000 -1000 -10 10 | DontCare -1 -1 -10 826.87 162.28 845.84 178.86 -1 -1 -1 -1000 -1000 -1000 -10 11 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000076.txt: -------------------------------------------------------------------------------- 1 | Pedestrian 0.28 0 2.39 90.64 186.57 329.47 373.00 1.51 1.01 0.97 -2.63 1.56 4.83 1.90 2 | Pedestrian 0.00 0 -1.35 964.98 157.64 1023.92 289.42 1.59 0.66 0.65 4.82 1.30 9.12 -0.88 3 | Pedestrian 0.00 2 0.66 236.84 169.82 288.56 339.57 1.66 0.65 0.72 -3.58 1.55 7.41 0.22 4 | Cyclist 0.00 0 1.68 413.06 169.47 489.86 291.05 1.67 0.93 1.69 -2.24 1.51 10.79 1.47 5 | Pedestrian 0.00 1 -1.90 811.24 152.52 841.74 249.83 1.75 0.82 1.14 4.05 1.23 13.53 -1.61 6 | Pedestrian 0.00 0 -1.53 583.85 164.01 608.02 237.87 1.77 0.94 0.93 -0.26 1.35 17.75 -1.55 7 | Pedestrian 0.00 0 2.63 524.52 165.79 549.51 230.67 1.73 0.67 0.83 -1.94 1.31 19.66 2.53 8 | Pedestrian 0.00 0 1.66 560.09 165.43 577.09 224.39 1.82 0.80 0.86 -1.22 1.32 22.65 1.60 9 | Pedestrian 0.00 2 1.43 708.67 157.73 729.27 222.05 1.88 0.78 1.03 3.36 1.19 21.58 1.58 10 | DontCare -1 -1 -10 614.86 162.09 693.85 210.96 -1 -1 -1 -1000 -1000 -1000 -10 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | sudo: required 3 | group: edge 4 | 5 | language: python 6 | 7 | python: 8 | - "3.5" 9 | 10 | cache: pip 11 | 12 | install: 13 | # install protobuf 14 | - sudo bash scripts/install/travis_install.bash 15 | 16 | # install python dependencies 17 | - pip install -r requirements.txt 18 | - pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.3.0-cp35-cp35m-linux_x86_64.whl 19 | 20 | # install c++ lib for wavedata 21 | - sudo bash scripts/install/build_integral_image_lib.bash 22 | 23 | before_script: 24 | - git submodule init 25 | - git submodule update 26 | - export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/wavedata 27 | - sudo protoc avod/protos/*.proto --python_out=. 28 | 29 | script: 30 | - python scripts/preprocessing/travis_test_preprocessing.py 31 | - python -m unittest discover -b --pattern "*_test.py" 32 | 33 | notifications: 34 | email: false 35 | -------------------------------------------------------------------------------- /avod/builders/feature_extractor_builder.py: -------------------------------------------------------------------------------- 1 | from avod.core.feature_extractors.bev_vgg import BevVgg 2 | from avod.core.feature_extractors.bev_vgg_pyramid import BevVggPyr 3 | 4 | from avod.core.feature_extractors.img_vgg import ImgVgg 5 | from avod.core.feature_extractors.img_vgg_pyramid import ImgVggPyr 6 | 7 | 8 | def get_extractor(extractor_config): 9 | 10 | extractor_type = extractor_config.WhichOneof('feature_extractor') 11 | 12 | # BEV feature extractors 13 | if extractor_type == 'bev_vgg': 14 | return BevVgg(extractor_config.bev_vgg) 15 | elif extractor_type == 'bev_vgg_pyr': 16 | return BevVggPyr(extractor_config.bev_vgg_pyr) 17 | 18 | # Image feature extractors 19 | elif extractor_type == 'img_vgg': 20 | return ImgVgg(extractor_config.img_vgg) 21 | elif extractor_type == 'img_vgg_pyr': 22 | return ImgVggPyr(extractor_config.img_vgg_pyr) 23 | 24 | return ValueError('Invalid feature extractor type', extractor_type) 25 | -------------------------------------------------------------------------------- /avod/core/feature_extractors/bev_feature_extractor.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | import tensorflow as tf 4 | 5 | class BevFeatureExtractor: 6 | 7 | def __init__(self, extractor_config): 8 | self.config = extractor_config 9 | 10 | def preprocess_input(self, tensor_in, output_shape): 11 | """Preprocesses the given input. 12 | 13 | Args: 14 | tensor_in: A `Tensor` of shape=(batch_size, height, 15 | width, channel) representing an input image. 16 | output_shape: The shape of the output (H x W) 17 | 18 | Returns: 19 | Preprocessed tensor input, resized to the output_size 20 | """ 21 | 22 | # Only reshape if input shape does not match 23 | if not tensor_in.shape[1:3] == output_shape: 24 | return tf.image.resize_images(tensor_in, output_shape) 25 | 26 | return tensor_in 27 | 28 | @abstractmethod 29 | def build(self, **kwargs): 30 | pass 31 | -------------------------------------------------------------------------------- /avod/protos/kitti_utils.proto: -------------------------------------------------------------------------------- 1 | package avod.protos; 2 | 3 | import "avod/protos/mini_batch.proto"; 4 | 5 | message KittiUtilsConfig { 6 | // 3D area extents [min_x, max_x, min_y, max_y, min_z, max_z] 7 | repeated float area_extents = 1; 8 | 9 | // Voxel grid size (for 2D and 3D) 10 | required float voxel_size = 2; 11 | 12 | // Anchor strides 13 | repeated float anchor_strides = 3; 14 | 15 | // Anchor filtering density threshold 16 | optional int32 density_threshold = 4 [default = 1]; 17 | 18 | required BevGenerator bev_generator = 20; 19 | 20 | required MiniBatchConfig mini_batch_config = 21; 21 | } 22 | 23 | message BevGenerator { 24 | oneof bev_maps_type { 25 | Slices slices = 1; 26 | } 27 | 28 | message Slices { 29 | // Min and max height 30 | required float height_lo = 1; 31 | required float height_hi = 2; 32 | 33 | // Number of slices to create 34 | required int32 num_slices = 3; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /scripts/offline_eval/kitti_native_eval/mail.h: -------------------------------------------------------------------------------- 1 | #ifndef MAIL_H 2 | #define MAIL_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class Mail { 9 | 10 | public: 11 | 12 | Mail (std::string email = "") { 13 | if (email.compare("")) { 14 | mail = popen("/usr/lib/sendmail -t -f noreply@cvlibs.net","w"); 15 | fprintf(mail,"To: %s\n", email.c_str()); 16 | fprintf(mail,"From: noreply@cvlibs.net\n"); 17 | fprintf(mail,"Subject: KITTI Evaluation Benchmark\n"); 18 | fprintf(mail,"\n\n"); 19 | } else { 20 | mail = 0; 21 | } 22 | } 23 | 24 | ~Mail() { 25 | if (mail) { 26 | pclose(mail); 27 | } 28 | } 29 | 30 | void msg (const char *format, ...) { 31 | va_list args; 32 | va_start(args,format); 33 | if (mail) { 34 | vfprintf(mail,format,args); 35 | fprintf(mail,"\n"); 36 | } 37 | vprintf(format,args); 38 | printf("\n"); 39 | va_end(args); 40 | } 41 | 42 | private: 43 | 44 | FILE *mail; 45 | 46 | }; 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /avod/protos/train.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package avod.protos; 4 | import "avod/protos/optimizer.proto"; 5 | 6 | // Message for configuring DetectionModel training jobs (train.py). 7 | message TrainConfig { 8 | 9 | // Input queue batch size. 10 | optional uint32 batch_size = 1 [default = 1]; 11 | 12 | // Max training iteration 13 | required uint32 max_iterations = 2 [default = 500]; 14 | 15 | // Optimizer used to train the DetectionModel. 16 | optional Optimizer optimizer = 3; 17 | 18 | // Checkpoint options 19 | optional uint32 checkpoint_interval = 4 [default = 50]; 20 | optional uint32 max_checkpoints_to_keep = 5 [default = 10]; 21 | optional bool overwrite_checkpoints = 6 [default = false]; 22 | 23 | // Summary options 24 | required uint32 summary_interval = 7 [default = 10]; 25 | required bool summary_histograms = 8; 26 | required bool summary_img_images = 9; 27 | required bool summary_bev_images = 10; 28 | 29 | // GPU options 30 | optional bool allow_gpu_mem_growth = 11 [default = false]; 31 | } 32 | -------------------------------------------------------------------------------- /avod/protos/mini_batch.proto: -------------------------------------------------------------------------------- 1 | package avod.protos; 2 | 3 | message MiniBatchConfig { 4 | 5 | // Density threshold for removing empty anchors 6 | required int32 density_threshold = 1; 7 | 8 | required MiniBatchRpnConfig rpn_config = 2; 9 | required MiniBatchAvodConfig avod_config = 3; 10 | } 11 | 12 | message MiniBatchIouThresholds { 13 | // RPN negative/positive iou ranges 14 | required float neg_iou_lo = 3; 15 | required float neg_iou_hi = 4; 16 | required float pos_iou_lo = 5; 17 | required float pos_iou_hi = 6; 18 | } 19 | 20 | message MiniBatchRpnConfig { 21 | oneof iou_type { 22 | MiniBatchIouThresholds iou_2d_thresholds = 1; 23 | MiniBatchIouThresholds iou_3d_thresholds = 2; 24 | } 25 | 26 | // Number of anchors in an RPN mini batch 27 | required int32 mini_batch_size = 3; 28 | } 29 | 30 | message MiniBatchAvodConfig { 31 | // AVOD positive/negative 2D iou ranges 32 | required MiniBatchIouThresholds iou_2d_thresholds = 1; 33 | 34 | // Number of anchors in an AVOD mini batch 35 | required int32 mini_batch_size = 2; 36 | } 37 | 38 | 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Jason Ku, Melissa Mozifian 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /avod/core/orientation_encoder.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def tf_orientation_to_angle_vector(orientations_tensor): 5 | """ Converts orientation angles into angle unit vector representation. 6 | e.g. 45 -> [0.717, 0.717], 90 -> [0, 1] 7 | 8 | Args: 9 | orientations_tensor: A tensor of shape (N,) of orientation angles 10 | 11 | Returns: 12 | A tensor of shape (N, 2) of angle unit vectors in the format [x, y] 13 | """ 14 | x = tf.cos(orientations_tensor) 15 | y = tf.sin(orientations_tensor) 16 | 17 | return tf.stack([x, y], axis=1) 18 | 19 | 20 | def tf_angle_vector_to_orientation(angle_vectors_tensor): 21 | """ Converts angle unit vectors into orientation angle representation. 22 | e.g. [0.717, 0.717] -> 45, [0, 1] -> 90 23 | 24 | Args: 25 | angle_vectors_tensor: a tensor of shape (N, 2) of angle unit vectors 26 | in the format [x, y] 27 | 28 | Returns: 29 | A tensor of shape (N,) of orientation angles 30 | """ 31 | x = angle_vectors_tensor[:, 0] 32 | y = angle_vectors_tensor[:, 1] 33 | 34 | return tf.atan2(y, x) 35 | -------------------------------------------------------------------------------- /avod/core/box_3d_projector_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | from avod.core import box_3d_projector 6 | 7 | 8 | class Box3dProjectorTest(unittest.TestCase): 9 | def test_project_to_bev(self): 10 | boxes_3d = np.array([[0, 0, 0, 1, 0.5, 1, 0], 11 | [0, 0, 0, 1, 0.5, 1, np.pi / 2], 12 | [1, 0, 1, 1, 0.5, 1, np.pi / 2]]) 13 | 14 | box_points, box_points_norm = \ 15 | box_3d_projector.project_to_bev(boxes_3d, [[-1, 1], [-1, 1]]) 16 | 17 | expected_boxes = np.array( 18 | [[[0.5, 0.25], 19 | [-0.5, 0.25], 20 | [-0.5, -0.25], 21 | [0.5, -0.25]], 22 | [[0.25, -0.5], 23 | [0.25, 0.5], 24 | [-0.25, 0.5], 25 | [-0.25, -0.5]], 26 | [[1.25, 0.5], 27 | [1.25, 1.5], 28 | [0.75, 1.5], 29 | [0.75, 0.5]]], 30 | dtype=np.float32) 31 | 32 | for box, exp_box in zip(box_points, expected_boxes): 33 | np.testing.assert_allclose(box, exp_box, rtol=1E-5) 34 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/label_2/000142.txt: -------------------------------------------------------------------------------- 1 | Pedestrian 0.75 1 -2.28 1199.30 108.54 1241.00 348.03 1.67 0.70 0.44 4.69 1.22 5.12 -1.57 2 | Cyclist 0.26 0 2.11 36.57 165.55 248.54 374.00 1.81 0.51 1.71 -3.49 1.77 5.50 1.56 3 | Car 0.52 0 -2.07 835.06 166.73 1241.00 374.00 1.57 1.54 3.22 2.86 1.56 5.12 -1.59 4 | Car 0.00 1 -1.81 726.07 154.71 916.86 306.71 1.74 1.60 3.86 2.70 1.56 10.24 -1.56 5 | Car 0.00 2 -1.74 688.52 153.34 806.10 252.85 1.83 1.69 4.44 2.74 1.50 15.58 -1.57 6 | Car 0.00 2 -1.67 678.28 170.36 747.97 231.25 1.61 1.62 3.78 2.90 1.57 21.17 -1.53 7 | Pedestrian 0.00 0 0.63 656.08 160.85 672.41 199.98 1.90 0.42 1.04 2.54 1.33 35.54 0.70 8 | Pedestrian 0.00 1 0.63 661.72 162.97 676.05 200.69 1.87 0.50 0.90 2.99 1.39 36.22 0.71 9 | Car 0.00 0 1.67 530.82 167.68 560.49 197.37 1.86 1.57 3.83 -4.22 1.56 47.51 1.58 10 | Pedestrian 0.00 0 -0.04 605.72 167.06 616.06 191.92 1.78 0.53 0.95 0.06 1.37 52.31 -0.04 11 | Car 0.00 1 1.80 428.14 178.08 495.67 225.49 1.50 1.46 3.70 -5.05 1.69 24.84 1.60 12 | Car 0.53 1 2.21 0.00 197.78 273.61 374.00 1.54 1.54 3.72 -5.07 1.84 7.13 1.61 13 | Car 0.00 1 1.95 197.05 186.01 398.12 301.00 1.57 1.60 4.24 -5.08 1.82 12.30 1.57 14 | Car 0.00 1 1.88 337.15 176.21 452.69 252.15 1.65 1.61 3.73 -5.17 1.75 17.71 1.60 15 | -------------------------------------------------------------------------------- /scripts/offline_eval/kitti_native_eval/README.md: -------------------------------------------------------------------------------- 1 | # kitti_native_eval 2 | 3 | `evaluate_object_3d_offline.cpp`evaluates your KITTI detection locally on your own computer using your validation data selected from KITTI training dataset, with the following metrics: 4 | 5 | - Average Precision In 2D Image Frame (AP) 6 | - oriented overlap on image (AOS) 7 | - Average Precision In BEV (AP) 8 | - Average Precision In 3D (AP) 9 | 10 | 1. Install: 11 | ``` 12 | sudo apt-get install gnuplot gnuplot5 13 | 14 | cd /kitti_native_eval 15 | 16 | make 17 | ``` 18 | 19 | 2. Copy the results folder into this folder. Each step should contain a 'data' folder. 20 | 21 | 3. Run the evaluation on all steps in the folder, for example: 22 | ``` 23 | ./all_eval.sh 0.5 24 | ``` 25 | --- 26 | Alternatively, you can run the evaluation using the following command on a single step: 27 | ``` 28 | ./evaluate_object_3d_offline groundtruth_dir result_dir 29 | ``` 30 | 31 | - Place the results folder in data folder and use /kitti_native_eval as results_dir 32 | - Use ~/Kitti/object/training/label_2 as your groundtruth_dir 33 | 34 | --- 35 | 36 | Note that you don't have to detect over all KITTI training data. The evaluator only evaluates samples whose result files exist. 37 | 38 | - Results will appear per class in terminal for easy, medium and difficult data. 39 | - Precision-Recall Curves will be generated and saved to 'plot' dir. 40 | -------------------------------------------------------------------------------- /avod/configs/mb_preprocessing/rpn_cars.config: -------------------------------------------------------------------------------- 1 | name: 'kitti' 2 | data_split: 'trainval' 3 | data_split_dir: 'training' 4 | has_labels: True 5 | 6 | cluster_split: 'train', 7 | classes: ['Car'] 8 | num_clusters: [2] 9 | bev_source: 'lidar' 10 | 11 | kitti_utils_config { 12 | area_extents: [-40, 40, -5, 3, 0, 70] 13 | voxel_size: 0.1 14 | anchor_strides: [0.5, 0.5] 15 | 16 | bev_generator { 17 | slices { 18 | height_lo: -0.2 19 | height_hi: 2.3 20 | num_slices: 5 21 | } 22 | } 23 | 24 | mini_batch_config { 25 | density_threshold: 1 26 | 27 | rpn_config { 28 | iou_2d_thresholds { 29 | neg_iou_lo: 0.0 30 | neg_iou_hi: 0.3 31 | pos_iou_lo: 0.5 32 | pos_iou_hi: 1.0 33 | } 34 | # iou_3d_thresholds { 35 | # neg_iou_lo: 0.0 36 | # neg_iou_hi: 0.3 37 | # pos_iou_lo: 0.4 38 | # pos_iou_hi: 1.0 39 | # } 40 | 41 | mini_batch_size: 512 42 | } 43 | 44 | avod_config { 45 | iou_2d_thresholds { 46 | neg_iou_lo: 0.0 47 | neg_iou_hi: 0.55 48 | pos_iou_lo: 0.65 49 | pos_iou_hi: 1.0 50 | } 51 | 52 | mini_batch_size: 1024 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /avod/configs/mb_preprocessing/rpn_cyclists.config: -------------------------------------------------------------------------------- 1 | name: 'kitti' 2 | data_split: 'trainval' 3 | data_split_dir: 'training' 4 | has_labels: True 5 | 6 | cluster_split: 'train', 7 | classes: ['Cyclist'] 8 | num_clusters: [1] 9 | bev_source: 'lidar' 10 | 11 | kitti_utils_config { 12 | area_extents: [-40, 40, -5, 3, 0, 70] 13 | voxel_size: 0.1 14 | anchor_strides: [0.5, 0.5] 15 | 16 | bev_generator { 17 | slices { 18 | height_lo: -0.2 19 | height_hi: 2.3 20 | num_slices: 5 21 | } 22 | } 23 | 24 | mini_batch_config { 25 | density_threshold: 1 26 | 27 | rpn_config { 28 | iou_2d_thresholds { 29 | neg_iou_lo: 0.0 30 | neg_iou_hi: 0.3 31 | pos_iou_lo: 0.45 32 | pos_iou_hi: 1.0 33 | } 34 | # iou_3d_thresholds { 35 | # neg_iou_lo: 0.0 36 | # neg_iou_hi: 0.1 37 | # pos_iou_lo: 0.3 38 | # pos_iou_hi: 1.0 39 | # } 40 | 41 | mini_batch_size: 512 42 | } 43 | 44 | avod_config { 45 | iou_2d_thresholds { 46 | neg_iou_lo: 0.0 47 | neg_iou_hi: 0.45 48 | pos_iou_lo: 0.55 49 | pos_iou_hi: 1.0 50 | } 51 | 52 | mini_batch_size: 1024 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /avod/configs/mb_preprocessing/rpn_pedestrians.config: -------------------------------------------------------------------------------- 1 | name: 'kitti' 2 | data_split: 'trainval' 3 | data_split_dir: 'training' 4 | has_labels: True 5 | 6 | cluster_split: 'train', 7 | classes: ['Pedestrian'] 8 | num_clusters: [1] 9 | bev_source: 'lidar' 10 | 11 | kitti_utils_config { 12 | area_extents: [-40, 40, -5, 3, 0, 70] 13 | voxel_size: 0.1 14 | anchor_strides: [0.5, 0.5] 15 | 16 | bev_generator { 17 | slices { 18 | height_lo: -0.2 19 | height_hi: 2.3 20 | num_slices: 5 21 | } 22 | } 23 | 24 | mini_batch_config { 25 | density_threshold: 1 26 | 27 | rpn_config { 28 | iou_2d_thresholds { 29 | neg_iou_lo: 0.0 30 | neg_iou_hi: 0.3 31 | pos_iou_lo: 0.45 32 | pos_iou_hi: 1.0 33 | } 34 | # iou_3d_thresholds { 35 | # neg_iou_lo: 0.0 36 | # neg_iou_hi: 0.1 37 | # pos_iou_lo: 0.3 38 | # pos_iou_hi: 1.0 39 | # } 40 | 41 | mini_batch_size: 512 42 | } 43 | 44 | avod_config { 45 | iou_2d_thresholds { 46 | neg_iou_lo: 0.0 47 | neg_iou_hi: 0.45 48 | pos_iou_lo: 0.55 49 | pos_iou_hi: 1.0 50 | } 51 | 52 | mini_batch_size: 1024 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /avod/core/bev_generators/bev_generator.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | import numpy as np 4 | 5 | 6 | class BevGenerator: 7 | 8 | @abc.abstractmethod 9 | def generate_bev(self, **params): 10 | """Generates BEV maps 11 | 12 | Args: 13 | **params: additional keyword arguments for 14 | specific implementations of BevGenerator. 15 | 16 | Returns: 17 | Dictionary with entries for height maps and one density map 18 | height_maps: list of height maps 19 | density_map: density map 20 | """ 21 | pass 22 | 23 | def _create_density_map(self, 24 | num_divisions, 25 | voxel_indices_2d, 26 | num_pts_per_voxel, 27 | norm_value): 28 | 29 | # Create empty density map 30 | density_map = np.zeros((num_divisions[0], 31 | num_divisions[2])) 32 | 33 | # Only update pixels where voxels have num_pts values 34 | density_map[voxel_indices_2d[:, 0], voxel_indices_2d[:, 1]] = \ 35 | np.minimum(1.0, np.log(num_pts_per_voxel + 1) / norm_value) 36 | 37 | # Density is calculated as min(1.0, log(N+1)/log(x)) 38 | # x=64 for stereo, x=16 for lidar, x=64 for depth 39 | density_map = np.flip(density_map.transpose(), axis=0) 40 | 41 | return density_map 42 | -------------------------------------------------------------------------------- /avod/configs/mb_preprocessing/rpn_people.config: -------------------------------------------------------------------------------- 1 | name: 'kitti' 2 | data_split: 'trainval' 3 | data_split_dir: 'training' 4 | has_labels: True 5 | 6 | cluster_split: 'train', 7 | classes: ['Pedestrian', 'Cyclist'] 8 | num_clusters: [1, 1] 9 | bev_source: 'lidar' 10 | 11 | kitti_utils_config { 12 | area_extents: [-40, 40, -5, 3, 0, 70] 13 | voxel_size: 0.1 14 | anchor_strides: [0.5, 0.5, 0.5, 0.5] 15 | 16 | bev_generator { 17 | slices { 18 | height_lo: -0.2 19 | height_hi: 2.3 20 | num_slices: 5 21 | } 22 | } 23 | 24 | mini_batch_config { 25 | density_threshold: 1 26 | 27 | rpn_config { 28 | iou_2d_thresholds { 29 | neg_iou_lo: 0.0 30 | neg_iou_hi: 0.3 31 | pos_iou_lo: 0.45 32 | pos_iou_hi: 1.0 33 | } 34 | # iou_3d_thresholds { 35 | # neg_iou_lo: 0.0 36 | # neg_iou_hi: 0.1 37 | # pos_iou_lo: 0.3 38 | # pos_iou_hi: 1.0 39 | # } 40 | 41 | mini_batch_size: 512 42 | } 43 | 44 | avod_config { 45 | iou_2d_thresholds { 46 | neg_iou_lo: 0.0 47 | neg_iou_hi: 0.45 48 | pos_iou_lo: 0.55 49 | pos_iou_hi: 1.0 50 | } 51 | 52 | mini_batch_size: 1024 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /avod/protos/kitti_dataset.proto: -------------------------------------------------------------------------------- 1 | package avod.protos; 2 | 3 | import "avod/protos/kitti_utils.proto"; 4 | 5 | message KittiDatasetConfig { 6 | 7 | // Unique name for dataset 8 | optional string name = 1 [default = "kitti"]; 9 | 10 | // Top level directory of the dataset 11 | optional string dataset_dir = 2 [default = "~/Kitti/object"]; 12 | 13 | // Split for the data (e.g. 'train', 'val') 14 | optional string data_split = 3 [default = "train"]; 15 | 16 | // Folder that holds the data for the chosen data split 17 | optional string data_split_dir = 4 [default = "training"]; 18 | 19 | // Whether the samples have labels 20 | optional bool has_labels = 5 [default = true]; 21 | 22 | // The data split to be used for calculating clusters (e.g. val split 23 | // should use the train split for clustering) 24 | optional string cluster_split = 6 [default = "train"]; 25 | 26 | // Classes to be classified (e.g. ['Car', 'Pedestrian', 'Cyclist'] 27 | repeated string classes = 7; 28 | 29 | // Number of clusters corresponding to each class (e.g. [2, 1, 2]) 30 | repeated int32 num_clusters = 8; 31 | 32 | // BEV source, e.g. 'lidar' 33 | required string bev_source = 9; 34 | 35 | // Augmentations (e.g. [''], ['flipping'], ['flipping', 'pca_jitter']) 36 | repeated string aug_list = 10; 37 | 38 | // KittiUtils configuration 39 | optional KittiUtilsConfig kitti_utils_config = 20; 40 | } -------------------------------------------------------------------------------- /avod/core/ops.py: -------------------------------------------------------------------------------- 1 | """A module for helper tensorflow ops.""" 2 | 3 | import tensorflow as tf 4 | 5 | 6 | def indices_to_dense_vector(indices, 7 | size, 8 | indices_value=1., 9 | default_value=0, 10 | dtype=tf.float32): 11 | """Creates dense vector with indices set to specific value 12 | and rest to zeros. 13 | 14 | This function exists because it is unclear if it is safe to use 15 | tf.sparse_to_dense(indices, [size], 1, validate_indices=False) 16 | with indices which are not ordered. This function accepts a 17 | dynamic size (e.g. tf.shape(tensor)[0]) 18 | 19 | Args: 20 | indices: 1d Tensor with integer indices which are to be set to 21 | indices_values. 22 | size: scalar with size (integer) of output Tensor. 23 | indices_value: values of elements specified by indices in the output 24 | vector 25 | default_value: values of other elements in the output vector. 26 | dtype: data type. 27 | 28 | Returns: 29 | dense 1D Tensor of shape [size] with indices set to indices_values and the 30 | rest set to default_value. 31 | """ 32 | size = tf.to_int32(size) 33 | zeros = tf.ones([size], dtype=dtype) * default_value 34 | values = tf.ones_like(indices, dtype=dtype) * indices_value 35 | 36 | return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)], 37 | [zeros, values]) 38 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000000.txt: -------------------------------------------------------------------------------- 1 | P0: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 0.000000000000e+00 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 -3.797842000000e+02 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 4.575831000000e+01 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 -3.454157000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 4.981016000000e-03 4 | P3: 7.070493000000e+02 0.000000000000e+00 6.040814000000e+02 -3.341081000000e+02 0.000000000000e+00 7.070493000000e+02 1.805066000000e+02 2.330660000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 3.201153000000e-03 5 | R0_rect: 9.999128000000e-01 1.009263000000e-02 -8.511932000000e-03 -1.012729000000e-02 9.999406000000e-01 -4.037671000000e-03 8.470675000000e-03 4.123522000000e-03 9.999556000000e-01 6 | Tr_velo_to_cam: 6.927964000000e-03 -9.999722000000e-01 -2.757829000000e-03 -2.457729000000e-02 -1.162982000000e-03 2.749836000000e-03 -9.999955000000e-01 -6.127237000000e-02 9.999753000000e-01 6.931141000000e-03 -1.143899000000e-03 -3.321029000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000001.txt: -------------------------------------------------------------------------------- 1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | 9 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000002.txt: -------------------------------------------------------------------------------- 1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | 9 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000003.txt: -------------------------------------------------------------------------------- 1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | 9 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000004.txt: -------------------------------------------------------------------------------- 1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | 9 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000005.txt: -------------------------------------------------------------------------------- 1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | 9 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000007.txt: -------------------------------------------------------------------------------- 1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | 9 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000008.txt: -------------------------------------------------------------------------------- 1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | 9 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000009.txt: -------------------------------------------------------------------------------- 1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | 9 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000142.txt: -------------------------------------------------------------------------------- 1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | 9 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000217.txt: -------------------------------------------------------------------------------- 1 | P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 4 | P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 5 | R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 6 | Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | 9 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000006.txt: -------------------------------------------------------------------------------- 1 | P0: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 0.000000000000e+00 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 -3.858846000000e+02 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 4.450382000000e+01 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 -5.951107000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.616315000000e-03 4 | P3: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 -3.363147000000e+02 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 3.159867000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 5.323834000000e-03 5 | R0_rect: 9.999478000000e-01 9.791707000000e-03 -2.925305000000e-03 -9.806939000000e-03 9.999382000000e-01 -5.238719000000e-03 2.873828000000e-03 5.267134000000e-03 9.999820000000e-01 6 | Tr_velo_to_cam: 7.755449000000e-03 -9.999694000000e-01 -1.014303000000e-03 -7.275538000000e-03 2.294056000000e-03 1.032122000000e-03 -9.999968000000e-01 -6.324057000000e-02 9.999673000000e-01 7.753097000000e-03 2.301990000000e-03 -2.670414000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | 9 | -------------------------------------------------------------------------------- /avod/tests/datasets/Kitti/object/training/calib/000076.txt: -------------------------------------------------------------------------------- 1 | P0: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 0.000000000000e+00 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 2 | P1: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 -3.858846000000e+02 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 3 | P2: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 4.450382000000e+01 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 -5.951107000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.616315000000e-03 4 | P3: 7.183351000000e+02 0.000000000000e+00 6.003891000000e+02 -3.363147000000e+02 0.000000000000e+00 7.183351000000e+02 1.815122000000e+02 3.159867000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 5.323834000000e-03 5 | R0_rect: 9.999478000000e-01 9.791707000000e-03 -2.925305000000e-03 -9.806939000000e-03 9.999382000000e-01 -5.238719000000e-03 2.873828000000e-03 5.267134000000e-03 9.999820000000e-01 6 | Tr_velo_to_cam: 7.755449000000e-03 -9.999694000000e-01 -1.014303000000e-03 -7.275538000000e-03 2.294056000000e-03 1.032122000000e-03 -9.999968000000e-01 -6.324057000000e-02 9.999673000000e-01 7.753097000000e-03 2.301990000000e-03 -2.670414000000e-01 7 | Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 8 | 9 | -------------------------------------------------------------------------------- /avod/core/trainer_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | 4 | slim = tf.contrib.slim 5 | 6 | 7 | def load_checkpoints(checkpoint_dir, saver): 8 | 9 | # Load latest checkpoint if available 10 | all_checkpoint_states = tf.train.get_checkpoint_state( 11 | checkpoint_dir) 12 | if all_checkpoint_states is not None: 13 | all_checkpoint_paths = \ 14 | all_checkpoint_states.all_model_checkpoint_paths 15 | # Save the checkpoint list into saver.last_checkpoints 16 | saver.recover_last_checkpoints(all_checkpoint_paths) 17 | else: 18 | print('No checkpoints found') 19 | 20 | 21 | def get_global_step(sess, global_step_tensor): 22 | # Read the global step if restored 23 | global_step = tf.train.global_step(sess, 24 | global_step_tensor) 25 | return global_step 26 | 27 | 28 | def create_dir(dir): 29 | """ 30 | Checks if a directory exists, or else create it 31 | 32 | Args: 33 | dir: directory to create 34 | """ 35 | if not os.path.exists(dir): 36 | os.makedirs(dir) 37 | 38 | 39 | def load_model_weights(sess, checkpoint_dir): 40 | """Restores the model weights. 41 | 42 | Loads the weights loaded from checkpoint dir onto the 43 | model. It ignores the missing weights since this is used 44 | to load the RPN weights onto AVOD. 45 | 46 | Args: 47 | sess: A TensorFlow session 48 | checkpoint_dir: Path to the weights to be loaded 49 | """ 50 | 51 | init_fn = slim.assign_from_checkpoint_fn( 52 | checkpoint_dir, slim.get_model_variables(), ignore_missing_vars=True) 53 | init_fn(sess) 54 | -------------------------------------------------------------------------------- /avod/core/avod_fc_layers/avod_fc_layer_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | OFFSETS_OUTPUT_SIZE = { 4 | 'box_3d': 6, 5 | 'box_8c': 24, 6 | 'box_8co': 24, 7 | 'box_4c': 10, 8 | 'box_4ca': 10, 9 | } 10 | 11 | ANG_VECS_OUTPUT_SIZE = { 12 | 'box_3d': 2, 13 | 'box_8c': 0, 14 | 'box_8co': 0, 15 | 'box_4c': 0, 16 | 'box_4ca': 2, 17 | } 18 | 19 | 20 | def feature_fusion(fusion_method, inputs, input_weights): 21 | """Applies feature fusion to multiple inputs 22 | 23 | Args: 24 | fusion_method: 'mean' or 'concat' 25 | inputs: Input tensors of shape (batch_size, width, height, depth) 26 | If fusion_method is 'mean', inputs must have same dimensions. 27 | If fusion_method is 'concat', width and height must be the same. 28 | input_weights: Weight of each input if using 'mean' fusion method 29 | 30 | Returns: 31 | fused_features: Features after fusion 32 | """ 33 | 34 | # Feature map fusion 35 | with tf.variable_scope('fusion'): 36 | fused_features = None 37 | 38 | if fusion_method == 'mean': 39 | rois_sum = tf.reduce_sum(inputs, axis=0) 40 | rois_mean = tf.divide(rois_sum, tf.reduce_sum(input_weights)) 41 | fused_features = rois_mean 42 | 43 | elif fusion_method == 'concat': 44 | # Concatenate along last axis 45 | last_axis = len(inputs[0].get_shape()) - 1 46 | fused_features = tf.concat(inputs, axis=last_axis) 47 | 48 | elif fusion_method == 'max': 49 | fused_features = tf.maximum(inputs[0], inputs[1]) 50 | 51 | else: 52 | raise ValueError('Invalid fusion method', fusion_method) 53 | 54 | return fused_features 55 | -------------------------------------------------------------------------------- /avod/core/model.py: -------------------------------------------------------------------------------- 1 | """Abstract detection model. 2 | 3 | This file defines a generic base class for detection models. Programs that are 4 | designed to work with arbitrary detection models should only depend on this 5 | class. We intend for the functions in this class to follow tensor-in/tensor-out 6 | design, thus all functions have tensors or lists/dictionaries holding tensors as 7 | inputs and outputs. 8 | 9 | Abstractly, detection models predict output tensors given input images 10 | which can be passed to a loss function at training time or passed to a 11 | postprocessing function at eval time. The postprocessing happens outside the 12 | model. 13 | 14 | """ 15 | from abc import ABCMeta 16 | from abc import abstractmethod 17 | 18 | 19 | class DetectionModel(object): 20 | """Abstract base class for detection models.""" 21 | __metaclass__ = ABCMeta 22 | 23 | def __init__(self, model_config): 24 | """Constructor. 25 | 26 | Args: 27 | model_config: configuration for the model 28 | """ 29 | self._config = model_config 30 | 31 | @property 32 | def model_config(self): 33 | return self._config 34 | 35 | @abstractmethod 36 | def create_feed_dict(self): 37 | """ To be overridden 38 | Creates a feed_dict that can be passed into a tensorflow session 39 | 40 | Returns: a dictionary with tensors as keys and numpy arrays as values 41 | """ 42 | return dict() 43 | 44 | @abstractmethod 45 | def loss(self, prediction_dict): 46 | """Compute scalar loss tensors with respect to provided groundtruth. 47 | 48 | Calling this function requires that groundtruth tensors have been 49 | provided via the provide_groundtruth function. 50 | 51 | Args: 52 | prediction_dict: a dictionary holding predicted tensors 53 | 54 | Returns: 55 | a dictionary mapping strings (loss names) to scalar tensors 56 | representing loss values. 57 | """ 58 | pass 59 | -------------------------------------------------------------------------------- /avod/configs/unittest_model.config: -------------------------------------------------------------------------------- 1 | # This config is used for model unit tests 2 | 3 | model_name: 'avod_model' 4 | checkpoint_name: 'unittest_model' 5 | 6 | rpn_config { 7 | rpn_proposal_roi_crop_size: 3 8 | rpn_fusion_method: 'mean' 9 | rpn_train_nms_size: 128 10 | rpn_test_nms_size: 128 11 | rpn_nms_iou_thresh: 0.8 12 | } 13 | 14 | avod_config { 15 | avod_proposal_roi_crop_size: 3 16 | avod_positive_selection: 'corr_cls' 17 | avod_nms_size: 128 18 | avod_nms_iou_thresh: 0.1 19 | avod_box_representation: 'box_3d' 20 | } 21 | 22 | label_smoothing_epsilon: 0.001 23 | expand_proposals_xz: 0.0 24 | path_drop_probabilities: [0.5, 0.5] 25 | train_on_all_samples: False 26 | eval_all_samples: False 27 | 28 | layers_config { 29 | bev_feature_extractor { 30 | bev_vgg { 31 | vgg_conv1: [2, 8] 32 | vgg_conv2: [2, 16] 33 | vgg_conv3: [3, 32] 34 | vgg_conv4: [3, 64] 35 | upsampling_multiplier: 1 36 | 37 | l2_weight_decay: 0.0005 38 | } 39 | } 40 | img_feature_extractor { 41 | img_vgg { 42 | vgg_conv1: [2, 8] 43 | vgg_conv2: [2, 16] 44 | vgg_conv3: [3, 32] 45 | vgg_conv4: [3, 64] 46 | upsampling_multiplier: 1 47 | 48 | l2_weight_decay: 0.0005 49 | } 50 | } 51 | rpn_config { 52 | cls_fc6 : 16 53 | cls_fc7 : 16 54 | 55 | reg_fc6 : 16 56 | reg_fc7 : 16 57 | 58 | l2_weight_decay: 0.001 59 | keep_prob: 0.5 60 | } 61 | avod_config { 62 | basic_fc_layers { 63 | num_layers: 2 64 | layer_sizes: [32, 32] 65 | l2_weight_decay: 0.005 66 | keep_prob: 0.5 67 | fusion_method: 'mean' # 'mean' or 'concat' 68 | } 69 | } 70 | } 71 | 72 | # Loss function weights 73 | loss_config { 74 | reg_loss_weight: 10.0 75 | ang_loss_weight: 10.0 76 | cls_loss_weight: 5.0 77 | } 78 | -------------------------------------------------------------------------------- /demos/dataset/data_mean.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | 4 | from avod.builders.dataset_builder import DatasetBuilder 5 | 6 | 7 | def main(): 8 | """ 9 | Calculates and prints the mean values for the RGB channels in a dataset 10 | """ 11 | 12 | dataset_builder = DatasetBuilder() 13 | dataset = dataset_builder.build_kitti_dataset( 14 | dataset_builder.KITTI_TRAIN 15 | # dataset_builder.KITTI_TRAIN_MINI 16 | ) 17 | 18 | # Options 19 | debug_print = True 20 | get_bev_mean = False 21 | 22 | # Dataset values 23 | dataset_utils = dataset.kitti_utils 24 | num_samples = dataset.num_samples 25 | clusters, _ = dataset.get_cluster_info() 26 | num_bev_maps = len(clusters) + 1 # Height Maps + Density Map 27 | 28 | pixels_sum = np.zeros(3) # RGB 29 | bev_sum = np.zeros(num_bev_maps) 30 | 31 | for sample_idx in range(num_samples): 32 | sample_name = dataset.sample_names[sample_idx] 33 | 34 | image_path = dataset.get_rgb_image_path(sample_name) 35 | image = np.asarray(Image.open(image_path)) 36 | 37 | pixels_r = np.mean(image[:, :, 0]) 38 | pixels_g = np.mean(image[:, :, 1]) 39 | pixels_b = np.mean(image[:, :, 2]) 40 | 41 | pixel_means = np.stack((pixels_r, pixels_g, pixels_b)) 42 | pixels_sum += pixel_means 43 | 44 | if get_bev_mean: 45 | bev_images = dataset_utils.create_bev_maps(sample_name, 46 | source='lidar') 47 | height_maps = np.asarray(bev_images['height_maps']) 48 | density_map = np.asarray(bev_images['density_map']) 49 | 50 | height_means = [np.mean(height_map) for height_map in height_maps] 51 | density_mean = np.mean(density_map) 52 | 53 | bev_means = np.stack((*height_means, density_mean)) 54 | bev_sum += bev_means 55 | 56 | if debug_print: 57 | debug_string = '{} / {}, Sample {}, pixel_means {}'.format( 58 | sample_idx + 1, num_samples, sample_name, pixel_means) 59 | if get_bev_mean: 60 | debug_string += ' density_means {}'.format(bev_means) 61 | 62 | print(debug_string) 63 | 64 | print("Dataset: {}, split: {}".format(dataset.name, dataset.data_split)) 65 | print("Image mean: {}".format(pixels_sum / num_samples)) 66 | 67 | if get_bev_mean: 68 | print("BEV mean: {}".format(bev_sum / num_samples)) 69 | 70 | 71 | if __name__ == '__main__': 72 | main() 73 | -------------------------------------------------------------------------------- /avod/core/anchor_generator.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Base anchor generator. 17 | 18 | The job of the anchor generator is to create (or load) a collection 19 | of bounding boxes to be used as anchors. 20 | 21 | Generated anchors are assumed to match some convolutional grid or list of grid 22 | shapes. For example, we might want to generate anchors matching an 8x8 23 | feature map and a 4x4 feature map. If we place 3 anchors per grid location 24 | on the first feature map and 6 anchors per grid location on the second feature 25 | map, then 3*8*8 + 6*4*4 = 288 anchors are generated in total. 26 | 27 | To support fully convolutional settings, feature map shapes are passed 28 | dynamically at generation time. The number of anchors to place at each location 29 | is static --- implementations of AnchorGenerator must always be able return 30 | the number of anchors that it uses per location for each feature map. 31 | """ 32 | from abc import ABCMeta 33 | from abc import abstractmethod 34 | 35 | import tensorflow as tf 36 | 37 | 38 | class AnchorGenerator(object): 39 | """Abstract base class for anchor generators.""" 40 | __metaclass__ = ABCMeta 41 | 42 | @abstractmethod 43 | def name_scope(self): 44 | """Name scope. 45 | 46 | Must be defined by implementations. 47 | 48 | Returns: 49 | a string representing the name scope of the anchor generation operation. 50 | """ 51 | pass 52 | 53 | def generate(self, **params): 54 | """Generates a collection of bounding boxes to be used as anchors. 55 | """ 56 | return self._generate(**params) 57 | 58 | @abstractmethod 59 | def _generate(self, **params): 60 | """To be overridden by implementations. 61 | 62 | Args: 63 | **params: parameters for anchor generation op 64 | 65 | Returns: 66 | boxes: a BoxList holding a collection of N anchor boxes 67 | """ 68 | pass 69 | -------------------------------------------------------------------------------- /scripts/offline_eval/plot_ap.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | 5 | def main(): 6 | """Plots AP scores from the native eval script and prints top 5 checkpoints 7 | for each metric 8 | """ 9 | 10 | # Output from native eval 11 | results_file = 'results/pyramid_cars_with_aug_example_results_0.1.txt' 12 | 13 | # Top n medium score indices to print 14 | top_n_to_print = 5 15 | 16 | with open(results_file) as f: 17 | lines = f.readlines() 18 | 19 | num_lines = len(lines) 20 | line_idx = 0 21 | 22 | ap_dict = {} 23 | current_step = 0 24 | 25 | while line_idx < num_lines - 1: 26 | line = lines[line_idx].rstrip('\n') 27 | 28 | # Step 29 | if line.isdigit(): 30 | current_step = int(line) 31 | else: 32 | ap_line = line.split(' ') 33 | 34 | if '_detection' in ap_line[0] or '_heading' in ap_line[0]: 35 | detection_type = str(ap_line[0]) 36 | ap_vals = np.hstack([current_step, ap_line[2:]]) 37 | 38 | if ap_dict.get(detection_type): 39 | ap_dict[detection_type].append(ap_vals) 40 | else: 41 | ap_dict.update({detection_type: [ap_vals]}) 42 | else: 43 | # Ignore line (e.g. 'done', 'directory exists', etc.) 44 | pass 45 | 46 | line_idx += 1 47 | 48 | # Plot results (2D, 3D, BEV, 3D_heading, BEV_heading) 49 | num_ap_plots = len(ap_dict) 50 | plot_cols = 5 51 | plot_rows = int(np.ceil(num_ap_plots / plot_cols)) 52 | 53 | fig, ax_arr = plt.subplots(plot_rows, plot_cols, 54 | figsize=(17, 4 * plot_rows)) 55 | fig.canvas.set_window_title(results_file) 56 | ax_arr = ax_arr.reshape(-1, plot_cols) 57 | 58 | # Create plots 59 | sorted_items = sorted(ap_dict.items()) 60 | for plot_idx in range(num_ap_plots): 61 | 62 | # Get values from dict 63 | values = sorted_items[plot_idx] 64 | detection_type = values[0] 65 | lines = np.asarray(values[1], dtype=np.float32) 66 | steps = lines[:, 0] 67 | ap_values = lines[:, 1:] 68 | 69 | top_n_med_indices = np.argsort(ap_values[:, 1])[-top_n_to_print:][::-1] 70 | 71 | print('{:25s}'.format(detection_type), steps.take(top_n_med_indices)) 72 | 73 | # Plot 74 | plot_row = int(plot_idx / plot_cols) 75 | plot_col = plot_idx % plot_cols 76 | ax_arr[plot_row, plot_col].plot(steps, ap_values) 77 | ax_arr[plot_row, plot_col].set_title(detection_type) 78 | 79 | plt.legend(labels=['easy', 'medium', 'hard']) 80 | plt.show() 81 | 82 | 83 | if __name__ == '__main__': 84 | main() 85 | -------------------------------------------------------------------------------- /avod/core/feature_extractors/img_feature_extractor.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | import tensorflow as tf 4 | 5 | 6 | class ImgFeatureExtractor: 7 | 8 | # Kitti image mean per channel 9 | _R_MEAN = 92.8403 10 | _G_MEAN = 97.7996 11 | _B_MEAN = 93.5843 12 | 13 | def __init__(self, extractor_config): 14 | self.config = extractor_config 15 | 16 | def preprocess_input(self, tensor_in, output_size): 17 | """Preprocesses the given input. 18 | 19 | Args: 20 | tensor_in: A `Tensor` of shape=(batch_size, height, 21 | width, channels) representing an input image. 22 | output_size: The size of the input (H x W) 23 | 24 | Returns: 25 | Preprocessed tensor input, resized to the output_size 26 | """ 27 | image = tf.image.resize_images(tensor_in, output_size) 28 | image = tf.squeeze(image) 29 | image = tf.to_float(image) 30 | image_normalized = self._mean_image_subtraction(image, 31 | [self._R_MEAN, 32 | self._G_MEAN, 33 | self._B_MEAN]) 34 | tensor_out = tf.expand_dims(image_normalized, axis=0) 35 | return tensor_out 36 | 37 | def _mean_image_subtraction(self, image, means): 38 | """Subtracts the given means from each image channel. 39 | 40 | For example: 41 | means = [123.68, 116.779, 103.939] 42 | image = _mean_image_subtraction(image, means) 43 | 44 | Note that the rank of `image` must be known. 45 | 46 | Args: 47 | image: a tensor of size [height, width, C]. 48 | means: a C-vector of values to subtract from each channel. 49 | 50 | Returns: 51 | the centered image. 52 | 53 | Raises: 54 | ValueError: If the rank of `image` is unknown, if `image` has a rank 55 | other than three or if the number of channels in `image` doesn't 56 | match the number of values in `means`. 57 | """ 58 | if image.get_shape().ndims != 3: 59 | raise ValueError('Input must be of size [height, width, C>0]') 60 | num_channels = image.get_shape().as_list()[-1] 61 | if len(means) != num_channels: 62 | raise ValueError('len(means) must match the number of channels') 63 | 64 | channels = tf.split( 65 | axis=2, 66 | num_or_size_splits=num_channels, 67 | value=image) 68 | for i in range(num_channels): 69 | channels[i] -= means[i] 70 | return tf.concat(axis=2, values=channels) 71 | 72 | @abstractmethod 73 | def build(self, **kwargs): 74 | pass 75 | -------------------------------------------------------------------------------- /avod/core/losses_test.py: -------------------------------------------------------------------------------- 1 | """Tests for object detection losses module.""" 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | from avod.core import losses 7 | 8 | 9 | class WeightedL2LocalizationLossTest(tf.test.TestCase): 10 | 11 | def testReturnsCorrectLoss(self): 12 | batch_size = 3 13 | num_anchors = 10 14 | code_size = 4 15 | prediction_tensor = tf.ones([batch_size, num_anchors, code_size]) 16 | target_tensor = tf.zeros([batch_size, num_anchors, code_size]) 17 | weights = tf.constant([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0], 18 | [1, 1, 1, 1, 1, 0, 0, 0, 0, 0], 19 | [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]], tf.float32) 20 | loss_op = losses.WeightedL2LocalizationLoss() 21 | loss = loss_op(prediction_tensor, target_tensor, weights=weights) 22 | 23 | expected_loss = (3 * 5 * 4) / 2.0 24 | with self.test_session() as sess: 25 | loss_output = sess.run(loss) 26 | self.assertAllClose(loss_output, expected_loss) 27 | 28 | def testReturnsCorrectLossSum(self): 29 | batch_size = 3 30 | num_anchors = 16 31 | code_size = 4 32 | prediction_tensor = tf.ones([batch_size, num_anchors, code_size]) 33 | target_tensor = tf.zeros([batch_size, num_anchors, code_size]) 34 | weights = tf.ones([batch_size, num_anchors]) 35 | loss_op = losses.WeightedL2LocalizationLoss() 36 | loss = loss_op(prediction_tensor, target_tensor, weights=weights) 37 | 38 | expected_loss = tf.nn.l2_loss(prediction_tensor - target_tensor) 39 | with self.test_session() as sess: 40 | loss_output = sess.run(loss) 41 | expected_loss_output = sess.run(expected_loss) 42 | self.assertAllClose(loss_output, expected_loss_output) 43 | 44 | def testReturnsCorrectNanLoss(self): 45 | batch_size = 3 46 | num_anchors = 10 47 | code_size = 4 48 | prediction_tensor = tf.ones([batch_size, num_anchors, code_size]) 49 | target_tensor = tf.concat([ 50 | tf.zeros([batch_size, num_anchors, code_size / 2]), 51 | tf.ones([batch_size, num_anchors, code_size / 2]) * np.nan 52 | ], 53 | axis=2) 54 | weights = tf.ones([batch_size, num_anchors]) 55 | loss_op = losses.WeightedL2LocalizationLoss() 56 | loss = loss_op(prediction_tensor, target_tensor, weights=weights, 57 | ignore_nan_targets=True) 58 | 59 | expected_loss = (3 * 5 * 4) / 2.0 60 | with self.test_session() as sess: 61 | loss_output = sess.run(loss) 62 | self.assertAllClose(loss_output, expected_loss) 63 | 64 | 65 | if __name__ == '__main__': 66 | tf.test.main() 67 | -------------------------------------------------------------------------------- /avod/core/label_cluster_utils_test.py: -------------------------------------------------------------------------------- 1 | """LabelClusterUtils unit test module""" 2 | 3 | import unittest 4 | 5 | import array 6 | import numpy as np 7 | import os 8 | 9 | import avod 10 | import avod.tests as tests 11 | 12 | from avod.builders.dataset_builder import DatasetBuilder 13 | from avod.core.label_cluster_utils import LabelClusterUtils 14 | 15 | 16 | class LabelClusterUtilsTest(unittest.TestCase): 17 | @classmethod 18 | def setUpClass(cls): 19 | cls.fake_kitti_dir = tests.test_path() + "/datasets/Kitti/object" 20 | cls.dataset = DatasetBuilder.build_kitti_dataset( 21 | DatasetBuilder.KITTI_UNITTEST) 22 | 23 | def test_get_clusters(self): 24 | 25 | # classes = ['Car', 'Pedestrian', 'Cyclist'] 26 | num_clusters = [2, 1, 1] 27 | 28 | label_cluster_utils = LabelClusterUtils(self.dataset) 29 | clusters, std_devs = label_cluster_utils.get_clusters() 30 | 31 | # Check that correct number of clusters are returned 32 | clusters_per_class = [len(cls_clusters) for cls_clusters in clusters] 33 | std_devs_per_class = [len(cls_std_devs) for cls_std_devs in std_devs] 34 | 35 | self.assertEqual(clusters_per_class, num_clusters) 36 | self.assertEqual(std_devs_per_class, num_clusters) 37 | 38 | # Check that text files were saved 39 | txt_folder_exists = os.path.isdir( 40 | avod.root_dir() + "/data/label_clusters/unittest-kitti") 41 | self.assertTrue(txt_folder_exists) 42 | 43 | # Calling get_clusters again should read from files 44 | read_clusters, read_std_devs = label_cluster_utils.get_clusters() 45 | 46 | # Check that read values are the same as generated ones 47 | np.testing.assert_allclose(np.vstack(clusters), 48 | np.vstack(read_clusters)) 49 | np.testing.assert_allclose(np.vstack(std_devs), 50 | np.vstack(read_std_devs)) 51 | 52 | def test_flatten_data(self): 53 | data_to_reshape = list() 54 | 55 | data_to_reshape.append([[1, 2, 3], [4, 5, 6]]) 56 | data_to_reshape.append([[7, 8, 9]]) 57 | data_to_reshape.append([[10, 11, 12], [13, 14, 15]]) 58 | 59 | expected_output = np.array([[1, 2, 3], 60 | [4, 5, 6], 61 | [7, 8, 9], 62 | [10, 11, 12], 63 | [13, 14, 15]]) 64 | 65 | label_cluster_utils = LabelClusterUtils(self.dataset) 66 | 67 | flattened = label_cluster_utils._flatten_data(data_to_reshape) 68 | np.testing.assert_array_equal(flattened, 69 | expected_output, 70 | err_msg='Wrong flattened array') 71 | -------------------------------------------------------------------------------- /avod/datasets/kitti/kitti_utils_test.py: -------------------------------------------------------------------------------- 1 | """KittiUtil unit test module.""" 2 | 3 | import numpy as np 4 | import unittest 5 | 6 | from wavedata.tools.obj_detection import obj_utils as obj_utils 7 | from avod.builders.dataset_builder import DatasetBuilder 8 | 9 | 10 | class KittiUtilsTest(unittest.TestCase): 11 | @classmethod 12 | def setUpClass(cls): 13 | dataset_config = DatasetBuilder.copy_config( 14 | DatasetBuilder.KITTI_UNITTEST) 15 | 16 | cls.dataset = DatasetBuilder.build_kitti_dataset(dataset_config) 17 | cls.label_dir = cls.dataset.label_dir 18 | 19 | def test_create_slice_filter(self): 20 | # Test slice filtering between 0.2 and 2.0m on three points located 21 | # at y=[0.0, 1.0, 3.0] with a flat ground plane along y 22 | 23 | # Create fake point cloud 24 | point_cloud = np.array([[1.0, 1.0, 1.0], 25 | [0.0, 1.0, 3.0], 26 | [1.0, 1.0, 1.0]]) 27 | 28 | area_extents = [[-2, 2], [-5, 5], [-2, 2]] 29 | ground_plane = [0, 1, 0, 0] 30 | 31 | ground_offset_dist = 0.2 32 | offset_dist = 2.0 33 | 34 | expected_slice_filter = [False, True, False] 35 | 36 | slice_filter = self.dataset.kitti_utils.create_slice_filter( 37 | point_cloud, area_extents, ground_plane, 38 | ground_offset_dist, offset_dist) 39 | 40 | np.testing.assert_equal(slice_filter, expected_slice_filter) 41 | 42 | def test_rotate_map_90_degrees(self): 43 | # Check that a transpose and flip returns the same ndarray as np.rot90 44 | # This logic is part of create_bev_images 45 | 46 | np.random.seed(123) 47 | fake_bev_map = np.random.rand(800, 700) 48 | 49 | # Rotate with a transpose then flip (faster than np.rot90) 50 | np_transpose_then_flip_out = np.flip(fake_bev_map.transpose(), axis=0) 51 | 52 | # Expected result from np.rot90 53 | np_rot_90_out = np.rot90(fake_bev_map) 54 | 55 | np.testing.assert_allclose(np_transpose_then_flip_out, 56 | np_rot_90_out) 57 | 58 | def test_filter_labels_by_class(self): 59 | 60 | sample_name = '000007' 61 | obj_labels = obj_utils.read_labels(self.label_dir, 62 | int(sample_name)) 63 | # This particular sample has 2 valid classes 64 | exp_num_valid_classes = 2 65 | 66 | filtered_labels = \ 67 | self.dataset.kitti_utils.filter_labels(obj_labels, difficulty=None) 68 | all_types = [] 69 | for label in filtered_labels: 70 | if label.type not in all_types: 71 | all_types.append(label.type) 72 | self.assertEqual(len(all_types), 73 | exp_num_valid_classes, 74 | msg='Wrong number of labels after filtering') 75 | 76 | 77 | if __name__ == '__main__': 78 | unittest.main() 79 | -------------------------------------------------------------------------------- /avod/experiments/run_training.py: -------------------------------------------------------------------------------- 1 | """Detection model trainer. 2 | 3 | This runs the DetectionModel trainer. 4 | """ 5 | 6 | import argparse 7 | import os 8 | 9 | import tensorflow as tf 10 | 11 | import avod 12 | import avod.builders.config_builder_util as config_builder 13 | from avod.builders.dataset_builder import DatasetBuilder 14 | from avod.core.models.avod_model import AvodModel 15 | from avod.core.models.rpn_model import RpnModel 16 | from avod.core import trainer 17 | 18 | tf.logging.set_verbosity(tf.logging.ERROR) 19 | 20 | 21 | def train(model_config, train_config, dataset_config): 22 | 23 | dataset = DatasetBuilder.build_kitti_dataset(dataset_config, 24 | use_defaults=False) 25 | 26 | train_val_test = 'train' 27 | model_name = model_config.model_name 28 | 29 | with tf.Graph().as_default(): 30 | if model_name == 'rpn_model': 31 | model = RpnModel(model_config, 32 | train_val_test=train_val_test, 33 | dataset=dataset) 34 | elif model_name == 'avod_model': 35 | model = AvodModel(model_config, 36 | train_val_test=train_val_test, 37 | dataset=dataset) 38 | else: 39 | raise ValueError('Invalid model_name') 40 | 41 | trainer.train(model, train_config) 42 | 43 | 44 | def main(_): 45 | parser = argparse.ArgumentParser() 46 | 47 | # Defaults 48 | default_pipeline_config_path = avod.root_dir() + \ 49 | '/configs/avod_cars_example.config' 50 | default_data_split = 'train' 51 | default_device = '1' 52 | 53 | parser.add_argument('--pipeline_config', 54 | type=str, 55 | dest='pipeline_config_path', 56 | default=default_pipeline_config_path, 57 | help='Path to the pipeline config') 58 | 59 | parser.add_argument('--data_split', 60 | type=str, 61 | dest='data_split', 62 | default=default_data_split, 63 | help='Data split for training') 64 | 65 | parser.add_argument('--device', 66 | type=str, 67 | dest='device', 68 | default=default_device, 69 | help='CUDA device id') 70 | 71 | args = parser.parse_args() 72 | 73 | # Parse pipeline config 74 | model_config, train_config, _, dataset_config = \ 75 | config_builder.get_configs_from_pipeline_file( 76 | args.pipeline_config_path, is_training=True) 77 | 78 | # Overwrite data split 79 | dataset_config.data_split = args.data_split 80 | 81 | # Set CUDA device id 82 | os.environ['CUDA_VISIBLE_DEVICES'] = args.device 83 | 84 | train(model_config, train_config, dataset_config) 85 | 86 | 87 | if __name__ == '__main__': 88 | tf.app.run() 89 | -------------------------------------------------------------------------------- /avod/protos/optimizer.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package avod.protos; 4 | 5 | // Messages for configuring the optimizing strategy for training object 6 | // detection models. 7 | 8 | // Top level optimizer message. 9 | message Optimizer { 10 | oneof optimizer { 11 | RMSPropOptimizer rms_prop_optimizer = 1; 12 | MomentumOptimizer momentum_optimizer = 2; 13 | AdamOptimizer adam_optimizer = 3; 14 | GradientDescentOptimizer gradient_descent = 6; 15 | } 16 | optional bool use_moving_average = 4 [default=true]; 17 | optional float moving_average_decay = 5 [default=0.9999]; 18 | } 19 | 20 | // Configuration message for the RMSPropOptimizer 21 | // See: https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer 22 | message RMSPropOptimizer { 23 | optional LearningRate learning_rate = 1; 24 | optional float momentum_optimizer_value = 2 [default=0.9]; 25 | optional float decay = 3 [default=0.9]; 26 | optional float epsilon = 4 [default=1.0]; 27 | } 28 | 29 | // Configuration message for the MomentumOptimizer 30 | // See: https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer 31 | message MomentumOptimizer { 32 | optional LearningRate learning_rate = 1; 33 | optional float momentum_optimizer_value = 2 [default=0.9]; 34 | } 35 | 36 | // Configuration message for the AdamOptimizer 37 | // See: https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer 38 | message AdamOptimizer { 39 | optional LearningRate learning_rate = 1; 40 | } 41 | 42 | // Configuration message for the GradientDescent 43 | // See: https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer 44 | message GradientDescentOptimizer { 45 | optional LearningRate learning_rate = 1; 46 | } 47 | 48 | 49 | // Configuration message for optimizer learning rate. 50 | message LearningRate { 51 | oneof learning_rate { 52 | ConstantLearningRate constant_learning_rate = 1; 53 | ExponentialDecayLearningRate exponential_decay_learning_rate = 2; 54 | ManualStepLearningRate manual_step_learning_rate = 3; 55 | } 56 | } 57 | 58 | // Configuration message for a constant learning rate. 59 | message ConstantLearningRate { 60 | optional float learning_rate = 1 [default=0.002]; 61 | } 62 | 63 | // Configuration message for an exponentially decaying learning rate. 64 | // See https://www.tensorflow.org/versions/master/api_docs/python/train/ \ 65 | // decaying_the_learning_rate#exponential_decay 66 | message ExponentialDecayLearningRate { 67 | optional float initial_learning_rate = 1 [default=0.002]; 68 | optional uint32 decay_steps = 2 [default=4000000]; 69 | optional float decay_factor = 3 [default=0.95]; 70 | optional bool staircase = 4 [default=true]; 71 | } 72 | 73 | // Configuration message for a manually defined learning rate schedule. 74 | message ManualStepLearningRate { 75 | optional float initial_learning_rate = 1 [default=0.002]; 76 | message LearningRateSchedule { 77 | optional uint32 step = 1; 78 | optional float learning_rate = 2 [default=0.002]; 79 | } 80 | repeated LearningRateSchedule schedule = 2; 81 | } 82 | -------------------------------------------------------------------------------- /avod/core/format_checker_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from avod.core import format_checker as fc 6 | from wavedata.tools.obj_detection import obj_utils 7 | 8 | 9 | class FormatCheckerTest(unittest.TestCase): 10 | 11 | def test_check_box_3d_format(self): 12 | 13 | # Case 1, invalid type 14 | test_var = [0, 0, 0, 0, 0, 0, 0] 15 | np.testing.assert_raises(TypeError, 16 | fc.check_box_3d_format, test_var) 17 | 18 | # Case 2, invalid shape 19 | test_var = np.ones([1, 5]) 20 | np.testing.assert_raises(TypeError, 21 | fc.check_box_3d_format, test_var) 22 | 23 | test_var = np.ones([5, 6]) 24 | np.testing.assert_raises(TypeError, 25 | fc.check_box_3d_format, test_var) 26 | 27 | test_var = np.ones([1, 7]) 28 | fc.check_box_3d_format(test_var) 29 | 30 | test_var = np.ones([10, 7]) 31 | fc.check_box_3d_format(test_var) 32 | 33 | test_var = tf.ones([5, 7]) 34 | fc.check_box_3d_format(test_var) 35 | 36 | test_var = tf.ones([5, 3]) 37 | np.testing.assert_raises(TypeError, 38 | fc.check_box_3d_format, test_var) 39 | 40 | def test_check_object_label_format(self): 41 | test_obj = obj_utils.ObjectLabel() 42 | test_obj.h = 1 43 | test_obj.w = 1 44 | test_obj.l = 1 45 | test_obj.t = [1, 1, 1] 46 | test_obj.ry = 0 47 | 48 | # Case 1, Single instance of object label 49 | test_obj_list = [test_obj] 50 | fc.check_object_label_format(test_obj_list) 51 | 52 | test_obj_list = [test_obj, test_obj, test_obj] 53 | fc.check_object_label_format(test_obj_list) 54 | 55 | test_obj_list = [test_obj, test_obj, '0'] 56 | np.testing.assert_raises(TypeError, 57 | fc.check_object_label_format, test_obj_list) 58 | 59 | # Case 2, Range check 60 | test_obj.t = [1, 1] 61 | test_obj_list = [test_obj] 62 | np.testing.assert_raises(TypeError, 63 | fc.check_object_label_format, test_obj_list) 64 | 65 | def test_check_anchor_format(self): 66 | # Case 1, invalid type 67 | test_var = [0, 0, 0, 0, 0, 0] 68 | np.testing.assert_raises(TypeError, 69 | fc.check_anchor_format, test_var) 70 | 71 | # Case 2, invalid shape 72 | test_var = np.ones([1, 5]) 73 | np.testing.assert_raises(TypeError, 74 | fc.check_anchor_format, test_var) 75 | 76 | test_var = np.ones([1, 6]) 77 | fc.check_anchor_format(test_var) 78 | 79 | test_var = np.ones([5, 6]) 80 | fc.check_anchor_format(test_var) 81 | 82 | test_var = tf.ones([5, 6]) 83 | fc.check_anchor_format(test_var) 84 | 85 | test_var = tf.ones([5, 4]) 86 | np.testing.assert_raises(TypeError, 87 | fc.check_anchor_format, test_var) 88 | -------------------------------------------------------------------------------- /avod/builders/avod_fc_layers_builder.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from avod.core.avod_fc_layers import basic_fc_layers 4 | from avod.core.avod_fc_layers import fusion_fc_layers 5 | 6 | 7 | KEY_CLS_LOGITS = 'classification_logits' 8 | KEY_OFFSETS = 'offsets' 9 | KEY_ANGLE_VECTORS = 'angle_vectors' 10 | KEY_ENDPOINTS = 'end_points' 11 | 12 | 13 | def build(layers_config, 14 | input_rois, input_weights, 15 | num_final_classes, box_rep, 16 | top_anchors, ground_plane, 17 | is_training): 18 | """Builds second stage fully connected layers 19 | 20 | Args: 21 | layers_config: Configuration object 22 | input_rois: List of input ROI feature maps 23 | input_weights: List of weights for each input e.g. [1.0, 1.0] 24 | num_final_classes: Number of output classes, including 'Background' 25 | box_rep: Box representation (e.g. 'box_3d', 'box_8c', etc.) 26 | top_anchors: Top proposal anchors, to include location information 27 | ground_plane: Ground plane coefficients 28 | is_training (bool): Whether the network is training or evaluating 29 | 30 | Returns: 31 | fc_output_layers: Output layer dictionary 32 | """ 33 | 34 | # Default all output layers to None 35 | cls_logits = offsets = angle_vectors = end_points = None 36 | 37 | with tf.variable_scope('box_predictor') as sc: 38 | end_points_collection = sc.name + '_end_points' 39 | 40 | fc_layers_type = layers_config.WhichOneof('fc_layers') 41 | 42 | if fc_layers_type == 'basic_fc_layers': 43 | fc_layers_config = layers_config.basic_fc_layers 44 | 45 | cls_logits, offsets, angle_vectors, end_points = \ 46 | basic_fc_layers.build( 47 | fc_layers_config=fc_layers_config, 48 | input_rois=input_rois, 49 | input_weights=input_weights, 50 | num_final_classes=num_final_classes, 51 | box_rep=box_rep, 52 | 53 | is_training=is_training, 54 | end_points_collection=end_points_collection) 55 | 56 | elif fc_layers_type == 'fusion_fc_layers': 57 | fc_layers_config = layers_config.fusion_fc_layers 58 | 59 | cls_logits, offsets, angle_vectors, end_points = \ 60 | fusion_fc_layers.build( 61 | fc_layers_config=fc_layers_config, 62 | input_rois=input_rois, 63 | input_weights=input_weights, 64 | num_final_classes=num_final_classes, 65 | box_rep=box_rep, 66 | 67 | is_training=is_training, 68 | end_points_collection=end_points_collection) 69 | 70 | else: 71 | raise ValueError('Invalid fc layers config') 72 | 73 | # # Histogram summaries 74 | # with tf.variable_scope('histograms_avod'): 75 | # for fc_layer in end_points: 76 | # tf.summary.histogram(fc_layer, end_points[fc_layer]) 77 | 78 | fc_output_layers = dict() 79 | fc_output_layers[KEY_CLS_LOGITS] = cls_logits 80 | fc_output_layers[KEY_OFFSETS] = offsets 81 | fc_output_layers[KEY_ANGLE_VECTORS] = angle_vectors 82 | fc_output_layers[KEY_ENDPOINTS] = end_points 83 | 84 | return fc_output_layers 85 | -------------------------------------------------------------------------------- /avod/protos/layers.proto: -------------------------------------------------------------------------------- 1 | package avod.protos; 2 | 3 | // Message for configuring Model Layer params. 4 | message LayersConfig { 5 | 6 | required FeatureExtractor bev_feature_extractor = 1; 7 | required FeatureExtractor img_feature_extractor = 2; 8 | 9 | required RPNLayersConfig rpn_config = 3; 10 | required AVODLayersConfig avod_config = 4; 11 | } 12 | 13 | message FeatureExtractor { 14 | oneof feature_extractor { 15 | 16 | VGGLayersConfig bev_vgg = 1; 17 | VGGLayersConfig img_vgg = 2; 18 | 19 | PyramidLayersConfig img_vgg_pyr = 3; 20 | PyramidLayersConfig bev_vgg_pyr = 4; 21 | } 22 | } 23 | 24 | message VGGLayersConfig { 25 | // Conv layer 1 [repeat, num filter] 26 | repeated int32 vgg_conv1 = 1; 27 | 28 | // Conv layer 2 [repeat, num filter] 29 | repeated int32 vgg_conv2 = 2; 30 | 31 | // Conv layer 3 [repeat, num filter] 32 | repeated int32 vgg_conv3 = 3; 33 | 34 | // Conv layer 4 [repeat, num filter] 35 | repeated int32 vgg_conv4 = 4; 36 | 37 | // Upsampling multiplier 38 | required int32 upsampling_multiplier = 5; 39 | 40 | // L2 norm weight decay 41 | optional float l2_weight_decay = 6 [default = 0.0005]; 42 | } 43 | 44 | message PyramidLayersConfig { 45 | // Conv layer 1 [repeat, num filter] 46 | repeated int32 vgg_conv1 = 1; 47 | 48 | // Conv layer 2 [repeat, num filter] 49 | repeated int32 vgg_conv2 = 2; 50 | 51 | // Conv layer 3 [repeat, num filter] 52 | repeated int32 vgg_conv3 = 3; 53 | 54 | // Conv layer 4 [repeat, num filter] 55 | repeated int32 vgg_conv4 = 4; 56 | 57 | // L2 norm weight decay 58 | optional float l2_weight_decay = 6 [default = 0.0005]; 59 | } 60 | 61 | message RPNLayersConfig { 62 | // Anchor predictor layer configs 63 | // classification fc layer size 64 | required int32 cls_fc6 = 1; 65 | required int32 cls_fc7 = 2; 66 | 67 | // Regression fc layer size 68 | required int32 reg_fc6 = 3; 69 | required int32 reg_fc7 = 4; 70 | 71 | // L2 weight decay 72 | required float l2_weight_decay = 6; 73 | 74 | // Dropout probability - the probabilit that a neuron's 75 | // output is kept during dropout 76 | optional float keep_prob = 5 [default = 0.5]; 77 | } 78 | 79 | message AVODLayersConfig{ 80 | oneof fc_layers { 81 | BasicFcLayers basic_fc_layers = 1; 82 | FusionFcLayers fusion_fc_layers = 2; 83 | } 84 | } 85 | 86 | message BasicFcLayers { 87 | required int32 num_layers = 1; 88 | repeated int32 layer_sizes = 2; 89 | 90 | // L2 weight decay 91 | required float l2_weight_decay = 3; 92 | 93 | // Dropout keep probability 94 | required float keep_prob = 4; 95 | 96 | // Fusion method ('mean', 'concat') 97 | required string fusion_method = 5; 98 | } 99 | 100 | message FusionFcLayers { 101 | 102 | required int32 num_layers = 1; 103 | repeated int32 layer_sizes = 2; 104 | 105 | // L2 weight decay 106 | required float l2_weight_decay = 3; 107 | 108 | // Dropout keep probability 109 | required float keep_prob = 4; 110 | 111 | // Fusion method ('mean', 'concat') 112 | required string fusion_method = 5; 113 | 114 | // Fusion type (early, late, deep) 115 | required string fusion_type = 6; 116 | } 117 | -------------------------------------------------------------------------------- /avod/core/anchor_generators/grid_anchor_3d_generator_test.py: -------------------------------------------------------------------------------- 1 | """Grid Anchor Generation unit test module.""" 2 | import unittest 3 | import numpy as np 4 | 5 | import avod.tests as tests 6 | 7 | from avod.core.anchor_generators import grid_anchor_3d_generator 8 | from avod.builders.dataset_builder import DatasetBuilder 9 | 10 | 11 | def generate_fake_dataset(): 12 | return DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_UNITTEST) 13 | 14 | 15 | class GridAnchor3dGeneratorTest(unittest.TestCase): 16 | @classmethod 17 | def setUpClass(cls): 18 | cls.fake_kitti_dir = tests.test_path() + "/datasets/Kitti/object" 19 | cls.dataset = generate_fake_dataset() 20 | 21 | # create generic ground plane (normal vector is straight up) 22 | cls.ground_plane = np.array([0., -1., 0., 0.]) 23 | cls.clusters = np.array([[1., 1., 1.], [2., 1., 1.]]) 24 | 25 | cls.anchor_generator = grid_anchor_3d_generator.GridAnchor3dGenerator() 26 | 27 | def test_generate_anchors(self): 28 | normal_area = [(-1., 1.), (-1., 0.), (0., 1.)] 29 | no_x_area = [(0., 0.), (-1., 0.), (0., 2.)] 30 | no_z_area = [(-1., 1.), (-1., 0.), (0., 0.)] 31 | 32 | expected_anchors = np.array([[-0.5, 0., 0.5, 1., 1., 1., 0.], 33 | [-0.5, 0., 0.5, 1., 1., 1., np.pi / 2], 34 | [-0.5, 0., 0.5, 2., 1., 1., 0.], 35 | [-0.5, 0., 0.5, 2., 1., 1., np.pi / 2], 36 | [0.5, 0., 0.5, 1., 1., 1., 0.], 37 | [0.5, 0., 0.5, 1., 1., 1., np.pi / 2], 38 | [0.5, 0., 0.5, 2., 1., 1., 0.], 39 | [0.5, 0., 0.5, 2., 1., 1., np.pi / 2]]) 40 | gen_anchors = \ 41 | self.anchor_generator.generate(area_3d=normal_area, 42 | anchor_3d_sizes=self.clusters, 43 | anchor_stride=[1, 1], 44 | ground_plane=self.ground_plane) 45 | self.assertEqual(gen_anchors.shape, expected_anchors.shape) 46 | np.testing.assert_almost_equal(gen_anchors, 47 | expected_anchors, 48 | decimal=3) 49 | 50 | expected_anchors = np.ndarray(shape=(0, 7)) 51 | gen_anchors = \ 52 | self.anchor_generator.generate(area_3d=no_x_area, 53 | anchor_3d_sizes=self.clusters, 54 | anchor_stride=[1, 1], 55 | ground_plane=self.ground_plane) 56 | self.assertEqual(gen_anchors.shape, expected_anchors.shape) 57 | np.testing.assert_almost_equal(gen_anchors, 58 | expected_anchors, 59 | decimal=3) 60 | 61 | expected_anchors = np.ndarray(shape=(0, 7)) 62 | gen_anchors = \ 63 | self.anchor_generator.generate(area_3d=no_z_area, 64 | anchor_3d_sizes=self.clusters, 65 | anchor_stride=[1, 1], 66 | ground_plane=self.ground_plane) 67 | self.assertEqual(gen_anchors.shape, expected_anchors.shape) 68 | np.testing.assert_almost_equal(gen_anchors, 69 | expected_anchors, 70 | decimal=3) 71 | 72 | 73 | if __name__ == '__main__': 74 | unittest.main() 75 | -------------------------------------------------------------------------------- /avod/utils/demo_utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from wavedata.tools.obj_detection import obj_utils 6 | from wavedata.tools.obj_detection import evaluation 7 | 8 | from avod.core import anchor_projector 9 | from avod.core import box_3d_encoder 10 | 11 | 12 | COLOUR_SCHEME_PREDICTIONS = { 13 | "Easy GT": (255, 255, 0), # Yellow 14 | "Medium GT": (255, 128, 0), # Orange 15 | "Hard GT": (255, 0, 0), # Red 16 | 17 | "Prediction": (50, 255, 50), # Green 18 | } 19 | 20 | 21 | def get_gts_based_on_difficulty(dataset, img_idx): 22 | """Returns lists of ground-truth based on difficulty. 23 | """ 24 | # Get all ground truth labels 25 | all_gt_objs = obj_utils.read_labels(dataset.label_dir, img_idx) 26 | 27 | # Filter to dataset classes 28 | gt_objs = dataset.kitti_utils.filter_labels(all_gt_objs) 29 | 30 | # Filter objects to desired difficulty 31 | easy_gt_objs = dataset.kitti_utils.filter_labels( 32 | copy.deepcopy(gt_objs), difficulty=0) 33 | medium_gt_objs = dataset.kitti_utils.filter_labels( 34 | copy.deepcopy(gt_objs), difficulty=1) 35 | hard_gt_objs = dataset.kitti_utils.filter_labels( 36 | copy.deepcopy(gt_objs), difficulty=2) 37 | 38 | for gt_obj in easy_gt_objs: 39 | gt_obj.type = 'Easy GT' 40 | for gt_obj in medium_gt_objs: 41 | gt_obj.type = 'Medium GT' 42 | for gt_obj in hard_gt_objs: 43 | gt_obj.type = 'Hard GT' 44 | 45 | return easy_gt_objs, medium_gt_objs, hard_gt_objs, all_gt_objs 46 | 47 | 48 | def get_max_ious_3d(all_gt_boxes_3d, pred_boxes_3d): 49 | """Helper function to calculate 3D IoU for the given predictions. 50 | 51 | Args: 52 | all_gt_boxes_3d: A list of the same ground-truth boxes in box_3d 53 | format. 54 | pred_boxes_3d: A list of predictions in box_3d format. 55 | """ 56 | 57 | # Only calculate ious if there are predictions 58 | if pred_boxes_3d: 59 | # Convert to iou format 60 | gt_objs_iou_fmt = box_3d_encoder.box_3d_to_3d_iou_format( 61 | all_gt_boxes_3d) 62 | pred_objs_iou_fmt = box_3d_encoder.box_3d_to_3d_iou_format( 63 | pred_boxes_3d) 64 | 65 | max_ious_3d = np.zeros(len(all_gt_boxes_3d)) 66 | for gt_obj_idx in range(len(all_gt_boxes_3d)): 67 | 68 | gt_obj_iou_fmt = gt_objs_iou_fmt[gt_obj_idx] 69 | 70 | ious_3d = evaluation.three_d_iou(gt_obj_iou_fmt, 71 | pred_objs_iou_fmt) 72 | 73 | max_ious_3d[gt_obj_idx] = np.amax(ious_3d) 74 | else: 75 | # No detections, all ious = 0 76 | max_ious_3d = np.zeros(len(all_gt_boxes_3d)) 77 | 78 | return max_ious_3d 79 | 80 | 81 | def tf_project_to_image_space(anchors, calib_p2, image_shape, img_idx): 82 | """Helper function to convert data to tensors and project 83 | to image space using the tf projection function. 84 | """ 85 | 86 | anchors_tensor = tf.convert_to_tensor(anchors, tf.float32) 87 | calib_p2_tensor = tf.convert_to_tensor(calib_p2, tf.float32) 88 | image_shape_tensor = tf.convert_to_tensor(image_shape, tf.float32) 89 | 90 | projected_boxes_tensor, _ = \ 91 | anchor_projector.tf_project_to_image_space( 92 | anchors_tensor, 93 | calib_p2_tensor, 94 | image_shape_tensor) 95 | sess = tf.Session() 96 | 97 | with sess.as_default(): 98 | projected_boxes = projected_boxes_tensor.eval() 99 | 100 | return projected_boxes 101 | -------------------------------------------------------------------------------- /avod/core/minibatch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Base minibatch sampler module. 17 | 18 | The job of the minibatch_sampler is to subsample a minibatch based on some 19 | criterion. 20 | 21 | The main function call is: 22 | subsample(indicator, batch_size, **params). 23 | Indicator is a 1d boolean tensor where True denotes which examples can be 24 | sampled. It returns a boolean indicator where True denotes an example has been 25 | sampled.. 26 | 27 | Subclasses should implement the Subsample function and can make use of the 28 | @staticmethod SubsampleIndicator. 29 | """ 30 | 31 | from abc import ABCMeta 32 | from abc import abstractmethod 33 | 34 | import tensorflow as tf 35 | 36 | from avod.core import ops 37 | 38 | 39 | class MinibatchSampler(object): 40 | """Abstract base class for subsampling minibatches.""" 41 | __metaclass__ = ABCMeta 42 | 43 | def __init__(self): 44 | """Constructs a minibatch sampler.""" 45 | pass 46 | 47 | @abstractmethod 48 | def subsample(self, indicator, batch_size, **params): 49 | """Returns subsample of entries in indicator. 50 | 51 | Args: 52 | indicator: boolean tensor of shape [N] whose 53 | True entries can be sampled. 54 | batch_size: desired batch size. 55 | **params: additional keyword arguments for 56 | specific implementations of the MinibatchSampler. 57 | 58 | Returns: 59 | sample_indicator: boolean tensor of shape [N] whose 60 | True entries have been sampled. 61 | If sum(indicator) >= batch_size, sum(is_sampled) = batch_size 62 | """ 63 | pass 64 | 65 | @staticmethod 66 | def subsample_indicator(indicator, num_samples): 67 | """Subsample indicator vector. 68 | 69 | Given a boolean indicator vector with M elements set to `True`, the function 70 | assigns all but `num_samples` of these previously `True` elements to 71 | `False`. If `num_samples` is greater than M, the original indicator vector 72 | is returned. 73 | 74 | Args: 75 | indicator: a 1-dimensional boolean tensor indicating which elements 76 | are allowed to be sampled and which are not. 77 | num_samples: int32 scalar tensor 78 | 79 | Returns: 80 | a boolean tensor with the same shape as input (indicator) tensor 81 | """ 82 | indices = tf.where(indicator) 83 | indices = tf.random_shuffle(indices) 84 | indices = tf.reshape(indices, [-1]) 85 | 86 | num_samples = tf.minimum(tf.size(indices), num_samples) 87 | selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1])) 88 | 89 | selected_indicator = ops.indices_to_dense_vector(selected_indices, 90 | tf.shape(indicator)[ 91 | 0]) 92 | 93 | return tf.equal(selected_indicator, 1) 94 | -------------------------------------------------------------------------------- /avod/protos/model.proto: -------------------------------------------------------------------------------- 1 | package avod.protos; 2 | 3 | import "avod/protos/layers.proto"; 4 | 5 | // Message for configuring the DetectionModel. 6 | message ModelConfig { 7 | 8 | // Model name used to run either RPN or AVOD 9 | optional string model_name = 1 [default = 'avod_model']; 10 | 11 | // Checkpoint name 12 | optional string checkpoint_name = 2 [default = 'detection_model']; 13 | 14 | optional PathsConfig paths_config = 3; 15 | required InputConfig input_config = 4; 16 | required RpnConfig rpn_config = 5; 17 | required AvodConfig avod_config = 6; 18 | 19 | // Label smoothing epsilon 20 | required float label_smoothing_epsilon = 7; 21 | 22 | // Expand proposals lengths along x and z for larger context region (in m) 23 | // (0.0 - 1.0 recommended) 24 | required float expand_proposals_xz = 8; 25 | 26 | // Global path drop (p_keep_img, p_keep_bev) 27 | // To disable path drop, set both to 1.0 28 | repeated float path_drop_probabilities = 9; 29 | 30 | // To keep all the samples including the ones without anchor-info 31 | // i.e. labels during training 32 | required bool train_on_all_samples = 10; 33 | 34 | // To keep all the samples including the ones without anchor-info 35 | // i.e. labels during validation 36 | required bool eval_all_samples = 11; 37 | 38 | // Layer configurations 39 | required LayersConfig layers_config = 12; 40 | 41 | // Loss configurations 42 | required LossConfig loss_config = 13; 43 | } 44 | 45 | message PathsConfig { 46 | // Checkpoint dir 47 | optional string checkpoint_dir = 1; 48 | 49 | // Log dir (no underscore to match tensorboard) 50 | optional string logdir = 2; 51 | 52 | // Directory to save predictions 53 | optional string pred_dir = 3; 54 | } 55 | 56 | message InputConfig { 57 | // Bev dimensions 58 | optional int32 bev_dims_h = 1 [default = 700]; 59 | optional int32 bev_dims_w = 2 [default = 800]; 60 | optional int32 bev_depth = 3 [default = 6]; 61 | 62 | // Image dimensions 63 | optional int32 img_dims_h = 4 [default = 480]; 64 | optional int32 img_dims_w = 5 [default = 1590]; 65 | optional int32 img_depth = 6 [default = 3]; 66 | } 67 | 68 | message RpnConfig { 69 | // RPN proposal ROI crop size 70 | required int32 rpn_proposal_roi_crop_size = 1; 71 | 72 | // RPN proposal ROI fusion method, one of ['mean', 'concat'] 73 | required string rpn_fusion_method = 2; 74 | 75 | // RPN Non-max suppression boxes during training 76 | required int32 rpn_train_nms_size = 3; 77 | 78 | // RPN Non-max suppression boxes during testing 79 | required int32 rpn_test_nms_size = 4; 80 | 81 | // RPN NMS IoU threshold 82 | required float rpn_nms_iou_thresh = 5; 83 | } 84 | 85 | message AvodConfig { 86 | // AVOD Proposal ROI crop size 87 | required int32 avod_proposal_roi_crop_size = 1; 88 | 89 | // Positive selection, one of ['corr_cls', 'not_bkg'] 90 | required string avod_positive_selection = 3; 91 | 92 | // AVOD Non-max suppression boxes 93 | required int32 avod_nms_size = 4; 94 | 95 | // AVOD NMS IoU threshold 96 | required float avod_nms_iou_thresh = 5; 97 | 98 | // AVOD bounding box representation, one of ['box_3d', 'box_8c'] 99 | required string avod_box_representation = 6; 100 | } 101 | 102 | message LossConfig { 103 | // RPN/AVOD Regression loss weight 104 | required float reg_loss_weight = 1; 105 | 106 | // AVOD angle vector loss weight 107 | required float ang_loss_weight = 2; 108 | 109 | // RPN/AVOD Classification loss weight 110 | required float cls_loss_weight = 3; 111 | } 112 | 113 | -------------------------------------------------------------------------------- /avod/core/minibatch_sampler_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for google3.research.vale.object_detection.minibatch_sampler.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from avod.core import minibatch_sampler 22 | 23 | 24 | class MinibatchSamplerTest(tf.test.TestCase): 25 | def test_subsample_indicator_when_more_true_elements_than_num_samples( 26 | self): 27 | np_indicator = [True, False, True, False, True, True, False] 28 | indicator = tf.constant(np_indicator) 29 | samples = minibatch_sampler.MinibatchSampler.subsample_indicator( 30 | indicator, 3) 31 | with self.test_session() as sess: 32 | samples_out = sess.run(samples) 33 | self.assertTrue(np.sum(samples_out), 3) 34 | self.assertAllEqual(samples_out, 35 | np.logical_and(samples_out, np_indicator)) 36 | 37 | def test_subsample_when_more_true_elements_than_num_samples_no_shape(self): 38 | np_indicator = [True, False, True, False, True, True, False] 39 | indicator = tf.placeholder(tf.bool) 40 | feed_dict = {indicator: np_indicator} 41 | 42 | samples = minibatch_sampler.MinibatchSampler.subsample_indicator( 43 | indicator, 3) 44 | with self.test_session() as sess: 45 | samples_out = sess.run(samples, feed_dict=feed_dict) 46 | self.assertTrue(np.sum(samples_out), 3) 47 | self.assertAllEqual(samples_out, 48 | np.logical_and(samples_out, np_indicator)) 49 | 50 | def test_subsample_indicator_when_less_true_elements_than_num_samples( 51 | self): 52 | np_indicator = [True, False, True, False, True, True, False] 53 | indicator = tf.constant(np_indicator) 54 | samples = minibatch_sampler.MinibatchSampler.subsample_indicator( 55 | indicator, 5) 56 | with self.test_session() as sess: 57 | samples_out = sess.run(samples) 58 | self.assertTrue(np.sum(samples_out), 4) 59 | self.assertAllEqual(samples_out, 60 | np.logical_and(samples_out, np_indicator)) 61 | 62 | def test_subsample_indicator_when_num_samples_is_zero(self): 63 | np_indicator = [True, False, True, False, True, True, False] 64 | indicator = tf.constant(np_indicator) 65 | samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator( 66 | indicator, 0) 67 | with self.test_session() as sess: 68 | samples_none_out = sess.run(samples_none) 69 | self.assertAllEqual( 70 | np.zeros_like(samples_none_out, dtype=bool), 71 | samples_none_out) 72 | 73 | def test_subsample_indicator_when_indicator_all_false(self): 74 | indicator_empty = tf.zeros([0], dtype=tf.bool) 75 | samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator( 76 | indicator_empty, 4) 77 | with self.test_session() as sess: 78 | samples_empty_out = sess.run(samples_empty) 79 | self.assertEqual(0, samples_empty_out.size) 80 | 81 | 82 | if __name__ == '__main__': 83 | tf.test.main() 84 | -------------------------------------------------------------------------------- /avod/core/anchor_filter_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | 4 | from avod.core import anchor_filter 5 | from avod.core import box_3d_encoder 6 | from wavedata.tools.core.voxel_grid import VoxelGrid 7 | 8 | 9 | class AnchorFilterTest(unittest.TestCase): 10 | 11 | def test_get_empty_anchor_filter_in_2d(self): 12 | # create generic ground plane (normal vector is straight up) 13 | area_extent = [(0., 2.), (-1., 0.), (0., 2.)] 14 | 15 | # Creates a voxel grid in following format at y = bin (-1.5, -0.5] 16 | # [ ][ ][ ][ ] 17 | # [ ][ ][x][ ] 18 | # [ ][ ][ ][ ] 19 | # [ ][ ][x][ ] 20 | pts = np.array([[0.51, -0.5, 1.1], 21 | [1.51, -0.5, 1.1]]) 22 | 23 | voxel_size = 0.5 24 | voxel_grid = VoxelGrid() 25 | voxel_grid.voxelize(pts, voxel_size, extents=area_extent) 26 | 27 | # Define anchors to test 28 | boxes_3d = np.array([ 29 | [0.51, 0, 0.51, 1, 1, 1, 0], 30 | [0.51, 0, 0.51, 1, 1, 1, np.pi / 2.], 31 | [0.51, 0, 1.1, 1, 1, 1, 0], 32 | [0.51, 0, 1.1, 1, 1, 1, np.pi / 2.], 33 | [1.51, 0, 0.51, 1, 1, 1, 0], 34 | [1.51, 0, 0.51, 1, 1, 1, np.pi / 2.], 35 | [1.51, 0, 1.1, 1, 1, 1, 0], 36 | [1.51, 0, 1.1, 1, 1, 1, np.pi / 2.], 37 | ]) 38 | 39 | anchors = box_3d_encoder.box_3d_to_anchor(boxes_3d) 40 | 41 | # test anchor locations, number indicates the anchors indices 42 | # [ ][ ][ ][ ] 43 | # [ ][1][3][ ] 44 | # [ ][ ][ ][ ] 45 | # [ ][5][7][ ] 46 | 47 | gen_filter = anchor_filter.get_empty_anchor_filter(anchors, 48 | voxel_grid, 49 | density_threshold=1) 50 | 51 | expected_filter = np.array( 52 | [False, False, True, True, False, False, True, True]) 53 | 54 | self.assertTrue((gen_filter == expected_filter).all()) 55 | 56 | boxes_3d = np.array([ 57 | [0.5, 0, 0.5, 2, 1, 1, 0], # case 1 58 | [0.5, 0, 0.5, 2, 1, 1, np.pi / 2.], 59 | [0.5, 0, 1.5, 1, 2, 1, 0], # case 2 60 | [0.5, 0, 1.5, 1, 2, 1, np.pi / 2.], 61 | [1.5, 0, 0.5, 2, 1, 1, 0], # case 3 62 | [1.5, 0, 0.5, 2, 1, 1, np.pi / 2.], 63 | [1.5, 0, 1.5, 1, 2, 1, 0], # case 4 64 | [1.5, 0, 1.5, 1, 2, 1, np.pi / 2.] 65 | ]) 66 | 67 | anchors = box_3d_encoder.box_3d_to_anchor(boxes_3d) 68 | 69 | # case 1 70 | # [ ][ ][ ][ ] [ ][ ][ ][ ] 71 | # [ ][o][ ][ ] [ ][o][o][ ] 72 | # [ ][o][ ][ ] [ ][ ][ ][ ] 73 | # [ ][ ][ ][ ] [ ][ ][ ][ ] 74 | 75 | # case 2 76 | # [ ][ ][ ][ ] [ ][ ][ ][ ] 77 | # [ ][ ][o][o] [ ][ ][o][ ] 78 | # [ ][ ][ ][ ] [ ][ ][o][ ] 79 | # [ ][ ][ ][ ] [ ][ ][ ][ ] 80 | 81 | # case 3 82 | # [ ][ ][ ][ ] [ ][ ][ ][ ] 83 | # [ ][ ][ ][ ] [ ][ ][ ][ ] 84 | # [ ][o][ ][ ] [ ][o][o][ ] 85 | # [ ][o][ ][ ] [ ][ ][ ][ ] 86 | 87 | # case 4 88 | # [ ][ ][ ][ ] [ ][ ][ ][ ] 89 | # [ ][ ][ ][ ] [ ][ ][ ][ ] 90 | # [ ][ ][o][o] [ ][ ][o][ ] 91 | # [ ][ ][ ][ ] [ ][ ][o][ ] 92 | 93 | gen_filter = anchor_filter.get_empty_anchor_filter(anchors, 94 | voxel_grid, 95 | density_threshold=1) 96 | expected_filter = np.array( 97 | [False, True, True, True, False, True, True, True]) 98 | 99 | self.assertTrue((gen_filter == expected_filter).all()) 100 | 101 | 102 | if __name__ == '__main__': 103 | unittest.main() 104 | -------------------------------------------------------------------------------- /avod/core/minibatch_samplers/balanced_positive_negative_sampler_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for object_detection.core.balanced_positive_negative_sampler.""" 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | 21 | from avod.core.minibatch_samplers import balanced_positive_negative_sampler 22 | 23 | 24 | class BalancedPositiveNegativeSamplerTest(tf.test.TestCase): 25 | def test_subsample_all_examples(self): 26 | numpy_labels = np.random.permutation(300) 27 | indicator = tf.constant(np.ones(300) == 1) 28 | numpy_labels = (numpy_labels - 200) > 0 29 | 30 | labels = tf.constant(numpy_labels) 31 | 32 | sampler = (balanced_positive_negative_sampler. 33 | BalancedPositiveNegativeSampler()) 34 | is_sampled, _ = sampler.subsample(indicator, 64, labels) 35 | with self.test_session() as sess: 36 | is_sampled = sess.run(is_sampled) 37 | self.assertTrue(sum(is_sampled) == 64) 38 | self.assertTrue( 39 | sum(np.logical_and(numpy_labels, is_sampled)) == 32) 40 | self.assertTrue(sum(np.logical_and( 41 | np.logical_not(numpy_labels), is_sampled)) == 32) 42 | 43 | def test_subsample_selection(self): 44 | # Test random sampling when only some examples can be sampled: 45 | # 100 samples, 20 positives, 10 positives cannot be sampled 46 | numpy_labels = np.arange(100) 47 | numpy_indicator = numpy_labels < 90 48 | indicator = tf.constant(numpy_indicator) 49 | numpy_labels = (numpy_labels - 80) >= 0 50 | 51 | labels = tf.constant(numpy_labels) 52 | 53 | sampler = (balanced_positive_negative_sampler. 54 | BalancedPositiveNegativeSampler()) 55 | is_sampled, _ = sampler.subsample(indicator, 64, labels) 56 | with self.test_session() as sess: 57 | is_sampled = sess.run(is_sampled) 58 | self.assertTrue(sum(is_sampled) == 64) 59 | self.assertTrue( 60 | sum(np.logical_and(numpy_labels, is_sampled)) == 10) 61 | self.assertTrue(sum(np.logical_and( 62 | np.logical_not(numpy_labels), is_sampled)) == 54) 63 | self.assertAllEqual(is_sampled, np.logical_and(is_sampled, 64 | numpy_indicator)) 65 | 66 | def test_raises_error_with_incorrect_label_shape(self): 67 | labels = tf.constant([[True, False, False]]) 68 | indicator = tf.constant([True, False, True]) 69 | sampler = (balanced_positive_negative_sampler. 70 | BalancedPositiveNegativeSampler()) 71 | with self.assertRaises(ValueError): 72 | sampler.subsample(indicator, 64, labels) 73 | 74 | def test_raises_error_with_incorrect_indicator_shape(self): 75 | labels = tf.constant([True, False, False]) 76 | indicator = tf.constant([[True, False, True]]) 77 | sampler = (balanced_positive_negative_sampler. 78 | BalancedPositiveNegativeSampler()) 79 | with self.assertRaises(ValueError): 80 | sampler.subsample(indicator, 64, labels) 81 | 82 | 83 | if __name__ == '__main__': 84 | tf.test.main() 85 | -------------------------------------------------------------------------------- /avod/core/orientation_encoder_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from avod.core import orientation_encoder 5 | 6 | 7 | class OrientationEncoderTest(tf.test.TestCase): 8 | def test_tf_orientation_to_angle_vector(self): 9 | # Test conversion for angles between [-pi, pi] with 0.5 degree steps 10 | np_orientations = np.arange(-np.pi, np.pi, np.pi / 360.0) 11 | 12 | expected_angle_vectors = np.stack([np.cos(np_orientations), 13 | np.sin(np_orientations)], axis=1) 14 | 15 | # Convert to tensors and convert to angle unit vectors 16 | tf_orientations = tf.convert_to_tensor(np_orientations) 17 | tf_angle_vectors = orientation_encoder.tf_orientation_to_angle_vector( 18 | tf_orientations) 19 | 20 | with self.test_session() as sess: 21 | angle_vectors_out = sess.run(tf_angle_vectors) 22 | 23 | np.testing.assert_allclose(angle_vectors_out, 24 | expected_angle_vectors) 25 | 26 | def test_angle_vectors_to_orientation(self): 27 | # Test conversion for angles between [-pi, pi] with 0.5 degree steps 28 | np_angle_vectors = \ 29 | np.asarray([[np.cos(angle), np.sin(angle)] 30 | for angle in np.arange(-np.pi, np.pi, np.pi / 360.0)]) 31 | 32 | # Check that tf output matches numpy's arctan2 output 33 | expected_orientations = np.arctan2(np_angle_vectors[:, 1], 34 | np_angle_vectors[:, 0]) 35 | 36 | # Convert to tensors and convert to orientation angles 37 | tf_angle_vectors = tf.convert_to_tensor(np_angle_vectors) 38 | tf_orientations = orientation_encoder.tf_angle_vector_to_orientation( 39 | tf_angle_vectors) 40 | 41 | with self.test_session() as sess: 42 | orientations_out = sess.run(tf_orientations) 43 | np.testing.assert_allclose(orientations_out, 44 | expected_orientations) 45 | 46 | def test_zeros_angle_vectors_to_orientation(self): 47 | # Test conversion for angle vectors with zeros in them 48 | np_angle_vectors = np.asarray( 49 | [[0, 0], 50 | [1, 0], [10, 0], 51 | [0, 1], [0, 10], 52 | [-1, 0], [-10, 0], 53 | [0, -1], [0, -10]]) 54 | 55 | half_pi = np.pi / 2 56 | expected_orientations = [0, 57 | 0, 0, 58 | half_pi, half_pi, 59 | np.pi, np.pi, 60 | -half_pi, -half_pi] 61 | 62 | # Convert to tensors and convert to orientation angles 63 | tf_angle_vectors = tf.convert_to_tensor(np_angle_vectors, 64 | dtype=tf.float64) 65 | tf_orientations = orientation_encoder.tf_angle_vector_to_orientation( 66 | tf_angle_vectors) 67 | 68 | with self.test_session() as sess: 69 | orientations_out = sess.run(tf_orientations) 70 | np.testing.assert_allclose(orientations_out, 71 | expected_orientations) 72 | 73 | def test_two_way_conversion(self): 74 | # Test conversion for angles between [-pi, pi] with 0.5 degree steps 75 | np_orientations = np.arange(np.pi, np.pi, np.pi / 360.0) 76 | 77 | tf_angle_vectors = orientation_encoder.tf_orientation_to_angle_vector( 78 | np_orientations) 79 | tf_orientations = orientation_encoder.tf_angle_vector_to_orientation( 80 | tf_angle_vectors) 81 | 82 | # Check that conversion from orientation -> angle vector -> 83 | # orientation results in the same values 84 | with self.test_session() as sess: 85 | orientations_out = sess.run(tf_orientations) 86 | np.testing.assert_allclose(orientations_out, 87 | np_orientations) 88 | -------------------------------------------------------------------------------- /demos/dataset/data_histograms.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from wavedata.tools.obj_detection import obj_utils 7 | 8 | from avod.builders.dataset_builder import DatasetBuilder 9 | 10 | 11 | def main(): 12 | """Show histograms of ground truth labels 13 | """ 14 | 15 | dataset = DatasetBuilder.build_kitti_dataset( 16 | # DatasetBuilder.KITTI_TRAIN 17 | # DatasetBuilder.KITTI_VAL 18 | DatasetBuilder.KITTI_TRAINVAL 19 | ) 20 | 21 | difficulty = 2 22 | 23 | centroid_bins = 51 24 | dimension_bins = 21 25 | orientation_bins = 65 26 | 27 | classes = ['Car'] 28 | # classes = ['Pedestrian'] 29 | # classes = ['Cyclist'] 30 | # classes = ['Pedestrian', 'Cyclist'] 31 | 32 | # Dataset values 33 | num_samples = dataset.num_samples 34 | 35 | all_centroids_x = [] 36 | all_centroids_y = [] 37 | all_centroids_z = [] 38 | all_lengths = [] 39 | all_widths = [] 40 | all_heights = [] 41 | all_orientations = [] 42 | 43 | # Counter for total number of valid samples 44 | num_valid_samples = 0 45 | 46 | for sample_idx in range(num_samples): 47 | 48 | sys.stdout.write('\r{} / {}'.format(sample_idx + 1, num_samples)) 49 | 50 | sample_name = dataset.sample_names[sample_idx] 51 | img_idx = int(sample_name) 52 | 53 | obj_labels = obj_utils.read_labels(dataset.label_dir, img_idx) 54 | obj_labels = dataset.kitti_utils.filter_labels(obj_labels, 55 | classes=classes, 56 | difficulty=difficulty) 57 | 58 | centroids = np.asarray([obj.t for obj in obj_labels]) 59 | lengths = np.asarray([obj.l for obj in obj_labels]) 60 | widths = np.asarray([obj.w for obj in obj_labels]) 61 | heights = np.asarray([obj.h for obj in obj_labels]) 62 | orientations = np.asarray([obj.ry for obj in obj_labels]) 63 | 64 | if any(orientations) and np.amax(np.abs(orientations) > np.pi): 65 | raise ValueError('Invalid orientation') 66 | 67 | if len(centroids) > 0: 68 | all_centroids_x.extend(centroids[:, 0]) 69 | all_centroids_y.extend(centroids[:, 1]) 70 | all_centroids_z.extend(centroids[:, 2]) 71 | all_lengths.extend(lengths) 72 | all_widths.extend(widths) 73 | all_heights.extend(heights) 74 | all_orientations.extend(orientations) 75 | 76 | num_valid_samples += 1 77 | 78 | print('Finished reading labels, num_valid_samples', num_valid_samples) 79 | 80 | # Get means 81 | mean_centroid_x = np.mean(all_centroids_x) 82 | mean_centroid_y = np.mean(all_centroids_y) 83 | mean_centroid_z = np.mean(all_centroids_z) 84 | mean_dims = np.mean([all_lengths, all_widths, all_heights]) 85 | 86 | np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)}) 87 | print('mean_centroid_x {0:0.3f}'.format(mean_centroid_x)) 88 | print('mean_centroid_y {0:0.3f}'.format(mean_centroid_y)) 89 | print('mean_centroid_z {0:0.3f}'.format(mean_centroid_z)) 90 | print('mean_dims {0:0.3f}'.format(mean_dims)) 91 | 92 | # Make plots 93 | f, ax_arr = plt.subplots(3, 3) 94 | 95 | # xyz 96 | ax_arr[0, 0].hist(all_centroids_x, centroid_bins, facecolor='green') 97 | ax_arr[0, 1].hist(all_centroids_y, centroid_bins, facecolor='green') 98 | ax_arr[0, 2].hist(all_centroids_z, centroid_bins, facecolor='green') 99 | 100 | # lwh 101 | ax_arr[1, 0].hist(all_lengths, dimension_bins, facecolor='green') 102 | ax_arr[1, 1].hist(all_widths, dimension_bins, facecolor='green') 103 | ax_arr[1, 2].hist(all_heights, dimension_bins, facecolor='green') 104 | 105 | # orientations 106 | ax_arr[2, 0].hist(all_orientations, orientation_bins, facecolor='green') 107 | 108 | plt.show(block=True) 109 | 110 | 111 | if __name__ == '__main__': 112 | main() 113 | -------------------------------------------------------------------------------- /avod/builders/config_builder_util.py: -------------------------------------------------------------------------------- 1 | """Config file reader utils.""" 2 | 3 | import os 4 | import shutil 5 | 6 | from google.protobuf import text_format 7 | 8 | import avod 9 | from avod.protos import model_pb2 10 | from avod.protos import pipeline_pb2 11 | 12 | 13 | class ConfigObj: 14 | pass 15 | 16 | 17 | def proto_to_obj(config): 18 | """Hack to convert proto config into an object so repeated fields can be 19 | overwritten 20 | 21 | Args: 22 | config: proto config 23 | 24 | Returns: 25 | config_obj: object with same fields as the config 26 | """ 27 | all_fields = list(config.DESCRIPTOR.fields_by_name) 28 | config_obj = ConfigObj() 29 | for field in all_fields: 30 | field_value = eval('config.{}'.format(field)) 31 | setattr(config_obj, field, field_value) 32 | 33 | return config_obj 34 | 35 | 36 | def get_model_config_from_file(config_path): 37 | """Reads model configuration from a configuration file. 38 | This merges the layer config info with model default configs. 39 | Args: 40 | config_path: A path to the config 41 | 42 | Returns: 43 | layers_config: A configured model_pb2 config 44 | """ 45 | 46 | model_config = model_pb2.ModelConfig() 47 | with open(config_path, 'r') as f: 48 | text_format.Merge(f.read(), model_config) 49 | return model_config 50 | 51 | 52 | def get_configs_from_pipeline_file(pipeline_config_path, 53 | is_training): 54 | """Reads model configuration from a pipeline_pb2.NetworkPipelineConfig. 55 | Args: 56 | pipeline_config_path: A path directory to the network pipeline config 57 | is_training: A boolean flag to indicate training stage, used for 58 | creating the checkpoint directory which must be created at the 59 | first training iteration. 60 | Returns: 61 | model_config: A model_pb2.ModelConfig 62 | train_config: A train_pb2.TrainConfig 63 | eval_config: A eval_pb2.EvalConfig 64 | dataset_config: A kitti_dataset_pb2.KittiDatasetConfig 65 | """ 66 | 67 | pipeline_config = pipeline_pb2.NetworkPipelineConfig() 68 | with open(pipeline_config_path, 'r') as f: 69 | text_format.Merge(f.read(), pipeline_config) 70 | 71 | model_config = pipeline_config.model_config 72 | 73 | # Make sure the checkpoint name matches the config filename 74 | config_file_name = \ 75 | os.path.split(pipeline_config_path)[1].split('.')[0] 76 | checkpoint_name = model_config.checkpoint_name 77 | if config_file_name != checkpoint_name: 78 | raise ValueError('Config and checkpoint names must match.') 79 | 80 | output_root_dir = avod.root_dir() + '/data/outputs/' + checkpoint_name 81 | 82 | # Construct paths 83 | paths_config = model_config.paths_config 84 | if not paths_config.checkpoint_dir: 85 | checkpoint_dir = output_root_dir + '/checkpoints' 86 | 87 | if is_training: 88 | if not os.path.exists(checkpoint_dir): 89 | os.makedirs(checkpoint_dir) 90 | 91 | paths_config.checkpoint_dir = checkpoint_dir 92 | 93 | if not paths_config.logdir: 94 | paths_config.logdir = output_root_dir + '/logs/' 95 | 96 | if not paths_config.pred_dir: 97 | paths_config.pred_dir = output_root_dir + '/predictions' 98 | 99 | train_config = pipeline_config.train_config 100 | eval_config = pipeline_config.eval_config 101 | dataset_config = pipeline_config.dataset_config 102 | 103 | if is_training: 104 | # Copy the config to the experiments folder 105 | experiment_config_path = output_root_dir + '/' +\ 106 | model_config.checkpoint_name 107 | experiment_config_path += '.config' 108 | # Copy this even if the config exists, in case some parameters 109 | # were modified 110 | shutil.copy(pipeline_config_path, experiment_config_path) 111 | 112 | return model_config, train_config, eval_config, dataset_config 113 | -------------------------------------------------------------------------------- /avod/core/anchor_generators/grid_anchor_3d_generator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generates 3D anchors, placing them on the ground plane 3 | """ 4 | 5 | import numpy as np 6 | 7 | from avod.core import anchor_generator 8 | 9 | 10 | class GridAnchor3dGenerator(anchor_generator.AnchorGenerator): 11 | 12 | def name_scope(self): 13 | return 'GridAnchor3dGenerator' 14 | 15 | def _generate(self, **params): 16 | """ 17 | Generates 3D anchors in a grid in the provided 3d area and places 18 | them on the ground_plane. 19 | 20 | Args: 21 | **params: 22 | area_3d: [[min_x, max_x], [min_y, max_y], [min_z, max_z]] 23 | 24 | Returns: 25 | list of 3D anchors in the form N x [x, y, z, l, w, h, ry] 26 | """ 27 | 28 | area_3d = params.get('area_3d') 29 | anchor_3d_sizes = params.get('anchor_3d_sizes') 30 | anchor_stride = params.get('anchor_stride') 31 | ground_plane = params.get('ground_plane') 32 | 33 | return tile_anchors_3d(area_3d, 34 | anchor_3d_sizes, 35 | anchor_stride, 36 | ground_plane) 37 | 38 | 39 | def tile_anchors_3d(area_extents, 40 | anchor_3d_sizes, 41 | anchor_stride, 42 | ground_plane): 43 | """ 44 | Tiles anchors over the area extents by using meshgrids to 45 | generate combinations of (x, y, z), (l, w, h) and ry. 46 | 47 | Args: 48 | area_extents: [[min_x, max_x], [min_y, max_y], [min_z, max_z]] 49 | anchor_3d_sizes: list of 3d anchor sizes N x (l, w, h) 50 | anchor_stride: stride lengths (x_stride, z_stride) 51 | ground_plane: coefficients of the ground plane e.g. [0, -1, 0, 0] 52 | 53 | Returns: 54 | boxes: list of 3D anchors in box_3d format N x [x, y, z, l, w, h, ry] 55 | """ 56 | # Convert sizes to ndarray 57 | anchor_3d_sizes = np.asarray(anchor_3d_sizes) 58 | 59 | anchor_stride_x = anchor_stride[0] 60 | anchor_stride_z = anchor_stride[1] 61 | anchor_rotations = np.asarray([0, np.pi / 2.0]) 62 | 63 | x_start = area_extents[0][0] + anchor_stride[0] / 2.0 64 | x_end = area_extents[0][1] 65 | x_centers = np.array(np.arange(x_start, x_end, step=anchor_stride_x), 66 | dtype=np.float32) 67 | 68 | z_start = area_extents[2][1] - anchor_stride[1] / 2.0 69 | z_end = area_extents[2][0] 70 | z_centers = np.array(np.arange(z_start, z_end, step=-anchor_stride_z), 71 | dtype=np.float32) 72 | 73 | # Use ranges for substitution 74 | size_indices = np.arange(0, len(anchor_3d_sizes)) 75 | rotation_indices = np.arange(0, len(anchor_rotations)) 76 | 77 | # Generate matrix for substitution 78 | # e.g. for two sizes and two rotations 79 | # [[x0, z0, 0, 0], [x0, z0, 0, 1], [x0, z0, 1, 0], [x0, z0, 1, 1], 80 | # [x1, z0, 0, 0], [x1, z0, 0, 1], [x1, z0, 1, 0], [x1, z0, 1, 1], ...] 81 | before_sub = np.stack(np.meshgrid(x_centers, 82 | z_centers, 83 | size_indices, 84 | rotation_indices), 85 | axis=4).reshape(-1, 4) 86 | 87 | # Place anchors on the ground plane 88 | a, b, c, d = ground_plane 89 | all_x = before_sub[:, 0] 90 | all_z = before_sub[:, 1] 91 | all_y = -(a * all_x + c * all_z + d) / b 92 | 93 | # Create empty matrix to return 94 | num_anchors = len(before_sub) 95 | all_anchor_boxes_3d = np.zeros((num_anchors, 7)) 96 | 97 | # Fill in x, y, z 98 | all_anchor_boxes_3d[:, 0:3] = np.stack((all_x, all_y, all_z), axis=1) 99 | 100 | # Fill in shapes 101 | sizes = anchor_3d_sizes[np.asarray(before_sub[:, 2], np.int32)] 102 | all_anchor_boxes_3d[:, 3:6] = sizes 103 | 104 | # Fill in rotations 105 | rotations = anchor_rotations[np.asarray(before_sub[:, 3], np.int32)] 106 | all_anchor_boxes_3d[:, 6] = rotations 107 | 108 | return all_anchor_boxes_3d 109 | -------------------------------------------------------------------------------- /demos/dataset/car_clustering.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import numpy as np 4 | from sklearn.cluster import KMeans 5 | 6 | from wavedata.tools.obj_detection import obj_utils 7 | 8 | from avod.builders.dataset_builder import DatasetBuilder 9 | from avod.core.label_cluster_utils import LabelClusterUtils 10 | 11 | 12 | def main(): 13 | """ 14 | Calculates clusters for each class 15 | 16 | Returns: 17 | all_clusters: list of clusters for each class 18 | all_std_devs: list of cluster standard deviations for each class 19 | """ 20 | 21 | dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAIN) 22 | 23 | # Calculate the remaining clusters 24 | # Load labels corresponding to the sample list for clustering 25 | sample_list = dataset.load_sample_names(dataset.cluster_split) 26 | all_dims = [] 27 | 28 | num_samples = len(sample_list) 29 | for sample_idx in range(num_samples): 30 | 31 | sys.stdout.write("\rClustering labels {} / {}".format( 32 | sample_idx + 1, num_samples)) 33 | sys.stdout.flush() 34 | 35 | sample_name = sample_list[sample_idx] 36 | img_idx = int(sample_name) 37 | 38 | obj_labels = obj_utils.read_labels(dataset.label_dir, img_idx) 39 | filtered_lwh = LabelClusterUtils._filter_labels_by_class( 40 | obj_labels, dataset.classes) 41 | 42 | if filtered_lwh[0]: 43 | all_dims.extend(filtered_lwh[0]) 44 | 45 | all_dims = np.array(all_dims) 46 | print("\nFinished reading labels, clustering data...\n") 47 | 48 | # Print 3 decimal places 49 | np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)}) 50 | 51 | # Calculate average cluster 52 | k_means = KMeans(n_clusters=1, 53 | random_state=0).fit(all_dims) 54 | 55 | cluster_centre = k_means.cluster_centers_[0] 56 | 57 | # Calculate std. dev 58 | std_dev = np.std(all_dims, axis=0) 59 | 60 | # Calculate 2 and 3 standard deviations below the mean 61 | two_sigma_length_lo = cluster_centre[0] - 2 * std_dev[0] 62 | three_sigma_length_lo = cluster_centre[0] - 3 * std_dev[0] 63 | 64 | # Remove all labels with length above two std dev 65 | # from the mean and re-cluster 66 | small_mask_2 = all_dims[:, 0] < two_sigma_length_lo 67 | small_dims_2 = all_dims[small_mask_2] 68 | 69 | small_mask_3 = all_dims[:, 0] < three_sigma_length_lo 70 | small_dims_3 = all_dims[small_mask_3] 71 | 72 | small_k_means_2 = KMeans(n_clusters=1, random_state=0).fit(small_dims_2) 73 | small_k_means_3 = KMeans(n_clusters=1, random_state=0).fit(small_dims_3) 74 | small_std_dev_2 = np.std(small_dims_2, axis=0) 75 | small_std_dev_3 = np.std(small_dims_3, axis=0) 76 | 77 | print('small_k_means_2:', small_k_means_2.cluster_centers_) 78 | print('small_k_means_3:', small_k_means_3.cluster_centers_) 79 | print('small_std_dev_2:', small_std_dev_2) 80 | print('small_std_dev_3:', small_std_dev_3) 81 | 82 | # Calculate 2 and 3 standard deviations above the mean 83 | two_sigma_length_hi = cluster_centre[0] + 2 * std_dev[0] 84 | three_sigma_length_hi = cluster_centre[0] + 3 * std_dev[0] 85 | 86 | # Remove all labels with length above two std dev 87 | # from the mean and re-cluster 88 | large_mask_2 = all_dims[:, 0] > two_sigma_length_hi 89 | large_dims_2 = all_dims[large_mask_2] 90 | 91 | large_mask_3 = all_dims[:, 0] > three_sigma_length_hi 92 | large_dims_3 = all_dims[large_mask_3] 93 | 94 | large_k_means_2 = KMeans(n_clusters=1, random_state=0).fit(large_dims_2) 95 | large_k_means_3 = KMeans(n_clusters=1, random_state=0).fit(large_dims_3) 96 | 97 | large_std_dev_2 = np.std(large_dims_2, axis=0) 98 | large_std_dev_3 = np.std(large_dims_3, axis=0) 99 | 100 | print('large_k_means_2:', large_k_means_2.cluster_centers_) 101 | print('large_k_means_3:', large_k_means_3.cluster_centers_) 102 | print('large_std_dev_2:', large_std_dev_2) 103 | print('large_std_dev_3:', large_std_dev_3) 104 | 105 | 106 | if __name__ == '__main__': 107 | main() 108 | -------------------------------------------------------------------------------- /avod/core/feature_extractors/bev_vgg_test.py: -------------------------------------------------------------------------------- 1 | """Testing VGG BEV network. 2 | """ 3 | import numpy as np 4 | import tensorflow as tf 5 | from google.protobuf import text_format 6 | 7 | import avod.tests as tests 8 | from avod.builders import optimizer_builder 9 | from avod.builders.dataset_builder import DatasetBuilder 10 | from avod.core import constants 11 | from avod.core.feature_extractors import bev_vgg as vgg 12 | from avod.datasets.kitti.kitti_dataset import KittiDataset 13 | from avod.protos import train_pb2 14 | 15 | slim = tf.contrib.slim 16 | 17 | 18 | def fill_feed_dict(dataset: KittiDataset, input_pl, batch_size): 19 | sample = dataset.next_batch(batch_size) 20 | 21 | bev_input = sample[0].get(constants.KEY_BEV_INPUT) 22 | bev_input = np.expand_dims(bev_input, axis=0) 23 | 24 | labels = sample[0].get(constants.KEY_LABEL_CLASSES) 25 | labels = np.expand_dims(labels, axis=1) 26 | 27 | label_pl = tf.placeholder(tf.float32, [None, 1]) 28 | 29 | feed_dict = { 30 | input_pl: bev_input, 31 | label_pl: labels 32 | } 33 | 34 | return feed_dict, label_pl 35 | 36 | 37 | class BevVggTest(tf.test.TestCase): 38 | 39 | @classmethod 40 | def setUpClass(cls): 41 | # Initialize the Kitti dataset 42 | test_dir = tests.test_path() 43 | 44 | # Get the unittest-kitti dataset 45 | dataset_builder = DatasetBuilder() 46 | cls.dataset = dataset_builder.build_kitti_dataset( 47 | dataset_builder.KITTI_UNITTEST) 48 | 49 | cls.log_dir = test_dir + '/logs' 50 | cls.bev_vgg_cls = vgg.BevVggClassification() 51 | 52 | def test_vgg_layers_build(self): 53 | train_config_text_proto = """ 54 | optimizer { 55 | gradient_descent { 56 | learning_rate { 57 | constant_learning_rate { 58 | learning_rate: 0.1 59 | } 60 | } 61 | } 62 | } 63 | """ 64 | train_config = train_pb2.TrainConfig() 65 | text_format.Merge(train_config_text_proto, train_config) 66 | global_summaries = set([]) 67 | batch_size = 1 68 | 69 | with tf.Graph().as_default(): 70 | with tf.name_scope('input'): 71 | # BEV image placeholder 72 | image_placeholder = tf.placeholder( 73 | tf.float32, (None, 700, 800, 6)) 74 | image_summary = tf.expand_dims(image_placeholder, axis=0) 75 | tf.summary.image("image", image_summary, max_outputs=5) 76 | 77 | # Check invalid BEV shape 78 | bev_shape = (300, 300) 79 | processed_image = self.bev_vgg_cls.preprocess_input( 80 | image_placeholder, bev_shape) 81 | 82 | predictions, end_points = self.bev_vgg_cls.build( 83 | processed_image, num_classes=1, is_training=True) 84 | 85 | feed_dict, label_pl = fill_feed_dict( 86 | self.dataset, image_placeholder, batch_size) 87 | 88 | ########################### 89 | # Loss Function 90 | ########################### 91 | cross_entropy = tf.nn.weighted_cross_entropy_with_logits( 92 | label_pl, 93 | predictions, 94 | 1.0) 95 | loss = tf.reduce_mean(cross_entropy) 96 | 97 | ########################### 98 | # Optimizer 99 | ########################### 100 | training_optimizer = optimizer_builder.build( 101 | train_config.optimizer, global_summaries) 102 | 103 | ########################### 104 | # Train-op 105 | ########################### 106 | train_op = slim.learning.create_train_op(loss, training_optimizer) 107 | 108 | sess = tf.Session() 109 | init = tf.global_variables_initializer() 110 | sess.run(init) 111 | 112 | loss = sess.run(train_op, feed_dict=feed_dict) 113 | 114 | self.assertLess(loss, 1) 115 | print('Loss ', loss) 116 | 117 | 118 | if __name__ == '__main__': 119 | tf.test.main() 120 | -------------------------------------------------------------------------------- /avod/core/trainer_test.py: -------------------------------------------------------------------------------- 1 | """Tests for avod.core.trainer with a dummy Detection Model""" 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | from tensorflow.contrib.layers.python.layers import layers 6 | from tensorflow.python.framework import random_seed 7 | from tensorflow.python.ops import math_ops 8 | from tensorflow.python.framework import constant_op 9 | from tensorflow.python.framework import dtypes 10 | 11 | from google.protobuf import text_format 12 | 13 | from avod.core import trainer 14 | from avod.core import model 15 | 16 | from avod.protos import train_pb2 17 | from avod.protos import model_pb2 18 | 19 | 20 | class FakeBatchNormClassifier(model.DetectionModel): 21 | 22 | def __init__(self, model_config, num_classes=1): 23 | # Sets model configs (_config and _num_classes) 24 | super(FakeBatchNormClassifier, self).__init__(model_config) 25 | 26 | self.tf_inputs, self.tf_labels = self.get_input() 27 | self._train_op = None 28 | self._loss = None 29 | 30 | def BatchNormClassifier(self, inputs): 31 | inputs = layers.batch_norm(inputs, decay=0.1, fused=None) 32 | return layers.fully_connected(inputs, 1, activation_fn=math_ops.sigmoid) 33 | 34 | def get_input(self): 35 | """Creates an easy training set.""" 36 | np.random.seed(0) 37 | 38 | inputs = np.zeros((16, 4)) 39 | labels = np.random.randint( 40 | 0, 2, size=(16, 1)).astype( 41 | np.float32) 42 | 43 | for i in range(16): 44 | j = int(2 * labels[i] + np.random.randint(0, 2)) 45 | inputs[i, j] = 1 46 | 47 | random_seed.set_random_seed(0) 48 | tf_inputs = constant_op.constant(inputs, dtype=dtypes.float32) 49 | tf_labels = constant_op.constant(labels, dtype=dtypes.float32) 50 | 51 | return tf_inputs, tf_labels 52 | 53 | def build(self): 54 | """Prediction tensors from inputs tensor. 55 | 56 | Args: 57 | preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor. 58 | 59 | Returns: 60 | prediction_dict: a dictionary holding prediction tensors to be 61 | passed to the Loss or Postprocess functions. 62 | """ 63 | tf_predictions = self.BatchNormClassifier(self.tf_inputs) 64 | return tf_predictions 65 | 66 | def loss(self, tf_predictions): 67 | """Compute scalar loss tensors with respect to provided groundtruth. 68 | """ 69 | # trainer expects two losses, pass in a dummy one 70 | dummy_loss_dict = {} 71 | total_loss = tf.losses.log_loss(self.tf_labels, 72 | tf_predictions, 73 | scope='BatchNormLoss') 74 | return dummy_loss_dict, total_loss 75 | 76 | 77 | class ClassifierTrainerTest(tf.test.TestCase): 78 | 79 | def test_batch_norm_class(self): 80 | # This tests the model and trainer set up 81 | train_config_text_proto = """ 82 | optimizer { 83 | gradient_descent { 84 | learning_rate { 85 | constant_learning_rate { 86 | learning_rate: 1.0 87 | } 88 | } 89 | } 90 | } 91 | max_iterations: 5 92 | """ 93 | model_config_text_proto = """ 94 | path_drop_probabilities: [1.0, 1.0] 95 | """ 96 | train_config = train_pb2.TrainConfig() 97 | text_format.Merge(train_config_text_proto, train_config) 98 | 99 | model_config = model_pb2.ModelConfig() 100 | text_format.Merge(model_config_text_proto, model_config) 101 | train_config.overwrite_checkpoints = True 102 | test_root_dir = '/tmp/avod_unit_test/' 103 | 104 | paths_config = model_config.paths_config 105 | paths_config.logdir = test_root_dir + 'logs/' 106 | paths_config.checkpoint_dir = test_root_dir 107 | 108 | classifier = FakeBatchNormClassifier(model_config) 109 | trainer.train(classifier, 110 | train_config) 111 | 112 | 113 | if __name__ == '__main__': 114 | tf.test.main() 115 | -------------------------------------------------------------------------------- /avod/core/summary_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def add_feature_maps_from_dict(end_points, layer_name): 6 | """ Calls add_feature_maps for a specified layer 7 | in a dictionary of end points 8 | 9 | Args: 10 | end_points: dictionary of network end points 11 | layer_name: dict key of the layer to add 12 | """ 13 | feature_maps = end_points.get(layer_name) 14 | add_feature_maps(feature_maps, layer_name) 15 | 16 | 17 | def add_feature_maps(feature_maps, layer_name): 18 | """ Adds an image summary showing tiled feature maps 19 | 20 | Args: 21 | feature_maps: a tensor of feature maps to show, dimensions should be 22 | (1, ?, ?, ?) (batch_size, height, width, depth) 23 | layer_name: name of the layer which will show up in tensorboard 24 | """ 25 | with tf.name_scope(layer_name): 26 | batch, maps_height, maps_width, num_maps = np.array( 27 | feature_maps.shape).astype(np.int32) 28 | 29 | # Resize to a visible size 30 | map_width_out = 300 31 | ratio = map_width_out / maps_width 32 | map_height_out = int(maps_height * ratio) 33 | map_size_out = tf.convert_to_tensor([map_height_out, map_width_out], 34 | tf.int32) 35 | 36 | resized_maps = tf.image.resize_bilinear(feature_maps, map_size_out) 37 | 38 | # Take first image only 39 | output = tf.slice(resized_maps, (0, 0, 0, 0), (1, -1, -1, -1)) 40 | output = tf.reshape(output, (map_height_out, map_width_out, num_maps)) 41 | 42 | # Add padding around each map 43 | map_width_out += 5 44 | map_height_out += 5 45 | output = tf.image.resize_image_with_crop_or_pad( 46 | output, map_height_out, map_width_out) 47 | 48 | # Find good image size for display 49 | map_sizes = [1, 32, 64, 128, 256, 512] 50 | # columns, rows 51 | image_sizes = [(1, 1), (4, 8), (8, 8), (8, 16), (8, 32), (16, 32)] 52 | size_idx = map_sizes.index(num_maps) 53 | desired_image_size = image_sizes[size_idx] 54 | image_width = desired_image_size[0] 55 | image_height = desired_image_size[1] 56 | 57 | # Arrange maps into a grid 58 | output = tf.reshape(output, 59 | (map_height_out, map_width_out, image_height, 60 | image_width)) 61 | output = tf.transpose(output, (2, 0, 3, 1)) 62 | output = tf.reshape(output, (1, image_height * map_height_out, 63 | image_width * map_width_out, 1)) 64 | 65 | layer_name = layer_name.split('/')[-1] 66 | tf.summary.image(layer_name, output, max_outputs=16) 67 | 68 | 69 | def add_scalar_summary(summary_name, scalar_value, 70 | summary_writer, global_step): 71 | """ Adds a single scalar summary value to the logs without adding a 72 | summary node to the graph 73 | 74 | Args: 75 | summary_name: name of the summary to add 76 | scalar_value: value of the scalar 77 | summary_writer: a summary writer object 78 | global_step: the current global step 79 | """ 80 | 81 | avg_summary = tf.Summary() 82 | avg_summary.value.add(tag=summary_name, 83 | simple_value=scalar_value) 84 | 85 | summary_writer.add_summary(avg_summary, global_step) 86 | 87 | 88 | def summaries_to_keep(summaries, 89 | global_summaries, 90 | histograms=True, 91 | input_imgs=True, 92 | input_bevs=True): 93 | 94 | if histograms and input_imgs and input_bevs: 95 | # Keep everything 96 | summaries |= global_summaries 97 | 98 | else: 99 | for summary in summaries.copy(): 100 | name = summary.name 101 | if not histograms and name.startswith('histograms'): 102 | summaries.remove(summary) 103 | if not input_imgs and name.startswith('img_'): 104 | summaries.remove(summary) 105 | if not input_bevs and name.startswith('bev_'): 106 | summaries.remove(summary) 107 | 108 | # Merge all summaries together. 109 | summary_op = tf.summary.merge(list(summaries), name='summary_op') 110 | 111 | return summary_op 112 | -------------------------------------------------------------------------------- /avod/configs/unittest_pipeline.config: -------------------------------------------------------------------------------- 1 | # Avod unittest configuration sample. 2 | 3 | model_config { 4 | model_name: 'avod_model' 5 | checkpoint_name: 'unittest_pipeline' 6 | 7 | input_config { 8 | bev_depth: 6 9 | img_depth: 3 10 | } 11 | 12 | rpn_config { 13 | rpn_proposal_roi_crop_size: 3 14 | rpn_fusion_method: 'mean' 15 | rpn_train_nms_size: 1024 16 | rpn_test_nms_size: 300 17 | rpn_nms_iou_thresh: 0.8 18 | } 19 | 20 | avod_config { 21 | avod_proposal_roi_crop_size: 7 22 | avod_nms_size: 100 23 | avod_nms_iou_thresh: 0.01 24 | avod_box_representation: 'box_3d' 25 | } 26 | 27 | label_smoothing_epsilon: 0.001 28 | expand_proposals_xz: 0.0 29 | path_drop_probabilities: [0.5, 0.5] 30 | train_on_all_samples: False 31 | eval_all_samples: False 32 | 33 | layers_config { 34 | bev_feature_extractor { 35 | bev_vgg { 36 | vgg_conv1: [2, 32] 37 | vgg_conv2: [2, 64] 38 | vgg_conv3: [3, 128] 39 | vgg_conv4: [3, 256] 40 | upsampling_multiplier: 2 41 | 42 | l2_weight_decay: 0.0005 43 | } 44 | } 45 | img_feature_extractor { 46 | img_vgg { 47 | vgg_conv1: [2, 32] 48 | vgg_conv2: [2, 64] 49 | vgg_conv3: [3, 128] 50 | vgg_conv4: [3, 256] 51 | upsampling_multiplier: 2 52 | 53 | l2_weight_decay: 0.0005 54 | } 55 | } 56 | rpn_config { 57 | cls_fc6: 32 58 | cls_fc7: 32 59 | 60 | reg_fc6: 32 61 | reg_fc7: 32 62 | 63 | l2_weight_decay: 0.005 64 | keep_prob: 0.5 65 | } 66 | avod_config { 67 | basic_fc_layers { 68 | num_layers: 2 69 | layer_sizes: [64, 64] 70 | l2_weight_decay: 0.005 71 | keep_prob: 0.5 72 | fusion_method: 'mean' # 'mean' or 'concat' 73 | } 74 | } 75 | } 76 | # Loss function weights 77 | loss_config { 78 | cls_loss_weight: 5.0 79 | reg_loss_weight: 10.0 80 | ang_loss_weight: 10.0 81 | } 82 | } 83 | 84 | train_config { 85 | 86 | batch_size: 1 87 | 88 | optimizer { 89 | adam_optimizer { 90 | learning_rate { 91 | constant_learning_rate { 92 | learning_rate: 0.0001 93 | } 94 | } 95 | } 96 | } 97 | 98 | overwrite_checkpoints: False 99 | 100 | max_checkpoints_to_keep: 10000 101 | max_iterations: 10 102 | checkpoint_interval: 1 103 | 104 | summary_interval: 10 105 | summary_histograms: False 106 | summary_img_images: False 107 | summary_bev_images: False 108 | } 109 | 110 | 111 | dataset_config { 112 | bev_source: 'lidar' 113 | 114 | kitti_utils_config { 115 | area_extents: [-40, 40, -5, 3, 0, 70] 116 | voxel_size: 0.1 117 | anchor_strides: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] 118 | density_threshold: 1 119 | 120 | bev_generator { 121 | slices { 122 | height_lo: -0.2 123 | height_hi: 2.3 124 | num_slices: 5 125 | } 126 | } 127 | 128 | mini_batch_config { 129 | density_threshold: 1 130 | 131 | rpn_config { 132 | iou_2d_thresholds { 133 | neg_iou_lo: 0.0 134 | neg_iou_hi: 0.3 135 | pos_iou_lo: 0.5 136 | pos_iou_hi: 1.0 137 | } 138 | # iou_3d_thresholds { 139 | # neg_iou_lo: 0.001 140 | # neg_iou_hi: 0.005 141 | # pos_iou_lo: 0.3 142 | # pos_iou_hi: 1.0 143 | # } 144 | 145 | mini_batch_size: 64 146 | } 147 | 148 | avod_config { 149 | iou_2d_thresholds { 150 | neg_iou_lo: 0.0 151 | neg_iou_hi: 0.55 152 | pos_iou_lo: 0.65 153 | pos_iou_hi: 1.0 154 | } 155 | 156 | mini_batch_size: 64 157 | } 158 | } 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /avod/core/minibatch_samplers/balanced_positive_negative_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Class to subsample minibatches by balancing positives and negatives. 17 | 18 | Subsamples minibatches based on a pre-specified positive fraction in range 19 | [0,1]. The class presumes there are many more negatives than positive examples: 20 | if the desired batch_size cannot be achieved with the pre-specified positive 21 | fraction, it fills the rest with negative examples. If this is not sufficient 22 | for obtaining the desired batch_size, it returns fewer examples. 23 | 24 | The main function to call is Subsample(self, indicator, labels). For convenience 25 | one can also call SubsampleWeights(self, weights, labels) which is defined in 26 | the minibatch_sampler base class. 27 | """ 28 | 29 | import tensorflow as tf 30 | 31 | from avod.core import minibatch_sampler 32 | 33 | 34 | class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): 35 | """Subsamples minibatches to a desired balance of positives and negatives.""" 36 | 37 | def __init__(self, positive_fraction=0.5): 38 | """Constructs a minibatch sampler. 39 | 40 | Args: 41 | positive_fraction: desired fraction of positive examples (scalar in [0,1]) 42 | 43 | Raises: 44 | ValueError: if positive_fraction < 0, or positive_fraction > 1 45 | """ 46 | if positive_fraction < 0 or positive_fraction > 1: 47 | raise ValueError('positive_fraction should be in range [0,1]. ' 48 | 'Received: %s.' % positive_fraction) 49 | self._positive_fraction = positive_fraction 50 | 51 | def subsample(self, indicator, batch_size, labels): 52 | """Returns subsampled minibatch. 53 | 54 | Args: 55 | indicator: boolean tensor of shape [N] whose 56 | True entries can be sampled. 57 | batch_size: desired batch size. 58 | labels: boolean tensor of shape [N] denoting 59 | positive(=True) and negative(=False) examples. 60 | 61 | Returns: 62 | sampled_idx: boolean tensor of shape [N], True for entries which are 63 | sampled. 64 | sampled_pos_idx: boolean tensor of shape [N], True for entries which are 65 | positive samples. 66 | 67 | Raises: 68 | ValueError: if labels and indicator are not 1D boolean tensors. 69 | """ 70 | if len(indicator.get_shape().as_list()) != 1: 71 | raise ValueError( 72 | 'indicator must be 1 dimensional, got a tensor of ' 73 | 'shape %s' % indicator.get_shape()) 74 | if len(labels.get_shape().as_list()) != 1: 75 | raise ValueError('labels must be 1 dimensional, got a tensor of ' 76 | 'shape %s' % labels.get_shape()) 77 | if labels.dtype != tf.bool: 78 | raise ValueError('labels should be of type bool. Received: %s' % 79 | labels.dtype) 80 | if indicator.dtype != tf.bool: 81 | raise ValueError('indicator should be of type bool. Received: %s' % 82 | indicator.dtype) 83 | 84 | # Only sample from indicated samples 85 | negative_idx = tf.logical_not(labels) 86 | positive_idx = tf.logical_and(labels, indicator) 87 | negative_idx = tf.logical_and(negative_idx, indicator) 88 | 89 | # Sample positive and negative samples separately 90 | max_num_pos = int(self._positive_fraction * batch_size) 91 | sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos) 92 | max_num_neg = batch_size - tf.reduce_sum( 93 | tf.cast(sampled_pos_idx, tf.int32)) 94 | sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg) 95 | 96 | sampled_idx = tf.logical_or(sampled_pos_idx, sampled_neg_idx) 97 | 98 | return sampled_idx, sampled_pos_idx 99 | -------------------------------------------------------------------------------- /avod/experiments/run_evaluation.py: -------------------------------------------------------------------------------- 1 | """Detection model evaluator. 2 | 3 | This runs the DetectionModel evaluator. 4 | """ 5 | 6 | import argparse 7 | import os 8 | 9 | import tensorflow as tf 10 | 11 | import avod 12 | import avod.builders.config_builder_util as config_builder 13 | from avod.builders.dataset_builder import DatasetBuilder 14 | from avod.core.models.avod_model import AvodModel 15 | from avod.core.models.rpn_model import RpnModel 16 | from avod.core.evaluator import Evaluator 17 | 18 | 19 | def evaluate(model_config, eval_config, dataset_config): 20 | 21 | # Parse eval config 22 | eval_mode = eval_config.eval_mode 23 | if eval_mode not in ['val', 'test']: 24 | raise ValueError('Evaluation mode can only be set to `val` or `test`') 25 | evaluate_repeatedly = eval_config.evaluate_repeatedly 26 | 27 | # Parse dataset config 28 | data_split = dataset_config.data_split 29 | if data_split == 'train': 30 | dataset_config.data_split_dir = 'training' 31 | dataset_config.has_labels = True 32 | 33 | elif data_split.startswith('val'): 34 | dataset_config.data_split_dir = 'training' 35 | 36 | # Don't load labels for val split when running in test mode 37 | if eval_mode == 'val': 38 | dataset_config.has_labels = True 39 | elif eval_mode == 'test': 40 | dataset_config.has_labels = False 41 | 42 | elif data_split == 'test': 43 | dataset_config.data_split_dir = 'testing' 44 | dataset_config.has_labels = False 45 | 46 | else: 47 | raise ValueError('Invalid data split', data_split) 48 | 49 | # Convert to object to overwrite repeated fields 50 | dataset_config = config_builder.proto_to_obj(dataset_config) 51 | 52 | # Remove augmentation during evaluation 53 | dataset_config.aug_list = [] 54 | 55 | # Build the dataset object 56 | dataset = DatasetBuilder.build_kitti_dataset(dataset_config, 57 | use_defaults=False) 58 | 59 | # Setup the model 60 | model_name = model_config.model_name 61 | 62 | # Convert to object to overwrite repeated fields 63 | model_config = config_builder.proto_to_obj(model_config) 64 | 65 | # Switch path drop off during evaluation 66 | model_config.path_drop_probabilities = [1.0, 1.0] 67 | 68 | with tf.Graph().as_default(): 69 | if model_name == 'avod_model': 70 | model = AvodModel(model_config, train_val_test=eval_mode, 71 | dataset=dataset) 72 | elif model_name == 'rpn_model': 73 | model = RpnModel(model_config, train_val_test=eval_mode, 74 | dataset=dataset) 75 | else: 76 | raise ValueError('Invalid model name {}'.format(model_name)) 77 | 78 | model_evaluator = Evaluator(model, 79 | dataset_config, 80 | eval_config) 81 | 82 | if evaluate_repeatedly: 83 | model_evaluator.repeated_checkpoint_run() 84 | else: 85 | model_evaluator.run_latest_checkpoints() 86 | 87 | 88 | def main(_): 89 | parser = argparse.ArgumentParser() 90 | 91 | default_pipeline_config_path = avod.root_dir() + \ 92 | '/configs/avod_cars_example.config' 93 | 94 | parser.add_argument('--pipeline_config', 95 | type=str, 96 | dest='pipeline_config_path', 97 | default=default_pipeline_config_path, 98 | help='Path to the pipeline config') 99 | 100 | parser.add_argument('--data_split', 101 | type=str, 102 | dest='data_split', 103 | default='val', 104 | help='Data split for evaluation') 105 | 106 | parser.add_argument('--device', 107 | type=str, 108 | dest='device', 109 | default='0', 110 | help='CUDA device id') 111 | 112 | args = parser.parse_args() 113 | 114 | # Parse pipeline config 115 | model_config, _, eval_config, dataset_config = \ 116 | config_builder.get_configs_from_pipeline_file( 117 | args.pipeline_config_path, 118 | is_training=False) 119 | 120 | # Overwrite data split 121 | dataset_config.data_split = args.data_split 122 | 123 | # Set CUDA device id 124 | os.environ['CUDA_VISIBLE_DEVICES'] = args.device 125 | 126 | evaluate(model_config, eval_config, dataset_config) 127 | 128 | 129 | if __name__ == '__main__': 130 | tf.app.run() 131 | -------------------------------------------------------------------------------- /avod/core/ops_test.py: -------------------------------------------------------------------------------- 1 | """Tests for object_detection.utils.ops.""" 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from avod.core import ops 6 | 7 | 8 | class OpsTestIndicesToDenseVector(tf.test.TestCase): 9 | 10 | def test_indices_to_dense_vector(self): 11 | size = 10000 12 | num_indices = np.random.randint(size) 13 | rand_indices = np.random.permutation(np.arange(size))[0:num_indices] 14 | 15 | expected_output = np.zeros(size, dtype=np.float32) 16 | expected_output[rand_indices] = 1. 17 | 18 | tf_rand_indices = tf.constant(rand_indices) 19 | indicator = ops.indices_to_dense_vector(tf_rand_indices, size) 20 | 21 | with self.test_session() as sess: 22 | output = sess.run(indicator) 23 | self.assertAllEqual(output, expected_output) 24 | self.assertEqual(output.dtype, expected_output.dtype) 25 | 26 | def test_indices_to_dense_vector_size_at_inference(self): 27 | size = 5000 28 | num_indices = 250 29 | all_indices = np.arange(size) 30 | rand_indices = np.random.permutation(all_indices)[0:num_indices] 31 | 32 | expected_output = np.zeros(size, dtype=np.float32) 33 | expected_output[rand_indices] = 1. 34 | 35 | tf_all_indices = tf.placeholder(tf.int32) 36 | tf_rand_indices = tf.constant(rand_indices) 37 | indicator = ops.indices_to_dense_vector(tf_rand_indices, 38 | tf.shape(tf_all_indices)[0]) 39 | feed_dict = {tf_all_indices: all_indices} 40 | 41 | with self.test_session() as sess: 42 | output = sess.run(indicator, feed_dict=feed_dict) 43 | self.assertAllEqual(output, expected_output) 44 | self.assertEqual(output.dtype, expected_output.dtype) 45 | 46 | def test_indices_to_dense_vector_int(self): 47 | size = 500 48 | num_indices = 25 49 | rand_indices = np.random.permutation(np.arange(size))[0:num_indices] 50 | 51 | expected_output = np.zeros(size, dtype=np.int64) 52 | expected_output[rand_indices] = 1 53 | 54 | tf_rand_indices = tf.constant(rand_indices) 55 | indicator = ops.indices_to_dense_vector( 56 | tf_rand_indices, size, 1, dtype=tf.int64) 57 | 58 | with self.test_session() as sess: 59 | output = sess.run(indicator) 60 | self.assertAllEqual(output, expected_output) 61 | self.assertEqual(output.dtype, expected_output.dtype) 62 | 63 | def test_indices_to_dense_vector_custom_values(self): 64 | size = 100 65 | num_indices = 10 66 | rand_indices = np.random.permutation(np.arange(size))[0:num_indices] 67 | indices_value = np.random.rand(1) 68 | default_value = np.random.rand(1) 69 | 70 | expected_output = np.float32(np.ones(size) * default_value) 71 | expected_output[rand_indices] = indices_value 72 | 73 | tf_rand_indices = tf.constant(rand_indices) 74 | indicator = ops.indices_to_dense_vector( 75 | tf_rand_indices, 76 | size, 77 | indices_value=indices_value, 78 | default_value=default_value) 79 | 80 | with self.test_session() as sess: 81 | output = sess.run(indicator) 82 | self.assertAllClose(output, expected_output) 83 | self.assertEqual(output.dtype, expected_output.dtype) 84 | 85 | def test_indices_to_dense_vector_all_indices_as_input(self): 86 | size = 500 87 | num_indices = 500 88 | rand_indices = np.random.permutation(np.arange(size))[0:num_indices] 89 | 90 | expected_output = np.ones(size, dtype=np.float32) 91 | 92 | tf_rand_indices = tf.constant(rand_indices) 93 | indicator = ops.indices_to_dense_vector(tf_rand_indices, size) 94 | 95 | with self.test_session() as sess: 96 | output = sess.run(indicator) 97 | self.assertAllEqual(output, expected_output) 98 | self.assertEqual(output.dtype, expected_output.dtype) 99 | 100 | def test_indices_to_dense_vector_empty_indices_as_input(self): 101 | size = 500 102 | rand_indices = [] 103 | 104 | expected_output = np.zeros(size, dtype=np.float32) 105 | 106 | tf_rand_indices = tf.constant(rand_indices) 107 | indicator = ops.indices_to_dense_vector(tf_rand_indices, size) 108 | 109 | with self.test_session() as sess: 110 | output = sess.run(indicator) 111 | self.assertAllEqual(output, expected_output) 112 | self.assertEqual(output.dtype, expected_output.dtype) 113 | 114 | 115 | if __name__ == '__main__': 116 | tf.test.main() 117 | -------------------------------------------------------------------------------- /avod/core/mini_batch_utils_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from avod.core import box_list 5 | from avod.core import box_list_ops 6 | from avod.builders.dataset_builder import DatasetBuilder 7 | 8 | 9 | class MiniBatchUtilsTest(tf.test.TestCase): 10 | 11 | @classmethod 12 | def setUpClass(cls): 13 | cls.dataset = DatasetBuilder.build_kitti_dataset( 14 | DatasetBuilder.KITTI_UNITTEST) 15 | 16 | cls.mb_utils = cls.dataset.kitti_utils.mini_batch_utils 17 | 18 | def test_get_anchors_info(self): 19 | 20 | # Take the first non empty sample 21 | sample = self.dataset.sample_names[1] 22 | 23 | # Check the anchors info for first class type 24 | anchors_info = self.mb_utils.get_anchors_info( 25 | self.dataset.classes_name, 26 | self.dataset.kitti_utils.anchor_strides, 27 | sample) 28 | 29 | anchor_indices = anchors_info[0] 30 | anchor_ious = anchors_info[1] 31 | anchor_offsets = anchors_info[2] 32 | anchor_classes = anchors_info[3] 33 | 34 | # Lengths should all be the same 35 | self.assertTrue(len(anchor_indices), len(anchor_ious)) 36 | self.assertTrue(len(anchor_indices), len(anchor_offsets)) 37 | self.assertTrue(len(anchor_indices), len(anchor_classes)) 38 | 39 | # Indices, IOUs, and classes values should all be >= 0 40 | self.assertTrue((anchor_indices >= 0).all()) 41 | self.assertTrue((anchor_ious >= 0).all()) 42 | self.assertTrue((anchor_classes >= 0).all()) 43 | 44 | # Offsets should be (N, 6) 45 | self.assertTrue(len(anchor_offsets.shape) == 2) 46 | self.assertTrue(anchor_offsets.shape[1] == 6) 47 | 48 | def test_iou_mask_ops(self): 49 | # corners are in [y1, x1, y2, x2] format 50 | corners_pred = tf.constant( 51 | [[4.0, 3.0, 7.0, 5.0], 52 | [14.0, 14.0, 16.0, 16.0], 53 | [0.0, 0.0, 21.0, 19.0], 54 | [3.0, 4.0, 5.0, 7.0]]) 55 | corners_gt = tf.constant( 56 | [[4.0, 3.0, 7.0, 6.0], 57 | [14.0, 14.0, 15.0, 15.0], 58 | [0.0, 0.0, 20.0, 20.0]]) 59 | # 3 classes 60 | class_indices = tf.constant([1., 2., 3.]) 61 | 62 | exp_ious = [[0.66666669, 0., 0.02255639, 0.15384616], 63 | [0., 0.25, 0.00250627, 0.], 64 | [0.015, 0.01, 0.90692127, 0.015]] 65 | 66 | exp_max_ious = np.array([0.66666669, 0.25, 0.90692127, 0.15384616]) 67 | exp_max_indices = np.array([0, 1, 2, 0]) 68 | 69 | exp_pos_mask = np.array([True, False, True, False]) 70 | 71 | exp_class_and_background_indices = np.array([1, 0, 3, 0]) 72 | 73 | # Convert to box_list format 74 | boxes_pred = box_list.BoxList(corners_pred) 75 | boxes_gt = box_list.BoxList(corners_gt) 76 | # Calculate IoU 77 | iou = box_list_ops.iou(boxes_gt, 78 | boxes_pred) 79 | 80 | # Get max IoU, the dimension should match the anchors we are 81 | # evaluating 82 | max_ious = tf.reduce_max(iou, axis=0) 83 | max_iou_indices = tf.argmax(iou, axis=0) 84 | 85 | # Sample a mini-batch from anchors with highest IoU match 86 | mini_batch_size = 4 87 | 88 | # Custom positive/negative iou ranges 89 | neg_2d_iou_range = [0.0, 0.3] 90 | pos_2d_iou_range = [0.6, 0.7] 91 | 92 | mb_mask, mb_pos_mask = \ 93 | self.mb_utils.sample_mini_batch(max_ious, 94 | mini_batch_size, 95 | neg_2d_iou_range, 96 | pos_2d_iou_range) 97 | 98 | mb_class_indices = self.mb_utils.mask_class_label_indices( 99 | mb_pos_mask, mb_mask, max_iou_indices, class_indices) 100 | 101 | with self.test_session() as sess: 102 | iou_out = sess.run(iou) 103 | max_ious_out, max_iou_indices_out = sess.run([max_ious, 104 | max_iou_indices]) 105 | mb_mask_out, mb_pos_mask_out = sess.run([mb_mask, 106 | mb_pos_mask]) 107 | class_indices_out = sess.run(mb_class_indices) 108 | 109 | self.assertAllClose(iou_out, exp_ious) 110 | self.assertAllClose(max_ious_out, exp_max_ious) 111 | self.assertAllEqual(max_iou_indices_out, exp_max_indices) 112 | self.assertAllEqual(exp_pos_mask, mb_pos_mask_out) 113 | self.assertAllEqual(class_indices_out, 114 | exp_class_and_background_indices) 115 | 116 | 117 | if __name__ == '__main__': 118 | tf.test.main() 119 | -------------------------------------------------------------------------------- /avod/experiments/run_inference.py: -------------------------------------------------------------------------------- 1 | """Detection model inferencing. 2 | 3 | This runs the DetectionModel evaluator in test mode to output detections. 4 | """ 5 | 6 | import argparse 7 | import os 8 | import sys 9 | 10 | import tensorflow as tf 11 | 12 | import avod 13 | import avod.builders.config_builder_util as config_builder 14 | from avod.builders.dataset_builder import DatasetBuilder 15 | from avod.core.models.avod_model import AvodModel 16 | from avod.core.models.rpn_model import RpnModel 17 | from avod.core.evaluator import Evaluator 18 | 19 | 20 | def inference(model_config, eval_config, 21 | dataset_config, data_split, 22 | ckpt_indices): 23 | 24 | # Overwrite the defaults 25 | dataset_config = config_builder.proto_to_obj(dataset_config) 26 | 27 | dataset_config.data_split = data_split 28 | dataset_config.data_split_dir = 'training' 29 | if data_split == 'test': 30 | dataset_config.data_split_dir = 'testing' 31 | 32 | eval_config.eval_mode = 'test' 33 | eval_config.evaluate_repeatedly = False 34 | 35 | dataset_config.has_labels = False 36 | # Enable this to see the actually memory being used 37 | eval_config.allow_gpu_mem_growth = True 38 | 39 | eval_config = config_builder.proto_to_obj(eval_config) 40 | # Grab the checkpoint indices to evaluate 41 | eval_config.ckpt_indices = ckpt_indices 42 | 43 | # Remove augmentation during evaluation in test mode 44 | dataset_config.aug_list = [] 45 | 46 | # Build the dataset object 47 | dataset = DatasetBuilder.build_kitti_dataset(dataset_config, 48 | use_defaults=False) 49 | 50 | # Setup the model 51 | model_name = model_config.model_name 52 | # Overwrite repeated field 53 | model_config = config_builder.proto_to_obj(model_config) 54 | # Switch path drop off during evaluation 55 | model_config.path_drop_probabilities = [1.0, 1.0] 56 | 57 | with tf.Graph().as_default(): 58 | if model_name == 'avod_model': 59 | model = AvodModel(model_config, 60 | train_val_test=eval_config.eval_mode, 61 | dataset=dataset) 62 | elif model_name == 'rpn_model': 63 | model = RpnModel(model_config, 64 | train_val_test=eval_config.eval_mode, 65 | dataset=dataset) 66 | else: 67 | raise ValueError('Invalid model name {}'.format(model_name)) 68 | 69 | model_evaluator = Evaluator(model, dataset_config, eval_config) 70 | model_evaluator.run_latest_checkpoints() 71 | 72 | 73 | def main(_): 74 | parser = argparse.ArgumentParser() 75 | 76 | # Example usage 77 | # --checkpoint_name='avod_cars_example' 78 | # --data_split='test' 79 | # --ckpt_indices=50 100 112 80 | # Optional arg: 81 | # --device=0 82 | 83 | parser.add_argument('--checkpoint_name', 84 | type=str, 85 | dest='checkpoint_name', 86 | required=True, 87 | help='Checkpoint name must be specified as a str\ 88 | and must match the experiment config file name.') 89 | 90 | parser.add_argument('--data_split', 91 | type=str, 92 | dest='data_split', 93 | required=True, 94 | help='Data split must be specified e.g. val or test') 95 | 96 | parser.add_argument( 97 | '--ckpt_indices', 98 | type=int, 99 | nargs='+', 100 | dest='ckpt_indices', 101 | required=True, 102 | help='Checkpoint indices must be a set of \ 103 | integers with space in between -> 0 10 20 etc') 104 | 105 | parser.add_argument('--device', 106 | type=str, 107 | dest='device', 108 | default='0', 109 | help='CUDA device id') 110 | 111 | args = parser.parse_args() 112 | if len(sys.argv) == 1: 113 | parser.print_help() 114 | sys.exit(1) 115 | 116 | experiment_config = args.checkpoint_name + '.config' 117 | 118 | # Read the config from the experiment folder 119 | experiment_config_path = avod.root_dir() + '/data/outputs/' +\ 120 | args.checkpoint_name + '/' + experiment_config 121 | 122 | model_config, _, eval_config, dataset_config = \ 123 | config_builder.get_configs_from_pipeline_file( 124 | experiment_config_path, is_training=False) 125 | 126 | os.environ['CUDA_VISIBLE_DEVICES'] = args.device 127 | inference(model_config, eval_config, 128 | dataset_config, args.data_split, 129 | args.ckpt_indices) 130 | 131 | 132 | if __name__ == '__main__': 133 | tf.app.run() 134 | -------------------------------------------------------------------------------- /avod/builders/optimizer_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Functions to build DetectionModel training optimizers.""" 17 | 18 | import tensorflow as tf 19 | 20 | slim = tf.contrib.slim 21 | 22 | 23 | def build(optimizer_config, 24 | global_summaries, 25 | global_step=None): 26 | """Create optimizer based on config. 27 | 28 | Args: 29 | optimizer_config: A Optimizer proto message. 30 | global_summaries: A set to attach learning rate summary to. 31 | global_step: (optional) A tensor that contains the global step. 32 | This is required for applying exponential decay to the learning 33 | rate. 34 | 35 | Returns: 36 | An optimizer. 37 | 38 | Raises: 39 | ValueError: when using an unsupported input data type. 40 | """ 41 | optimizer_type = optimizer_config.WhichOneof('optimizer') 42 | optimizer = None 43 | 44 | if optimizer_type == 'rms_prop_optimizer': 45 | config = optimizer_config.rms_prop_optimizer 46 | optimizer = tf.train.RMSPropOptimizer( 47 | _create_learning_rate(config.learning_rate, 48 | global_summaries, 49 | global_step), 50 | decay=config.decay, 51 | momentum=config.momentum_optimizer_value, 52 | epsilon=config.epsilon) 53 | 54 | elif optimizer_type == 'momentum_optimizer': 55 | config = optimizer_config.momentum_optimizer 56 | optimizer = tf.train.MomentumOptimizer( 57 | _create_learning_rate(config.learning_rate, 58 | global_summaries, 59 | global_step), 60 | momentum=config.momentum_optimizer_value) 61 | 62 | elif optimizer_type == 'adam_optimizer': 63 | config = optimizer_config.adam_optimizer 64 | optimizer = tf.train.AdamOptimizer( 65 | _create_learning_rate(config.learning_rate, 66 | global_summaries, 67 | global_step)) 68 | 69 | elif optimizer_type == 'gradient_descent': 70 | config = optimizer_config.gradient_descent 71 | optimizer = tf.train.GradientDescentOptimizer( 72 | _create_learning_rate(config.learning_rate, 73 | global_summaries, 74 | global_step)) 75 | 76 | if optimizer is None: 77 | raise ValueError('Optimizer %s not supported.' % optimizer_type) 78 | 79 | if optimizer_config.use_moving_average: 80 | optimizer = tf.contrib.opt.MovingAverageOptimizer( 81 | optimizer, average_decay=optimizer_config.moving_average_decay) 82 | 83 | return optimizer 84 | 85 | 86 | def _create_learning_rate(learning_rate_config, 87 | global_summaries, 88 | global_step): 89 | """Create optimizer learning rate based on config. 90 | 91 | Args: 92 | learning_rate_config: A LearningRate proto message. 93 | global_summaries: A set to attach learning rate summary to. 94 | global_step: A tensor that contains the global step. 95 | 96 | Returns: 97 | A learning rate. 98 | 99 | Raises: 100 | ValueError: when using an unsupported input data type. 101 | """ 102 | learning_rate = None 103 | learning_rate_type = learning_rate_config.WhichOneof('learning_rate') 104 | if learning_rate_type == 'constant_learning_rate': 105 | config = learning_rate_config.constant_learning_rate 106 | learning_rate = config.learning_rate 107 | 108 | elif learning_rate_type == 'exponential_decay_learning_rate': 109 | config = learning_rate_config.exponential_decay_learning_rate 110 | learning_rate = tf.train.exponential_decay( 111 | config.initial_learning_rate, 112 | global_step, 113 | config.decay_steps, 114 | config.decay_factor, 115 | staircase=config.staircase) 116 | 117 | if learning_rate is None: 118 | raise ValueError('Learning_rate %s not supported.' % learning_rate_type) 119 | 120 | global_summaries.add(tf.summary.scalar('Learning_Rate', learning_rate)) 121 | return learning_rate 122 | -------------------------------------------------------------------------------- /avod/core/anchor_encoder_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from avod.core import anchor_encoder 6 | 7 | 8 | class AnchorEncoderTest(unittest.TestCase): 9 | 10 | def test_anchor_to_offset(self): 11 | 12 | # anchor format is [x, y, z, dim_x, dim_y, dim_z] 13 | anchors = np.asarray([[1, 2, 3, 4, 6, 5], 14 | [0, 0, 0, 2, 3, 1]], dtype=np.float32) 15 | 16 | # same formatting goes for the labels 17 | # which are also in anchor format 18 | anchors_gt =\ 19 | np.array([2.0, 1.5, 7.0, 1.0, 0.5, 1.8]) 20 | 21 | expected_offsets = np.array( 22 | [[0.25, -0.083, 0.8, -1.386, -2.484, -1.022], 23 | [1., 0.5, 7., -0.693, -1.791, 0.588]], 24 | dtype=np.float32) 25 | 26 | anchor_offsets = anchor_encoder.anchor_to_offset(anchors, 27 | anchors_gt) 28 | np.testing.assert_almost_equal(anchor_offsets, 29 | expected_offsets, 30 | decimal=3) 31 | 32 | def test_anchor_tensor_to_offset(self): 33 | 34 | # anchor format is [x, y, z, dim_x, dim_y, dim_z] 35 | anchors = np.asarray([[1, 2, 3, 4, 6, 5], 36 | [0, 0, 0, 2, 3, 1]], dtype=np.float32) 37 | 38 | anchors_tensor = \ 39 | tf.convert_to_tensor(anchors, dtype=tf.float32) 40 | 41 | # we expect this in matrix format for the tensor version 42 | # of this function. In this case, it's just a repeated 43 | # gt associated with each anchor 44 | anchors_gt =\ 45 | np.array([[2.0, 1.5, 7.0, 1.0, 0.5, 1.8], 46 | [2.0, 1.5, 7.0, 1.0, 0.5, 1.8]]) 47 | 48 | anchors_gt_tensor = \ 49 | tf.convert_to_tensor(anchors_gt, dtype=tf.float32) 50 | 51 | expected_offsets = np.array( 52 | [[0.25, -0.083, 0.8, -1.386, -2.484, -1.022], 53 | [1., 0.5, 7., -0.693, -1.791, 0.588]], 54 | dtype=np.float32) 55 | 56 | # test in tensor space 57 | anchor_offsets = anchor_encoder.tf_anchor_to_offset(anchors_tensor, 58 | anchors_gt_tensor) 59 | 60 | sess = tf.Session() 61 | with sess.as_default(): 62 | anchor_offsets_out = anchor_offsets.eval() 63 | np.testing.assert_almost_equal(anchor_offsets_out, 64 | expected_offsets, 65 | decimal=3) 66 | 67 | def test_offset_to_anchor(self): 68 | 69 | # anchor format is [x, y, z, dim_x, dim_y, dim_z] 70 | anchors = np.asarray([[1, 2, 3, 4, 6, 5], 71 | [0, 0, 0, 2, 3, 1]], dtype=np.float32) 72 | 73 | # anchor offset prediction is [tx, ty, tz, tdim_x, tdim_y, tdim_z] 74 | anchor_offsets = np.array( 75 | [[0.5, 0.02, 0.01, 0.1, 0.4, 0.03], 76 | [0.04, 0.1, 0.03, 0.001, 0.3, 0.03]], 77 | dtype=np.float32) 78 | 79 | expected_anchors = np.array( 80 | [[3.0, 2.12, 3.05, 4.420, 8.9509, 5.152], 81 | [0.08, 0.3, 0.03, 2.002, 4.05, 1.03]], 82 | dtype=np.float32) 83 | 84 | anchors = anchor_encoder.offset_to_anchor(anchors, 85 | anchor_offsets) 86 | np.testing.assert_almost_equal(anchors, 87 | expected_anchors, 88 | decimal=3) 89 | 90 | def test_offset_tensor_to_anchor(self): 91 | 92 | # anchor format is [x, y, z, dim_x, dim_y, dim_z] 93 | anchors = np.asarray([[1, 2, 3, 4, 6, 5], 94 | [0, 0, 0, 2, 3, 1]], dtype=np.float32) 95 | 96 | anchor_tensor = \ 97 | tf.convert_to_tensor(anchors, dtype=tf.float32) 98 | 99 | # anchor offset prediction is [tx, ty, tz, tdim_x, tdim_y, tdim_z] 100 | anchor_offsets = np.array( 101 | [[0.5, 0.02, 0.01, 0.1, 0.4, 0.03], 102 | [0.04, 0.1, 0.03, 0.001, 0.3, 0.03]], 103 | dtype=np.float32) 104 | 105 | anchor_offset_tensor = \ 106 | tf.convert_to_tensor(anchor_offsets, dtype=tf.float32) 107 | 108 | expected_anchors = np.array( 109 | [[3.0, 2.12, 3.05, 4.420, 8.9509, 5.152], 110 | [0.08, 0.3, 0.03, 2.002, 4.05, 1.03]], 111 | dtype=np.float32) 112 | 113 | anchors_tensor = anchor_encoder.offset_to_anchor( 114 | anchor_tensor, anchor_offset_tensor) 115 | 116 | sess = tf.Session() 117 | with sess.as_default(): 118 | anchors = anchors_tensor.eval() 119 | 120 | np.testing.assert_almost_equal(anchors, 121 | expected_anchors, 122 | decimal=3) 123 | 124 | 125 | if __name__ == '__main__': 126 | unittest.main() 127 | --------------------------------------------------------------------------------