├── .gitignore ├── LICENSE.txt ├── README.md ├── demos ├── demo_odometry.py ├── demo_raw.py └── demo_raw_cv2.py ├── pykitti.png ├── pykitti ├── __init__.py ├── downloader │ └── tracking.py ├── odometry.py ├── raw.py ├── tracking.py └── utils.py ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | __pycache__/ 3 | build/ 4 | dist/ 5 | .cache 6 | .vscode 7 | *.egg-info 8 | *.pyc -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 STARS Laboratory 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pykitti 2 | ![KITTI](pykitti.png) 3 | 4 | This package provides a minimal set of tools for working with the KITTI dataset [[1]](#references) in Python. So far only the raw datasets and odometry benchmark datasets are supported, but we're working on adding support for the others. We welcome contributions from the community. 5 | 6 | ## Installation 7 | 8 | ### Using pip 9 | You can install pykitti via pip using 10 | ``` 11 | pip install pykitti 12 | ``` 13 | 14 | ### From source 15 | To install the package from source, simply clone or download the repository to your machine 16 | ``` 17 | git clone https://github.com/utiasSTARS/pykitti.git 18 | ``` 19 | and run the provided setup tool 20 | ``` 21 | cd pykitti 22 | python setup.py install 23 | ``` 24 | 25 | ## Assumptions 26 | This package assumes that you have also downloaded the calibration data associated with the sequences you want to work on (these are separate files from the sequences themselves), and that the directory structure is unchanged from the original structure laid out in the KITTI zip files. 27 | 28 | ## Notation 29 | Homogeneous coordinate transformations are provided as 4x4 `numpy.array` objects and are denoted as `T_destinationFrame_originFrame`. 30 | 31 | Pinhole camera intrinsics for camera `N` are provided as 3x3 `numpy.array` objects and are denoted as `K_camN`. Stereo pair baselines are given in meters as `b_gray` for the monochrome stereo pair (`cam0` and `cam1`), and `b_rgb` for the color stereo pair (`cam2` and `cam3`). 32 | 33 | ## Example 34 | More detailed examples can be found in the `demos` directory, but the general idea is to specify what dataset you want to load, then access the parts you need and do something with them. 35 | 36 | Camera and velodyne data are available via generators for easy sequential access (e.g., for visual odometry), and by indexed getter methods for random access (e.g., for deep learning). Images are loaded as `PIL.Image` objects using Pillow. 37 | 38 | ```python 39 | import pykitti 40 | 41 | basedir = '/your/dataset/dir' 42 | date = '2011_09_26' 43 | drive = '0019' 44 | 45 | # The 'frames' argument is optional - default: None, which loads the whole dataset. 46 | # Calibration, timestamps, and IMU data are read automatically. 47 | # Camera and velodyne data are available via properties that create generators 48 | # when accessed, or through getter methods that provide random access. 49 | data = pykitti.raw(basedir, date, drive, frames=range(0, 50, 5)) 50 | 51 | # dataset.calib: Calibration data are accessible as a named tuple 52 | # dataset.timestamps: Timestamps are parsed into a list of datetime objects 53 | # dataset.oxts: List of OXTS packets and 6-dof poses as named tuples 54 | # dataset.camN: Returns a generator that loads individual images from camera N 55 | # dataset.get_camN(idx): Returns the image from camera N at idx 56 | # dataset.gray: Returns a generator that loads monochrome stereo pairs (cam0, cam1) 57 | # dataset.get_gray(idx): Returns the monochrome stereo pair at idx 58 | # dataset.rgb: Returns a generator that loads RGB stereo pairs (cam2, cam3) 59 | # dataset.get_rgb(idx): Returns the RGB stereo pair at idx 60 | # dataset.velo: Returns a generator that loads velodyne scans as [x,y,z,reflectance] 61 | # dataset.get_velo(idx): Returns the velodyne scan at idx 62 | 63 | point_velo = np.array([0,0,0,1]) 64 | point_cam0 = data.calib.T_cam0_velo.dot(point_velo) 65 | 66 | point_imu = np.array([0,0,0,1]) 67 | point_w = [o.T_w_imu.dot(point_imu) for o in data.oxts] 68 | 69 | for cam0_image in data.cam0: 70 | # do something 71 | pass 72 | 73 | cam2_image, cam3_image = data.get_rgb(3) 74 | ``` 75 | ### OpenCV 76 | PIL Image data can be converted to an OpenCV-friendly format using numpy and `cv2.cvtColor`: 77 | 78 | ```python 79 | img_np = np.array(img) 80 | img_cv2 = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR) 81 | ``` 82 | 83 | Note: This package does not actually require that OpenCV be installed on your system, except to run `demo_raw_cv2.py`. 84 | 85 | ## References 86 | [1] A. Geiger, P. Lenz, C. Stiller, and R. Urtasun, "Vision meets robotics: The KITTI dataset," Int. J. Robot. Research (IJRR), vol. 32, no. 11, pp. 1231–1237, Sep. 2013. [http://www.cvlibs.net/datasets/kitti/](http://www.cvlibs.net/datasets/kitti/) 87 | ` -------------------------------------------------------------------------------- /demos/demo_odometry.py: -------------------------------------------------------------------------------- 1 | """Example of pykitti.odometry usage.""" 2 | import itertools 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from mpl_toolkits.mplot3d import Axes3D 6 | 7 | import pykitti 8 | 9 | __author__ = "Lee Clement" 10 | __email__ = "lee.clement@robotics.utias.utoronto.ca" 11 | 12 | # Change this to the directory where you store KITTI data 13 | basedir = '/Users/leeclement/Desktop/KITTI/odometry/dataset' 14 | 15 | # Specify the dataset to load 16 | sequence = '04' 17 | 18 | # Load the data. Optionally, specify the frame range to load. 19 | # dataset = pykitti.odometry(basedir, sequence) 20 | dataset = pykitti.odometry(basedir, sequence, frames=range(0, 20, 5)) 21 | 22 | # dataset.calib: Calibration data are accessible as a named tuple 23 | # dataset.timestamps: Timestamps are parsed into a list of timedelta objects 24 | # dataset.poses: List of ground truth poses T_w_cam0 25 | # dataset.camN: Generator to load individual images from camera N 26 | # dataset.gray: Generator to load monochrome stereo pairs (cam0, cam1) 27 | # dataset.rgb: Generator to load RGB stereo pairs (cam2, cam3) 28 | # dataset.velo: Generator to load velodyne scans as [x,y,z,reflectance] 29 | 30 | # Grab some data 31 | second_pose = dataset.poses[1] 32 | first_gray = next(iter(dataset.gray)) 33 | first_cam1 = next(iter(dataset.cam1)) 34 | first_rgb = dataset.get_rgb(0) 35 | first_cam2 = dataset.get_cam2(0) 36 | third_velo = dataset.get_velo(2) 37 | 38 | # Display some of the data 39 | np.set_printoptions(precision=4, suppress=True) 40 | print('\nSequence: ' + str(dataset.sequence)) 41 | print('\nFrame range: ' + str(dataset.frames)) 42 | 43 | print('\nGray stereo pair baseline [m]: ' + str(dataset.calib.b_gray)) 44 | print('\nRGB stereo pair baseline [m]: ' + str(dataset.calib.b_rgb)) 45 | 46 | print('\nFirst timestamp: ' + str(dataset.timestamps[0])) 47 | print('\nSecond ground truth pose:\n' + str(second_pose)) 48 | 49 | f, ax = plt.subplots(2, 2, figsize=(15, 5)) 50 | ax[0, 0].imshow(first_gray[0], cmap='gray') 51 | ax[0, 0].set_title('Left Gray Image (cam0)') 52 | 53 | ax[0, 1].imshow(first_cam1, cmap='gray') 54 | ax[0, 1].set_title('Right Gray Image (cam1)') 55 | 56 | ax[1, 0].imshow(first_cam2) 57 | ax[1, 0].set_title('Left RGB Image (cam2)') 58 | 59 | ax[1, 1].imshow(first_rgb[1]) 60 | ax[1, 1].set_title('Right RGB Image (cam3)') 61 | 62 | f2 = plt.figure() 63 | ax2 = f2.add_subplot(111, projection='3d') 64 | # Plot every 100th point so things don't get too bogged down 65 | velo_range = range(0, third_velo.shape[0], 100) 66 | ax2.scatter(third_velo[velo_range, 0], 67 | third_velo[velo_range, 1], 68 | third_velo[velo_range, 2], 69 | c=third_velo[velo_range, 3], 70 | cmap='gray') 71 | ax2.set_title('Third Velodyne scan (subsampled)') 72 | 73 | plt.show() 74 | -------------------------------------------------------------------------------- /demos/demo_raw.py: -------------------------------------------------------------------------------- 1 | """Example of pykitti.raw usage.""" 2 | import itertools 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from mpl_toolkits.mplot3d import Axes3D 6 | 7 | import pykitti 8 | 9 | __author__ = "Lee Clement" 10 | __email__ = "lee.clement@robotics.utias.utoronto.ca" 11 | 12 | # Change this to the directory where you store KITTI data 13 | basedir = '/Users/leeclement/Desktop/KITTI/raw' 14 | 15 | # Specify the dataset to load 16 | date = '2011_09_30' 17 | drive = '0034' 18 | 19 | # Load the data. Optionally, specify the frame range to load. 20 | # dataset = pykitti.raw(basedir, date, drive) 21 | dataset = pykitti.raw(basedir, date, drive, frames=range(0, 20, 5)) 22 | 23 | # dataset.calib: Calibration data are accessible as a named tuple 24 | # dataset.timestamps: Timestamps are parsed into a list of datetime objects 25 | # dataset.oxts: List of OXTS packets and 6-dof poses as named tuples 26 | # dataset.camN: Returns a generator that loads individual images from camera N 27 | # dataset.get_camN(idx): Returns the image from camera N at idx 28 | # dataset.gray: Returns a generator that loads monochrome stereo pairs (cam0, cam1) 29 | # dataset.get_gray(idx): Returns the monochrome stereo pair at idx 30 | # dataset.rgb: Returns a generator that loads RGB stereo pairs (cam2, cam3) 31 | # dataset.get_rgb(idx): Returns the RGB stereo pair at idx 32 | # dataset.velo: Returns a generator that loads velodyne scans as [x,y,z,reflectance] 33 | # dataset.get_velo(idx): Returns the velodyne scan at idx 34 | 35 | # Grab some data 36 | second_pose = dataset.oxts[1].T_w_imu 37 | first_gray = next(iter(dataset.gray)) 38 | first_cam1 = next(iter(dataset.cam1)) 39 | first_rgb = dataset.get_rgb(0) 40 | first_cam2 = dataset.get_cam2(0) 41 | third_velo = dataset.get_velo(2) 42 | 43 | # Display some of the data 44 | np.set_printoptions(precision=4, suppress=True) 45 | print('\nDrive: ' + str(dataset.drive)) 46 | print('\nFrame range: ' + str(dataset.frames)) 47 | 48 | print('\nIMU-to-Velodyne transformation:\n' + str(dataset.calib.T_velo_imu)) 49 | print('\nGray stereo pair baseline [m]: ' + str(dataset.calib.b_gray)) 50 | print('\nRGB stereo pair baseline [m]: ' + str(dataset.calib.b_rgb)) 51 | 52 | print('\nFirst timestamp: ' + str(dataset.timestamps[0])) 53 | print('\nSecond IMU pose:\n' + str(second_pose)) 54 | 55 | f, ax = plt.subplots(2, 2, figsize=(15, 5)) 56 | ax[0, 0].imshow(first_gray[0], cmap='gray') 57 | ax[0, 0].set_title('Left Gray Image (cam0)') 58 | 59 | ax[0, 1].imshow(first_cam1, cmap='gray') 60 | ax[0, 1].set_title('Right Gray Image (cam1)') 61 | 62 | ax[1, 0].imshow(first_cam2) 63 | ax[1, 0].set_title('Left RGB Image (cam2)') 64 | 65 | ax[1, 1].imshow(first_rgb[1]) 66 | ax[1, 1].set_title('Right RGB Image (cam3)') 67 | 68 | 69 | f2 = plt.figure() 70 | ax2 = f2.add_subplot(111, projection='3d') 71 | # Plot every 100th point so things don't get too bogged down 72 | velo_range = range(0, third_velo.shape[0], 100) 73 | ax2.scatter(third_velo[velo_range, 0], 74 | third_velo[velo_range, 1], 75 | third_velo[velo_range, 2], 76 | c=third_velo[velo_range, 3], 77 | cmap='gray') 78 | ax2.set_title('Third Velodyne scan (subsampled)') 79 | 80 | plt.show() 81 | -------------------------------------------------------------------------------- /demos/demo_raw_cv2.py: -------------------------------------------------------------------------------- 1 | """Example of pykitti.raw usage with OpenCV.""" 2 | import cv2 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | import pykitti 7 | 8 | __author__ = "Lee Clement" 9 | __email__ = "lee.clement@robotics.utias.utoronto.ca" 10 | 11 | # Change this to the directory where you store KITTI data 12 | basedir = '/Users/leeclement/Desktop/KITTI/raw' 13 | 14 | # Specify the dataset to load 15 | date = '2011_09_30' 16 | drive = '0034' 17 | 18 | # Load the data. Optionally, specify the frame range to load. 19 | dataset = pykitti.raw(basedir, date, drive, 20 | frames=range(0, 20, 5)) 21 | 22 | # dataset.calib: Calibration data are accessible as a named tuple 23 | # dataset.timestamps: Timestamps are parsed into a list of datetime objects 24 | # dataset.oxts: List of OXTS packets and 6-dof poses as named tuples 25 | # dataset.camN: Returns a generator that loads individual images from camera N 26 | # dataset.get_camN(idx): Returns the image from camera N at idx 27 | # dataset.gray: Returns a generator that loads monochrome stereo pairs (cam0, cam1) 28 | # dataset.get_gray(idx): Returns the monochrome stereo pair at idx 29 | # dataset.rgb: Returns a generator that loads RGB stereo pairs (cam2, cam3) 30 | # dataset.get_rgb(idx): Returns the RGB stereo pair at idx 31 | # dataset.velo: Returns a generator that loads velodyne scans as [x,y,z,reflectance] 32 | # dataset.get_velo(idx): Returns the velodyne scan at idx 33 | 34 | # Grab some data 35 | first_gray = dataset.get_gray(0) 36 | first_rgb = dataset.get_rgb(0) 37 | 38 | # Do some stereo processing 39 | stereo = cv2.StereoBM_create() 40 | disp_gray = stereo.compute(np.array(first_gray[0]), np.array(first_gray[1])) 41 | disp_rgb = stereo.compute( 42 | cv2.cvtColor(np.array(first_rgb[0]), cv2.COLOR_RGB2GRAY), 43 | cv2.cvtColor(np.array(first_rgb[1]), cv2.COLOR_RGB2GRAY)) 44 | 45 | # Display some data 46 | f, ax = plt.subplots(2, 2, figsize=(15, 5)) 47 | ax[0, 0].imshow(first_gray[0], cmap='gray') 48 | ax[0, 0].set_title('Left Gray Image (cam0)') 49 | 50 | ax[0, 1].imshow(disp_gray, cmap='viridis') 51 | ax[0, 1].set_title('Gray Stereo Disparity') 52 | 53 | ax[1, 0].imshow(first_rgb[0]) 54 | ax[1, 0].set_title('Left RGB Image (cam2)') 55 | 56 | ax[1, 1].imshow(disp_rgb, cmap='viridis') 57 | ax[1, 1].set_title('RGB Stereo Disparity') 58 | 59 | plt.show() 60 | -------------------------------------------------------------------------------- /pykitti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utiasSTARS/pykitti/de337234413f4b9d192da7e3d3fc28de5281c748/pykitti.png -------------------------------------------------------------------------------- /pykitti/__init__.py: -------------------------------------------------------------------------------- 1 | """Tools for working with KITTI data.""" 2 | 3 | from pykitti.odometry import odometry 4 | from pykitti.raw import raw 5 | from pykitti.tracking import tracking 6 | 7 | __author__ = "Lee Clement" 8 | __email__ = "lee.clement@robotics.utias.utoronto.ca" 9 | -------------------------------------------------------------------------------- /pykitti/downloader/tracking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """"Downloads and unzips the KITTI tracking data. 3 | Warning: This can take a while, and use up >100Gb of disk space.""" 4 | 5 | from __future__ import print_function 6 | 7 | import argparse 8 | import os 9 | import sys 10 | 11 | from subprocess import call 12 | import glob 13 | 14 | URL_BASE="https://s3.eu-central-1.amazonaws.com/avg-kitti/" 15 | tracking_dir_names = ['image_02', 'image_03', 'velodyne', 'calib', 'oxts', 'label_02', 'det_02'] 16 | tracking_dir_zip_tags = ['image_2', 'image_3', 'velodyne', 'calib', 'oxts', 'label_2', 'det_2_lsvm'] 17 | 18 | def parse_args(): 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument('--kitti_root', type=str, default=os.path.join('data', 'kitti')) 21 | # parser.add_argument('--root', type=str, default=None, help='data folder') 22 | 23 | return parser.parse_args(sys.argv[1:]) 24 | 25 | ## Need to clean up lsvm as their files have trailing whitespaces 26 | def clean_file(filename): 27 | f = open(filename, 'r') 28 | new_lines = [] 29 | for line in f.readlines(): 30 | new_lines.append(line.rstrip()) 31 | f.close() 32 | 33 | f = open(filename, 'w') 34 | for line in new_lines: 35 | f.write(line + '\n') 36 | 37 | 38 | f.close() 39 | def clean_lsvm(lsvm_dir): 40 | for filename in glob.glob(lsvm_dir + '/*.txt'): 41 | print('Cleaning ', filename) 42 | clean_file(filename) 43 | 44 | 45 | def main(): 46 | args = parse_args() 47 | kitti_dir = args.kitti_root 48 | 49 | # Perform a few sanity checks to make sure we're operating in the right dir 50 | # when left with the default args. 51 | if not os.path.isabs(kitti_dir): 52 | if not os.path.isdir('src'): 53 | os.chdir('..') 54 | 55 | if not os.path.isdir('src'): 56 | print("Please make sure to run this tool from the DynSLAM " 57 | "project root when using relative paths.") 58 | return 1 59 | 60 | tracking_dir = os.path.join(kitti_dir, 'tracking') 61 | os.makedirs(tracking_dir, exist_ok=True) 62 | os.chdir(tracking_dir) 63 | 64 | tracking_zip_names = ["data_tracking_" + name + ".zip" for name in tracking_dir_zip_tags] 65 | 66 | for dir_name, zip_name in zip(tracking_dir_names, tracking_zip_names): 67 | canary_dir = os.path.join('training', dir_name) 68 | if os.path.isdir(canary_dir): 69 | print("Directory {} canary dir seems to exist, so I will assume the data is there.".format(canary_dir)) 70 | else: 71 | if os.path.exists(zip_name): 72 | print("File {} exists. Not re-downloading.".format(zip_name)) 73 | else: 74 | url = URL_BASE + zip_name 75 | print("Downloading file {} to folder {}.".format(zip_name, kitti_dir)) 76 | call(['wget', url]) 77 | 78 | call(['unzip', '-o', zip_name]) 79 | 80 | if str(canary_dir) == 'training/det_02': 81 | print("Need to trim whitespaces for lsvm label files") 82 | clean_lsvm('training/det_02') 83 | 84 | return 0 85 | 86 | 87 | if __name__ == '__main__': 88 | sys.exit(main()) 89 | -------------------------------------------------------------------------------- /pykitti/odometry.py: -------------------------------------------------------------------------------- 1 | """Provides 'odometry', which loads and parses odometry benchmark data.""" 2 | 3 | import datetime as dt 4 | import glob 5 | import os 6 | from collections import namedtuple 7 | 8 | import numpy as np 9 | 10 | import pykitti.utils as utils 11 | 12 | __author__ = "Lee Clement" 13 | __email__ = "lee.clement@robotics.utias.utoronto.ca" 14 | 15 | ##Since Python2.x has no 'FileNotFoundError' exception, define it 16 | ##Python3.x should do fine 17 | try: 18 | FileNotFoundError 19 | except NameError: 20 | FileNotFoundError = IOError 21 | 22 | class odometry: 23 | """Load and parse odometry benchmark data into a usable format.""" 24 | 25 | def __init__(self, base_path, sequence, **kwargs): 26 | """Set the path.""" 27 | self.sequence = sequence 28 | self.sequence_path = os.path.join(base_path, 'sequences', sequence) 29 | self.pose_path = os.path.join(base_path, 'poses') 30 | self.frames = kwargs.get('frames', None) 31 | 32 | # Default image file extension is 'png' 33 | self.imtype = kwargs.get('imtype', 'png') 34 | 35 | # Find all the data files 36 | self._get_file_lists() 37 | 38 | # Pre-load data that isn't returned as a generator 39 | self._load_calib() 40 | self._load_timestamps() 41 | self._load_poses() 42 | 43 | def __len__(self): 44 | """Return the number of frames loaded.""" 45 | return len(self.timestamps) 46 | 47 | @property 48 | def cam0(self): 49 | """Generator to read image files for cam0 (monochrome left).""" 50 | return utils.yield_images(self.cam0_files, mode='L') 51 | 52 | def get_cam0(self, idx): 53 | """Read image file for cam0 (monochrome left) at the specified index.""" 54 | return utils.load_image(self.cam0_files[idx], mode='L') 55 | 56 | @property 57 | def cam1(self): 58 | """Generator to read image files for cam1 (monochrome right).""" 59 | return utils.yield_images(self.cam1_files, mode='L') 60 | 61 | def get_cam1(self, idx): 62 | """Read image file for cam1 (monochrome right) at the specified index.""" 63 | return utils.load_image(self.cam1_files[idx], mode='L') 64 | 65 | @property 66 | def cam2(self): 67 | """Generator to read image files for cam2 (RGB left).""" 68 | return utils.yield_images(self.cam2_files, mode='RGB') 69 | 70 | def get_cam2(self, idx): 71 | """Read image file for cam2 (RGB left) at the specified index.""" 72 | return utils.load_image(self.cam2_files[idx], mode='RGB') 73 | 74 | @property 75 | def cam3(self): 76 | """Generator to read image files for cam0 (RGB right).""" 77 | return utils.yield_images(self.cam3_files, mode='RGB') 78 | 79 | def get_cam3(self, idx): 80 | """Read image file for cam3 (RGB right) at the specified index.""" 81 | return utils.load_image(self.cam3_files[idx], mode='RGB') 82 | 83 | @property 84 | def gray(self): 85 | """Generator to read monochrome stereo pairs from file. 86 | """ 87 | return zip(self.cam0, self.cam1) 88 | 89 | def get_gray(self, idx): 90 | """Read monochrome stereo pair at the specified index.""" 91 | return (self.get_cam0(idx), self.get_cam1(idx)) 92 | 93 | @property 94 | def rgb(self): 95 | """Generator to read RGB stereo pairs from file. 96 | """ 97 | return zip(self.cam2, self.cam3) 98 | 99 | def get_rgb(self, idx): 100 | """Read RGB stereo pair at the specified index.""" 101 | return (self.get_cam2(idx), self.get_cam3(idx)) 102 | 103 | @property 104 | def velo(self): 105 | """Generator to read velodyne [x,y,z,reflectance] scan data from binary files.""" 106 | # Return a generator yielding Velodyne scans. 107 | # Each scan is a Nx4 array of [x,y,z,reflectance] 108 | return utils.yield_velo_scans(self.velo_files) 109 | 110 | def get_velo(self, idx): 111 | """Read velodyne [x,y,z,reflectance] scan at the specified index.""" 112 | return utils.load_velo_scan(self.velo_files[idx]) 113 | 114 | def _get_file_lists(self): 115 | """Find and list data files for each sensor.""" 116 | self.cam0_files = sorted(glob.glob( 117 | os.path.join(self.sequence_path, 'image_0', 118 | '*.{}'.format(self.imtype)))) 119 | self.cam1_files = sorted(glob.glob( 120 | os.path.join(self.sequence_path, 'image_1', 121 | '*.{}'.format(self.imtype)))) 122 | self.cam2_files = sorted(glob.glob( 123 | os.path.join(self.sequence_path, 'image_2', 124 | '*.{}'.format(self.imtype)))) 125 | self.cam3_files = sorted(glob.glob( 126 | os.path.join(self.sequence_path, 'image_3', 127 | '*.{}'.format(self.imtype)))) 128 | self.velo_files = sorted(glob.glob( 129 | os.path.join(self.sequence_path, 'velodyne', 130 | '*.bin'))) 131 | 132 | # Subselect the chosen range of frames, if any 133 | if self.frames is not None: 134 | self.cam0_files = utils.subselect_files( 135 | self.cam0_files, self.frames) 136 | self.cam1_files = utils.subselect_files( 137 | self.cam1_files, self.frames) 138 | self.cam2_files = utils.subselect_files( 139 | self.cam2_files, self.frames) 140 | self.cam3_files = utils.subselect_files( 141 | self.cam3_files, self.frames) 142 | self.velo_files = utils.subselect_files( 143 | self.velo_files, self.frames) 144 | 145 | def _load_calib(self): 146 | """Load and compute intrinsic and extrinsic calibration parameters.""" 147 | # We'll build the calibration parameters as a dictionary, then 148 | # convert it to a namedtuple to prevent it from being modified later 149 | data = {} 150 | 151 | # Load the calibration file 152 | calib_filepath = os.path.join(self.sequence_path, 'calib.txt') 153 | filedata = utils.read_calib_file(calib_filepath) 154 | 155 | # Create 3x4 projection matrices 156 | P_rect_00 = np.reshape(filedata['P0'], (3, 4)) 157 | P_rect_10 = np.reshape(filedata['P1'], (3, 4)) 158 | P_rect_20 = np.reshape(filedata['P2'], (3, 4)) 159 | P_rect_30 = np.reshape(filedata['P3'], (3, 4)) 160 | 161 | data['P_rect_00'] = P_rect_00 162 | data['P_rect_10'] = P_rect_10 163 | data['P_rect_20'] = P_rect_20 164 | data['P_rect_30'] = P_rect_30 165 | 166 | # Compute the rectified extrinsics from cam0 to camN 167 | T1 = np.eye(4) 168 | T1[0, 3] = P_rect_10[0, 3] / P_rect_10[0, 0] 169 | T2 = np.eye(4) 170 | T2[0, 3] = P_rect_20[0, 3] / P_rect_20[0, 0] 171 | T3 = np.eye(4) 172 | T3[0, 3] = P_rect_30[0, 3] / P_rect_30[0, 0] 173 | 174 | # Compute the velodyne to rectified camera coordinate transforms 175 | data['T_cam0_velo'] = np.reshape(filedata['Tr'], (3, 4)) 176 | data['T_cam0_velo'] = np.vstack([data['T_cam0_velo'], [0, 0, 0, 1]]) 177 | data['T_cam1_velo'] = T1.dot(data['T_cam0_velo']) 178 | data['T_cam2_velo'] = T2.dot(data['T_cam0_velo']) 179 | data['T_cam3_velo'] = T3.dot(data['T_cam0_velo']) 180 | 181 | # Compute the camera intrinsics 182 | data['K_cam0'] = P_rect_00[0:3, 0:3] 183 | data['K_cam1'] = P_rect_10[0:3, 0:3] 184 | data['K_cam2'] = P_rect_20[0:3, 0:3] 185 | data['K_cam3'] = P_rect_30[0:3, 0:3] 186 | 187 | # Compute the stereo baselines in meters by projecting the origin of 188 | # each camera frame into the velodyne frame and computing the distances 189 | # between them 190 | p_cam = np.array([0, 0, 0, 1]) 191 | p_velo0 = np.linalg.inv(data['T_cam0_velo']).dot(p_cam) 192 | p_velo1 = np.linalg.inv(data['T_cam1_velo']).dot(p_cam) 193 | p_velo2 = np.linalg.inv(data['T_cam2_velo']).dot(p_cam) 194 | p_velo3 = np.linalg.inv(data['T_cam3_velo']).dot(p_cam) 195 | 196 | data['b_gray'] = np.linalg.norm(p_velo1 - p_velo0) # gray baseline 197 | data['b_rgb'] = np.linalg.norm(p_velo3 - p_velo2) # rgb baseline 198 | 199 | self.calib = namedtuple('CalibData', data.keys())(*data.values()) 200 | 201 | def _load_timestamps(self): 202 | """Load timestamps from file.""" 203 | timestamp_file = os.path.join(self.sequence_path, 'times.txt') 204 | 205 | # Read and parse the timestamps 206 | self.timestamps = [] 207 | with open(timestamp_file, 'r') as f: 208 | for line in f.readlines(): 209 | t = dt.timedelta(seconds=float(line)) 210 | self.timestamps.append(t) 211 | 212 | # Subselect the chosen range of frames, if any 213 | if self.frames is not None: 214 | self.timestamps = [self.timestamps[i] for i in self.frames] 215 | 216 | def _load_poses(self): 217 | """Load ground truth poses (T_w_cam0) from file.""" 218 | pose_file = os.path.join(self.pose_path, self.sequence + '.txt') 219 | 220 | # Read and parse the poses 221 | poses = [] 222 | try: 223 | with open(pose_file, 'r') as f: 224 | lines = f.readlines() 225 | if self.frames is not None: 226 | lines = [lines[i] for i in self.frames] 227 | 228 | for line in lines: 229 | T_w_cam0 = np.fromstring(line, dtype=float, sep=' ') 230 | T_w_cam0 = T_w_cam0.reshape(3, 4) 231 | T_w_cam0 = np.vstack((T_w_cam0, [0, 0, 0, 1])) 232 | poses.append(T_w_cam0) 233 | 234 | except FileNotFoundError: 235 | print('Ground truth poses are not available for sequence ' + 236 | self.sequence + '.') 237 | 238 | self.poses = poses 239 | -------------------------------------------------------------------------------- /pykitti/raw.py: -------------------------------------------------------------------------------- 1 | """Provides 'raw', which loads and parses raw KITTI data.""" 2 | 3 | import datetime as dt 4 | import glob 5 | import os 6 | from collections import namedtuple 7 | 8 | import numpy as np 9 | 10 | import pykitti.utils as utils 11 | 12 | __author__ = "Lee Clement" 13 | __email__ = "lee.clement@robotics.utias.utoronto.ca" 14 | 15 | 16 | class raw: 17 | """Load and parse raw data into a usable format.""" 18 | 19 | def __init__(self, base_path, date, drive, **kwargs): 20 | """Set the path and pre-load calibration data and timestamps.""" 21 | self.dataset = kwargs.get('dataset', 'sync') 22 | self.drive = date + '_drive_' + drive + '_' + self.dataset 23 | self.calib_path = os.path.join(base_path, date) 24 | self.data_path = os.path.join(base_path, date, self.drive) 25 | self.frames = kwargs.get('frames', None) 26 | 27 | # Default image file extension is '.png' 28 | self.imtype = kwargs.get('imtype', 'png') 29 | 30 | # Find all the data files 31 | self._get_file_lists() 32 | 33 | # Pre-load data that isn't returned as a generator 34 | self._load_calib() 35 | self._load_timestamps() 36 | self._load_oxts() 37 | 38 | def __len__(self): 39 | """Return the number of frames loaded.""" 40 | return len(self.timestamps) 41 | 42 | @property 43 | def cam0(self): 44 | """Generator to read image files for cam0 (monochrome left).""" 45 | return utils.yield_images(self.cam0_files, mode='L') 46 | 47 | def get_cam0(self, idx): 48 | """Read image file for cam0 (monochrome left) at the specified index.""" 49 | return utils.load_image(self.cam0_files[idx], mode='L') 50 | 51 | @property 52 | def cam1(self): 53 | """Generator to read image files for cam1 (monochrome right).""" 54 | return utils.yield_images(self.cam1_files, mode='L') 55 | 56 | def get_cam1(self, idx): 57 | """Read image file for cam1 (monochrome right) at the specified index.""" 58 | return utils.load_image(self.cam1_files[idx], mode='L') 59 | 60 | @property 61 | def cam2(self): 62 | """Generator to read image files for cam2 (RGB left).""" 63 | return utils.yield_images(self.cam2_files, mode='RGB') 64 | 65 | def get_cam2(self, idx): 66 | """Read image file for cam2 (RGB left) at the specified index.""" 67 | return utils.load_image(self.cam2_files[idx], mode='RGB') 68 | 69 | @property 70 | def cam3(self): 71 | """Generator to read image files for cam0 (RGB right).""" 72 | return utils.yield_images(self.cam3_files, mode='RGB') 73 | 74 | def get_cam3(self, idx): 75 | """Read image file for cam3 (RGB right) at the specified index.""" 76 | return utils.load_image(self.cam3_files[idx], mode='RGB') 77 | 78 | @property 79 | def gray(self): 80 | """Generator to read monochrome stereo pairs from file. 81 | """ 82 | return zip(self.cam0, self.cam1) 83 | 84 | def get_gray(self, idx): 85 | """Read monochrome stereo pair at the specified index.""" 86 | return (self.get_cam0(idx), self.get_cam1(idx)) 87 | 88 | @property 89 | def rgb(self): 90 | """Generator to read RGB stereo pairs from file. 91 | """ 92 | return zip(self.cam2, self.cam3) 93 | 94 | def get_rgb(self, idx): 95 | """Read RGB stereo pair at the specified index.""" 96 | return (self.get_cam2(idx), self.get_cam3(idx)) 97 | 98 | @property 99 | def velo(self): 100 | """Generator to read velodyne [x,y,z,reflectance] scan data from binary files.""" 101 | # Return a generator yielding Velodyne scans. 102 | # Each scan is a Nx4 array of [x,y,z,reflectance] 103 | return utils.yield_velo_scans(self.velo_files) 104 | 105 | def get_velo(self, idx): 106 | """Read velodyne [x,y,z,reflectance] scan at the specified index.""" 107 | return utils.load_velo_scan(self.velo_files[idx]) 108 | 109 | def _get_file_lists(self): 110 | """Find and list data files for each sensor.""" 111 | self.oxts_files = sorted(glob.glob( 112 | os.path.join(self.data_path, 'oxts', 'data', '*.txt'))) 113 | self.cam0_files = sorted(glob.glob( 114 | os.path.join(self.data_path, 'image_00', 115 | 'data', '*.{}'.format(self.imtype)))) 116 | self.cam1_files = sorted(glob.glob( 117 | os.path.join(self.data_path, 'image_01', 118 | 'data', '*.{}'.format(self.imtype)))) 119 | self.cam2_files = sorted(glob.glob( 120 | os.path.join(self.data_path, 'image_02', 121 | 'data', '*.{}'.format(self.imtype)))) 122 | self.cam3_files = sorted(glob.glob( 123 | os.path.join(self.data_path, 'image_03', 124 | 'data', '*.{}'.format(self.imtype)))) 125 | self.velo_files = sorted(glob.glob( 126 | os.path.join(self.data_path, 'velodyne_points', 127 | 'data', '*.bin'))) 128 | 129 | # Subselect the chosen range of frames, if any 130 | if self.frames is not None: 131 | self.oxts_files = utils.subselect_files( 132 | self.oxts_files, self.frames) 133 | self.cam0_files = utils.subselect_files( 134 | self.cam0_files, self.frames) 135 | self.cam1_files = utils.subselect_files( 136 | self.cam1_files, self.frames) 137 | self.cam2_files = utils.subselect_files( 138 | self.cam2_files, self.frames) 139 | self.cam3_files = utils.subselect_files( 140 | self.cam3_files, self.frames) 141 | self.velo_files = utils.subselect_files( 142 | self.velo_files, self.frames) 143 | 144 | def _load_calib_rigid(self, filename): 145 | """Read a rigid transform calibration file as a numpy.array.""" 146 | filepath = os.path.join(self.calib_path, filename) 147 | data = utils.read_calib_file(filepath) 148 | return utils.transform_from_rot_trans(data['R'], data['T']) 149 | 150 | def _load_calib_cam_to_cam(self, velo_to_cam_file, cam_to_cam_file): 151 | # We'll return the camera calibration as a dictionary 152 | data = {} 153 | 154 | # Load the rigid transformation from velodyne coordinates 155 | # to unrectified cam0 coordinates 156 | T_cam0unrect_velo = self._load_calib_rigid(velo_to_cam_file) 157 | data['T_cam0_velo_unrect'] = T_cam0unrect_velo 158 | 159 | # Load and parse the cam-to-cam calibration data 160 | cam_to_cam_filepath = os.path.join(self.calib_path, cam_to_cam_file) 161 | filedata = utils.read_calib_file(cam_to_cam_filepath) 162 | 163 | # Create 3x4 projection matrices 164 | P_rect_00 = np.reshape(filedata['P_rect_00'], (3, 4)) 165 | P_rect_10 = np.reshape(filedata['P_rect_01'], (3, 4)) 166 | P_rect_20 = np.reshape(filedata['P_rect_02'], (3, 4)) 167 | P_rect_30 = np.reshape(filedata['P_rect_03'], (3, 4)) 168 | 169 | data['P_rect_00'] = P_rect_00 170 | data['P_rect_10'] = P_rect_10 171 | data['P_rect_20'] = P_rect_20 172 | data['P_rect_30'] = P_rect_30 173 | 174 | # Create 4x4 matrices from the rectifying rotation matrices 175 | R_rect_00 = np.eye(4) 176 | R_rect_00[0:3, 0:3] = np.reshape(filedata['R_rect_00'], (3, 3)) 177 | R_rect_10 = np.eye(4) 178 | R_rect_10[0:3, 0:3] = np.reshape(filedata['R_rect_01'], (3, 3)) 179 | R_rect_20 = np.eye(4) 180 | R_rect_20[0:3, 0:3] = np.reshape(filedata['R_rect_02'], (3, 3)) 181 | R_rect_30 = np.eye(4) 182 | R_rect_30[0:3, 0:3] = np.reshape(filedata['R_rect_03'], (3, 3)) 183 | 184 | data['R_rect_00'] = R_rect_00 185 | data['R_rect_10'] = R_rect_10 186 | data['R_rect_20'] = R_rect_20 187 | data['R_rect_30'] = R_rect_30 188 | 189 | # Compute the rectified extrinsics from cam0 to camN 190 | T0 = np.eye(4) 191 | T0[0, 3] = P_rect_00[0, 3] / P_rect_00[0, 0] 192 | T1 = np.eye(4) 193 | T1[0, 3] = P_rect_10[0, 3] / P_rect_10[0, 0] 194 | T2 = np.eye(4) 195 | T2[0, 3] = P_rect_20[0, 3] / P_rect_20[0, 0] 196 | T3 = np.eye(4) 197 | T3[0, 3] = P_rect_30[0, 3] / P_rect_30[0, 0] 198 | 199 | # Compute the velodyne to rectified camera coordinate transforms 200 | data['T_cam0_velo'] = T0.dot(R_rect_00.dot(T_cam0unrect_velo)) 201 | data['T_cam1_velo'] = T1.dot(R_rect_00.dot(T_cam0unrect_velo)) 202 | data['T_cam2_velo'] = T2.dot(R_rect_00.dot(T_cam0unrect_velo)) 203 | data['T_cam3_velo'] = T3.dot(R_rect_00.dot(T_cam0unrect_velo)) 204 | 205 | # Compute the camera intrinsics 206 | data['K_cam0'] = P_rect_00[0:3, 0:3] 207 | data['K_cam1'] = P_rect_10[0:3, 0:3] 208 | data['K_cam2'] = P_rect_20[0:3, 0:3] 209 | data['K_cam3'] = P_rect_30[0:3, 0:3] 210 | 211 | # Compute the stereo baselines in meters by projecting the origin of 212 | # each camera frame into the velodyne frame and computing the distances 213 | # between them 214 | p_cam = np.array([0, 0, 0, 1]) 215 | p_velo0 = np.linalg.inv(data['T_cam0_velo']).dot(p_cam) 216 | p_velo1 = np.linalg.inv(data['T_cam1_velo']).dot(p_cam) 217 | p_velo2 = np.linalg.inv(data['T_cam2_velo']).dot(p_cam) 218 | p_velo3 = np.linalg.inv(data['T_cam3_velo']).dot(p_cam) 219 | 220 | data['b_gray'] = np.linalg.norm(p_velo1 - p_velo0) # gray baseline 221 | data['b_rgb'] = np.linalg.norm(p_velo3 - p_velo2) # rgb baseline 222 | 223 | return data 224 | 225 | def _load_calib(self): 226 | """Load and compute intrinsic and extrinsic calibration parameters.""" 227 | # We'll build the calibration parameters as a dictionary, then 228 | # convert it to a namedtuple to prevent it from being modified later 229 | data = {} 230 | 231 | # Load the rigid transformation from IMU to velodyne 232 | data['T_velo_imu'] = self._load_calib_rigid('calib_imu_to_velo.txt') 233 | 234 | # Load the camera intrinsics and extrinsics 235 | data.update(self._load_calib_cam_to_cam( 236 | 'calib_velo_to_cam.txt', 'calib_cam_to_cam.txt')) 237 | 238 | # Pre-compute the IMU to rectified camera coordinate transforms 239 | data['T_cam0_imu'] = data['T_cam0_velo'].dot(data['T_velo_imu']) 240 | data['T_cam1_imu'] = data['T_cam1_velo'].dot(data['T_velo_imu']) 241 | data['T_cam2_imu'] = data['T_cam2_velo'].dot(data['T_velo_imu']) 242 | data['T_cam3_imu'] = data['T_cam3_velo'].dot(data['T_velo_imu']) 243 | 244 | self.calib = namedtuple('CalibData', data.keys())(*data.values()) 245 | 246 | def _load_timestamps(self): 247 | """Load timestamps from file.""" 248 | timestamp_file = os.path.join( 249 | self.data_path, 'oxts', 'timestamps.txt') 250 | 251 | # Read and parse the timestamps 252 | self.timestamps = [] 253 | with open(timestamp_file, 'r') as f: 254 | for line in f.readlines(): 255 | # NB: datetime only supports microseconds, but KITTI timestamps 256 | # give nanoseconds, so need to truncate last 4 characters to 257 | # get rid of \n (counts as 1) and extra 3 digits 258 | t = dt.datetime.strptime(line[:-4], '%Y-%m-%d %H:%M:%S.%f') 259 | self.timestamps.append(t) 260 | 261 | # Subselect the chosen range of frames, if any 262 | if self.frames is not None: 263 | self.timestamps = [self.timestamps[i] for i in self.frames] 264 | 265 | def _load_oxts(self): 266 | """Load OXTS data from file.""" 267 | self.oxts = utils.load_oxts_packets_and_poses(self.oxts_files) 268 | -------------------------------------------------------------------------------- /pykitti/tracking.py: -------------------------------------------------------------------------------- 1 | """Provides 'tracking', which loads and parses tracking benchmark data.""" 2 | 3 | import datetime as dt 4 | import glob 5 | import os 6 | from collections import namedtuple 7 | import pandas as pd 8 | 9 | import numpy as np 10 | 11 | import pykitti.utils as utils 12 | import cv2 13 | 14 | try: 15 | xrange 16 | except NameError: 17 | xrange = range 18 | 19 | __author__ = "Sidney zhang" 20 | __email__ = "sidney@sidazhang.com" 21 | 22 | 23 | class tracking: 24 | """Load and parse tracking benchmark data into a usable format.""" 25 | 26 | def __init__(self, base_path, sequence, **kwargs): 27 | """Set the path.""" 28 | self.base_path = base_path 29 | self.sequence = sequence 30 | self.frames = kwargs.get('frames', None) 31 | 32 | # Default image file extension is 'png' 33 | self.imtype = kwargs.get('imtype', 'png') 34 | 35 | # Find all the data files 36 | self._get_file_lists() 37 | print('files', len(self.cam2_files)) 38 | # Pre-load data that isn't returned as a generator 39 | # self._load_calib() 40 | 41 | def __len__(self): 42 | """Return the number of frames loaded.""" 43 | return len(self.timestamps) 44 | 45 | @property 46 | def cam2(self): 47 | """Generator to read image files for cam2 (RGB left).""" 48 | return utils.yield_images(self.cam2_files, mode='RGB') 49 | 50 | def get_cam2(self, idx): 51 | """Read image file for cam2 (RGB left) at the specified index.""" 52 | return utils.load_image(self.cam2_files[idx], mode='RGB') 53 | 54 | @property 55 | def cam3(self): 56 | """Generator to read image files for cam0 (RGB right).""" 57 | return utils.yield_images(self.cam3_files, mode='RGB') 58 | 59 | def get_cam3(self, idx): 60 | """Read image file for cam3 (RGB right) at the specified index.""" 61 | return utils.load_image(self.cam3_files[idx], mode='RGB') 62 | 63 | @property 64 | def gray(self): 65 | """Generator to read monochrome stereo pairs from file. 66 | """ 67 | return zip(self.cam0, self.cam1) 68 | 69 | def get_gray(self, idx): 70 | """Read monochrome stereo pair at the specified index.""" 71 | return (self.get_cam0(idx), self.get_cam1(idx)) 72 | 73 | @property 74 | def rgb(self): 75 | """Generator to read RGB stereo pairs from file. 76 | """ 77 | return zip(self.cam2, self.cam3) 78 | 79 | def get_rgb(self, idx): 80 | """Read RGB stereo pair at the specified index.""" 81 | return (self.get_cam2(idx), self.get_cam3(idx)) 82 | 83 | @property 84 | def velo(self): 85 | """Generator to read velodyne [x,y,z,reflectance] scan data from binary files.""" 86 | # Return a generator yielding Velodyne scans. 87 | # Each scan is a Nx4 array of [x,y,z,reflectance] 88 | return utils.yield_velo_scans(self.velo_files) 89 | 90 | def get_velo(self, idx): 91 | """Read velodyne [x,y,z,reflectance] scan at the specified index.""" 92 | return utils.load_velo_scan(self.velo_files[idx]) 93 | 94 | def _get_file_lists(self): 95 | """Find and list data files for each sensor.""" 96 | self.cam2_files = sorted(glob.glob( 97 | os.path.join(self.base_path, 98 | 'image_02', 99 | self.sequence, 100 | '*.{}'.format(self.imtype)))) 101 | self.cam3_files = sorted(glob.glob( 102 | os.path.join(self.base_path, 103 | 'image_03', 104 | self.sequence, 105 | '*.{}'.format(self.imtype)))) 106 | self.velo_files = sorted(glob.glob( 107 | os.path.join(self.base_path, 108 | 'velodyne', 109 | self.sequence, 110 | '*.bin'))) 111 | 112 | # Subselect the chosen range of frames, if any 113 | if self.frames is not None: 114 | self.cam0_files = utils.subselect_files( 115 | self.cam0_files, self.frames) 116 | self.cam1_files = utils.subselect_files( 117 | self.cam1_files, self.frames) 118 | self.cam2_files = utils.subselect_files( 119 | self.cam2_files, self.frames) 120 | self.cam3_files = utils.subselect_files( 121 | self.cam3_files, self.frames) 122 | self.velo_files = utils.subselect_files( 123 | self.velo_files, self.frames) 124 | 125 | def _load_calib(self): 126 | """Load and compute intrinsic and extrinsic calibration parameters.""" 127 | # We'll build the calibration parameters as a dictionary, then 128 | # convert it to a namedtuple to prevent it from being modified later 129 | data = {} 130 | 131 | # Load the calibration file 132 | # calib_filepath = os.path.join(self.sequence_path + '.txt', 'calib.txt') 133 | calib_filepath = os.path.join(self.base_path, 'calib', self.sequence + '.txt') 134 | filedata = utils.read_calib_file(calib_filepath) 135 | 136 | # Create 3x4 projection matrices 137 | P_rect_00 = np.reshape(filedata['P0'], (3, 4)) 138 | P_rect_10 = np.reshape(filedata['P1'], (3, 4)) 139 | P_rect_20 = np.reshape(filedata['P2'], (3, 4)) 140 | P_rect_30 = np.reshape(filedata['P3'], (3, 4)) 141 | 142 | data['P_rect_00'] = P_rect_00 143 | data['P_rect_10'] = P_rect_10 144 | data['P_rect_20'] = P_rect_20 145 | data['P_rect_30'] = P_rect_30 146 | 147 | # Compute the rectified extrinsics from cam0 to camN 148 | T1 = np.eye(4) 149 | T1[0, 3] = P_rect_10[0, 3] / P_rect_10[0, 0] 150 | T2 = np.eye(4) 151 | T2[0, 3] = P_rect_20[0, 3] / P_rect_20[0, 0] 152 | T3 = np.eye(4) 153 | T3[0, 3] = P_rect_30[0, 3] / P_rect_30[0, 0] 154 | 155 | # Compute the velodyne to rectified camera coordinate transforms 156 | data['T_cam0_velo'] = np.reshape(filedata['Tr_velo_cam'], (3, 4)) 157 | data['T_cam0_velo'] = np.vstack([data['T_cam0_velo'], [0, 0, 0, 1]]) 158 | data['T_cam1_velo'] = T1.dot(data['T_cam0_velo']) 159 | data['T_cam2_velo'] = T2.dot(data['T_cam0_velo']) 160 | data['T_cam3_velo'] = T3.dot(data['T_cam0_velo']) 161 | 162 | # Compute the camera intrinsics 163 | data['K_cam0'] = P_rect_00[0:3, 0:3] 164 | data['K_cam1'] = P_rect_10[0:3, 0:3] 165 | data['K_cam2'] = P_rect_20[0:3, 0:3] 166 | data['K_cam3'] = P_rect_30[0:3, 0:3] 167 | 168 | # Compute the stereo baselines in meters by projecting the origin of 169 | # each camera frame into the velodyne frame and computing the distances 170 | # between them 171 | p_cam = np.array([0, 0, 0, 1]) 172 | p_velo0 = np.linalg.inv(data['T_cam0_velo']).dot(p_cam) 173 | p_velo1 = np.linalg.inv(data['T_cam1_velo']).dot(p_cam) 174 | p_velo2 = np.linalg.inv(data['T_cam2_velo']).dot(p_cam) 175 | p_velo3 = np.linalg.inv(data['T_cam3_velo']).dot(p_cam) 176 | 177 | data['b_gray'] = np.linalg.norm(p_velo1 - p_velo0) # gray baseline 178 | data['b_rgb'] = np.linalg.norm(p_velo3 - p_velo2) # rgb baseline 179 | 180 | self.calib = namedtuple('CalibData', data.keys())(*data.values()) 181 | 182 | def to_array_list(df, length=None, by_id=True): 183 | """Converts a dataframe to a list of arrays, with one array for every unique index entry. 184 | Index is assumed to be 0-based contiguous. If there is a missing index entry, an empty 185 | numpy array is returned for it. 186 | Elements in the arrays are sorted by their id. 187 | :param df: 188 | :param length: 189 | :return: 190 | """ 191 | 192 | if by_id: 193 | assert 'id' in df.columns 194 | 195 | # if `id` is the only column, don't sort it (and don't remove it) 196 | if len(df.columns) == 1: 197 | by_id = False 198 | 199 | idx = df.index.unique() 200 | if length is None: 201 | length = max(idx) + 1 202 | 203 | l = [np.empty(0) for _ in xrange(length)] 204 | for i in idx: 205 | a = df.loc[i] 206 | if by_id: 207 | if isinstance(a, pd.Series): 208 | a = a[1:] 209 | else: 210 | a = a.copy().set_index('id').sort_index() 211 | 212 | l[i] = a.values.reshape((-1, a.shape[-1])) 213 | return np.asarray(l) 214 | 215 | 216 | # TODO: Acknowledge this is from HART 217 | class KittiTrackingLabels(object): 218 | """Kitt Tracking Label parser. It can limit the maximum number of objects per track, 219 | filter out objects with class "DontCare", or retain only those objects present 220 | in a given frame. 221 | """ 222 | 223 | columns = 'id class truncated occluded alpha x1 y1 x2 y2 xd yd zd x y z roty score'.split() 224 | classes = 'Car Van Truck Pedestrian Person_sitting Cyclist Tram Misc DontCare'.split() 225 | 226 | 227 | def __init__(self, path_or_df, bbox_with_size=True, remove_dontcare=True, split_on_reappear=True): 228 | 229 | if isinstance(path_or_df, pd.DataFrame): 230 | self._df = path_or_df 231 | else: 232 | if not os.path.exists(path_or_df): 233 | raise ValueError('File {} doesn\'t exist'.format(path_or_df)) 234 | 235 | self._df = pd.read_csv(path_or_df, sep=' ', header=None, 236 | index_col=0, skip_blank_lines=True) 237 | 238 | # Detection files have 1 more column than label files 239 | # label file has 16 columns 240 | # detection file has 17 columns (the last column is score) 241 | # Here it checks the number of columns the df has and sets the 242 | # column names on the df accordingly 243 | self._df.columns = self.columns[:len(self._df.columns)] 244 | 245 | self.bbox_with_size = bbox_with_size 246 | 247 | if remove_dontcare: 248 | self._df = self._df[self._df['class'] != 'DontCare'] 249 | 250 | for c in self._df.columns: 251 | self._convert_type(c, np.float32, np.float64) 252 | self._convert_type(c, np.int32, np.int64) 253 | 254 | 255 | # TODO: Add occlusion filtering back in 256 | truncated_threshold=(0, 2.) 257 | occluded_threshold=(0, 3.) 258 | # if not nest.is_sequence(occluded_threshold): 259 | # occluded_threshold = (0, occluded_threshold) 260 | # 261 | # if not nest.is_sequence(truncated_threshold): 262 | # truncated_threshold = (0, truncated_threshold) 263 | # TODO: Add occlusion filteringinback in 264 | # self._df = self._df[self._df['occluded'] >= occluded_threshold[0]] 265 | # self._df = self._df[self._df['occluded'] <= occluded_threshold[1]] 266 | # 267 | # self._df = self._df[self._df['truncated'] >= truncated_threshold[0]] 268 | # self._df = self._df[self._df['truncated'] <= truncated_threshold[1]] 269 | 270 | # make 0-based contiguous ids 271 | ids = self._df.id.unique() 272 | offset = max(ids) + 1 273 | id_map = {id: new_id for id, new_id in zip(ids, np.arange(offset, len(ids) + offset))} 274 | self._df.replace({'id': id_map}, inplace=True) 275 | self._df.id -= offset 276 | 277 | self.ids = list(self._df.id.unique()) 278 | self.max_objects = len(self.ids) 279 | self.index = self._df.index.unique() 280 | 281 | if split_on_reappear: 282 | added_ids = self._split_on_reappear(self._df, self.presence, self.ids[-1]) 283 | self.ids.extend(added_ids) 284 | self.max_objects += len(added_ids) 285 | 286 | def _convert_type(self, column, dest_type, only_from_type=None): 287 | cond = only_from_type is None or self._df[column].dtype == only_from_type 288 | if cond: 289 | self._df[column] = self._df[column].astype(dest_type) 290 | 291 | @property 292 | def bbox(self): 293 | bbox = self._df[['id', 'x1', 'y1', 'x2', 'y2']].copy() 294 | # TODO: Fix this to become x, y, w, h 295 | if self.bbox_with_size: 296 | bbox['y2'] -= bbox['y1'] 297 | bbox['x2'] -= bbox['x1'] 298 | 299 | """Converts a dataframe to a list of arrays 300 | :param df: 301 | :param length: 302 | :return: 303 | """ 304 | 305 | return to_array_list(bbox) 306 | 307 | @property 308 | def presence(self): 309 | return self._presence(self._df, self.index, self.max_objects) 310 | 311 | @property 312 | def num_objects(self): 313 | ns = self._df.id.groupby(self._df.index).count() 314 | absent = list(set(range(len(self))) - set(self.index)) 315 | other = pd.DataFrame([0] * len(absent), absent) 316 | ns = ns.append(other) 317 | ns.sort_index(inplace=True) 318 | return ns.as_matrix().squeeze() 319 | 320 | @property 321 | def cls(self): 322 | return to_array_list(self._df[['id', 'class']]) 323 | 324 | @property 325 | def occlusion(self): 326 | return to_array_list(self._df[['id', 'occluded']]) 327 | 328 | @property 329 | def id(self): 330 | return to_array_list(self._df['id']) 331 | 332 | def __len__(self): 333 | return self.index[-1] - self.index[0] + 1 334 | 335 | @classmethod 336 | def _presence(cls, df, index, n_objects): 337 | p = np.zeros((index[-1] + 1, n_objects), dtype=bool) 338 | for i, row in df.iterrows(): 339 | p[i, row.id] = True 340 | return p 341 | 342 | @classmethod 343 | def _split_on_reappear(cls, df, p, id_offset): 344 | """Assign a new identity to an objects that appears after disappearing previously. 345 | Works on `df` in-place. 346 | :param df: data frame 347 | :param p: presence 348 | :param id_offset: offset added to new ids 349 | :return: 350 | """ 351 | 352 | next_id = id_offset + 1 353 | added_ids = [] 354 | nt = p.sum(0) 355 | start = np.argmax(p, 0) 356 | end = np.argmax(np.cumsum(p, 0), 0) 357 | diff = end - start + 1 358 | is_contiguous = np.equal(nt, diff) 359 | for id, contiguous in enumerate(is_contiguous): 360 | if not contiguous: 361 | 362 | to_change = df[df.id == id] 363 | index = to_change.index 364 | diff = index[1:] - index[:-1] 365 | where = np.where(np.greater(diff, 1))[0] 366 | for w in where: 367 | to_change.loc[w + 1:, 'id'] = next_id 368 | added_ids.append(next_id) 369 | next_id += 1 370 | 371 | df[df.id == id] = to_change 372 | 373 | return added_ids 374 | -------------------------------------------------------------------------------- /pykitti/utils.py: -------------------------------------------------------------------------------- 1 | """Provides helper methods for loading and parsing KITTI data.""" 2 | 3 | from collections import namedtuple 4 | 5 | import numpy as np 6 | from PIL import Image 7 | 8 | __author__ = "Lee Clement" 9 | __email__ = "lee.clement@robotics.utias.utoronto.ca" 10 | 11 | # Per dataformat.txt 12 | OxtsPacket = namedtuple('OxtsPacket', 13 | 'lat, lon, alt, ' + 14 | 'roll, pitch, yaw, ' + 15 | 'vn, ve, vf, vl, vu, ' + 16 | 'ax, ay, az, af, al, au, ' + 17 | 'wx, wy, wz, wf, wl, wu, ' + 18 | 'pos_accuracy, vel_accuracy, ' + 19 | 'navstat, numsats, ' + 20 | 'posmode, velmode, orimode') 21 | 22 | # Bundle into an easy-to-access structure 23 | OxtsData = namedtuple('OxtsData', 'packet, T_w_imu') 24 | 25 | 26 | def subselect_files(files, indices): 27 | try: 28 | files = [files[i] for i in indices] 29 | except: 30 | pass 31 | return files 32 | 33 | 34 | def rotx(t): 35 | """Rotation about the x-axis.""" 36 | c = np.cos(t) 37 | s = np.sin(t) 38 | return np.array([[1, 0, 0], 39 | [0, c, -s], 40 | [0, s, c]]) 41 | 42 | 43 | def roty(t): 44 | """Rotation about the y-axis.""" 45 | c = np.cos(t) 46 | s = np.sin(t) 47 | return np.array([[c, 0, s], 48 | [0, 1, 0], 49 | [-s, 0, c]]) 50 | 51 | 52 | def rotz(t): 53 | """Rotation about the z-axis.""" 54 | c = np.cos(t) 55 | s = np.sin(t) 56 | return np.array([[c, -s, 0], 57 | [s, c, 0], 58 | [0, 0, 1]]) 59 | 60 | 61 | def transform_from_rot_trans(R, t): 62 | """Transforation matrix from rotation matrix and translation vector.""" 63 | R = R.reshape(3, 3) 64 | t = t.reshape(3, 1) 65 | return np.vstack((np.hstack([R, t]), [0, 0, 0, 1])) 66 | 67 | 68 | def read_calib_file(filepath): 69 | """Read in a calibration file and parse into a dictionary.""" 70 | data = {} 71 | 72 | with open(filepath, 'r') as f: 73 | for line in f.readlines(): 74 | try: 75 | key, value = line.split(':', 1) 76 | except ValueError: 77 | key, value = line.split(' ', 1) 78 | # The only non-float values in these files are dates, which 79 | # we don't care about anyway 80 | try: 81 | data[key] = np.array([float(x) for x in value.split()]) 82 | except ValueError: 83 | pass 84 | 85 | return data 86 | 87 | 88 | def pose_from_oxts_packet(packet, scale): 89 | """Helper method to compute a SE(3) pose matrix from an OXTS packet. 90 | """ 91 | er = 6378137. # earth radius (approx.) in meters 92 | 93 | # Use a Mercator projection to get the translation vector 94 | tx = scale * packet.lon * np.pi * er / 180. 95 | ty = scale * er * \ 96 | np.log(np.tan((90. + packet.lat) * np.pi / 360.)) 97 | tz = packet.alt 98 | t = np.array([tx, ty, tz]) 99 | 100 | # Use the Euler angles to get the rotation matrix 101 | Rx = rotx(packet.roll) 102 | Ry = roty(packet.pitch) 103 | Rz = rotz(packet.yaw) 104 | R = Rz.dot(Ry.dot(Rx)) 105 | 106 | # Combine the translation and rotation into a homogeneous transform 107 | return R, t 108 | 109 | 110 | def load_oxts_packets_and_poses(oxts_files): 111 | """Generator to read OXTS ground truth data. 112 | 113 | Poses are given in an East-North-Up coordinate system 114 | whose origin is the first GPS position. 115 | """ 116 | # Scale for Mercator projection (from first lat value) 117 | scale = None 118 | # Origin of the global coordinate system (first GPS position) 119 | origin = None 120 | 121 | oxts = [] 122 | 123 | for filename in oxts_files: 124 | with open(filename, 'r') as f: 125 | for line in f.readlines(): 126 | line = line.split() 127 | # Last five entries are flags and counts 128 | line[:-5] = [float(x) for x in line[:-5]] 129 | line[-5:] = [int(float(x)) for x in line[-5:]] 130 | 131 | packet = OxtsPacket(*line) 132 | 133 | if scale is None: 134 | scale = np.cos(packet.lat * np.pi / 180.) 135 | 136 | R, t = pose_from_oxts_packet(packet, scale) 137 | 138 | if origin is None: 139 | origin = t 140 | 141 | T_w_imu = transform_from_rot_trans(R, t - origin) 142 | 143 | oxts.append(OxtsData(packet, T_w_imu)) 144 | 145 | return oxts 146 | 147 | 148 | def load_image(file, mode): 149 | """Load an image from file.""" 150 | return Image.open(file).convert(mode) 151 | 152 | 153 | def yield_images(imfiles, mode): 154 | """Generator to read image files.""" 155 | for file in imfiles: 156 | yield load_image(file, mode) 157 | 158 | 159 | def load_velo_scan(file): 160 | """Load and parse a velodyne binary file.""" 161 | scan = np.fromfile(file, dtype=np.float32) 162 | return scan.reshape((-1, 4)) 163 | 164 | 165 | def yield_velo_scans(velo_files): 166 | """Generator to parse velodyne binary files into arrays.""" 167 | for file in velo_files: 168 | yield load_velo_scan(file) 169 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='pykitti', 5 | version='0.3.1', 6 | description='A minimal set of tools for working with the KITTI dataset in Python', 7 | author='Lee Clement', 8 | author_email='lee.clement@robotics.utias.utoronto.ca', 9 | url='https://github.com/utiasSTARS/pykitti', 10 | download_url='https://github.com/utiasSTARS/pykitti/tarball/0.3.1', 11 | license='MIT', 12 | packages=['pykitti'], 13 | install_requires=['numpy', 'matplotlib', 'Pillow', 'pandas'] 14 | ) 15 | --------------------------------------------------------------------------------