├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
└── src
    ├── visodo.cpp
    └── vo_features.h


/.gitignore:
--------------------------------------------------------------------------------
1 | /build


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | project( mono-vo )
 3 | find_package( OpenCV 3.0 REQUIRED )
 4 | 
 5 | include_directories( ${OpenCV_INCLUDE_DIRS} )
 6 | 
 7 | file(GLOB viso
 8 |     "src/*.h"
 9 |     "src/*.cpp"
10 | )
11 | add_executable( vo ${viso} )
12 | target_link_libraries( vo ${OpenCV_LIBS} )
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2015 Avi Singh
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This is an OpenCV 3.0 based implementation of a monocular visual odometry algorithm.
 2 | 
 3 | ## Algorithm
 4 | Uses Nister's Five Point Algorithm for Essential Matrix estimation, and FAST features, with a KLT tracker.
 5 | More details are available [here as a report](http://avisingh599.github.io/assets/ugp2-report.pdf), and
 6 | [here as a blog post](http://avisingh599.github.io/vision/monocular-vo/). 
 7 | 
 8 | Note that this project is not yet capable of doing reliable relative scale estimation, 
 9 | so the scale informaion is extracted from the KITTI dataset ground truth files.
10 | 
11 | ## Demo Video
12 | 
13 | [![Demo video](http://share.gifyoutube.com/Ke1ope.gif)](http://www.youtube.com/watch?v=homos4vd_Zs)
14 | 
15 | 
16 | ## Requirements
17 | OpenCV 3.0
18 | 
19 | ## How to compile?
20 | Provided with this repo is a CMakeLists.txt file, which you can use to directly compile the code as follows:
21 | ```bash
22 | mkdir build
23 | cd build
24 | cmake ..
25 | make
26 | ```
27 | 
28 | ## How to run? 
29 | After compilation, in the build directly, type the following:
30 | ```bash
31 | ./vo
32 | ```
33 | ## Before you run
34 | In order to run this algorithm, you need to have either your own data, 
35 | or else the sequences from [KITTI's Visual Odometry Dataset](http://www.cvlibs.net/datasets/kitti/eval_odometry.php).
36 | In order to run this algorithm on your own data, you must modify the intrinsic calibration parameters in the code.
37 | 
38 | ## Performance
39 | ![Results on the KITTI VO Benchmark](http://avisingh599.github.io/images/visodo/2K.png)
40 | 
41 | ## Contact
42 | For any queries, contact: avisingh599@gmail.com
43 | 
44 | ## License
45 | MIT


--------------------------------------------------------------------------------
/src/visodo.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | The MIT License
  4 | 
  5 | Copyright (c) 2015 Avi Singh
  6 | 
  7 | Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | of this software and associated documentation files (the "Software"), to deal
  9 | in the Software without restriction, including without limitation the rights
 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | copies of the Software, and to permit persons to whom the Software is
 12 | furnished to do so, subject to the following conditions:
 13 | 
 14 | The above copyright notice and this permission notice shall be included in
 15 | all copies or substantial portions of the Software.
 16 | 
 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 | THE SOFTWARE.
 24 | 
 25 | */
 26 | 
 27 | #include "vo_features.h"
 28 | 
 29 | using namespace cv;
 30 | using namespace std;
 31 | 
 32 | #define MAX_FRAME 1000
 33 | #define MIN_NUM_FEAT 2000
 34 | 
 35 | // IMP: Change the file directories (4 places) according to where your dataset is saved before running!
 36 | 
 37 | double getAbsoluteScale(int frame_id, int sequence_id, double z_cal)	{
 38 |   
 39 |   string line;
 40 |   int i = 0;
 41 |   ifstream myfile ("/home/avisingh/Datasets/KITTI_VO/00.txt");
 42 |   double x =0, y=0, z = 0;
 43 |   double x_prev, y_prev, z_prev;
 44 |   if (myfile.is_open())
 45 |   {
 46 |     while (( getline (myfile,line) ) && (i<=frame_id))
 47 |     {
 48 |       z_prev = z;
 49 |       x_prev = x;
 50 |       y_prev = y;
 51 |       std::istringstream in(line);
 52 |       //cout << line << '\n';
 53 |       for (int j=0; j<12; j++)  {
 54 |         in >> z ;
 55 |         if (j==7) y=z;
 56 |         if (j==3)  x=z;
 57 |       }
 58 |       
 59 |       i++;
 60 |     }
 61 |     myfile.close();
 62 |   }
 63 | 
 64 |   else {
 65 |     cout << "Unable to open file";
 66 |     return 0;
 67 |   }
 68 | 
 69 |   return sqrt((x-x_prev)*(x-x_prev) + (y-y_prev)*(y-y_prev) + (z-z_prev)*(z-z_prev)) ;
 70 | 
 71 | }
 72 | 
 73 | 
 74 | int main( int argc, char** argv )	{
 75 | 
 76 |   Mat img_1, img_2;
 77 |   Mat R_f, t_f; //the final rotation and tranlation vectors containing the 
 78 | 
 79 |   ofstream myfile;
 80 |   myfile.open ("results1_1.txt");
 81 | 
 82 |   double scale = 1.00;
 83 |   char filename1[200];
 84 |   char filename2[200];
 85 |   sprintf(filename1, "/home/avisingh/Datasets/KITTI_VO/00/image_2/%06d.png", 0);
 86 |   sprintf(filename2, "/home/avisingh/Datasets/KITTI_VO/00/image_2/%06d.png", 1);
 87 | 
 88 |   char text[100];
 89 |   int fontFace = FONT_HERSHEY_PLAIN;
 90 |   double fontScale = 1;
 91 |   int thickness = 1;  
 92 |   cv::Point textOrg(10, 50);
 93 | 
 94 |   //read the first two frames from the dataset
 95 |   Mat img_1_c = imread(filename1);
 96 |   Mat img_2_c = imread(filename2);
 97 | 
 98 |   if ( !img_1_c.data || !img_2_c.data ) { 
 99 |     std::cout<< " --(!) Error reading images " << std::endl; return -1;
100 |   }
101 | 
102 |   // we work with grayscale images
103 |   cvtColor(img_1_c, img_1, COLOR_BGR2GRAY);
104 |   cvtColor(img_2_c, img_2, COLOR_BGR2GRAY);
105 | 
106 |   // feature detection, tracking
107 |   vector<Point2f> points1, points2;        //vectors to store the coordinates of the feature points
108 |   featureDetection(img_1, points1);        //detect features in img_1
109 |   vector<uchar> status;
110 |   featureTracking(img_1,img_2,points1,points2, status); //track those features to img_2
111 | 
112 |   //TODO: add a fucntion to load these values directly from KITTI's calib files
113 |   // WARNING: different sequences in the KITTI VO dataset have different intrinsic/extrinsic parameters
114 |   double focal = 718.8560;
115 |   cv::Point2d pp(607.1928, 185.2157);
116 |   //recovering the pose and the essential matrix
117 |   Mat E, R, t, mask;
118 |   E = findEssentialMat(points2, points1, focal, pp, RANSAC, 0.999, 1.0, mask);
119 |   recoverPose(E, points2, points1, R, t, focal, pp, mask);
120 | 
121 |   Mat prevImage = img_2;
122 |   Mat currImage;
123 |   vector<Point2f> prevFeatures = points2;
124 |   vector<Point2f> currFeatures;
125 | 
126 |   char filename[100];
127 | 
128 |   R_f = R.clone();
129 |   t_f = t.clone();
130 | 
131 |   clock_t begin = clock();
132 | 
133 |   namedWindow( "Road facing camera", WINDOW_AUTOSIZE );// Create a window for display.
134 |   namedWindow( "Trajectory", WINDOW_AUTOSIZE );// Create a window for display.
135 | 
136 |   Mat traj = Mat::zeros(600, 600, CV_8UC3);
137 | 
138 |   for(int numFrame=2; numFrame < MAX_FRAME; numFrame++)	{
139 |   	sprintf(filename, "/home/avisingh/Datasets/KITTI_VO/00/image_2/%06d.png", numFrame);
140 |     //cout << numFrame << endl;
141 |   	Mat currImage_c = imread(filename);
142 |   	cvtColor(currImage_c, currImage, COLOR_BGR2GRAY);
143 |   	vector<uchar> status;
144 |   	featureTracking(prevImage, currImage, prevFeatures, currFeatures, status);
145 | 
146 |   	E = findEssentialMat(currFeatures, prevFeatures, focal, pp, RANSAC, 0.999, 1.0, mask);
147 |   	recoverPose(E, currFeatures, prevFeatures, R, t, focal, pp, mask);
148 | 
149 |     Mat prevPts(2,prevFeatures.size(), CV_64F), currPts(2,currFeatures.size(), CV_64F);
150 | 
151 | 
152 |    for(int i=0;i<prevFeatures.size();i++)	{   //this (x,y) combination makes sense as observed from the source code of triangulatePoints on GitHub
153 |   		prevPts.at<double>(0,i) = prevFeatures.at(i).x;
154 |   		prevPts.at<double>(1,i) = prevFeatures.at(i).y;
155 | 
156 |   		currPts.at<double>(0,i) = currFeatures.at(i).x;
157 |   		currPts.at<double>(1,i) = currFeatures.at(i).y;
158 |     }
159 | 
160 |   	scale = getAbsoluteScale(numFrame, 0, t.at<double>(2));
161 | 
162 |     //cout << "Scale is " << scale << endl;
163 | 
164 |     if ((scale>0.1)&&(t.at<double>(2) > t.at<double>(0)) && (t.at<double>(2) > t.at<double>(1))) {
165 | 
166 |       t_f = t_f + scale*(R_f*t);
167 |       R_f = R*R_f;
168 | 
169 |     }
170 |   	
171 |     else {
172 |      //cout << "scale below 0.1, or incorrect translation" << endl;
173 |     }
174 |     
175 |    // lines for printing results
176 |    // myfile << t_f.at<double>(0) << " " << t_f.at<double>(1) << " " << t_f.at<double>(2) << endl;
177 | 
178 |   // a redetection is triggered in case the number of feautres being trakced go below a particular threshold
179 |  	  if (prevFeatures.size() < MIN_NUM_FEAT)	{
180 |       //cout << "Number of tracked features reduced to " << prevFeatures.size() << endl;
181 |       //cout << "trigerring redection" << endl;
182 |  		  featureDetection(prevImage, prevFeatures);
183 |       featureTracking(prevImage,currImage,prevFeatures,currFeatures, status);
184 | 
185 |  	  }
186 | 
187 |     prevImage = currImage.clone();
188 |     prevFeatures = currFeatures;
189 | 
190 |     int x = int(t_f.at<double>(0)) + 300;
191 |     int y = int(t_f.at<double>(2)) + 100;
192 |     circle(traj, Point(x, y) ,1, CV_RGB(255,0,0), 2);
193 | 
194 |     rectangle( traj, Point(10, 30), Point(550, 50), CV_RGB(0,0,0), CV_FILLED);
195 |     sprintf(text, "Coordinates: x = %02fm y = %02fm z = %02fm", t_f.at<double>(0), t_f.at<double>(1), t_f.at<double>(2));
196 |     putText(traj, text, textOrg, fontFace, fontScale, Scalar::all(255), thickness, 8);
197 | 
198 |     imshow( "Road facing camera", currImage_c );
199 |     imshow( "Trajectory", traj );
200 | 
201 |     waitKey(1);
202 | 
203 |   }
204 | 
205 |   clock_t end = clock();
206 |   double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
207 |   cout << "Total time taken: " << elapsed_secs << "s" << endl;
208 | 
209 |   //cout << R_f << endl;
210 |   //cout << t_f << endl;
211 | 
212 |   return 0;
213 | }


--------------------------------------------------------------------------------
/src/vo_features.h:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | The MIT License
 4 | 
 5 | Copyright (c) 2015 Avi Singh
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 | 
25 | */
26 | 
27 | #include "opencv2/video/tracking.hpp"
28 | #include "opencv2/imgproc/imgproc.hpp"
29 | #include "opencv2/highgui/highgui.hpp"
30 | #include "opencv2/features2d/features2d.hpp"
31 | #include "opencv2/calib3d/calib3d.hpp"
32 | 
33 | 
34 | #include <iostream>
35 | #include <ctype.h>
36 | #include <algorithm> // for copy
37 | #include <iterator> // for ostream_iterator
38 | #include <vector>
39 | #include <ctime>
40 | #include <sstream>
41 | #include <fstream>
42 | #include <string>
43 | 
44 | using namespace cv;
45 | using namespace std;
46 | 
47 | void featureTracking(Mat img_1, Mat img_2, vector<Point2f>& points1, vector<Point2f>& points2, vector<uchar>& status)	{ 
48 | 
49 | //this function automatically gets rid of points for which tracking fails
50 | 
51 |   vector<float> err;					
52 |   Size winSize=Size(21,21);																								
53 |   TermCriteria termcrit=TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01);
54 | 
55 |   calcOpticalFlowPyrLK(img_1, img_2, points1, points2, status, err, winSize, 3, termcrit, 0, 0.001);
56 | 
57 |   //getting rid of points for which the KLT tracking failed or those who have gone outside the frame
58 |   int indexCorrection = 0;
59 |   for( int i=0; i<status.size(); i++)
60 |      {  Point2f pt = points2.at(i- indexCorrection);
61 |      	if ((status.at(i) == 0)||(pt.x<0)||(pt.y<0))	{
62 |      		  if((pt.x<0)||(pt.y<0))	{
63 |      		  	status.at(i) = 0;
64 |      		  }
65 |      		  points1.erase (points1.begin() + (i - indexCorrection));
66 |      		  points2.erase (points2.begin() + (i - indexCorrection));
67 |      		  indexCorrection++;
68 |      	}
69 | 
70 |      }
71 | 
72 | }
73 | 
74 | 
75 | void featureDetection(Mat img_1, vector<Point2f>& points1)	{   //uses FAST as of now, modify parameters as necessary
76 |   vector<KeyPoint> keypoints_1;
77 |   int fast_threshold = 20;
78 |   bool nonmaxSuppression = true;
79 |   FAST(img_1, keypoints_1, fast_threshold, nonmaxSuppression);
80 |   KeyPoint::convert(keypoints_1, points1, vector<int>());
81 | }
82 | 


--------------------------------------------------------------------------------