├── .gitignore
├── LICENSE
├── README.md
├── docs
    ├── KnowledgeTree4SLAM.png
    ├── KnowledgeTree4SLAM.xmind
    ├── LOGO.png
    ├── _configurations
    │   ├── _js
    │   │   ├── extra.js
    │   │   └── uml.js
    │   ├── _scss
    │   │   ├── _admonition.scss
    │   │   ├── _code.scss
    │   │   ├── _details.scss
    │   │   ├── _footer.scss
    │   │   ├── _headeranchors.scss
    │   │   ├── _keys.scss
    │   │   ├── _links.scss
    │   │   ├── _progressbar.scss
    │   │   ├── _uml.scss
    │   │   └── extra.scss
    │   ├── _snippets
    │   │   ├── abbr.txt
    │   │   ├── critic-accept-example.txt
    │   │   ├── critic-preview-example.txt
    │   │   ├── links.txt
    │   │   ├── mathjax.txt
    │   │   ├── refs.txt
    │   │   └── uml.txt
    │   └── _theme_extra
    │   │   ├── theme_extra.css
    │   │   └── theme_extra.js
    ├── aboutSLAM.md
    ├── computer_vision
    │   ├── FAST.md
    │   ├── Haar.md
    │   ├── Harris.md
    │   ├── ORB.md
    │   ├── SIFT.md
    │   ├── SUFT.md
    │   ├── computer_vision.md
    │   ├── image
    │   │   ├── AC_elliptic.png
    │   │   ├── BRIEF_point_choice.png
    │   │   ├── DoG.png
    │   │   ├── DoH_model_scale_change.png
    │   │   ├── FAST_corner.png
    │   │   ├── Gaussian_Diff_model.png
    │   │   ├── Haar_like.png
    │   │   ├── LoG_vs_DoG.png
    │   │   ├── aperture_problem.png
    │   │   ├── calculate_Haar.png
    │   │   ├── cascade.png
    │   │   ├── classification_via_eigenvalues.png
    │   │   ├── descriptor_HOG.png
    │   │   ├── descriptor_SUFT.png
    │   │   ├── diff_result_in_descriptor_of_SUFT .png
    │   │   ├── differential_Gaussian.png
    │   │   ├── extremum_DoG.png
    │   │   ├── extremum_in_continuous_vs_discrete.png
    │   │   ├── harris_weighting_function.png
    │   │   ├── hog.png
    │   │   ├── integral_rectangle.png.png
    │   │   ├── integral_rotation_rectangle.png
    │   │   ├── invariant2intensity.png
    │   │   ├── not_invariant2scale.png
    │   │   ├── orientation_of_SUFT.png
    │   │   ├── principal_component_ellipse.png
    │   │   ├── pyramid_LoG_VS_DoH.png
    │   │   ├── pyramid_box_scale.png
    │   │   ├── rectangle.png
    │   │   ├── rotate_coordinate.png
    │   │   ├── rotated_rectangle.png
    │   │   ├── rotation_region.png
    │   │   └── variable_scale_Gaussian.png
    │   └── image_filtering.md
    ├── index.md
    ├── licensebutton.png
    ├── multiple_view_geometry
    │   ├── RepreOf3DMoveScene.md
    │   ├── image
    │   │   ├── Rigid-body_Motion.png
    │   │   ├── Rotation4Rigid-body.png
    │   │   ├── geometric_interpretation2svd.png
    │   │   ├── rigid-body motion.png
    │   │   ├── rigid-body_motions.png
    │   │   └── rodrigues.png
    │   ├── linear_algebra.md
    │   └── mvg.md
    ├── others
    │   └── md_template.md
    ├── projects
    │   ├── orb_slam
    │   │   ├── image
    │   │   │   └── circumference.png
    │   │   ├── local_mapping.md
    │   │   ├── loop_closing.md
    │   │   └── tracking.md
    │   ├── projects.md
    │   └── rpg_svo
    │   │   ├── algorithm_framework.md
    │   │   ├── code_analysis.md
    │   │   └── image
    │   │       ├── SVO_Structure.png
    │   │       ├── depth_estimation.png
    │   │       ├── depth_uncertainty.png
    │   │       ├── feature_alignment.png
    │   │       ├── feature_reproject.png
    │   │       └── image_alignment.png
    └── state_estimation_robotics
    │   └── state_estimation_robotics.md
├── mkdocs.yml
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | site/
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Jacob.lsx
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Journey to SLAM
 2 | 
 3 | This Repository create a [**site**](https://lsxiang.github.io/Journey2SLAM "https://lsxiang.github.io/Journey2SLAM") to note Knowledge point during learning [**SLAM**](https://en.wikipedia.org/wiki/Simultaneous_localization_and_mapping "Simultaneous Localization and Mapping"). 
 4 | 
 5 | The following content will be included in the [**site**](https://lsxiang.github.io/Journey2SLAM "https://lsxiang.github.io/Journey2SLAM") :  
 6 | - Introduction to Computer Vision
 7 | - Multiple View Geometry
 8 | - State Estimation for Robotics
 9 | - Popular project introduction
10 | - Document recommendation
11 | - ......
12 | 
13 | ## Participate in editing
14 | 
15 | The Markdown source for all articles in the  [**site**](https://lsxiang.github.io/Journey2SLAM "https://lsxiang.github.io/Journey2SLAM") is open sourced in the [**Repo/docs**](https://github.com/LSXiang/Journey2SLAM/tree/master/docs) folder, and all pages of the  [**site**](https://lsxiang.github.io/Journey2SLAM "https://lsxiang.github.io/Journey2SLAM") are rendered based on these Markdown files via [**MkDocs**](https://www.mkdocs.org/ "https://www.mkdocs.org/") to generate HTML files for direct access.
16 | 
17 | Personal ability is limited, and like-minded friends are welcome to write together. You can edit blogs, notes, etc. written during learning [**SLAM**](https://en.wikipedia.org/wiki/Simultaneous_localization_and_mapping "Simultaneous Localization and Mapping") through Markdown and push them to the [docs](https://github.com/LSXiang/Journey2SLAM/tree/master/docs) folder in this Repository. You can also modify the appropriate file and submit a `Pull Request` , or just issue [***Issues***](https://github.com/LSXiang/Journey2SLAM/issues) for your specific situation.
18 | 
19 | The following is a local simulation of the **Ubuntu** system to run this website, other systems can install the corresponding plug-in and run according to the following environment requirements.
20 | 
21 | #### 1. Prerequisites
22 | 
23 | You need install `python3` , `git`, [`Python-Markdown`](https://python-markdown.github.io/),  [`MkDocs`](https://www.mkdocs.org/),  [`PyMdown Extensions`](https://facelessuser.github.io/pymdown-extensions), [`Pygments`](http://pygments.org/) and [`Material for MkDocs`](https://squidfunk.github.io/mkdocs-material). Open one terminal and switch to the path where you want to download the project :
24 | 
25 | ```
26 | sudo apt-get install python3 git python3-pip
27 | git clone https://github.com/LSXiang/Journey2SLAM.git
28 | cd Journey2SLAM/
29 | python3 -m pip install -r requirements.txt
30 | ```
31 | 
32 | #### 2. Build
33 | 
34 | ```
35 | python3 -m mkdocs serve
36 | ```
37 | 
38 | Then visit http://127.0.0.1:8000 in your local browser to see the website performance.
39 | 
40 | **NOTE: Please be sure to sign the article. If you are authorizing to reprint the articles on your personal website, please also include the original source in the article. When you initiate a submission, you will agree to the "[CC BY-NC-ND 4.0](https://creativecommons.org/licenses/by-nc-nd/4.0/deed.zh)" knowledge used by this site. Share the agreement, please read the terms of the agreement before submitting the manuscript to determine whether you accept the agreement. At the same time, you can apply to us to revoke the authorization to publish the article at any time. You only need to apply for the Pull Request of the corresponding file in Repo.**  
41 | [![license](https://i.creativecommons.org/l/by-nc-nd/4.0/88x31.png)](https://creativecommons.org/licenses/by-nc-nd/4.0/deed.en)
42 | 
43 | 
44 | 
45 | ---
46 | 
47 | # SLAM之旅
48 | 
49 | 这个 [**Repo**](https://github.com/LSXiang/Journey2SLAM "https://github.com/LSXiang/Journey2SLAM") 创建一个[**网站**](https://lsxiang.github.io/Journey2SLAM "https://lsxiang.github.io/Journey2SLAM")用于记录学习 [**SLAM**](https://en.wikipedia.org/wiki/Simultaneous_localization_and_mapping "Simultaneous Localization and Mapping") 过程中的知识点。
50 | 
51 | 该[**网站**](https://lsxiang.github.io/Journey2SLAM "https://lsxiang.github.io/Journey2SLAM")将包括一下内容：  
52 | 
53 | - 计算机视觉基础
54 | - 多视几何
55 | - 状态估计
56 | - 开源项目梳理
57 | - 文档推荐
58 | - ……
59 | 
60 | ## 共同编辑
61 | 
62 | 该[**网站**](https://lsxiang.github.io/Journey2SLAM "https://lsxiang.github.io/Journey2SLAM")中的全部文章的 Markdown 源码开源于 [**Repo/docs**](https://github.com/LSXiang/Journey2SLAM/tree/master/docs) 文件夹中，而[**网站**](https://lsxiang.github.io/Journey2SLAM "https://lsxiang.github.io/Journey2SLAM")的所有页面均基于这些 Markdown 文件通过 [**MkDocs**](https://www.mkdocs.org/ "https://www.mkdocs.org/") 进行渲染生成 HTML 文件可直接访问。
63 | 
64 | 个人能力有限，欢迎志同道合的朋友一起写作。 您可以将学习 SLAM 期间编写的博客、笔记等通过 Markdown 编辑并推送到此项目的 [**docs**](https://github.com/LSXiang/Journey2SLAM/tree/master/docs) 文件中。您也可以修改相应的文件然后提交 `Pull Request`，或者仅针对具体情况提出 [***Issues***](https://github.com/LSXiang/Journey2SLAM/issues) 。
65 | 
66 | 下面给出 **Ubuntu** 系统下本地模拟运行本网站，其他系统可以根据下文环境需求安装对应插件并运行。
67 | 
68 | #### 1. 需求
69 | 
70 | 你需要安装 `python3` , `git`, [`Python-Markdown`](https://python-markdown.github.io/),  [`MkDocs`](https://www.mkdocs.org/),  [`PyMdown Extensions`](https://facelessuser.github.io/pymdown-extensions), [`Pygments`](http://pygments.org/) 和 [`Material for MkDocs`](https://squidfunk.github.io/mkdocs-material) 。打开一个终端，并切换到您希望下载该项目的路径下：
71 | 
72 | ```
73 | sudo apt-get install python3 git python3-pip
74 | git clone https://github.com/LSXiang/Journey2SLAM.git
75 | cd Journey2SLAM/
76 | python3 -m pip install -r requirements.txt
77 | ```
78 | 
79 | #### 2. 运行
80 | 
81 | ```
82 | python3 -m mkdocs serve
83 | ```
84 | 
85 | 然后在本地浏览器访问 http://127.0.0.1:8000 ，查看网站效果。
86 | 
87 | 
88 | 
89 | **注意：请务必在文章中署名，若您是授权将您个人网站中的文章转载在本站，也请一并在文中附上原文出处。当您发起投稿后，将意味着同意本站所使用的 "[CC BY-NC-ND 4.0](https://creativecommons.org/licenses/by-nc-nd/4.0/deed.zh)" 知识共享协议，投稿前请先阅读协议条款，确定您是否接受这一协议。同时，您随时可以向我们申请撤销刊登文章的授权，只需要在 Repo 中申请删除对应文件的 Pull Request 即可。**  
90 | [![license](https://i.creativecommons.org/l/by-nc-nd/4.0/88x31.png)](https://creativecommons.org/licenses/by-nc-nd/4.0/deed.zh)
91 | 
92 | 


--------------------------------------------------------------------------------
/docs/KnowledgeTree4SLAM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/KnowledgeTree4SLAM.png


--------------------------------------------------------------------------------
/docs/KnowledgeTree4SLAM.xmind:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/KnowledgeTree4SLAM.xmind


--------------------------------------------------------------------------------
/docs/LOGO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/LOGO.png


--------------------------------------------------------------------------------
/docs/_configurations/_js/extra.js:
--------------------------------------------------------------------------------
 1 | import uml from "./uml"
 2 | 
 3 | (() => {
 4 |   const onReady = function(fn) {
 5 |     if (document.addEventListener) {
 6 |       document.addEventListener("DOMContentLoaded", fn)
 7 |     } else {
 8 |       document.attachEvent("onreadystatechange", () => {
 9 |         if (document.readyState === "interactive") {
10 |           fn()
11 |         }
12 |       })
13 |     }
14 |   }
15 | 
16 |   onReady(() => {
17 | 
18 |     if (typeof flowchart !== "undefined") {
19 |       uml(flowchart, "uml-flowchart")
20 |     }
21 | 
22 |     if (typeof Diagram !== "undefined") {
23 |       uml(Diagram, "uml-sequence-diagram", {theme: "simple"})
24 |     }
25 |   })
26 | })()
27 | 
28 | !function(){"use strict";var e=function(e,t,n){for(var o=function(e){for(var t="",n=0;n<e.childNodes.length;n++){var o=e.childNodes[n];if("code"===o.tagName.toLowerCase())for(var a=0;a<o.childNodes.length;a++){var i=o.childNodes[a];if("#text"===i.nodeName&&!/^\s*$/.test(i.nodeValue)){t=i.nodeValue;break}}}return t},a=function(e){return e.textContent||e.innerText},i=document.querySelectorAll("article"),r=document.querySelectorAll("pre.".concat(t,",div.").concat(t)),d=void 0===n?{}:n,c=0;c<r.length;c++){var l=r[c],s=document.createElement("div");s.className=t,s.style.visibility="hidden",s.style.position="absolute";var u="pre"===l.tagName.toLowerCase()?o(l):a(l);i[0].appendChild(s);e.parse(u).drawSVG(s,d),s.style.visibility="visible",s.style.position="static",l.parentNode.insertBefore(s,l),l.parentNode.removeChild(l)}};!function(e){document.addEventListener?document.addEventListener("DOMContentLoaded",e):document.attachEvent("onreadystatechange",function(){"interactive"===document.readyState&&e()})}(function(){"undefined"!=typeof flowchart&&e(flowchart,"uml-flowchart"),"undefined"!=typeof Diagram&&e(Diagram,"uml-sequence-diagram",{theme:"simple"})})}();
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/docs/_configurations/_js/uml.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Targets special code or div blocks and converts them to UML.
 3 |  * @param {object} converter is the object that transforms the text to UML.
 4 |  * @param {string} className is the name of the class to target.
 5 |  * @param {object} settings is the settings for converter.
 6 |  * @return {void}
 7 |  */
 8 | export default (converter, className, settings) => {
 9 | 
10 |   const getFromCode = function(parent) {
11 |     // Handles <pre><code>
12 |     let text = ""
13 |     for (let j = 0; j < parent.childNodes.length; j++) {
14 |       const subEl = parent.childNodes[j]
15 |       if (subEl.tagName.toLowerCase() === "code") {
16 |         for (let k = 0; k < subEl.childNodes.length; k++) {
17 |           const child = subEl.childNodes[k]
18 |           const whitespace = /^\s*$/
19 |           if (child.nodeName === "#text" && !(whitespace.test(child.nodeValue))) {
20 |             text = child.nodeValue
21 |             break
22 |           }
23 |         }
24 |       }
25 |     }
26 |     return text
27 |   }
28 | 
29 |   const getFromDiv = function(parent) {
30 |     // Handles <div>
31 |     return parent.textContent || parent.innerText
32 |   }
33 | 
34 |   // Change article to whatever element your main Markdown content lives.
35 |   const article = document.querySelectorAll("article")
36 |   const blocks = document.querySelectorAll(`pre.${className},div.${className}`)
37 | 
38 |   // Is there a settings object?
39 |   const config = (settings === void 0) ? {} : settings
40 | 
41 |   // Find the UML source element and get the text
42 |   for (let i = 0; i < blocks.length; i++) {
43 |     const parentEl = blocks[i]
44 |     const el = document.createElement("div")
45 |     el.className = className
46 |     el.style.visibility = "hidden"
47 |     el.style.position = "absolute"
48 | 
49 |     const text = (parentEl.tagName.toLowerCase() === "pre") ? getFromCode(parentEl) : getFromDiv(parentEl)
50 | 
51 |     // Insert our new div at the end of our content to get general
52 |     // typeset and page sizes as our parent might be `display:none`
53 |     // keeping us from getting the right sizes for our SVG.
54 |     // Our new div will be hidden via "visibility" and take no space
55 |     // via `position: absolute`. When we are all done, use the
56 |     // original node as a reference to insert our SVG back
57 |     // into the proper place, and then make our SVG visible again.
58 |     // Lastly, clean up the old node.
59 |     article[0].appendChild(el)
60 |     const diagram = converter.parse(text)
61 |     diagram.drawSVG(el, config)
62 |     el.style.visibility = "visible"
63 |     el.style.position = "static"
64 |     parentEl.parentNode.insertBefore(el, parentEl)
65 |     parentEl.parentNode.removeChild(parentEl)
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/docs/_configurations/_scss/_admonition.scss:
--------------------------------------------------------------------------------
 1 | /* Material colors and icons */
 2 | .md-typeset {
 3 |   .admonition {
 4 |     @each $names, $appearance in (
 5 |       settings config: $clr-purple-a700 "settings",
 6 |       new: $clr-yellow-a700 "new_releases"
 7 |     ) {
 8 |       $tint: nth($appearance, 1);
 9 |       $icon: nth($appearance, 2);
10 | 
11 |       // Define base class
12 |       &%#{nth($names, 1)},
13 |       &.#{nth($names, 1)} {
14 |         border-left: 0.4rem solid $tint;
15 | 
16 |         // Title
17 |         > .admonition-title {
18 |           border-bottom: 0.1rem solid transparentize($tint, 0.9);
19 |           background-color: transparentize($tint, 0.9);
20 | 
21 |           // Icon
22 |           &::before {
23 |             color: $tint;
24 |             content: $icon;
25 |           }
26 |         }
27 |       }
28 |     }
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/docs/_configurations/_scss/_code.scss:
--------------------------------------------------------------------------------
  1 | $md-code-background: hsla(0, 0%, 92.5%, 0.5);
  2 | $md-code-special: hsl(0, 0%, 100%);
  3 | $md-linenos-background-special: hsl(0, 0%, 90%);
  4 | $md-linenos-background-special-table: hsla(0, 0%, 60%, 0.2);
  5 | $md-linenos-background: hsl(0, 0%, 97%);
  6 | $md-linenos: #999999;
  7 | $md-linenos-border: #DDDDDD;
  8 | $tab-label: #333333;
  9 | $tab-current-label: #FF5252;
 10 | 
 11 | .md-typeset {
 12 | 
 13 |   /* Code */
 14 |   a {
 15 |     > code {
 16 |       $correct: 1 / 0.85;
 17 | 
 18 |       margin: 0 0.25em * $correct;
 19 |       padding: 0.0625em * $correct 0;
 20 |       border-radius: 0.2rem;
 21 |       background-color: $md-code-background;
 22 |       box-shadow:
 23 |         +0.25em * $correct 0 0 $md-code-background,
 24 |         -0.25em * $correct 0 0 $md-code-background;
 25 |       box-decoration-break: clone;
 26 |     }
 27 |   }
 28 | 
 29 |   .codehilitetable {
 30 |     .linenos {
 31 |       border-right: 0.0625rem solid $md-linenos-border;
 32 |       border-radius: 0;
 33 |       background-color: $md-code-background;
 34 |     }
 35 | 
 36 |     .linenodiv {
 37 |       .special {
 38 |         margin-right: -1.2rem;
 39 |         margin-left: -1.2rem;
 40 |         padding-right: 1.2rem;
 41 |         padding-left: 1.2rem;
 42 |         background-color: $md-linenos-background-special-table;
 43 |       }
 44 |     }
 45 |   }
 46 | 
 47 |   td code {
 48 |     word-break: normal;
 49 |   }
 50 | 
 51 |   .codehilite {
 52 |     tab-size: 8;
 53 | 
 54 |     .hll {
 55 |       display: inline;
 56 |     }
 57 | 
 58 |     [data-linenos] {
 59 |       &::before {
 60 |         display: inline-block;
 61 |         // position: sticky;
 62 |         // left: -1.2rem;
 63 |         margin-right: 0.5rem;
 64 |         margin-left: -1.2rem;
 65 |         padding-left: 1.2rem;
 66 |         border-right: 0.0625rem solid $md-linenos-border;
 67 |         background-color: $md-linenos-background;
 68 |         color: $md-linenos;
 69 |         content: attr(data-linenos);
 70 |         user-select: none;
 71 |       }
 72 | 
 73 |       &.special::before {
 74 |         background-color: $md-linenos-background-special;
 75 |       }
 76 | 
 77 |       + .hll {
 78 |         margin: 0 -0.5rem;
 79 |         padding: 0 0.5rem;
 80 |       }
 81 |     }
 82 |   }
 83 | 
 84 |   > {
 85 |     .codehilitetable {
 86 |       .linenodiv {
 87 |         @media only screen and (max-width: 44.9375em) {
 88 |           .special {
 89 |             margin-left: -1.6rem;
 90 |             padding-left: 1.6rem;
 91 |           }
 92 |         }
 93 |       }
 94 |     }
 95 | 
 96 |     .codehilite {
 97 |       [data-linenos]::before {
 98 |         @media only screen and (max-width: 44.9375em) {
 99 |           // left: -1.6rem;
100 |           margin-left: -1.6rem;
101 |           padding-left: 1.6rem;
102 |         }
103 |       }
104 |     }
105 |   }
106 | 
107 |   .highlight {
108 |     @extend .codehilite;
109 |   }
110 | 
111 |   .highlighttable {
112 |     @extend .codehilitetable;
113 |   }
114 | }
115 | 


--------------------------------------------------------------------------------
/docs/_configurations/_scss/_details.scss:
--------------------------------------------------------------------------------
 1 | /* Material colors and icons */
 2 | .md-typeset {
 3 |   details {
 4 |     @extend .admonition;
 5 | 
 6 |     // Title
 7 |     > summary {
 8 |       @extend .admonition-title;
 9 |     }
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/docs/_configurations/_scss/_footer.scss:
--------------------------------------------------------------------------------
1 | $md-footer-text: hsla(0, 0%, 100%, 0.3);
2 | 
3 | .md-footer .md-footer-custom-text {
4 |   color: $md-footer-text;
5 | }
6 | 


--------------------------------------------------------------------------------
/docs/_configurations/_scss/_headeranchors.scss:
--------------------------------------------------------------------------------
 1 | .md-typeset {
 2 |   .headerlink {
 3 |     font: normal 400 1rem "Material Icons";
 4 |     vertical-align: middle;
 5 | 
 6 |     @media only screen and (min-width: 76.1876em) {
 7 |       margin-left: -1.2rem;
 8 |       float: left;
 9 |     }
10 |   }
11 | 
12 |   h1 {
13 |     .headerlink {
14 |       margin-top: -0.3rem;
15 | 
16 |       @media only screen and (min-width: 76.1876em) {
17 |         margin-top: 0.4rem;
18 |       }
19 |     }
20 |   }
21 | 
22 |   h2 {
23 |     .headerlink {
24 |       margin-top: -0.2rem;
25 | 
26 |       @media only screen and (min-width: 76.1876em) {
27 |         margin-top: 0.3rem;
28 |       }
29 |     }
30 |   }
31 | 
32 |   h3 {
33 |     .headerlink {
34 |       margin-top: -0.15rem;
35 | 
36 |       @media only screen and (min-width: 76.1876em) {
37 |         margin-top: 0.2rem;
38 |       }
39 |     }
40 |   }
41 | 
42 |   h4 {
43 |     .headerlink {
44 |       margin-top: -0.1rem;
45 | 
46 |       @media only screen and (min-width: 76.1876em) {
47 |         margin-top: 0.1rem;
48 |       }
49 |     }
50 |   }
51 | 
52 |   h5,
53 |   h6 {
54 |     .headerlink {
55 |       margin-top: -0.1rem;
56 | 
57 |       @media only screen and (min-width: 76.1876em) {
58 |         margin-top: 0;
59 |       }
60 |     }
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/docs/_configurations/_scss/_keys.scss:
--------------------------------------------------------------------------------
 1 | .md-typeset {
 2 | 
 3 |   /* Keys */
 4 |   .keys {
 5 |     kbd {
 6 |       &::before,
 7 |       &::after {
 8 |         position: relative;
 9 |         margin: 0;
10 |         color: $clr-grey-400;
11 |         font-family: sans-serif;
12 |         -moz-osx-font-smoothing: initial;
13 |         -webkit-font-smoothing: initial;
14 |         font-weight: 400;
15 |       }
16 |     }
17 | 
18 |     span {
19 |       padding: 0 0.2rem;
20 |       color: $clr-grey-400;
21 |     }
22 | 
23 |     // Build primary colors
24 |     @each $name, $code in (
25 |       "backspace": "\2190",
26 |       "command":   "\2318",
27 |       "windows":   "\229E",
28 |       "caps-lock": "\21EA",
29 |       "control":   "\2303",
30 |       "meta":      "\25C6",
31 |       "shift":     "\21E7",
32 |       "option":    "\2325",
33 |     ) {
34 |       .key-#{$name} {
35 |         &::before {
36 |           padding-left: 0.2rem;
37 |           content: $code;
38 |         }
39 |       }
40 |     }
41 | 
42 |     // Build primary colors
43 |     @each $name, $code in (
44 |       "tab":       "\21B9",
45 |       "num-enter": "\21B5",
46 |       "enter":     "\21A9"
47 |     ) {
48 |       .key-#{$name} {
49 |         &::after {
50 |           padding-left: 0.2rem;
51 |           content: $code;
52 |         }
53 |       }
54 |     }
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/docs/_configurations/_scss/_links.scss:
--------------------------------------------------------------------------------
 1 | // General styling for repository link icons
 2 | .md-typeset {
 3 |   .magiclink {
 4 |     &::before {
 5 |       position: relative;
 6 |       padding-right: 0.25rem;
 7 |       font-family: FontAwesome;
 8 |       -moz-osx-font-smoothing: initial;
 9 |       -webkit-font-smoothing: initial;
10 |       font-weight: 400;
11 |     }
12 |   }
13 | }
14 | 
15 | // Assign icons to repository links
16 | @each $name, $icon in (
17 |   "github": "\f09b",
18 |   "gitlab": "\f296",
19 |   "bitbucket": "\f171"
20 | ) {
21 |   .md-typeset {
22 |     .magiclink-repository.magiclink-#{$name} {
23 |       &::before {
24 |         content: "#{$icon}";
25 |       }
26 |     }
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/docs/_configurations/_scss/_progressbar.scss:
--------------------------------------------------------------------------------
  1 | /* Stripes */
  2 | $pb-stripe: transparentize($clr-white, 0.2);
  3 | 
  4 | .md-typeset {
  5 | 
  6 |   /* Progress Bars */
  7 |   .progress-label {
  8 |     position: absolute;
  9 |     width: 100%;
 10 |     margin: 0;
 11 |     color: transparentize($clr-black, 0.5);
 12 |     font-weight: 700;
 13 |     line-height: 1.4rem;
 14 |     text-align: center;
 15 |     white-space: nowrap;
 16 |   }
 17 | 
 18 |   .progress-bar {
 19 |     height: 1.2rem;
 20 |     float: left;
 21 |     background-color: $clr-blue-a400;
 22 |   }
 23 | 
 24 |   /* Stripe animation */
 25 |   .candystripe-animate {
 26 |     .progress-bar{
 27 |       animation: animate-stripes 3s linear infinite;
 28 |     }
 29 |   }
 30 | 
 31 |   .progress {
 32 |     display: block;
 33 |     position: relative;
 34 |     width: 100%;
 35 |     height: 1.2rem;
 36 |     margin: 0.5rem 0;
 37 |     background-color: $clr-grey-200;
 38 | 
 39 |     &.thin {
 40 |       height: 0.4rem;
 41 |       margin-top: 0.9rem;
 42 | 
 43 |       .progress-label {
 44 |         margin-top: -0.4rem;
 45 |       }
 46 | 
 47 |       .progress-bar {
 48 |         height: 0.4rem;
 49 |       }
 50 |     }
 51 | 
 52 |     &.candystripe .progress-bar {
 53 |       background-image:
 54 |         linear-gradient(
 55 |           135deg,
 56 |           $pb-stripe 27%,
 57 |           transparent 27%,
 58 |           transparent 52%,
 59 |           $pb-stripe 52%,
 60 |           $pb-stripe 77%,
 61 |           transparent 77%,
 62 |           transparent
 63 |         );
 64 |       background-size: 2rem 2rem;
 65 |     }
 66 |   }
 67 | 
 68 |   @each $percent, $color in (
 69 |     "100": $clr-green-a400,
 70 |     "80":  $clr-green-a400,
 71 |     "60":  $clr-yellow-700,
 72 |     "40":  $clr-orange-a400,
 73 |     "20":  $clr-red-a200,
 74 |     "0":   $clr-red-a400
 75 |   ) {
 76 |     .progress-#{$percent}plus {
 77 |       .progress-bar {
 78 |         background-color: $color;
 79 |       }
 80 |     }
 81 |   }
 82 | 
 83 |   @each $names, $color in (
 84 |     note seealso: $clr-blue-a400,
 85 |     summary tldr: $clr-light-blue-a400,
 86 |     tip hint important : $clr-teal-a700,
 87 |     success check done: $clr-green-a400,
 88 |     warning caution attention: $clr-orange-a400,
 89 |     failure fail missing: $clr-red-a200,
 90 |     danger error: $clr-red-a400,
 91 |     bug: $clr-pink-a400,
 92 |     quote cite: $clr-grey
 93 |   ) {
 94 |     .progress.#{nth($names, 1)} {
 95 |       .progress-bar {
 96 |         background-color: $color;
 97 |       }
 98 |     }
 99 |   }
100 | 
101 |   @keyframes animate-stripes {
102 |     0% {
103 |       background-position: 0 0;
104 |     }
105 | 
106 |     100% {
107 |       background-position: 6rem 0;
108 |     }
109 |   }
110 | }
111 | 


--------------------------------------------------------------------------------
/docs/_configurations/_scss/_uml.scss:
--------------------------------------------------------------------------------
 1 | .md-typeset {
 2 | 
 3 |   /* UML */
 4 |   .uml-sequence-diagram,
 5 |   .uml-flowchart {
 6 |     width: 100%;
 7 |     padding: 1rem 0;
 8 |     overflow: auto;
 9 | 
10 |     svg {
11 |       max-width: initial;
12 |     }
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/docs/_configurations/_scss/extra.scss:
--------------------------------------------------------------------------------
 1 | @import "links";
 2 | @import "keys";
 3 | @import "admonition";
 4 | @import "details";
 5 | @import "uml";
 6 | @import "code";
 7 | @import "headeranchors";
 8 | @import "progressbar";
 9 | @import "footer";
10 | 


--------------------------------------------------------------------------------
/docs/_configurations/_snippets/abbr.txt:
--------------------------------------------------------------------------------
1 | *[SLAM]: Simultaneous Localization and Mapping
2 | 


--------------------------------------------------------------------------------
/docs/_configurations/_snippets/critic-accept-example.txt:
--------------------------------------------------------------------------------
1 | <p>Here is some  Markdown.  I am adding this here.  Here is some more text.  And here is even more text that I am adding.  Paragraph was deleted and replaced with some spaces.</p><p>Spaces were removed and a paragraph was added.</p><p>And here is a comment on some text. Substitutions are great!</p><p>General block handling.</p><ul><li>test add</li><li>test add</li><li>test add<ul><li>test add</li></ul></li><li>test add</li></ul>


--------------------------------------------------------------------------------
/docs/_configurations/_snippets/critic-preview-example.txt:
--------------------------------------------------------------------------------
1 | <p>Here is some <del class="critic"><em>incorrect</em></del> Markdown.  I am adding this<ins class="critic"> here</ins>.  Here is some more <del class="critic">text that I am removing</del>text.  And here is even more <ins class="critic">text that I am </ins>adding.<del class="critic break">&nbsp;</del><ins class="critic">  </ins>Paragraph was deleted and replaced with some spaces.<del class="critic">  </del></p><ins class="critic break">&nbsp;</ins><p>Spaces were removed and a paragraph was added.</p><p>And here is a comment on <mark class="critic">some text</mark><span class="critic comment">This works quite well. I just wanted to comment on it.</span>. Substitutions <del class="critic">is</del><ins class="critic">are</ins> great!</p><p>General block handling.</p><del class="critic block"><ul><li>test remove</li><li>test remove</li><li>test remove<ul><li>test remove</li></ul></li><li>test remove</li></ul></del><ins class="critic block"><ul><li>test add</li><li>test add</li><li>test add<ul><li>test add</li></ul></li><li>test add</li></ul></ins>


--------------------------------------------------------------------------------
/docs/_configurations/_snippets/links.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/_configurations/_snippets/links.txt


--------------------------------------------------------------------------------
/docs/_configurations/_snippets/mathjax.txt:
--------------------------------------------------------------------------------
 1 | <script type="text/x-mathjax-config">
 2 | MathJax.Hub.Config({
 3 |     config: ["MMLorHTML.js"],
 4 |     jax: ["input/TeX", "output/HTML-CSS", "output/NativeMML"],
 5 |     extensions: ["MathMenu.js", "MathZoom.js"],
 6 |     TeX: {
 7 |         TagSide: "right",
 8 |         TagIndent: ".8em",
 9 |         MultLineWidth: "85%",
10 |         equationNumbers: {
11 |             autoNumber: "AMS",
12 |         }
13 |     },
14 |     showProcessingMessages: false,
15 |     messageStyle: 'none'
16 | });
17 | </script>
18 | <script src='https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js' async></script>


--------------------------------------------------------------------------------
/docs/_configurations/_snippets/refs.txt:
--------------------------------------------------------------------------------
1 | --8<--
2 | links.txt
3 | abbr.txt
4 | --8<--
5 | 


--------------------------------------------------------------------------------
/docs/_configurations/_snippets/uml.txt:
--------------------------------------------------------------------------------
1 | <script src="https://cdnjs.cloudflare.com/ajax/libs/raphael/2.2.7/raphael.min.js"></script>
2 | <script src="https://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js"></script>
3 | <script src="https://cdnjs.cloudflare.com/ajax/libs/js-sequence-diagrams/1.0.6/sequence-diagram-min.js"></script>
4 | <script src="https://cdnjs.cloudflare.com/ajax/libs/flowchart/1.6.5/flowchart.min.js"></script>
5 | 


--------------------------------------------------------------------------------
/docs/_configurations/_theme_extra/theme_extra.css:
--------------------------------------------------------------------------------
1 | @charset "UTF-8";.md-typeset .magiclink:before{position:relative;padding-right:.25rem;font-family:FontAwesome;-moz-osx-font-smoothing:initial;-webkit-font-smoothing:initial;font-weight:400}.md-typeset .magiclink-repository.magiclink-github:before{content:""}.md-typeset .magiclink-repository.magiclink-gitlab:before{content:""}.md-typeset .magiclink-repository.magiclink-bitbucket:before{content:""}.md-typeset .keys kbd:after,.md-typeset .keys kbd:before{position:relative;margin:0;color:#bdbdbd;font-family:sans-serif;-moz-osx-font-smoothing:initial;-webkit-font-smoothing:initial;font-weight:400}.md-typeset .keys span{padding:0 .2rem;color:#bdbdbd}.md-typeset .keys .key-backspace:before{padding-left:.2rem;content:"←"}.md-typeset .keys .key-command:before{padding-left:.2rem;content:"⌘"}.md-typeset .keys .key-windows:before{padding-left:.2rem;content:"⊞"}.md-typeset .keys .key-caps-lock:before{padding-left:.2rem;content:"⇪"}.md-typeset .keys .key-control:before{padding-left:.2rem;content:"⌃"}.md-typeset .keys .key-meta:before{padding-left:.2rem;content:"◆"}.md-typeset .keys .key-shift:before{padding-left:.2rem;content:"⇧"}.md-typeset .keys .key-option:before{padding-left:.2rem;content:"⌥"}.md-typeset .keys .key-tab:after{padding-left:.2rem;content:"↹"}.md-typeset .keys .key-num-enter:after{padding-left:.2rem;content:"↵"}.md-typeset .keys .key-enter:after{padding-left:.2rem;content:"↩"}.md-typeset .admonition.settings,.md-typeset details.settings{border-left:.4rem solid #a0f}.md-typeset .admonition.settings>.admonition-title,.md-typeset details.settings>.admonition-title,.md-typeset details.settings>summary{border-bottom:.1rem solid rgba(170,0,255,.1);background-color:rgba(170,0,255,.1)}.md-typeset .admonition.settings>.admonition-title:before,.md-typeset details.settings>.admonition-title:before,.md-typeset details.settings>summary:before{color:#a0f;content:"settings"}.md-typeset .admonition.new,.md-typeset details.new{border-left:.4rem solid #ffd600}.md-typeset .admonition.new>.admonition-title,.md-typeset details.new>.admonition-title,.md-typeset details.new>summary{border-bottom:.1rem solid rgba(255,214,0,.1);background-color:rgba(255,214,0,.1)}.md-typeset .admonition.new>.admonition-title:before,.md-typeset details.new>.admonition-title:before,.md-typeset details.new>summary:before{color:#ffd600;content:"new_releases"}.md-typeset .uml-flowchart,.md-typeset .uml-sequence-diagram{width:100%;padding:1rem 0;overflow:auto}.md-typeset .uml-flowchart svg,.md-typeset .uml-sequence-diagram svg{max-width:none}.md-typeset a>code{margin:0 .29412em;padding:.07353em 0;border-radius:.2rem;background-color:hsla(0,0%,93%,.5);box-shadow:.29412em 0 0 hsla(0,0%,93%,.5),-.29412em 0 0 hsla(0,0%,93%,.5);-webkit-box-decoration-break:clone;box-decoration-break:clone}.md-typeset .codehilitetable .linenos,.md-typeset .highlighttable .linenos{border-right:.0625rem solid #ddd;border-radius:0;background-color:hsla(0,0%,93%,.5)}.md-typeset .codehilitetable .linenodiv .special,.md-typeset .highlighttable .linenodiv .special{margin-right:-1.2rem;margin-left:-1.2rem;padding-right:1.2rem;padding-left:1.2rem;background-color:hsla(0,0%,60%,.2)}.md-typeset td code{word-break:normal}.md-typeset .codehilite,.md-typeset .highlight{-moz-tab-size:8;-o-tab-size:8;tab-size:8}.md-typeset .codehilite .hll,.md-typeset .highlight .hll{display:inline}.md-typeset .codehilite [data-linenos]:before,.md-typeset .highlight [data-linenos]:before{display:inline-block;margin-right:.5rem;margin-left:-1.2rem;padding-left:1.2rem;border-right:.0625rem solid #ddd;background-color:#f7f7f7;color:#999;content:attr(data-linenos);-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.md-typeset .codehilite [data-linenos].special:before,.md-typeset .highlight [data-linenos].special:before{background-color:#e6e6e6}.md-typeset .codehilite [data-linenos]+.hll,.md-typeset .highlight [data-linenos]+.hll{margin:0 -.5rem;padding:0 .5rem}.md-typeset .headerlink{font:normal 400 1rem Material Icons;vertical-align:middle}.md-typeset h1 .headerlink{margin-top:-.3rem}.md-typeset h2 .headerlink{margin-top:-.2rem}.md-typeset h3 .headerlink{margin-top:-.15rem}.md-typeset h4 .headerlink,.md-typeset h5 .headerlink,.md-typeset h6 .headerlink{margin-top:-.1rem}.md-typeset .progress-label{position:absolute;width:100%;margin:0;color:rgba(0,0,0,.5);font-weight:700;line-height:1.4rem;text-align:center;white-space:nowrap}.md-typeset .progress-bar{height:1.2rem;float:left;background-color:#2979ff}.md-typeset .candystripe-animate .progress-bar{-webkit-animation:a 3s linear infinite;animation:a 3s linear infinite}.md-typeset .progress{display:block;position:relative;width:100%;height:1.2rem;margin:.5rem 0;background-color:#eee}.md-typeset .progress.thin{height:.4rem;margin-top:.9rem}.md-typeset .progress.thin .progress-label{margin-top:-.4rem}.md-typeset .progress.thin .progress-bar{height:.4rem}.md-typeset .progress.candystripe .progress-bar{background-image:linear-gradient(135deg,hsla(0,0%,100%,.8) 27%,transparent 0,transparent 52%,hsla(0,0%,100%,.8) 0,hsla(0,0%,100%,.8) 77%,transparent 0,transparent);background-size:2rem 2rem}.md-typeset .progress-80plus .progress-bar,.md-typeset .progress-100plus .progress-bar{background-color:#00e676}.md-typeset .progress-60plus .progress-bar{background-color:#fbc02d}.md-typeset .progress-40plus .progress-bar{background-color:#ff9100}.md-typeset .progress-20plus .progress-bar{background-color:#ff5252}.md-typeset .progress-0plus .progress-bar{background-color:#ff1744}.md-typeset .progress.note .progress-bar{background-color:#2979ff}.md-typeset .progress.summary .progress-bar{background-color:#00b0ff}.md-typeset .progress.tip .progress-bar{background-color:#00bfa5}.md-typeset .progress.success .progress-bar{background-color:#00e676}.md-typeset .progress.warning .progress-bar{background-color:#ff9100}.md-typeset .progress.failure .progress-bar{background-color:#ff5252}.md-typeset .progress.danger .progress-bar{background-color:#ff1744}.md-typeset .progress.bug .progress-bar{background-color:#f50057}.md-typeset .progress.quote .progress-bar{background-color:#9e9e9e}@-webkit-keyframes a{0%{background-position:0 0}to{background-position:6rem 0}}@keyframes a{0%{background-position:0 0}to{background-position:6rem 0}}.md-footer .md-footer-custom-text{color:hsla(0,0%,100%,.3)}@media only screen and (max-width:44.9375em){.md-typeset>.codehilite [data-linenos]:before,.md-typeset>.codehilitetable .linenodiv .special,.md-typeset>.highlight [data-linenos]:before,.md-typeset>.highlighttable .linenodiv .special{margin-left:-1.6rem;padding-left:1.6rem}}@media only screen and (min-width:76.1876em){.md-typeset .headerlink{margin-left:-1.2rem;float:left}.md-typeset h1 .headerlink{margin-top:.4rem}.md-typeset h2 .headerlink{margin-top:.3rem}.md-typeset h3 .headerlink{margin-top:.2rem}.md-typeset h4 .headerlink{margin-top:.1rem}.md-typeset h5 .headerlink,.md-typeset h6 .headerlink{margin-top:0}}


--------------------------------------------------------------------------------
/docs/_configurations/_theme_extra/theme_extra.js:
--------------------------------------------------------------------------------
1 | !function(){"use strict";var e=function(e,t,n){for(var o=function(e){for(var t="",n=0;n<e.childNodes.length;n++){var o=e.childNodes[n];if("code"===o.tagName.toLowerCase())for(var a=0;a<o.childNodes.length;a++){var i=o.childNodes[a];if("#text"===i.nodeName&&!/^\s*$/.test(i.nodeValue)){t=i.nodeValue;break}}}return t},a=function(e){return e.textContent||e.innerText},i=document.querySelectorAll("article"),r=document.querySelectorAll("pre.".concat(t,",div.").concat(t)),d=void 0===n?{}:n,c=0;c<r.length;c++){var l=r[c],s=document.createElement("div");s.className=t,s.style.visibility="hidden",s.style.position="absolute";var u="pre"===l.tagName.toLowerCase()?o(l):a(l);i[0].appendChild(s);e.parse(u).drawSVG(s,d),s.style.visibility="visible",s.style.position="static",l.parentNode.insertBefore(s,l),l.parentNode.removeChild(l)}};!function(e){document.addEventListener?document.addEventListener("DOMContentLoaded",e):document.attachEvent("onreadystatechange",function(){"interactive"===document.readyState&&e()})}(function(){"undefined"!=typeof flowchart&&e(flowchart,"uml-flowchart"),"undefined"!=typeof Diagram&&e(Diagram,"uml-sequence-diagram",{theme:"simple"})})}();
2 | 
3 | 


--------------------------------------------------------------------------------
/docs/aboutSLAM.md:
--------------------------------------------------------------------------------
 1 | [**Simultaneous Localization and Mapping (SLAM)**](https://en.wikipedia.org/wiki/Simultaneous_localization_and_mapping "Simultaneous Localization and Mapping") **即时定位与地图构建** 是在未知环境中构建或更新地图，并同时利用地图信息跟踪定位的计算问题。虽然在最初看来像极了先有鸡还是先有蛋的问题，但对于某些环境下，至少在可控制的时间内，有几种算法可以近似地解决这个问题。目前为止的近似求解方法有 [**粒子滤波**](https://en.wikipedia.org/wiki/Particle_filter)、[**扩展卡尔曼滤波**](https://en.wikipedia.org/wiki/Extended_Kalman_filter) 和 [**GraphSLAM**](https://en.wikipedia.org/wiki/GraphSLAM) 。随着人工智能的发展，也出现了 **语义 SLAM (Semantic slam)** 等新的求解方式，但就目前最为流行和广泛使用的还是 GraphSLAM 。
 2 | 
 3 | 近年来，「SLAM 技术」成为了越来越多学者研究的课题方向，随着该技术的成熟，它也被广泛的应用于市场上的诸多产品上，例如：自动驾驶汽车、无人驾驶飞行器、自动水下航行器、行星漫游者、较新的家用机器人，等等。因而市场对「SLAM 算法工程师」的需求也越来越多。然而 「SLAM 技术」涉及知识体系之广、而且大部分知识也相对较难，所以 「SLAM 技术」并非那么容易能够掌握精通。下面给出大体的「SLAM 知识树 」([PNG图片下载](https://raw.githubusercontent.com/LSXiang/Journey2SLAM/master/docs/KnowledgeTree4SLAM.png)、[xmind文件下载](https://github.com/LSXiang/Journey2SLAM/blob/master/docs/KnowledgeTree4SLAM.xmind?raw=true) )
 4 | 
 5 | ![SLAM 知识树](KnowledgeTree4SLAM.png)
 6 | 
 7 | 
 8 | 
 9 | 如果你喜欢该技术领域，想要在这个方向上有所发展，那么你就需要付出许多时间在这些知识中深挖。建议您可以考取相关领域的硕士及以上学历，这对你在市场竞争中能获取相当大的优势。当然这并非硬性规定，只要您对该技术感兴趣，并沉浸于研究之中，相信自己也可以成为该领域中优秀的人才之一。
10 | 
11 | 「双非渣本的我」在创建这个站点的最初目的就是用于记录和分享自己学习「SLAM 技术」的经过。正所谓好记性不如烂笔头，之前书面记录的学习笔记随着时间的推移，很不方便查阅并且易于丢失，CSDN、博创园等博客不太简美或整体化，并且可能不支持一些数学公式、强调格式等，抑或是存在各种碍眼广告。因而选着了这种形式来记录和分享这个学习过程。
12 | 
13 | 如果您对这个项目感兴趣，欢迎您  <iframe src="https://ghbtns.com/github-btn.html?user=LSXiang&repo=Journey2SLAM&type=star&count=true&size=large" frameborder="0" scrolling="0" width="125px" height="30px"></iframe> 或者 <iframe src="https://ghbtns.com/github-btn.html?user=LSXiang&repo=Journey2SLAM&type=fork&count=true&size=large" frameborder="0" scrolling="0" width="125px" height="30px"></iframe> 该项目，或者持续关注该网址。如果您愿意分享您的学习过程到该网址，您可以阅读该项目的 [README.md](https://github.com/LSXiang/Journey2SLAM/blob/master/README.md) 来共同编写该站点。
14 | 
15 | 下面分享一些个人认为值得关注的公众号及网址：
16 | 
17 | - 「泡泡机器人 SLAM 」公众号
18 | - [SlamCN](http://www.slamcn.org/index.php/%E9%A6%96%E9%A1%B5)
19 | - [OpenSLAM](https://openslam-org.github.io/)
20 | - [学习SLAM需要哪些预备知识？（知乎）](https://www.zhihu.com/question/35186064)
21 | - [SLAM 学习视频合集（知乎）](https://www.zhihu.com/question/35186064)
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/docs/computer_vision/FAST.md:
--------------------------------------------------------------------------------
  1 | ## FAST 角点原理
  2 | 
  3 | FAST的全称为 Features From Accelerated Segment Test 。是由 Edward Rosten 和 Tom Drummond 在 2006 年发表的  Machine learning for high-speed corner detection [^1]  文章中提出。FAST 角点定义为：若某像素点与周围邻域足够多的像素点处于不同区域，则该像素可能为角点。考虑灰度图像，即若某像素点的灰度值比周围邻域足够多的像素点的灰度值大或小，则该点可能为角点。与其他特征点相比较而言，FAST 在进行角点检测时，计算速度更快，实时性更好。
  4 | 
  5 | 
  6 | 
  7 | ## 算法步骤
  8 | 
  9 | - 从图像中选取一个像素 $p$ ，其灰度值为 $I_p$ 
 10 | 
 11 | - 设定一个合适的阈值 $t$ 
 12 | 
 13 | - 以该像素点为中心考虑一个半径为 3 的离散化的 [Bresenham](https://en.wikipedia.org/wiki/Bresenham%27s_line_algorithm) 圆，圆边界上有 16 个像素（如下图所示）
 14 | 
 15 |     ![FAST Corner](image/FAST_corner.png)
 16 | 
 17 | - 如果圆上有 $n$ 个连续像素点的灰度值小于 $I_p-t$ 或者大于 $I_p+t$ ，那么这个点即可判断为角点（ $n$ 的值可取12 或 9）
 18 | 
 19 | 一种快速排除大部分非角点像素的高效的测试方法是先仅仅检查周围 1、5、9、13 四个位置的像素，如果位置 1 和 9 与中心像素 $p$ 点的灰度差小于给定阈值，则 $p$ 点不可能是角点，直接排除；否则进一步判断位置 5 和 13 与中心像素的灰度差。如果这四个像素中至少有 3 个像素与 $p$ 点的灰度差超过阈值，则再考察邻域圆上 16 个像素点与中心点的灰度差，如果有连续至少 9 个超过给定阈值的像素则认为 $p$ 是角点。
 20 | 
 21 | 
 22 | 
 23 | ## 角点分类器
 24 | 
 25 | - 选取需要检测的场景的多张图像进行 FAST 角点检测，选取合适的阈值 $n(n<12)$ ，提取多个特征点作为训练数据
 26 | 
 27 | - 对于图像上的点 $p$ ,它周围邻域圆上位置为 $x, \; x \in \{1, \dotsc, 16\}$ 的点表示为 $p \to x$ ，可以用下面的判断公式将该点 $p \to x$ 分为 3 类
 28 |   
 29 |     $$
 30 |   S_{p \to s} = 
 31 |   	\begin{cases}
 32 |   		d, & I_{p \to x} < I_p - t & (darker) \\
 33 |   		s, & I_p -t \leq I_{p \to x} \leq I_p + t & (similar) \\
 34 |   		b, & I_p + t < I_{p \to x} &(brighter)
 35 |   	\end{cases}
 36 |   $$
 37 |   
 38 | - 设 $P$ 为训练图像集中所有像素点的集合，我们任意 16 个位置中的一个位置 $x$ ，可以把集合 $P$ 分为三个部分 $P_d$ 、 $P_s$ 和 $P_b$ ，其中 $P_d$ 的定义如下， $P_s$ 和 $P_b$ 的定义与其类似
 39 |   
 40 |     $$
 41 |     P_b = \{ p \in P : S_{p \to s} = b \}
 42 |     $$
 43 |     
 44 |     换句话说，对于任意给定的位置 $x$ ，它都可以把所有图像中的点分为三类，第一类 $P_d$ 包括了所有位置 $x$ 处的像素在阈值 $t$ 下暗于中心像素，第二类 $P_s$ 包括了所有位置 $x$ 处的像素在阈值 $t$ 下近似于中心像素， $P_b$ 包括了所有位置 $x$ 处的像素在阈值 $t$ 下亮于中心像素
 45 | 
 46 | - 对每个特征点定义一个 bool 变量 $K_p$ ，如果 $p$ 是一个角点，则 $K_p$ 为真，否则为假
 47 | 
 48 | - 对提取的特征点集进行训练，使用 $\mathrm{ID}_3$ 算法建立一颗决策树，通过第 $x$ 个像素点进行决策树的划分，对集合 $P$ ，得到熵值为
 49 |   
 50 |     $$
 51 |     H(P) = (c+\hat{c}) \log_2(c+\hat{c}) - c \log_2 c - \hat{c} \log_2 \hat{c}
 52 |     $$
 53 |     
 54 |     其中 $c$ 为角点的数目， $\hat{x}$ 为非角点的数目。由此得到的信息增益为
 55 |     
 56 |     $$
 57 |     \Delta H = H(P)-H(P_d)-H(P_s)-H(P_b)
 58 |     $$
 59 |     
 60 |     选择信息增益最大位置进行划分，得到决策树
 61 | 
 62 | - 使用决策树对类似场景进行特征点的检测与分类
 63 | 
 64 | 
 65 | 
 66 | ## 非极大值抑制
 67 | 
 68 | 对于邻近位置存在多个特征点的情况，需要进一步做非极大值抑制 (Non-Maximal Suppression) 。给每个已经检测到的角点一个量化的值 $V$ ，然后比较相邻角点的 $V$ 值，保留局部邻域内 $V$ 值最大的点。 $V$ 值可定义为特征点与邻域 16 个像素点灰度绝对差值的和，即
 69 | 
 70 | $$
 71 | V = \max \left( \sum_{x \in S_{bright}} \left| I_{p\to x} - I_p \right| - t \;,\; \sum_{x \in S_{dark}} \left| I_{p\to x} - I_p \right| - t \right)
 72 | $$
 73 | 
 74 | 上式中， $S_{bright}$ 是 16 个邻域像素点中灰度值大于 $I_p + t$ 的像素点的集合，而 $S_{dark}$ 表示的是那些灰度值小于$I_p - t$ 的像素点。
 75 | 
 76 | 
 77 | 
 78 | ## 算法特点
 79 | 
 80 | - FAST 算法比其他大多数角点检测算法要快
 81 | - 受图像噪声以及设定阈值影响大
 82 | - 当设置 $n<12$ 时，不能用快速方法过滤非角点
 83 | - 检测出来的角点不是最优的，因为它的效率取决于问题的排序与角点的分布
 84 | - 多个特征点容易挤在一起
 85 | - FAST 不产生多尺度特征，而且没有方向信息不具备旋转不变性
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | ## 参考
 96 | 
 97 | [^1]: [FAST Corner Detection -- Edward Rosten](http://www.edwardrosten.com/work/fast.html) 
 98 | 
 99 | [^2]: [思维之际博客：FAST特征点检测](https://www.cnblogs.com/ronny/p/4078710.html) 
100 | 
101 | [^3]:  Senit_Co 博客：[图像特征之FAST角点检测](https://senitco.github.io/2017/06/30/image-feature-fast/) 
102 | 
103 | 
104 | 
105 | --8<--
106 | mathjax.txt
107 | --8<--


--------------------------------------------------------------------------------
/docs/computer_vision/Haar.md:
--------------------------------------------------------------------------------
  1 | ## 简介
  2 | 
  3 | Haar 特征[^1]是一种用于目标检测或识别的图像特征描述子，Haar 特征通常和 AdaBoost 分类器组合使用，而且由于 Haar 特征提取的实时性以及 AdaBoost 分类的准确率，使其成为人脸检测以及识别领域较为经典的算法。
  4 | 
  5 | 
  6 | 
  7 | ## 多种Haar-like特征
  8 | 
  9 | 在 Haar-like 特征[^2][^3]提出之前，传统的人脸检测算法一般是基于图像像素值进行的，计算量较大且实时性较差。 Papageorgiou 等人最早将 Harr 小波用于人脸特征表示，Viola 和 Jones 则在此基础上，提出了多种形式的 Haar 特征。Lienhart 等人对 Haar 矩形特征做了进一步的扩展，加入了旋转 $45^{\circ}$ 的矩形特征，因此现有的 Haar 特征模板主要如下图所示：
 10 | 
 11 | ![Haar-like 矩形特征](image/Haar_like.png)
 12 | 
 13 | 在计算 Haar 特征值时，用白色区域像素值的和减去黑色区域像素值的和，也就是说白色区域的权值为正值，黑色区域的权值为负值，而且权值与矩形区域的面积成反比，抵消两种矩形区域面积不等造成的影响，保证 Haar 特征值在灰度分布均匀的区域特征值趋近于 0 。Haar 特征在一定程度上反应了图像灰度的局部变化，在人脸检测中，脸部的一些特征可由矩形特征简单刻画，例如，眼睛比周围区域的颜色要深，鼻梁比两侧颜色要浅等。
 14 | 
 15 | Haar-like 矩形特征分为多类，特征模板可用于图像中的任一位置，而且大小也可任意变化，因此 Haar 特征的取值受到特征模板的类别、位置以及大小这三种因素的影响，使得在一固定大小的图像窗口内，可以提取出大量的 Haar 特征。例如，在一个 $24\times 24$ 的检测窗口内，矩形特征的数量可以达到 16 万个。这样就需要解决两个重要问题，快速计算 Haar 矩形特征值——积分图；筛选有效的矩形特征用于分类识别—— AdaBoost 分类器。
 16 | 
 17 | 
 18 | 
 19 | ## 快速计算——积分图
 20 | 
 21 | ### 积分图构建
 22 | 
 23 | 在一个图像窗口中，可以提取出大量的 Haar 矩形特征区域，如果在计算 Haar 特征值时，每次都遍历矩形特征区域，将会造成大量重复计算，严重浪费时间。而积分图正是一种快速计算矩形特征的方法，其主要思想是将图像起始像素点到每一个像素点之间所形成的矩形区域的像素值的和，作为一个元素保存下来，也就是将原始图像转换为积分图 (或者求和图) ，这样在求某一矩形区域的像素和时，只需索引矩形区域 4 个角点在积分图中的取值，进行普通的加减运算，即可求得 Haar 特征值，整个过程只需遍历一次图像，计算特征的时间复杂度为常数 (O(1)) 。因此可以大大提升计算效率。
 24 | 
 25 | 积分图中元素的公式定义如下：
 26 | 
 27 | $$
 28 | I_i(x,y) = \sum_{x'\leq x,y'\leq y} I(x',y')
 29 | $$
 30 | 
 31 | 上式含义是在 $(x,y)$ (第 $x$ 行第 $y$ 列) 位置处，积分图中元素为原图像中对应像素左上角所有像素值之和。在具体实现时，可用下式进行迭代运算。
 32 | 
 33 | $$
 34 | s(x,y)=s(x,y-1)+I(x,y) \\
 35 | I_i(x,y)=I_i(x-1,y)+s(x,y)
 36 | $$
 37 | 
 38 | $s(x,y)$ 为行元素累加值，初始值 $s(x,-1)=0,I_i(-1,y)=0$ 。
 39 | 
 40 | 
 41 | 
 42 | #### 矩形特征计算
 43 | 构建好积分图后，图像中任何矩形区域的像素值累加和都可以通过简单的加减运算快速得到，如下图所示，矩形区域 D 的像素和值计算公式如下： 
 44 | 
 45 | $$
 46 | Sum(D)=I_i(x_4, y_4)-I_i(x_2,y_2)-I_i(x_3,y_3)+I_i(x_1,y_1)
 47 | $$
 48 | 
 49 | ![矩形区域求和示意图](image/rectangle.png)
 50 | 
 51 | 在下图中，以水平向右为 x 轴正方向，垂直向下为 y 轴正方向，可定义积分图公式 Summed Area Table ($SAT(x,y)$ )
 52 | 
 53 | $$
 54 | SAT(x,y)=\sum_{x'\leq x,y'\leq y} I(x',y')
 55 | $$
 56 | 
 57 | 以及迭代求解式对于左上角坐标为 $(x,y)$ ，宽高为 $(w,h)$ 的矩形区域 $r(x,y,w,h,0)$ ，可利用积分图 $SAT(x,y)$ 求取像素和值
 58 | 
 59 | $$
 60 | \scriptsize{
 61 | \begin{align*}
 62 | RecSum(r) &= SAT(x+w-1, y+h-1) \\ &+ SAT(x-1, y-1) \\ &- SAT(x+w-1, y-1) \\ &-SAT(x-1,y+h-1)
 63 | \end{align*}
 64 | }
 65 | $$
 66 | 
 67 | ![积分图求矩形区域和值](image/integral_rectangle.png.png)
 68 | 
 69 | 
 70 | 
 71 | #### 旋转矩形特征的计算
 72 | 
 73 | 对于旋转矩形特征，相应的有 $45^{\circ}$ 倾斜积分图用于快速计算 Haar 特征值，如下图所示，倾斜积分图的定义为像素点左上角 $45^{\circ}$ 区域和左下角 $45^{\circ}$ 区域的像素和，公式表示如下：
 74 | 
 75 | $$
 76 | RSAT(x,y)=\sum_{x'\leq x,x'\leq x-\left|y-y'\right|} I(x',y')
 77 | $$
 78 | 
 79 | 其递推公式计算如下：
 80 | 
 81 | $$
 82 | \scriptsize{
 83 | RSAT(x,y)=RSAT(x-1,y-1)+RSAT(x-1,y)-RSAT(x-2,y-1)+I(x,y) \\
 84 | RSAT(x,y)=RSAT(x,y)+RSAT(x-1,y+1)-RSAT(x-2,y)
 85 | }
 86 | $$
 87 | 
 88 | 其中 $RSAT(-1,y)=RSAT(-2,y)=RSAT(x,-1)=0$ 也可直接通过下式递归计算：
 89 | 
 90 | $$
 91 | \scriptsize{
 92 | RSAT(x,y)=RSAT(x-1,y-1)+RSAT(x+1,y-1)-RSAT(x,y-2)+I(x-1,y)+I(x,y)
 93 | }
 94 | $$
 95 | 
 96 | 以上 3 个积分图计算公式是等价的。
 97 | 
 98 | ![倾斜积分图求倾斜矩形区域和值](image/integral_rotation_rectangle.png)
 99 | 
100 | 如下图所示，构建好倾斜积分图后，可快速计算倾斜矩形区域$r=(x,y,w,h,45^{\circ})$的像素和值
101 | 
102 | $$
103 | \scriptsize{
104 | \begin{align*}
105 | RecSum(r) &=RSAT(x+w-1,y+w-1) \\
106 | &+RSAT(x-h-1,y+h-1) \\
107 | &-RSAT(x-1,y-1) \\
108 | &-RSAT(x+w-1-h,y+w-1+h)
109 | \end{align*}
110 | }
111 | $$
112 | 
113 | ![倾斜矩形区域求和示意图](image/rotated_rectangle.png)
114 | 
115 | 
116 | 
117 | ## AdaBoost分类器
118 | 
119 | 由输入图像得到积分图，通过取不同种类、大小的 Haar 特征模板，并在不同位置处，利用积分图提取 Haar 矩形特征，可快速得到大量Haar特征值，AdaBoost 分类器可用于对提取的 Haar 特征 (通常需要做归一化处理) 进行训练分类，并应用于人脸检测中。AdaBoost 是一种集成分类器，由若干个强分类级联而成，而每个强分类器又由若干个弱分类器 (例如：决策树) 组合训练得到。
120 | 
121 | 弱分类器的定义如下：
122 | 
123 | $$
124 | h_j(x)=\begin{cases} 1,&p_j f_j(x) < p_j \theta_j \\ 0, & otherwise \end{cases}
125 | $$
126 | 
127 | 上式中 $p_j$ 是为了控制不等式的方向而设置的参数。 $x$ 表示一个图像窗口，$f_j(x)$ 表示提取的 Haar 特征，阈值 $\theta$ 用于判断该窗口是否为目标区域 (人脸) 。
128 | 
129 | 
130 | 
131 | ### 算法流程
132 | 
133 | - 假设训练样本为 $(x_i,y_i),i=0,1,...,n$，$y_i$ 取值为 0 (负样本)、1 (正样本)
134 | 
135 | - 初始化权重 $w_1,i=\dfrac{1}{2m},y_i=\dfrac{1}{2l}$ ，其中$m$表示负样本的个数，$l$表示正样本的个数
136 | 
137 | - For $t =1,2,...,T$ 
138 | 
139 |   1. 归一化权值：$w_{t,i} = \dfrac{w_{t,i}}{\Sigma_{j=1}^n w_{t,j}}$ 
140 |   2. 对于每个(种)特征，训练一个分类器 ($h_j$) ，每个分类器只使用一种 Haar 特征进行训练。分类误差为 $\varepsilon_j = \sum_i w_i \left | h_j (x_i) - y_i \right|$ ， $h_j$ 为特征分类器，$x_i$ 为训练图像样本。
141 |   3. 选择最低误差的分类器 $h_t$ 
142 |   4. 更新训练样本的权值 $w_{t+1,i} = w_{t,i} \beta_t^{1-e_i}$，分类正确 $e_i=0$ ，分类错误 $e_i=1$ ， $\beta_t=\dfrac{\varepsilon_t}{1-\varepsilon_t}$ 
143 | 
144 | - 最后的强分类器为
145 |   
146 |     $$
147 |     h(x)=\begin{cases} 1, & \sum_{t=1}^T \alpha_t h_t \geq \dfrac{1}{2} \Sigma_{t=1}^T \alpha_t \\ 0,& otherwise \end{cases}
148 |     $$
149 |     
150 |     其中 $\alpha_t=log(\dfrac{1}{\beta_t})$ 。
151 | 
152 | 在训练多个弱分类器得到强分类器的过程中，采用了两次加权的处理方法，一是对样本进行加权，在迭代过程中，提高错分样本的权重；二是对筛选出的弱分类器 $h_t$ 进行加权，弱分类器准确率越高，权重越大。此外，还需进一步对强分类器进行级联，以提高检测正确率并降低误识率。级联分类器如下所示：
153 | 
154 | ![级联分类器](image/cascade.png)
155 | 
156 | 首先将所有待检测的子窗口输入到第一个分类器中，如果某个子窗口判决通过，则进入下一个分类器继续检测识别，否则该子窗口直接退出检测流程，也就是说后续分类器不需要处理该子窗口。通过这样一种级联的方式可以去除一些误识为目标的子窗口，降低误识率。例如，单个强分类器，99% 的目标窗口可以通过，同时50%的非目标窗口也能通过，假设有 20 个强分类器级联，那么最终的正确检测率为 $0.99^{20}=98\%$ ，而错误识别率为 $0.50^{20} \approx 0.0001\%$ ，在不影响检测准确率的同时，大大降低了误识率。当然前提是单个强分类器的准确率非常高，这样级联多个分类器才能不影响最终的准确率或者影响很小。
157 | 
158 | 在一幅图像中，为了能够检测到不同位置的目标区域，需要以一定步长遍历整幅图像；而对于不同大小的目标，则需要改变检测窗口的尺寸，或者固定窗口而缩放图像。这样，最后检测到的子窗口必然存在相互重叠的情况，因此需要进一步对这些重叠的子窗口进行合并，也就是非极大值抑制 (NMS, non-maximum suppression) ，同时剔除零散分布的错误检测窗口。
159 | 
160 | 
161 | 
162 | ## 参考
163 | 
164 | [^1]: [Rapid Object Detection using a Boosted Cascade of Simple Features](http://wearables.cc.gatech.edu/paper_of_week/viola01rapid.pdf) 
165 | [^2]: [Empirical Analysis of Detection Cascades of Boosted Classifiers for Rapid Object Detection](https://link.springer.com/content/pdf/10.1007%2F978-3-540-45243-0_39.pdf) 
166 | [^3]: [Paper: An Extended Set of Haar-like Features for Rapid Object Detection](https://pdfs.semanticscholar.org/72e0/8cf12730135c5ccd7234036e04536218b6c1.pdf) 
167 | 
168 | [^4]: Senit_Co 博客: [图像特征提取之Haar特征](https://senitco.github.io/2017/06/15/image-feature-haar/) 
169 | 
170 | 
171 | 
172 | --8<--
173 | mathjax.txt
174 | --8<--


--------------------------------------------------------------------------------
/docs/computer_vision/Harris.md:
--------------------------------------------------------------------------------
  1 | ## 特征检测
  2 | 
  3 | **特征检测 (Feature detection)** 也称为**角点检测 (Corner detection)**，特征检测和匹配是许多计算机视觉应用中的一个重要组成部分，广泛应用于运动检测。图像匹配、视频跟踪、三维建模以及目标识别领域中。在 SLAM 前端，往往需要通过**特征跟踪 (Feature tracking)** 来先初步恢复相机的位姿。那么一张图向中的哪些特征是适合用来做匹配跟踪呢？通常拥有交大对比尺度（梯度）的图像块是比较容易定位的，又由于单一方向的直线段存在 “孔径问题 (aperture problem)” ，因而拥有至少两个（明显）不同方向梯度的图像块最容易定位。如下图所示：
  4 | 
  5 | ![aperture problem](image/aperture_problem.png)
  6 | 
  7 | 从上图可以看出，不同图像块的孔径问题： (a) “角点” -- 稳定的，各方面都发生了重大变化； (b) “边 (edge)” -- 经典的孔径问题，沿边缘方向没有变化； (c) “平坦/无纹理的区域 (flat region)” -- 各方向都没有变化。从上面的描述我们认为通过移动一个小窗口会导致窗口中图像灰度变化剧烈，那么这个窗口中易于识别的特征是我们认为的角点。
  8 | 
  9 | 
 10 | 
 11 | ## Harris 角点
 12 | 
 13 | ### 基本原理
 14 | 
 15 | 特征点在图像中一般有具体的坐标，并具有某些数学特征，如局部最大或最小灰度、以及某些梯度特征等。这些直觉可以这样来形式化：用最简单的图像块匹配策略来比较两个图像块，通过它们的（加权）差的平方和：
 16 | 
 17 | $$
 18 | E_{\mathrm{wssd}}(\mathbf{u}) = \sum_i \omega(\mathbf{x}_i)[I_1(\mathbf{x}_i+\mathbf{u}) - I_0(\mathbf{x}_i)]^2
 19 | $$
 20 | 
 21 | 其中 $I_0$ 和 $I_1$ 是两幅需要比较的图像块，$\mathbf{u} = (u, v)$ 是平移向量， $\omega(\mathbf{x})$ 是在空间上变化的权重（或窗口）函数，求和变量 $i$ 作用于块中的全体图像像素。由于在进行特征检测时，并不知道该特征被匹配时会终止于哪些相对的其他图像位置的匹配。因此，只能在一个小的位置变化区域 $\Delta \mathbf{u}$ 内，通过于原图像块进行比较来计算这个匹配结果的稳定度，这就是通常所说的**自相关函数 (autocorrelation function)** 。
 22 | 
 23 | 根据上述，对于给定图像 $I(x, y)$ 和固定尺寸的邻域窗口，计算窗口平移前后各个像素差值的平方和，即在点 $(x, y)$ 处平移 $(\Delta x, \Delta y)$ 后的自相关性：
 24 | 
 25 | $$
 26 | E_{AC}(x,y;\Delta x,\Delta y) = \sum_{(u,v)\in W(x,y)} \omega(u,v)[I(u+\Delta x, v+\Delta y) - I(u,v)]^2
 27 | $$
 28 | 
 29 | 其中，$W(x,y)$ 是以点 $(x, y)$ 为中心的窗口，$\omega(x,y)$ 是窗口加权函数，它可取均值函数或者高斯函数，如下图所示：
 30 | 
 31 | ![Weighting Function](image/harris_weighting_function.png)
 32 | 
 33 | 根据泰勒展开，可得到窗口平移后图像的一阶近似：
 34 | 
 35 | $$
 36 | \begin{align*}
 37 | I(u+\Delta x, v+\Delta y) &= I(u,v) + I_x(u,v)\Delta x + I_y(u,v)\Delta y + O(\Delta x^2,\Delta y^2) \\
 38 | 						& \approx I(u,v) + I_x(u,v)\Delta x + I_y(u,v)\Delta y
 39 | \end{align*}
 40 | $$
 41 | 
 42 | 其中，$I_x , I_y$ 是图像 $I(x,y)$ 的偏导数，那么自相关函数可以简化为：
 43 | 
 44 | $$
 45 | \begin{align*}
 46 | E_{AC}(x,y;\Delta x,\Delta y) &\approx \sum_{(u,v)\in W(x,y)} \omega(u,v)[I_x(u,v)\Delta x + I_y(u,v)\Delta y]^2 \\
 47 | & = \begin{bmatrix} \Delta x & \Delta y \end{bmatrix} M(x,y) \begin{bmatrix} \Delta x \\ \Delta y \end{bmatrix}
 48 | \end{align*}
 49 | $$
 50 | 
 51 | 其中，
 52 | 
 53 | $$
 54 | \begin{align*}
 55 | M(x,y) &= \sum_W \begin{bmatrix} I_x(x,y)^2 & I_x(x,y) I_y(x,y) \\ I_x(x,y)I_y(x,y) & I_y(x,y)^2 \end{bmatrix} \\
 56 | &= \begin{bmatrix} \sum_W I_x(x,y)^2 & \sum_W I_x(x,y) I_y(x,y) \\ \sum_W I_x(x,y)I_y(x,y) & \sum_W I_y(x,y)^2 \end{bmatrix} \\
 57 | &= \begin{bmatrix} A & C \\ C & B \end{bmatrix}
 58 | \end{align*}
 59 | $$
 60 | 
 61 | 也就是说图像 $I(x,y)$ 在点 $(x, y)$ 处平移 $(\Delta x, \Delta y)$ 后的自相关函数可以近似为二项函数:
 62 | 
 63 | $$
 64 | E_{AC}(x,y;\Delta x,\Delta y) \approx A\Delta x^2 + 2C\Delta x \Delta y + B\Delta y^2
 65 | $$
 66 | 
 67 | 其中有 $A = \sum_W I_x^2 \; , \; B=\sum_W I_y^2 \; , \; C=\sum_W I_x I_y$ 。
 68 | 
 69 | 将梯度向量视为一组 $(dx，dy)$点，其质心定义为 $(0,0)$ 。通过散射矩阵 $M(x,y)$ 对该组点进行椭圆分析，根据不同情况分析椭圆参数。而 $x$ 和 $y$ 的导数分布可以用椭圆形状和大小的主成分来特征化，如下图所示：
 70 | 
 71 | ![principal component ellipse](image/principal_component_ellipse.png)
 72 | 
 73 | 二次项函数本质上是一个椭圆函数，椭圆的曲率和尺寸可由 $M(x,y)$ 的特征值 $λ1, λ2$ 决定，椭圆方向由 $M(x,y)$ 的特征向量决定，椭圆方程和其图形分别如下所示：
 74 | 
 75 | $$
 76 | \begin{bmatrix} \Delta x & \Delta y \end{bmatrix} M(x,y) \begin{bmatrix} \Delta x \\ \Delta y \end{bmatrix} = 1
 77 | $$
 78 | 
 79 | ![Auto-correlation Elliptic](image/AC_elliptic.png)
 80 | 
 81 | 因此，对于矩阵 $M(x,y)$ ，可以将其和协方差矩阵类比，协方差表示多维随机变量之间的相关性，协方差矩阵对角线的元素表示的是各个维度的方差，而非对角线上的元素表示的是各个维度之间的相关性，在 [**PCA (主成分分析)**](https://en.wikipedia.org/w/index.php?title=Principal_component_analysis) 中，将协方差矩阵对角化，使不同维度的相关性尽可能的小，并取特征值较大的维度，来达到降维的目的。类似的，可以将矩阵 $M(x,y)$ 看成是一个二维随机分布的协方差矩阵，通过将其对角化，求取矩阵的两个特征值，并根据这两个特征值来判断角点。
 82 | 
 83 | 椭圆函数特征值与图像中的角点、直线（边缘）和平面之间的关系共可分为三种情况（如下图所示）：
 84 | 
 85 | - 两个特征值都较大时为角点
 86 | - 一个特征值较大而另一个较小时则为图像边缘
 87 | - 两个特征值都较小时为均匀区域
 88 | 
 89 | ![Classification via Eigenvalues](image/classification_via_eigenvalues.png)
 90 | 
 91 | 根据二次项函数特征值的计算公式，我们可以求矩阵 $M(x,y)$ 的特征值。但是 Harris 给出的角点差别方法并不需要计算具体的特征值，而是计算一个角点的响应值 $R$ 来判断角点。$R$ 的计算公式：
 92 | 
 93 | $$
 94 | R = \det M - \alpha (trace M)^2
 95 | $$
 96 | 
 97 | 上式中，$\det M$ 是矩阵 $M = \begin{bmatrix} A & C \\ C & B \end{bmatrix}$ 的行列式，$trace M$ 是矩阵 $M$ 的迹，$\alpha$ 为经验常数，通常取值为 $0.04 \sim 0.06$ 。事实上，特征值是隐含在 $\det M$ 和 $trace M$ 中的，因为：
 98 | 
 99 | $$
100 | \det M = \lambda_1 \lambda_2 = AC-B^2 \\
101 | traceM = \lambda_1 + \lambda_2 = A+C
102 | $$
103 | 
104 | 
105 | ### 算法实现
106 | 
107 | 根据上述，可以将 Harris 角点检测算法归纳如下：
108 | 
109 | - 计算图像 $I(x,y)$ 在 $X$ 和 $Y$ 两个方向上的梯度
110 |   
111 |     $$
112 |   I_x = \frac{\partial I}{\partial x} = I \otimes \begin{bmatrix} -1 & 0 & 1 \end{bmatrix} ,
113 |   I_y = \frac{\partial I}{\partial y} = I \otimes \begin{bmatrix} -1 & 0 & 1 \end{bmatrix}^\top
114 |   $$
115 | 
116 | - 计算两个方向梯度的乘积
117 |   
118 |     $$
119 |     I_x^2 = I_x \cdot I_x \;,\; I_y^2 = I_y \cdot I_y \;,\; I_{xy} = I_x \cdot I_y
120 |     $$
121 | 
122 | - 使用高斯函数对 $I_x^2$ 、$I_y^2$ 和 $I_{xy}$ 进行高斯加权，生成矩阵 $M$ 的元素 $A$ 、 $B$ 和 $C$  
123 |   
124 |     $$
125 |     A = g(I_x^2) = I_x^2 \otimes w \;,\; C = g(I_y^2) = I_y^2 \otimes w \;,\; B= g(I_{xy}) = I_{xy} \otimes w
126 |     $$
127 |     
128 | - 计算每一个像素的 Harris 响应值 $R$ ，并对小于阈值 $t$ 的 $R$ 置零
129 |   
130 |     $$
131 |     R = \left\{ R:\det M - \alpha (trace M)^2  > t \right\}
132 |     $$
133 |     
134 | - 在 $3\times3$ 或者 $5\times5$ 大小的窗口邻域内进行非最大值抑制，局部最大值点即为图像中的角点。
135 | 
136 | 
137 | 
138 | ### 性质
139 | 
140 | 1. **参数α对角点检测的影响**：增大 $\alpha$ 的值，将减小角点响应值 $R$ ，减少被检测角点的数量；减小 $\alpha$ 的值，将增大角点响应值 $R$ ，增加被检测角点的数量。
141 | 
142 | 2. **Harris 角点检测算子对亮度和对比度的变化不敏感** ：因为在进行 Harris 角点检测时，使用了微分算子对图像进行微分运算，而微分运算对图像密度的拉升或收缩和对亮度的抬高或下降不敏感。换言之，对亮度和对比度的仿射变换并不改变 Harris 响应的极值点出现的位置，但是，由于阈值的选择，可能会影响角点检测的数量。如下图所示：
143 | 
144 |    ![Invariant to intensity changes](image/invariant2intensity.png)
145 | 
146 | 3. **Harris 角点检测算子具有旋转不变性**：Harris 角点检测算子使用的是角点附近的区域灰度二阶矩矩阵。而二阶矩矩阵可以表示成一个椭圆，椭圆的长短轴正是二阶矩矩阵特征值平方根的倒数。当特征椭圆转动时，特征值并不发生变化，所以判断角点响应值 $R$ 也不发生变化，由此说明 Harris 角点检测算子具有旋转不变性。
147 | 
148 | 4. **Harris 角点检测算子不具有尺度不变性**：如下图所示，当右图被缩小时，在检测窗口尺寸不变的前提下，在窗口内所包含图像的内容是完全不同的。左侧的图像可能被检测为边缘或曲线，而右侧的图像则可能被检测为一个角点。
149 | 
150 |    ![Not invariant to scale](image/not_invariant2scale.png)
151 | 
152 | ### 多尺度 Harris 角点
153 | 
154 | Harris 角点具有灰度不变性和旋转不变性，但不具备尺度不变性，而尺度不变性对于图像的局部特征来说至关重要。将 Harris 角点检测算子和高斯尺度空间表示相结合，可有效解决这个问题。与 Harris 角点检测中的二阶矩表示类似，定义一个尺度自适应的二阶矩：
155 | 
156 | $$
157 | M=\mu(x,y,\sigma_I,\sigma_D) = \sigma_D^2g(\sigma_I)\otimes
158 | \begin{bmatrix}
159 | 	L_x^2(x,y,\sigma_D) & L_xL_y(x,y,\sigma_D) \\
160 | 	L_xL_y(x,y,\sigma_D) & L_y^2(x,y,\sigma_D)
161 | \end{bmatrix}
162 | $$
163 | 
164 | 式中，$g(\sigma_I)$ 表示尺度为 $\sigma_I$ 的高斯卷积核，$L_x(x,y,\sigma_D)$ 和 $L_y(x,y,\sigma_D)$ 表示对图像使用高斯函数 $g(\sigma_D)$ 进行平滑后取微分的结构。$\sigma_I$ 通常称为积分尺度，是决定 Harris 角点当前尺度的变量，$\sigma_D$ 为微分尺度，是决定角点附近微分变化的变量，通常 $\sigma_I$ 应大于 $\sigma_D$ 。
165 | 
166 | 具体的算法流程：
167 | 
168 | - 确定尺度空间的一组取值 $\sigma_I = (\sigma_0, \sigma_1 \dotsc, \sigma_n) = (\sigma, k\sigma, \dotsc, k^n \sigma) \;,\; \sigma_D = s\sigma_I$ ，一般情况下 $k=1.4 \;,\; s= 0.7$ 
169 | 
170 | - 对于给定的尺度空间值 $\sigma_D$ ，进行如下角点响应值的计算和判断，并做非极大值抑制处理
171 |   
172 |     $$
173 |   cornerness = \det \left( \mu(x,y, \sigma_n) - \alpha \,trace^2 \left(\mu(x,y, \sigma_n)\right) \right) > threshold_H
174 |   $$
175 |   
176 | - 在位置空间搜索候选角点后，还需在尺度空间上进行搜索，计算候选点的拉普拉斯响应值，并于与给定阈值作比较：
177 |   
178 |     $$
179 |     F(x,\sigma_n) = \sigma_n^2 | L_{xx}(x,y,\sigma_n)+L_{yy}(x,y,\sigma_n) | > threshold_L
180 |     $$
181 | 
182 | - 将响应值 $F$ 与邻近的两个尺度空间的拉普拉斯响应值进行比较，使其满足：
183 |   
184 |     $$
185 |     F(x,y,\sigma_n) > F(x,y,\sigma_l) \;,\; l = n-1, n+1
186 |     $$
187 | 
188 | 这样既可确定在位置空间和尺度空间均满足条件的 Harris 角点。
189 | 
190 | 
191 | 
192 | ## Shi-Tomasi 算法
193 | 
194 | Shi-Tomasi 算法是 Harris 算法的改进。Harris 算法最原始的定义是将矩阵 $M$ 的行列式值与 $M$ 的迹相减，再将差值同预先给定的阈值进行比较。后来 Jianbo Shi 和 Carlo Tomasi 提出改进的方法，若两个特征值中较小的一个大于最小阈值，则会得到强角点。
195 | 
196 | Harris 角点的响应值 $R$ 为：
197 | 
198 | $$
199 | R = \lambda_1\lambda_2-\alpha(\lambda_1+\lambda_2)^2
200 | $$
201 | 
202 | 而 Shi-Tomasi 角点的响应值 $R$ 则为：
203 | 
204 | $$
205 | R = \min(\lambda_1,\lambda_2)
206 | $$
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 | 
217 | 
218 | 
219 | 
220 | 
221 | ## 参考
222 | 
223 | [^1]: Computer Vision: Algorithms and Applications, by Richard Szeliski.  Sections 4.1
224 | [^2]: [CMU : Feature and corner detection & Feature descriptors and matching](http://www.cs.cmu.edu/~16385/)
225 | 
226 | [^3]: [Penn State University : Corner Detection](http://www.cse.psu.edu/~rtc12/CSE486/) 
227 | [^4]: [A COMBINED CORNER AND EDGE DETECTOR](http://www.bmva.org/bmvc/1988/avc-88-023.pdf)
228 | [^5]: [Scale & Affine Invariant Interest Point Detectors](https://www.robots.ox.ac.uk/~vgg/research/affine/det_eval_files/mikolajczyk_ijcv2004.pdf)
229 | [^6]: [Code: Harris Detector](https://github.com/ronnyyoung/ImageFeatures)
230 | [^7]: [思维之际博客：Harris 角点](https://www.cnblogs.com/ronny/p/4009425.html)
231 | 
232 | [^8]: Senit_Co 博客：[图像特征之 Harris 角点检测](https://senitco.github.io/2017/06/18/image-feature-harris/)
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 
240 | 
241 | 
242 | --8<--
243 | mathjax.txt
244 | --8<--
245 | 
246 | 


--------------------------------------------------------------------------------
/docs/computer_vision/ORB.md:
--------------------------------------------------------------------------------
  1 | ## ORB 算法原理
  2 | 
  3 | ORB (Oriented FAST and Rotated BRIEF)[^1] 算法是对 FAST 特征点检测和 BRIEF (Binary robust independent elementary features)[^2] 特征描述子的一种结合，在原有的基础上做了改进与优化，使得 ORB 特征具备多种局部不变性，并为实时计算提供了可能。
  4 | 
  5 | 
  6 | 
  7 | ### 角点检测
  8 | 
  9 | ORB 首先利用 FAST 算法检测特征点，然后利用 Harris 角点的度量方式，从提取的 FAST 角点中筛选出 N 个 Harris 响应值最大的特征点，其中 Harris 角点的响应函数如下：
 10 | 
 11 | $$
 12 | R = \det M - \alpha (trace \, M)^2
 13 | $$
 14 | 
 15 | 相关内容在前两篇文章「[Harris 角点检测](Harris.md)」和「[FAST 角点检测](FAST.md)」分别做了详细的介绍。
 16 | 
 17 | 
 18 | 
 19 | #### 旋转不变性
 20 | 
 21 | FAST 检测特征点不具备尺度不变性，可以借助尺度空间理论构建图像高斯金字塔，然后在每一层金字塔图像上检测角点，以实现尺度不变性。对于 FAST 特征点不具有方向，即不具备旋转不变性。原论文中提出了一种利用灰度质心法来解决这个问题，而带有方向的 FAST 角点被称为 oFAST 。灰度质心法假设灰度质心法假设角点的灰度与质心之间存在一个偏移，这个向量可以用于表示一个方向。换言之，在半径为 $r$ 的邻域内求取灰度质心，从特征点到灰度质心的向量，定义为该特征点的主方向。对于任意一个特征点 $p$ 来说，我们定义 $p$ 的邻域像素的矩为：
 22 | 
 23 | $$
 24 | m_{pq} = \sum_{x,y\in \bigcirc_r} x^p y^q I(x,y)
 25 | $$
 26 | 
 27 | 其中 $I(x,y)$ 表示像素灰度值，$\bigcirc_r$ 是半径为 $r$ 离散化的 [Bresenham](https://en.wikipedia.org/wiki/Bresenham%27s_line_algorithm) 圆形窗口，零阶矩 $m_{00}$ 即图像领域窗口内所有像素的灰度和， $m_{10}$ 和 $m_{01}$ 分别是相对于 $x$ 与 $y$ 的一阶矩，因此图像局部邻域的中心矩或者质心可定义为：
 28 | 
 29 | $$
 30 | C = \left( \frac{m_{10}}{m_{00}} \,,\, \frac{m_{01}}{m_{00}} \right)
 31 | $$
 32 | 
 33 | 则 oFAST 特征点的方向矢量定义为特征点的中心 $O$ 与质心 $C$ 形成的向量 $\overrightarrow{OC}$ 与 $X$ 轴的夹角，即：
 34 | 
 35 | $$
 36 | \theta = \arctan(m_{01}, m_{10})
 37 | $$
 38 | 
 39 | 
 40 | 
 41 | ### 特征点描述
 42 | 
 43 | 在介绍 ORB 对 BRIEF 特征描述子增添转向之前，我们先介绍下 BRIEF 特征描述子。它是一种对已检测到的特征点进行表示和描述的特征描述方法，和传统的利用图像局部邻域的灰度直方图或梯度直方图提取特征的方式不同，BRIEF 是一种二进制编码的特征描述子，既降低了存储空间的需求，提升了特征描述子生成的速度，也减少了特征匹配时所需的时间。
 44 | 
 45 | 
 46 | 
 47 | #### BRIEF 原理
 48 | 
 49 | 经典的图像特征描述子 SIFT 和 SURF 采用 128 维 (SIFT) 或者 64 维 (SURF) 特征向量，每维数据一般占用 4 个字节 (Byte) ，一个特征点的特征描述向量需要占用 512 或者 256 个字节。如果一幅图像中包含有大量的特征点，那么特征描述子将占用大量的存储，而且生成描述子的过程也会相当耗时。在 SIFT 特征的实际应用中，可以采用 PCA 、LDA 等特征降维的方法来减少特征描述子的维度，例如 PCA-SIFT；此外还可以采用一些局部敏感哈希 (Locality-Sensitive Hashing, LSH) 的方法将特征描述子编码为二进制串，然后使用汉明距离 (Hamming Distance) 进行特征点的匹配，汉明距离计算的是两个二进制比特串中同一位置不同值的个数，可通过异或操作快速实现，大大提升了特征匹配的效率。
 50 | 
 51 | BRIEF 正是这样一种基于二进制编码生成特征描述子，以及利用汉明距离进行特征匹配的算法。由于 BRIEF 只是一种特征描述子，因此事先得检测并定位特征点，可采用 Harris、FAST 或者是 SIFT 算法检测特征点，在此基础上利用 BRIEF 算法建立特征描述符。该描述符是在特征点邻域 Patch 内随机选择若干点对 $(\mathbf{x},\mathbf{y})$ ，并比较这些点对的灰度值，灰度测试 $\tau$ 定义如下：
 52 | 
 53 | $$
 54 | \tau(p;\mathbf{x},\mathbf{y}) \doteq \begin{cases} 1 & p(\mathbf{x})<p(\mathbf{y}) \\ 0 & p(\mathbf{x}) \geq p(\mathbf{y}) \end{cases}
 55 | $$
 56 | 
 57 | 式中 $p(\mathbf{x})$ 是图像块上点 $\mathbf{x} = (u,v)^\top$ 处的灰度值。所有的 $n$ 个点对进行比较，生成了一个 $n$ 长的二进制串：
 58 | 
 59 | $$
 60 | f_n(p) \doteq \sum_{1 \leq i \leq n} 2^{i-1} \tau(p; \mathbf{x}_i, \mathbf{y}_i)
 61 | $$
 62 | 
 63 | 对于 $n$ 的取值可以设为 128 、 256 或者 512 。
 64 | 
 65 | 
 66 | 
 67 | #### BRIEF 算法实现
 68 | 
 69 | - 利用 FAST 或者 Harris 等方法进行角点检测
 70 | - 确定特征点邻域窗口 Patch ，并对该邻域内的像素进行高斯平滑滤除噪声
 71 | - 在邻域窗口内随机选取 $n = 128/256/512$ 对像素点，根据灰度值大小编码成二进制串，生成 $n$ bit 的特征描述子
 72 | 
 73 | 
 74 | 
 75 | #### BRIEF 点对选取
 76 | 
 77 | 原论文作者 Calonder 提供了 5 种在 $S \times S$ 的邻域 Patch 内随机选取点对 $(\mathbf{x},\mathbf{y})$ 的方法。如下图所示，一条线段的两个端点表示一个随机点对 $(\mathbf{x}_i,\mathbf{y}_i)$ 。
 78 | 
 79 | ![BRIEF Point Choice](image/BRIEF_point_choice.png)
 80 | 
 81 | 1.  $\mathbf{x},\mathbf{y}$ 为均匀分布 $(-S/2, S/2)$ 
 82 | 2.  $\mathbf{x},\mathbf{y}$ 为高斯分布 $\mathcal{N}(0,\frac{1}{25}S^2)$ 
 83 | 3.  $\mathbf{x}$ 为高斯分布 $\mathcal{N}(0,\frac{1}{25}S^2)$ ， $\mathbf{y}$ 为高斯分布 $\mathcal{N}(0,\frac{1}{100}S^2)$ 
 84 | 4. 在空间量化极坐标下的离散位置随机采样
 85 | 5.  $\mathbf{x}_i$ 固定为 $(0,0)$ ， $\mathbf{y}$ 在周围平均采样
 86 | 
 87 | 
 88 | 
 89 | #### BRIEF 算法特点
 90 | 
 91 | BRIEF 算法通过检测随机响应，并采用二进制编码方式建立特征描述子，减少了特征的存储空间需求，并提升了特征生成的速度。Hamming 距离的度量方式便于进行特征点的快速匹配，而且大量实验数据表明，针对特征维数为 256 的情况，不匹配特征点的 Hamming 距离为 128 左右，而匹配点的 Hamming 距离则远小于 128 。
 92 | 
 93 | BRIEF 算法的缺点是不具备尺度不变性和旋转不变性，在图像的旋转角度超过 $30^\circ$ 时，特征点匹配的准确率快速下降。
 94 | 
 95 | 
 96 | 
 97 | #### BRIEF 特征描述子改进
 98 | 
 99 | 根据上述，可知 BRIEF 虽然速度优势明显，但也存在一些缺陷，例如不具备尺度不变性和旋转不变性，对噪声敏感。尺度不变性的问题在利用 FAST 检测特征点时，通过构建高斯金字塔得以解决。BRIEF 中采用 9×9 的高斯卷积核进行滤波降噪，可以在一定程度上缓解噪声敏感问题。ORB 中利用积分图像，在 31×31 的 Patch 中选取随机点对，并以选取的随机点为中心，在 5×5 的窗口内计算灰度平均值（灰度和），比较随机点对的邻域灰度均值，进行二进制编码，而不是仅仅由两个随机点对的像素值决定编码结果，可以有效地解决噪声问题。
100 | 
101 | 
102 | 
103 | ##### Steer BREIF
104 | 
105 | 至于旋转不变性问题，可利用 FAST 特征点检测时求取的主方向，旋转特征点邻域，这种方法称为「Steer BREIF (sBRIEF) 」。但旋转整个 Patch 再提取 BRIEF 特征描述子的计算代价较大，因此，ORB 采用了一种更高效的方式，在每个特征点邻域 Patch 内，先选取 $n$ 对随机点，将其进行旋转，然后做判决编码为二进制串。 $n$ 个点对构成矩阵 $S$ 
106 | 
107 | $$
108 | S = \begin{bmatrix} x_1 & x_2 & \dotsc & x_n \\ y_1 & y_2 & \dotsc & y_n \end{bmatrix}
109 | $$
110 | 
111 | 旋转矩阵 $R_\theta$ 为
112 | 
113 | $$
114 | R_\theta =  \begin{bmatrix} \cos\theta & -\sin\theta \\ \sin\theta & \cos\theta \end{bmatrix}
115 | $$
116 | 
117 | 旋转后的坐标矩阵为
118 | 
119 | $$
120 | S_\theta = R_\theta S
121 | $$
122 | 
123 | 
124 | 
125 | ##### rBRIEF
126 | 
127 | 通过上述方法得到的特征描述子具有旋转不变性，但匹配效果却不如原始 BRIEF 算法，因为可区分性减弱了。特征描述子的一个要求就是要尽可能地表达特征点的独特性，便于区分不同的特征点。BRIEF 令人惊喜的特性之一是描述子所有比特位的均值接近于 0.5 ，且方差很大。方差越大表明可区分性越好，不同特征点的描述子表现出较大的差异性，不易造成误匹配。但 steered BRIEF 进行了坐标旋转，损失了这个特性，导致可区分性减弱，相关性变强，不利于匹配。
128 | 
129 | 为了解决 steered BRIEF 可区分性降低的问题，ORB 使用了一种基于学习的方法来选择一定数量的随机点对。首先建立一个大约 $300k$ 特征点的数据集（特征点来源于 PASCAL2006 中的图像），对每个特征点，考虑其 31×31 的邻域 Patch ，为了去除噪声的干扰，选择 5×5 的子窗口的灰度均值代替单个像素的灰度，这样每个 Patch 内就有 $N = (31-5+1)\times(31-5+1) = 27 \times 27 = 729$ 个子窗口，从中随机选取 2 个非重复的子窗口，一共有 $M = C_N^2$ 种方法。这样，每个特征点便可提取出一个长度为 $M$ 的二进制串，所有特征点可构成一个 $300k \times M$ 的二进制矩阵 $Q$ ：
130 | 
131 | $$
132 | Q = \begin{bmatrix} 
133 | 	p_{1,1} & p_{1,2} & \cdots & p_{1,M} \\
134 | 	p_{2,1} & p_{2,2} & \cdots & p_{2,M} \\
135 | 	\vdots & \vdots & \ddots & \vdots \\
136 | 	p_{300k,1} & p_{300k,2} & \cdots & p_{300k,M}
137 | \end{bmatrix} \quad p_{i,j} \in \{0,1\}
138 | $$
139 | 
140 | 现在需要从 $M$ 个点对中选取 256 个相关性最小、可区分性最大的点对，作为最终的二进制编码。筛选方法如下：
141 | 
142 | 1. 对矩阵 $Q$ 的每一列求取均值，并根据均值与 0.5 之间的距离从小到大的顺序，依次对所有列向量进行重新排序，得到矩阵 $T$ 
143 | 2. 将 $T$ 中的第一列向量放到结果矩阵 $R$ 中
144 | 3. 取出 $T$ 中的下一列向量，计算其与矩阵 $R$ 中所有列向量的相关性，如果相关系数小于给定阈值，则将 $T$ 中的该列向量移至矩阵 $R$ 中，否则丢弃
145 | 4. 循环执行上一步，直到 $R$ 中有 256 个列向量；如果遍历 $T$ 中所有列， $R$ 中向量列数还不满 256，则增大阈值，重复以上步骤
146 | 
147 | 这样，最后得到的就是相关性最小的 256 对随机点，该方法称为 rBRIEF 。
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | ## 参考
157 | 
158 | [^1]: Ethan Rublee, Vincent Rabaud, Kurt Konolige, Gary R. Bradski: [ORB: An efficient alternative to SIFT or SURFhttp](http://www.willowgarage.com/sites/default/files/orb_final.pdf)
159 | [^2]:M. Calonder, V. Lepetit, C. Strecha, and P. Fua. [Brief: Binary robust independent elementary features](http://vision.stanford.edu/teaching/cs231b_spring1415/papers/BRIEF.pdf) 
160 | 
161 | [^3]: 思维之际博客：[BRIEF 特征描述子](https://www.cnblogs.com/ronny/p/4081362.html) 、[ORB特征点检测](https://www.cnblogs.com/ronny/p/4083537.html) 
162 | 
163 | [^4]: Senit_Co 博客：[图像特征描述子之ORB](https://senitco.github.io/2017/07/09/image-feature-orb/)  、[图像特征描述子之BRIEF](https://senitco.github.io/2017/07/05/image-feature-brief/)
164 | 
165 | 
166 | 
167 | --8<--
168 | mathjax.txt
169 | --8<--


--------------------------------------------------------------------------------
/docs/computer_vision/SIFT.md:
--------------------------------------------------------------------------------
  1 | ## 简介
  2 | 
  3 | 尺度不变特征变换 (Scale-invariant feature transform, SIFT) [^1][^2] 是计算机视觉中一种检测、描述和匹配图像局部特征点的方法，通过在不同的尺度空间中检测极值点或特征点 (Conrner Point, Interest Point) ，提取出其位置、尺度和旋转不变量，并生成特征描述子，最后用于图像的特征点匹配。SIFT 特征凭借其良好的性能广泛应用于运动跟踪 (Motion tracking) 、图像拼接 (Automatic mosaicing) 、3D 重建 (3D reconstruction) 、移动机器人导航 (Mobile robot navigation) 以及目标识别 (Object Recognition) 等领域。
  4 | 
  5 | 
  6 | 
  7 | ## 尺度空间极值检测
  8 | 
  9 | 为了使检测到的特征点具备尺度不变性，使能够在不同尺度检测到尽可能完整的特征点或关键点，则需要借助尺度空间理论来描述图像的多尺度特征。相关研究证明高斯卷积核是实现尺度变换的唯一线性核。因此可用图像的高斯金字塔表示尺度空间，而且尺度规范化的 LoG 算子具有尺度不变性，在具体实现中，可用高斯差分 (DoG) 算子近似 LoG 算子，在构建的尺度空间中检测稳定的特征点。
 10 | 
 11 | 
 12 | 
 13 | ### 构建尺度空间
 14 | 
 15 | 尺度空间理论的基本思想是：在图像处理模型中引入一个被视为尺度的参数，通过连续变化尺度参数获取多尺度下的空间表示序列，对这些空间序列提取某些特征描述子，抽象成特征向量，实现图像在不同尺度或不同分辨率的特征提取。尺度空间中各尺度图像的模糊程度逐渐变大，模拟人在由近到远时目标在人眼视网膜上的成像过程。而且尺度空间需满足一定的不变性，包括图像灰度不变性、对比度不变性、平移不变性、尺度不变性以及旋转不变性等。在某些情况下甚至要求尺度空间算子具备仿射不变性。
 16 | 
 17 | 
 18 | 
 19 | #### 尺度空间的表示
 20 | 
 21 | 图像的尺度空间 $L(x,y,\sigma)$ 可以定义为输入图像 $I(x,y)$ 与可变尺度的高斯函数 $G(x,y,\sigma)$ 进行卷积：
 22 | 
 23 | $$
 24 | L(x,y,\sigma) = G(x,y,\sigma) * I(x,y) \\
 25 | G(x,y,\sigma) = \frac{1}{2\pi\sigma^2}e^{-\frac{x^2+y^2}{2\sigma^2}}
 26 | $$
 27 | 
 28 | 式中， $(x,y)$ 是图像的空间坐标，高斯函数 $G(x,y,\sigma)$ 中的 $\sigma$ 是尺度坐标 (尺度变化因子) ， $\sigma$ 大小决定图像的平滑程度，值越大图像模糊得越严重。大尺度对应图像的概貌特征，小尺度对应图像的细节特征。一般根据 $3\sigma$ 原则，高斯核矩阵的大小设为 $(6\sigma+1)\times(6\sigma+1)$ 。
 29 | 
 30 | 在使用高斯金字塔构建尺度空间时，主要分成两部分，对图像做降采样，以及对图像做不同尺度的高斯模糊。对图像做降采样得到不同尺度的图像，也就是不同的组 (Octave) ，后面的 Octave (高一层的金字塔) 为上一个 Octave (低一层的金字塔) 降采样得到，图像宽高分别为上一个 Octave 的 $1/2$ 。每组 (Octave) 又分为若干层 (Interval) ，通过对图像做不同尺度的高斯模糊得到。
 31 | 
 32 | 
 33 | 
 34 | #### 高斯差分金字塔
 35 | 
 36 | 在 2002 年 [Mikolajczyk](http://lear.inrialpes.fr/people/mikolajczyk/) 在详细的实验比较中发现尺度归一化的高斯拉普拉斯函数 $\sigma^2 \nabla^2G$ 的极大值和极小值同其它特征提起函数（例如：梯度、Hessian 或者 Harris 角点）比较，能够产生最为稳定的图像特征。而早在 1994 年 Lindebery 发现高斯差分函数（Difference-of-Gaussian ，简称 DoG 算子）与尺度归一化的高斯拉普拉斯函数 $\sigma^2 \nabla^2G$ 非常近似。其中两者间的关系可以从如下推导得到：
 37 | 
 38 | $$
 39 | \frac{\partial G}{\partial \sigma} = \sigma \nabla^2 G
 40 | $$
 41 | 
 42 | 利用差分近似替代微分，则有：
 43 | 
 44 | $$
 45 | \sigma \nabla^2 G = \frac{\partial G}{\partial \sigma} \approx \frac{G(x,y,k\sigma) - G(x,y,\sigma)}{k\sigma -\sigma}
 46 | $$
 47 | 
 48 | 因此有：
 49 | 
 50 | $$
 51 | G(x,y,k\sigma) - G(x,y,\sigma) \approx (k-1)\sigma^2 \nabla^2 G
 52 | $$
 53 | 
 54 | 其中， $k-1$ 是个常数，不影响极值点的检测，如下图所示，红色曲线表示的是高斯差分算子，而蓝色曲线表示的是高斯拉普拉斯算子。
 55 | 
 56 | ![高斯拉普拉斯和高斯差分对比](image/LoG_vs_DoG.png)
 57 | 
 58 | 为了能更为高效地在尺度空间检测稳定的关键点，SIFT 作者选用了高斯差分算子替代高斯拉普拉斯算子。因此提出了高斯差分尺度空间 (Difference-of-Gaussian Scale-Space) $D(x,y,\sigma)$ ， $D(x,y,\sigma)$ 可以通过由变化尺度因子 $k$ 分隔的相邻不同尺度的高斯差分核与图像卷积生成：
 59 | 
 60 | $$
 61 | \begin{align*}
 62 | D(x,y,\sigma) &= \left( G(x,y,k\sigma) - G(x,y,\sigma) \right) * I(x,y) \\
 63 | &= L(x,y,k\sigma) - L(x,y,\sigma)
 64 | \end{align*}
 65 | $$
 66 | 
 67 | 图像的高斯金字塔和高斯差分金字塔如下图所示，高斯差分图像由高斯金字塔中同一组 (Octave) 内相邻层 (Interval) 的图像作差得到。
 68 | 
 69 | ![图像高斯金字塔](image/DoG.png)
 70 | 
 71 | 
 72 | 
 73 | ### 尺度空间的参数确定
 74 | 
 75 | 在由图像金字塔表示的尺度空间中，图像的 Octave 由原始图像的大小和塔顶图像的大小决定。
 76 | 
 77 | $$
 78 | Octave = \log_2(\min(width_0,height_0)) - \log_2(\min(width,height))
 79 | $$
 80 | 
 81 | 其中， $width_0, \, height_0$ 分别为原始图像的宽和高， $width, \, height$ 分别为金字塔塔顶图像的宽和高。例如：对于一幅大小为 512×512 的图像，当塔顶图像大小为 4×4 时，图像的组数为 $Octave=7$ 。
 82 | 
 83 | 尺度参数 $\sigma$ 的取值与金字塔的组数和层数相关，设第一组第一层的尺度参数取值为 $\sigma(1,1) = \sigma_0$ ，一般 $\sigma_0$ 取 $1.6$ 。则第 $m$ 组第 $n$ 层的 $\sigma(m,n)$ 取值为
 84 | 
 85 | $$
 86 | \sigma(m,n) = \sigma_0 \cdot 2^{m-1} \cdot k^{n-1}, \quad k=2^\frac{1}{S}
 87 | $$
 88 | 
 89 | 式中， $S$ 是金字塔中每组的有效层数，$k=2^\frac{1}{S}$ 是变化尺度因子。在检测极值点前对原始图像的高斯平滑会导致图像高频信息的丢失，所以在建立尺度空间之前，先利用双线性插值将图像扩大为原来的两倍，以保留原始图像信息，增加特征点数量。
 90 | 
 91 | 
 92 | 
 93 | #### 第一组第一层图像的生成
 94 | 
 95 | 根据上述说明，为了得到更多的特征点，需要将图像扩大为原来的两倍。根据图像反走样的需要，通常假设输入图像 $I(x,y)$ 是经过高斯平滑处理的，其值为 $\sigma' = 0.5$ ，即半个像元。意思就是说我们采集到的图像 $I(x,y)$ ，已经被 $\sigma' = 0.5$ 的高斯滤波器平滑过了。那么，经过双线性插值扩大为原来的两倍后的图像 $I'(x,y)$ 可以看为是被 $2\sigma' = 1$ 高斯平滑处理过的。所以我们不能直接对扩大两倍后的图像 $I'(x,y)$ 直接用 $\sigma_0 = 1.6$ 的高斯滤波器平滑，而应该用 $\sqrt{\sigma_0^2 - (2\sigma'^2)}$ 的高斯滤波器去平滑图像 $I'(x,y)$ ，即：
 96 | 
 97 | $$
 98 | I_{pyr(1,1)}(x,y) = I'(x,y) * G(x,y,\sqrt{\sigma_0^2 - (2\sigma'^2)})
 99 | $$
100 | 
101 | 
102 | 
103 | ### 局部极值检测
104 | 
105 | 为了寻找 DoG 尺度空间的极值点，每一个采样点要和其所有邻域像素相比较，如下图所示，中间检测点与其同尺度的 8 个邻域像素点以及上下相邻两层对应的 9×2 个像素点一共 26 个点作比较，以确保在图像空间和尺度空间都能检测到极值点。一个像素点如果在 DoG 尺度空间本层及上下两层的 26 邻域中取得最大或最小值时，就可以认为该点是图像在该尺度下的一个特征点。
106 | 
107 | ![Extremum of the DoG](image/extremum_DoG.png)
108 | 
109 | 
110 | 
111 | #### 每组尺度空间需要的层数
112 | 
113 | 如上所述，在极值比较的过程中，每一组差分图像的首末两层是无法比较的，为了在每组中检测 $S$ 个尺度的极值点，则DoG金字塔每组须有 $S+2$ 层图像，高斯金字塔每组须有 $S+3$ 层图像。另外，在降采样时，高斯金字塔中后一组 (Octive) 的底层图像是由前一组图像的倒数第 $3$ 张图像 ( $S+1$ 层) 隔点采样得到。这样也保证了尺度变化的连续性，如下图所示：
114 | 
115 | ![variable scale Gaussian](image/variable_scale_Gaussian.png)
116 | 
117 | 根据上图，假设每组层数 $S=3$ ，则 $k=2^\frac{1}{S} = 2^\frac{1}{3}$ ，在高斯金字塔中，第一个 Octave 第 $S+1$ 层图像的尺度为 $k^3\sigma = 2\sigma$ ，经降采样后得到第二个 Octave 的第 1 层图像，尺度仍为 $2\sigma$ 。在 DoG 尺度空间中，第一组 (1st-Octave) 图像中间三项的尺度分别为 $(k\sigma, k^2\sigma, k^3\sigma)$ ，下一组中间三项为 $(2k\sigma, 2k^2\sigma, 2k^3\sigma)$ ，其 “首项” $2k\sigma = 2^\frac{4}{3}$ ，与 上一组 “末项” $k^3\sigma = 2^\frac{3}{3}\sigma$ 尺度变化连续，变化尺度为 $k=2^\frac{1}{S} = 2^\frac{1}{3}$ 。
118 | 
119 | 
120 | 
121 | ## 关键点定位
122 | 
123 | 在 DoG 尺度空间检测到的极值点是离散的，通过拟合三元二次函数可以精确定位关键点的位置和尺度，达到亚像素精度。同时去除低对比度的检测点和不稳定的边缘点 (因为 DoG 算子会产生较强的边缘响应) ，以增强匹配稳定性，提高抗噪声能力。
124 | 
125 | 
126 | 
127 | ### 关键点精确定位
128 | 
129 | 离散空间的极值点并不是真正的极值点，如下图所示一维函数离散空间得到的极值点与连续空间极值点的差别。利用已知的离散空间点插值得到的连续空间极值点的方法叫做子像素插值 (Sub-pixel Interpolation) 
130 | 
131 | ![离散空间与连续空间极值点的区别](image/extremum_in_continuous_vs_discrete.png)
132 | 
133 | 下面我们用子像素插值法来考虑连续区间上的情况，假设我们在尺度为 $\sigma$ 的尺度图像 $D(x,y,\sigma)$ 检测到一个局部极值点，空间位置为 $(x,y,\sigma)$ 。根据上图直观可知，它只是离散情况下的极值点，而连续情况下，极值点可能坐落在 $(x,y,\sigma)$ 的附近，设连续情况的正真极值点偏离 $(x,y,\sigma)$ 的坐标为 $(\Delta x, \Delta y, \Delta \sigma)$ 。则对于正真极值点 $D(x+\Delta x, y+\Delta y, \sigma+\Delta \sigma)$ 可以在尺度空间 $D(x,y,\sigma)$ 处进行泰勒展开（依照 $f(x+\Delta x) \approx f(x) + f'(x)\Delta x + f''(x)\Delta x^2$ 保留二阶形式）：
134 | 
135 | $$
136 | \scriptsize {
137 | D(x+\Delta x, y+\Delta y, \sigma+\Delta \sigma) \approx \\ \quad
138 | D(x,y,\sigma) 
139 | + \begin{bmatrix} \frac{\partial D}{\partial x} & \frac{\partial D}{\partial y} & \frac{\partial D}{\partial \sigma} \end{bmatrix}
140 | \begin{bmatrix} \Delta x \\ \Delta y \\ \Delta \sigma \end{bmatrix} 
141 | + \frac{1}{2} \begin{bmatrix} \Delta x & \Delta y & \Delta \sigma \end{bmatrix} 
142 | \begin{bmatrix} \frac{\partial D^2}{\partial x^2} & \frac{\partial D^2}{\partial x \partial y} & \frac{\partial^2 D}{\partial x \partial \sigma} \\ \frac{\partial D^2}{\partial y \partial x} & \frac{\partial D^2}{\partial y^2} & \frac{\partial D^2}{\partial y \partial \sigma} \\ \frac{\partial D^2}{\partial \sigma \partial x} & \frac{\partial D^2}{\partial \sigma \partial y} & \frac{\partial D^2}{\partial \sigma^2} \end{bmatrix}
143 | \begin{bmatrix} \Delta x \\ \Delta y \\ \Delta \sigma \end{bmatrix}
144 | }
145 | $$
146 | 
147 | 可以将上式写成矢量形式，得：
148 | 
149 | $$
150 | D(X+\Delta X) = D(X) + \frac{\partial D^\top(X)}{\partial X} \Delta X + \frac{1}{2} \Delta X^\top \frac{\partial^2D(X)}{\partial X^2} \Delta X
151 | $$
152 | 
153 | 上式对 $\Delta X$ 求导，并令其等于零，可以得到极值点的偏移量：
154 | 
155 | $$
156 | \Delta X = - \frac{\partial^2D(X)}{\partial X^2}^{-1} \frac{\partial D^\top(X)}{\partial X}
157 | $$
158 | 
159 | 当它在任一维度上 (即 $x$ 或 $y$ 或 $\sigma$ ）的偏移量大于 0.5 时，意味着插值中心已经偏移到它的邻近点上，所以必须改变当前关键点的位置。同时在新的位置上反复插值直到收敛，也有可能超出所设定的迭代次数或者超出图像边界的范围，此时这样的点应该删除。精确关键点处的函数值为：
160 | 
161 | $$
162 | D(\hat{X}) = D(X) + \frac{1}{2} \frac{\partial D^\top(X)}{\partial X} \Delta X
163 | $$
164 | 
165 | 如果 $|D(\hat{X})|$ 过小易受噪声点的干扰而变得不稳定，若其小于某个阈值 (例如 0.03 或者 0.04/S ) ，则将该极值点也应该删除。
166 | 
167 | 
168 | 
169 | ### 消除边缘响应
170 | 
171 | 高斯差分函数有较强的边缘响应，对于比较像边缘的点应该去除掉。这样的点的特征为在某个方向有较大主曲率，而在垂直的方向主曲率很小。主曲率可通过一个 2×2 的 Hessian 矩阵求出
172 | 
173 | $$
174 | H = \begin{bmatrix} D_{xx} & D_{xy} \\ D_{xy} & D_{yy} \end{bmatrix}
175 | $$
176 | 
177 | $D$ 的主曲率和 $H$ 的特征值成正比，令 $\alpha$ 为较大特征值， $\beta$ 为较小特征值，且令 $\alpha / \beta = r$ ，则
178 | 
179 | $$
180 | tr(H) = D_{xx}+D_{yy} = \alpha + \beta \;, \quad \det(H) = D_{xx}D_{yy}-D_{xy}^2 = \alpha \beta \\
181 | \frac{tr(H)^2}{\det(H)} = \frac{(\alpha + \beta)^2}{\alpha \beta} = \frac{(r+1)^2}{r}
182 | $$
183 | 
184 | $(r+1)^2 /  r$ 在两个特征值相等时最小，随着 $r$ 的增大而增大， $r$ 值越大，说明两个特征值的比值越大，正好对应边缘的情况。因此，设定一个阈值 $r_t$ ，若满足
185 | 
186 | $$
187 | \frac{tr(H)^2}{\det(H)} < \frac{(r_t + 1)^2}{r_t}
188 | $$
189 | 
190 | 则认为该关键点不是边缘，否则予以剔除。
191 | 
192 | 
193 | 
194 | ## 关键点方向指定
195 | 
196 | 为了使特征描述子具有旋转不变性，需要利用关键点邻域像素的梯度方向分布特性为每个关键点指定方向参数。对于在 DoG 金字塔中检测出的关键点，在其邻近高斯金字塔图像的 $3\sigma$ 邻域窗口内计算其梯度幅值和方向，公式如下：
197 | 
198 | $$
199 | m(x,y) = \sqrt{\left(L(x+1,y) - L(x-1,y)\right)^2 + \left(L(x,y+1) - L(x,y-1)\right)^2} \\
200 | \theta(x,y) = \arctan \left( (L(x,y+1) - L(x,y-1)) / (L(x+1,y) - L(x-1,y)) \right)
201 | $$
202 | 
203 | 式中，$L$ 为关键点所在尺度空间的灰度值，$m(x,y)$ 为梯度幅值，$\theta(x,y)$ 为梯度方向。对于模值 $m(x,y)$ 按照 $\sigma = 1.5 \sigma_{oct}$ 邻域窗口为 $3\sigma = 3 \times 1.5 \sigma_{oct}$ 的高斯分布加权。在完成关键点的梯度计算后，使用直方图统计邻域内像素的梯度和方向，梯度直方图将梯度方向 $(0,360^\circ)$ 分为 36 柱 (bins) ，如下图所示 (为简化，图中只画了八个方向的直方图) ，直方图的峰值所在的方向代表了该关键点的主方向。
204 | 
205 | ![关键点方向直方图](image/hog.png)
206 | 
207 | 梯度方向直方图的峰值代表了该特征点处邻域梯度的主方向，为了增强鲁棒性，保留峰值大于主方向峰值 80% 的方向作为该关键点的辅方向，因此，在相同位置和尺度，将会有多个关键点被创建但方向不同，可以提高特征点匹配的稳定性。
208 | 
209 | 至此，将检测出的含有位置、尺度和方向的关键点即是该图像的SIFT特征点。
210 | 
211 | 
212 | 
213 | ## 关键点特征描述子
214 | 
215 | 在经过上述流程后，检测到的每个关键点有三个信息：位置、尺度以及方向，接下来就是为每个关键点建立一个描述符，用一组向量将这个关键点描述出来。这个特征描述子不但包括关键点，还包括其邻域像素的贡献，而且需具备较高的独特性和稳定性，以提高特征点匹配的准确率。SIFT 特征描述子是关键点邻域梯度经过高斯加权后统计结果的一种表示。通过对关键点周围图像区域分块，计算块内的梯度直方图，得到表示局部特征点信息的特征向量。例如在尺度空间 4×4 的窗口内统计 8 个方向的梯度直方图，生成一个 4×4×8=128 维的表示向量。
216 | 
217 | 
218 | 
219 | ### 确定计算描述子所需的图像区域
220 | 
221 | 特征描述子与特征点所在的尺度有关，因此，对梯度的求取应在特征点对应的高斯图像上进行。将关键点附近的邻域划分为 $d\times d(d=4)$ 个子区域，每个子区域做为一个种子点，每个种子点有8个方向。每个子区域的大小与关键点方向分配时相同，即每个区域边长为 $3 \sigma_{oct}$ 子像素。考虑到实际计算时需要进行三线性插值，采样窗口区域半边长设为 $\frac{3 \sigma_{oct}(d+1)}{2}$ ，又考虑到旋转因素 (坐标轴旋转至关键点主方向) ，这个值需要乘以 $\sqrt{2}$ （如下图所示），最后所需的图像区域半径为：
222 | 
223 | $$
224 | radius = \frac{3 \sigma_{oct} \times \sqrt{2} \times (d+1)}{2}
225 | $$
226 | 
227 | ![旋转引起的领域半径变化](image/rotation_region.png)
228 | 
229 | ### 旋转坐标轴至关键点主方向
230 | 
231 | 将坐标轴旋转至关键点主方向，以确保旋转不变性。如下图所示
232 | 
233 | ![坐标轴旋转](image/rotate_coordinate.png)
234 | 
235 | 旋转后采样点的新坐标为：
236 | 
237 | $$
238 | \begin{bmatrix} x' \\ y' \end{bmatrix} = 
239 | \begin{bmatrix} \cos\theta & -\sin\theta \\ \sin\theta & \cos\theta \end{bmatrix}
240 | \begin{bmatrix} x \\ y \end{bmatrix}
241 | \quad (x, y \in [-radius, radius])
242 | $$
243 | 
244 | 
245 | ### 三线性插值计算权值
246 | 
247 | 在图像半径区域内对每个像素点求其梯度幅值和方向，并对每个梯度幅值乘以高斯权重参数
248 | 
249 | $$
250 | w = m(u+x,b+v) \times e ^{-\frac{x'^2 + y'^2}{2\sigma_w^2}}
251 | $$
252 | 
253 | 注意，上式中，$u,v$ 表示关键点在高斯金字塔图像中的位置坐标，而 $x,y$ 为旋转坐标轴至关键点主方向**之前**相对关键点的偏移量 (即，还在图像坐标系下)， $x',y'$ 为旋转坐标轴至关键点主方向**之后**相对关键点的偏移量 即，在关键点主方向坐标系下) 。
254 | 
255 | 将旋转后的采样点坐标分配到对应的子区域，计算影响子区域的采样点的梯度和方向，分配到8个方向上。旋转后的采样点 $(x',y')$ 落在子区域的下标为
256 | 
257 | $$
258 | \begin{bmatrix} x_d \\ y_d \end{bmatrix} = \frac{1}{3\sigma_{oct}} \begin{bmatrix} x' \\ y' \end{bmatrix} + \frac{d}{2} \;,\quad x_d, y_d \in [0, d]
259 | $$
260 | 
261 | 将采样点在子区域的下标进行三线性插值，根据三维坐标计算与周围子区域的距离，按距离远近计算权重，最终累加在相应子区域的相关方向上的权值为
262 | 
263 | $$
264 | weight = w \cdot [d_r^i \cdot (1-d_r)^{1-i}] \cdot [d_c^j \cdot (1-d_c^{1-j})] \cdot [d_o^k \cdot (1-d_o)^{1-k}]
265 | $$
266 | 
267 | 式中 $i,j,k$ 取值 $0$ 或 $1$ ，其中 $d_r, 1-d_r$ 是对相邻两行的贡献因子， $d_c, 1-d_c$ 是对相邻两列的贡献因子， $d_o, 1-d_o$ 是对相邻两个方向的贡献因子。插值计算每个种子点八个方向的梯度，最终结果如下图所示。
268 | 
269 | ![描述子方向梯度直方图](image/descriptor_HOG.png)
270 | 
271 | 
272 | 
273 | ### 向量归一化生成描述子
274 | 
275 | 得到 128 维特征向量后，为了去除光照变化的影响，需要对向量进行归一化处理。非线性光照变化仍可能导致梯度幅值的较大变化，但对梯度方向影响较小。因此对于超过阈值 0.2 的梯度幅值设为 0.2 ，然后再进行一次归一化。最后将特征向量按照对应高斯金字塔的尺度大小排序。至此，SIFT 特征描述子形成。
276 | 
277 | 
278 | 
279 | ## SIFT 特征匹配
280 | 
281 | 对两幅图像中检测到的特征点，可采用特征向量的欧式距离作为特征点相似性的度量，取图像 1 中某个关键点，并在图像 2 中找到与其距离最近的两个关键点，若最近距离与次近距离的比值小于某个阈值，则认为距离最近的这一对关键点为匹配点。降低比例阈值，SIFT 匹配点数量会减少，但相对而言会更加稳定。阈值 ratio 的取值范围一般为 0.4~0.6 。
282 | 
283 | 
284 | 
285 | ## SIFT特征的特点
286 | 
287 | SIFT是一种检测、描述、匹配图像局部特征点的算法，通过在尺度空间中检测极值点，提取位置、尺度、旋转不变量，并抽象成特征向量加以描述，最后用于图像特征点的匹配。SIFT特征对灰度、对比度变换、旋转、尺度缩放等保持不变性，对视角变化、仿射变化、噪声也具有一定的鲁棒性。但其实时性不高，对边缘光滑的目标无法准确提取特征点。
288 | 
289 | 
290 | 
291 | 
292 | 
293 | ## 参考
294 | 
295 | [^1]: [Distinctive Image Features from Scale-Invariant Keypoints](https://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf) 
296 | [^2]: [Object Recognition from Local Scale-Invariant Features](http://cgit.nutn.edu.tw:8080/cgit/PaperDL/iccv99.pdf) 
297 | [^3]: Slides : [Object Recognition from Local Scale-Invariant Feature](https://people.cs.umass.edu/~elm/Teaching/ppt/SIFT.pdf) 
298 | [^4]: zddhub 博客：[SIFT 算法详解](https://blog.csdn.net/zddblog/article/details/7521424) (论文中包含**有限差分求导推导** 、**三阶矩阵求逆**)
299 | [^5]: Senit_Co 博客：[图像特征之SIFT特征匹配](https://senitco.github.io/2017/06/24/image-feature-sift/) 
300 | [^6]: 思维之际博客：[SIFT定位算法关键步骤的说明](https://www.cnblogs.com/ronny/p/4028776.html)
301 | 
302 | 
303 | 
304 | 
305 | 
306 | --8<--
307 | mathjax.txt
308 | --8<--


--------------------------------------------------------------------------------
/docs/computer_vision/SUFT.md:
--------------------------------------------------------------------------------
  1 | ## 简介
  2 | 
  3 | 加速鲁棒特征 (Speed Up Robust Feature, SURF) [^1][^2] 和 SIFT 特征类似，同样是一个用于检测、描述、匹配图像局部特征点的特征描述子。SIFT 是被广泛应用的特征点提取算法，但其实时性较差，如果不借助于硬件的加速和专用图形处理器 (GPUs) 的配合，很难达到实时的要求。对于一些实时应用场景，如基于特征点匹配的实时目标跟踪系统，每秒要处理数十帧的图像，需要在毫秒级完成特征点的搜索定位、特征向量的生成、特征向量的匹配以及目标锁定等工作，SIFT 特征很难满足这种需求。SURF 借鉴了 SIFT 中近似简化 (DoG 近似替代 LoG) 的思想，将 Hessian 矩阵的高斯二阶微分模板简化，使得模板对图像的滤波只需要进行几次简单的加减法运算，并且这种运算与滤波模板的尺寸无关。SURF 相当于 SIFT 的加速改进版本，在特征点检测取得相似性能的条件下，提高了运算速度。整体来说，SURF 比 SIFT 在运算速度上要快数倍，综合性能更优。
  4 | 
  5 | 
  6 | 
  7 | ## SURF 特征提取
  8 | 
  9 | ### 积分图像
 10 | 
 11 | SURF 算法中用到了积分图的概念，积分图 (Integral Image) 由 Viola 和 Jones 提出，在博文「 [Haar 特征提取](Haar.md)」中做了详细的介绍，这里就不重复。借助积分图，图像与高斯二阶微分模板的滤波转化为对积分图像的加减运算，从而在特征点的检测时大大缩短了搜索时间。
 12 | 
 13 | 
 14 | 
 15 | ### Hessian 矩阵近似
 16 | 
 17 | SIFT算法建立一幅图像的金字塔，在每一层进行高斯滤波并求取图像差 (DoG) 进行特征点的提取，而 SURF 则用的是 Hessian 矩阵近似进行特征点的提取。图像点的二阶微分 Hessian 矩阵的行列式 (Determinant of Hessian, DoH) 极大值，可用于图像的斑点检测 (Blob Detection) 。由于特征点需要尺度无关性，所以在进行 Hessian 矩阵构造前，需要对其进行高斯滤波。Hessian-Laplace detector 矩阵定义如下：
 18 | 
 19 | $$
 20 | H(x,y,\sigma) = \begin{bmatrix} L_{xx} & L_{xy} \\ L_{xy} & L_{yy} \end{bmatrix}
 21 | $$
 22 | 
 23 | 式中， $L_{xx}$ 、 $L_{yy}$ 和 $L_{xy}$ 分别是高斯二阶偏导算子 $\frac{\partial^2 g}{\partial x^2}$ 、 $\frac{\partial^2 g}{\partial y^2}$ 和 $\frac{\partial^2 g}{\partial x \partial y}$ 与原图像的卷积，该矩阵的行列式 DoH 为：
 24 | 
 25 | $$
 26 | \det(H) = L_{xx} L_{yy} - L_{xy}^2
 27 | $$
 28 | 
 29 | 与 LoG 算子一样，DoH 同样反映了图像局部的纹理或结构信息，与 LoG 相比，DoH 对图像中细长结构的斑点有较好的抑制作用。LoG 和 DoH 在利用二阶微分算子对图像进行斑点检测时，都需要利用高斯滤波平滑图像、抑制噪声，检测过程主要分为以下两步：
 30 | 
 31 | - 使用不同的 $\sigma$ 生成 $\left( \frac{\partial^2 g}{\partial x^2} + \frac{\partial^2 g}{\partial y^2} \right)$ 或者 $\frac{\partial^2 g}{\partial x^2}$ 、 $\frac{\partial^2 g}{\partial y^2}$ 和 $\frac{\partial^2 g}{\partial x \partial y}$ 高斯卷积模板，并对图像进行卷积运算
 32 | - 在图像的位置空间和尺度空间搜索 LoG 或 DoH 的峰值，并进行非极大值抑制，精确定位到图像极值点
 33 | 
 34 | 以上三个高斯微分算子的响应图像如下图所示。
 35 | 
 36 | ![二阶高斯偏微分算子响应图](image/differential_Gaussian.png)
 37 | 
 38 | 由于二阶高斯微分模板被离散化和裁剪的原因，导致了图像在旋转奇数倍的 $\pi /4$ ，即模板对角线方向时，特征点检测的重复性 (Repeatability) 降低，即原来是特征点的地方在旋转后可能检测不到了；而旋转 $\pi /$2 时倍数时，特征点检测的重复性最高。不过这一不足并不影响 Hessian 矩阵检测特征点。
 39 | 
 40 | 为了将模板与图像的卷积转化为盒子滤波器 (Box Filter) 运算，并能够使用积分图，需要对高斯二阶微分模板进行简化，使得简化后的模板只是由几个矩形区域组成，矩形区域内填充同一值，如下图所示，在简化模板中白色区域的值为 1 ，黑色区域的值为 -1 或 -2 (由相对面积决定) ，灰色区域的值为 0 。
 41 | 
 42 | ![二阶高斯偏微分模板及相应简化模板](image/Gaussian_Diff_model.png)
 43 | 
 44 | 使用 $D_{xx}$ 、 $D_{yy}$ 和  $D_{xy}$ 表示简化模板与图像进行卷积的结果，Hessian 矩阵的行列式可进一步简化为：
 45 | 
 46 | $$
 47 | \begin{align*}
 48 | \det(H) &= L_{xx} L_{yy} - L_{xy}^2 \\
 49 | &= D_{xx} \frac{L_{xx}}{D_{xx}} D_{yy} \frac{L_{yy}}{D_{yy}} - D_{xy} \frac{L_{xy}}{D_{xy}}D_{xy} \frac{L_{xy}}{D_{xy}} \\
 50 | &= D_{xx}D_{yy}\left(\frac{L_{xx}}{D_{xx}}\frac{L_{yy}}{D_{yy}}\right) - D_{xy}^2 \left(\frac{L_{xy}^2}{D_{xy}^2}\right) \\
 51 | &= \left(D_{xx}D_{yy} - D_{xy}^2 Y \right) C
 52 | \end{align*} \\ 其中， \quad
 53 | Y = \left(\frac{L_{xy}^2}{D_{xy}^2} \right) \left( \frac{D_{xx}}{L_{xx}}\frac{D_{yy}}{L_{yy}} \right)  = \left(\frac{\| L_{xy}(\sigma) \|_F \| D_{xx}(l) \|_F}{\| L_{xx}(\sigma) \|_F \| D_{xy}(l) \|_F} \right)^2
 54 | \\ 与 \quad
 55 | C = \frac{L_{xx}}{D_{xx}}\frac{L_{yy}}{D_{yy}}
 56 | $$
 57 | 
 58 | 式中，$\|\cdot\|_F$ 为 Frobenius 范数，$\sigma$ 为 LoG 的尺度，$l$ 为设定 box filter 模板的尺寸。那么对于 $\sigma = 1.2$ 的二阶高斯微分滤波，设定 box filter 模板的尺寸为 $l \times l = 9 \times 9$ 的大小，此时的 $Y = (0.912)^2 \approx (0.9)^2$ 。理论上说，对于不同的 $\sigma$ 值和对应的模板尺寸，$Y$ 值应该是不同的，但为了简化起见，可将其视为一个常数，同样 $C$ 也为一常数，且不影响极值求取，进而它作为最小尺度空间值对图像进行滤波和斑点检测。因此，DoH 可近似如下：
 59 | 
 60 | $$
 61 | \det(H) \approx \det(H_{approx}) = D_{xx} D_{yy} - (0.9 D_{xy})^2
 62 | $$
 63 | 
 64 | 在实际计算滤波响应值时，需要使用模板中盒子 (矩形) 区域的面积进行归一化处理，以保证一个统一的 Frobenius 范数能适应所有的滤波尺寸。
 65 | 
 66 | 使用近似的 Hessian 矩阵行列式来表示一个图像中某一点处的斑点响应值，遍历图像中的所有像素，便形成了在某一尺度下斑点检测的响应图像。使用不同的模糊尺度和模板尺寸，便形成了多尺度斑点响应的金字塔图像，利用这一金字塔图像，可以进行斑点响应极值点的搜索定位，其过程与 SIFT 算法类似。
 67 | 
 68 | 
 69 | 
 70 | ### 尺度空间的表示
 71 | 
 72 | 要想检测不同尺度的极值点，必须建立图像的尺度空间金字塔。一般的方法是通过采用不同 $\sigma$ 的高斯函数，对图像进行平滑滤波，然后降采样获得更高一组 (Octave) 的金字塔图像。SIFT 算法中就是通过相邻两层 (Interval) 高斯金字塔图像相减得到 DoG 图像，然后在 DoG 金字塔图像上进行特征点检测。与 SIFT 特征不同的是，SURF 算法不需要通过降采样的方式得到不同尺寸大小的图像建立金字塔，而是借助于盒子滤波和积分图像，不断增大盒子滤波模板，通过积分图快速计算盒子滤波的响应图像。SURF 采用这种方法节省了降采样过程，然后在响应图像上采用非极大值抑制，检测不同尺度的特征点。SIFT 算法的 LoG 金字塔和 SURF 算法的近似 DoH 金字塔如下图所示。
 73 | 
 74 | ![LoG 与 DoH 金字塔对比](image/pyramid_LoG_VS_DoH.png)
 75 | 
 76 | 如前所述，使用 9×9 的模板对图像滤波，其结果作为最初始的尺度空间层，后续层将通过逐步增大滤波模板尺寸，以及放大后的模板与图像卷积得到。由于采用了 box filter 和积分图，滤波过程并不随着滤波模板尺寸的增大而增加运算量。
 77 | 
 78 | 在建立盒状滤波金字塔时，与 SIFT 算法类似，需要将尺度空间划分为若干组 (Octaves) 。每组又由若干固层组成，包括不同尺寸的滤波模板对同一输入图像进行滤波得到的一系列响应图。由于积分图像的离散特性，两个相邻层之间的最小尺度变化量，是由高斯二阶微分滤波模板在微分方向上对正负斑点响应长度 (波瓣长度) $l_0$ 决定的，它是盒子滤波模板尺寸的 $1/3$ 。对于 9×9 的滤波模板，$l_0$ 为 3 。下一层的响应长度至少应该在 $l_0$ 的基础上增加 2 个像素，以保证一边一个像素，即 $l_0 = 5$，这样模板的尺寸为 15×15 ，如下图所示。依次类推，可以得到一个尺寸逐渐增大的模板序列，尺寸分别为 9×9 、15×15 、21×21 、27×27 。显然，第一个模板和最后一份模板产生的 Hessian 响应图像只作为比较用，而不会产生最后的响应极值。
 79 | 
 80 | ![滤波模板尺寸变化](image/pyramid_box_scale.png)
 81 | 
 82 | 采用类似的方法处理其他组的模板序列，其方法是将滤波器尺寸增加量按 Octave 的组数 m 翻倍，即 $6\times2m−1$ ，序列依次为$(6,12,24,48, \dotsc)$ ，这样，在盒状滤波金字塔中，每组滤波器的尺寸如下图所示，滤波器的组数可由原始图像的尺寸决定。对数水平轴代表尺度，组之间有相互重叠，其目的是为了覆盖所有可能的尺度。在通常尺度分析情况下，随着尺度的增大，被检测的特征点数迅速衰减。
 83 | 
 84 | ![不同组滤波器尺寸](image/DoH_model_scale_change.png)
 85 | 
 86 | 滤波器的尺寸 $L$ 、滤波响应长度 $l$ 、组索引 $o$ 、层索引 $s$ 、尺度 $\sigma$ 之间的相互关系如下：
 87 | 
 88 | $$
 89 | L = 3 \times(2^{o+1}(s+1)+1) \\
 90 | l = \frac{L}{3} = 2^{o+1}(s+1)+1 \\
 91 | \sigma = 1.2 \times \frac{L}{9} = 1.2 \times \frac{l}{3}
 92 | $$
 93 | 
 94 | ### 关键点定位
 95 | 
 96 | 和 LoG、DoG 类似，建立尺度空间后，需要搜索定位关键点。将经过 box filter 处理过的响应图像中每个像素点
 97 | 与其 3 维邻域中的 26 个像素点进行比较，若是最极大值点，则认为是该区域的局部特征点。然后，采用 3 维线性插值法得到亚像素级的特征点，同时去掉一些小于给定阈值的点，使得极值检测出来的特征点更稳健。和 DoG 不同的是，不需要剔除边缘导致的极值点，因为 Hessian 矩阵的行列式已经考虑了边缘的问题。
 98 | 
 99 | 
100 | 
101 | ## 特征点方向分配
102 | 
103 | 为了保证特征描述子具有旋转不变性，与 SIFT 一样，需要对每个特征点分配一个主方向。为此，在以特征点为中心，以 $6s$ ( $s = 1.2 \times L / 9$ 为特征点的尺度) 为半径的区域内，计算图像的 Haar 小波响应，实际上就是对图像进行梯度运算，只不过需要利用积分图，提高梯度计算效率。求 Haar 小波响应的图像区域和 Haar 小波模板如下图所示，用于计算梯度的 Haar 小波的尺度是 $4s$ ，扫描步长为$s$ 。
104 | 
105 | ![Haar 小波响应计算](image/calculate_Haar.png)
106 | 
107 | 使用 $\sigma=2s$ 的高斯函数对 Haar 小波的响应值进行加权。为了求取主方向，设计一个以特征点为中心，张角为 $\pi/3$ 的扇形窗口，如下图所示，以一定旋转角度 $\theta$ 旋转窗口，并对窗口内的 Haar 小波响应值 $dx$、$dy$ 进行累加，得到一个矢量 $(m_w,θ_w)$ 
108 | 
109 | $$
110 | m_w = \sum_w dx + \sum_w dy \\
111 | \theta_w = \arctan \left( \frac{\sum_w dy}{\sum_w dx} \right)
112 | $$
113 | 
114 | 主方向为最大 Haar 响应累加值所对应的方向，即 $\theta = \theta_w | \max(m_w)$ 如下图所示。
115 | 
116 | ![统计旋转窗口内 Haar 小波响应值和](image/orientation_of_SUFT.png)
117 | 
118 | 仿照 SIFT 求主方向时策略，当存在大于主峰值 80% 以上的峰值时，则将对应方向认为是该特征点的辅方向。一个特征点可能会被指定多个方向，可以增强匹配的鲁棒性。
119 | 
120 | 
121 | 
122 | ## 特征描述子
123 | 
124 | 生成特征点描述子时，同样需要计算图像的 Haar 小波响应。与确定主方向不同的是，这里不再使用圆形区域，而是在一个矩形区域计算 Haar 小波响应。以特征点为中心，沿主方向将 $20s\times20s$ 的邻域划分为 4×4 个子块，每个子块利用尺寸为 $2s$ 的 Haar 模板计算响应值，然后对响应值统计 $\sum dx$ 、$\sum|dx|$ 、$\sum dy$、$\sum|dy|$ 形成特征向量，如下图所示：
125 | 
126 | ![特征描述子生成](image/descriptor_SUFT.png)
127 | 
128 | 算法步骤：
129 | 
130 | - 将 $20s$ 的窗口划分为 4×4 个子窗口，每个子窗口大小为 $5s \times 5s$，使用尺寸为 $2s$ 的 Haar 小波计算子窗口的响应值
131 | - 以特征点为中心，用 $\sigma = 10s/3 = 3.3s$ 的高斯核函数对 $dx$ 、$dy$ 进行加权计算
132 | - 分别对每个子块的加权响应值进行统计，得到每个子块的向量：
133 | 
134 | $$
135 | \mathbf{v}_i = \left[ \sum dx \quad \sum|dx| \quad \sum dy \quad \sum|dy| \right]
136 | $$
137 | 
138 | 由于共有 4×4 个子块，因此，特征描述子共由 4×4×4=64 维特征矢量组成。SURF 描述子不仅具有尺度和旋转不变性，还具有光照不变性，这由小波响应本身决定，而对比度不变性则是通过将特征向量归一化来实现。下图为 3 种简单模式图像及其对应的特征描述子，可以看出，引入 Haar 小波响应绝对值的统计和是必要的，否则只计算 $\sum dx$ 、$\sum dy$ 的话，第一幅图和第二幅图的特征表现形式是一样的，因此，采用 4 个统计量描述子区域使特征更具有区分度。
139 | 
140 | ![不同的图像密度模式得到的不同的描述子结果](image/diff_result_in_descriptor_of_SUFT .png)
141 | 
142 | 为了充分利用积分图像计算 Haar 小波的响应值，在具体实现中，并不是直接通过旋转 Haar 小波模板求其响应值，而是在积分图像上先使用水平和垂直的 Haar 模板求得响应值 $dx$ 、$dy$ ，对 $dx$ 、$dy$ 进行高斯加权处理，冰根据主方向的角度，对 $dx$ 、$dy$ 进行旋转变换，从而得到旋转后的 $dx'$ 、$dy'$  。
143 | 
144 | SURF 在求取描述子特征向量时，是对一个子块的梯度信息进行求和，而 SIFT 是依靠单个像素计算梯度的方向。在有噪声的干扰下，SURF 描述子具有更好的鲁棒性。一般而言，特征向量的长度越长，所承载的信息量就越大，特征描述子的独特性就越好，但匹配时所付出的时间代价也越大。对于 SURF 描述子，可以将其扩展到 128 维。具体方法就是在求 Haar 小波响应值的统计和时，区分 $dx \geq 0$ 和 $dx < 0$ 的情况，以及 $dy \geq 0$ 和 $dy < 0$ 的情况。为了实现快速匹配，SURF 在特征向量中增加了一个新的元素，即特征点的拉普拉斯响应正负号。在特征点检测时，将 Hessian 矩阵的迹 (Trace) 的正负号记录下来，作为特征向量中的一个变量。在特征匹配时可以节省运算时间，因为只用具有相同正负号的特征点才可能匹配，对于不同正负号的特征点不再进行相似性计算。
145 | 
146 | 
147 | 
148 | ## SURF 与 SIFT 的对比
149 | 
150 | - 尺度空间：SIFT 使用 DoG 金字塔与图像进行卷积操作，而且对图像有做降采样处理；SURF 是用近似 DoH 金字塔 (即不同尺度的 box filters) 与图像做卷积，借助积分图，实际操作只涉及到数次简单的加减运算，而且不改变图像大小。
151 | 
152 | - 特征点检测：SIFT 是先进行非极大值抑制，去除对比度低的点，再通过 Hessian 矩阵剔除边缘点。而 SURF 是计算 Hessian 矩阵的行列式值 (DoH) ，再进行非极大值抑制。
153 | - 特征点主方向：SIFT 在方形邻域窗口内统计梯度方向直方图，并对梯度幅值加权，取最大峰对应的方向；SURF 是在圆形区域内，计算各个扇形范围内 $x$ 、$y$ 方向的 Haar 小波响应值，确定响应累加和值最大的扇形方向。
154 | - 特征描述子：SIFT 将关键点附近的邻域划分为 4×4 的区域，统计每个子区域的梯度方向直方图，连接成一个 4×4×8=128 维的特征向量；SURF 将 20s×20s 的邻域划分为 4×4 个子块，计算每个子块的 Haar 小波响应，并统计 4 个特征量，得到 4×4×4=64 维的特征向量
155 | 
156 | 总体来说，SURF 和 SIFT 算法在特征点的检测取得了相似的性能，SURF 借助积分图，将模板卷积操作近似转换为加减运算，在计算速度方面要优于 SIFT 特征。   
157 | 
158 | 
159 | 
160 | 
161 | ## 参考
162 | 
163 | [^1]: [SURF: Speeded Up Robust Features](http://www.vision.ee.ethz.ch/~surf/eccv06.pdf) 
164 | [^2]: [The Website of SURF: Speeded Up Robust Features](http://www.vision.ee.ethz.ch/~surf/index.html) 
165 | 
166 | [^3]: Senit_Co 博客：[图像特征之SURF特征匹配](https://senitco.github.io/2017/06/27/image-feature-surf/) 
167 | 
168 | [^4]: 松子茶博客：[SURF特征提取分析](https://blog.csdn.net/songzitea/article/details/16986423) 
169 | [^5]: 思维之际博客：[SURF算法与源码分析、上](http://www.cnblogs.com/ronny/p/4045979.html) 、[SURF算法与源码分析、下](https://www.cnblogs.com/ronny/p/4048213.html) 
170 | 
171 | 
172 | 
173 | --8<--
174 | mathjax.txt
175 | --8<--


--------------------------------------------------------------------------------
/docs/computer_vision/computer_vision.md:
--------------------------------------------------------------------------------
 1 | ## 前言
 2 | 
 3 | !!! tips "什么是视觉、什么是计算机视觉"  
 4 |     **视觉:**  
 5 |     :    " Vision is the act of knowing what is where by looking ."    --Aristotle  
 6 | 	      『视觉是通过观察来知道什么在哪里的行为』　——亚里士多德  
 7 | 
 8 | 	  **计算机视觉:**  
 9 | 	:   维基百科的解释是『[计算机视觉](https://en.wikipedia.org/wiki/Computer_vision)是一个跨学科的科学领域，研究如何使计算机从数字图像或视频中获得高层次的理解』。换句话说，计算机视觉的目标是使计算机拥有人类或超越人类水平的视觉感知能力
10 | 
11 | 
12 | 
13 | 在进入计算机视觉基础每一小节内容之前，先给出一些知名高校的课程 Slides 和公开课链接网址：  
14 | 
15 | - [卡内基梅隆大学](http://www.cs.cmu.edu/~16385/) 
16 | - [宾夕法尼亚州立大学](http://www.cse.psu.edu/~rtc12/CSE486/)
17 | - [佐治亚理工学院](https://cn.udacity.com/course/introduction-to-computer-vision--ud810)
18 | 
19 | 其实还有很多，这里暂时不一一列举，下文相关链接中会有很多引申链接。在接下来与计算机视觉相关的大部分章节基本都源自于以上课程加以拓展。如果读者的英语水平可以的话，强烈建议去阅读和观看这些更加原汁原味的课程。亦或者，结合该网站知识章节与这些课程一同学习。
20 | 
21 | 关于计算机视觉的入门书籍，上述的三个课程都给出了参考书籍，当然有些书籍并不适合初学者。读者可以移步到[知乎关于该问题的讨论回答](https://www.zhihu.com/question/28813777)中找到切合自身情况的相应书籍阅读，为了方便大家，下面给出一些书籍网址链接：  
22 | 
23 | - [Computer Vision -- A Modern Approach](http://cmuems.com/excap/readings/forsyth-ponce-computer-vision-a-modern-approach.pdf)
24 | - [Computer Vision:  Models, Learning, and Inference](http://www.computervisionmodels.com)
25 | - [Computer Vision: Algorithms and Applications](http://szeliski.org/Book/)
26 | - [Annotated Computer Vision Bibliography: Table of Contents](http://www.visionbib.com/bibliography/contents.html)
27 | 
28 | 
29 | 
30 | ## 目录列表
31 | 
32 | 


--------------------------------------------------------------------------------
/docs/computer_vision/image/AC_elliptic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/AC_elliptic.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/BRIEF_point_choice.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/BRIEF_point_choice.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/DoG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/DoG.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/DoH_model_scale_change.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/DoH_model_scale_change.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/FAST_corner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/FAST_corner.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/Gaussian_Diff_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/Gaussian_Diff_model.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/Haar_like.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/Haar_like.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/LoG_vs_DoG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/LoG_vs_DoG.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/aperture_problem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/aperture_problem.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/calculate_Haar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/calculate_Haar.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/cascade.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/cascade.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/classification_via_eigenvalues.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/classification_via_eigenvalues.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/descriptor_HOG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/descriptor_HOG.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/descriptor_SUFT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/descriptor_SUFT.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/diff_result_in_descriptor_of_SUFT .png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/diff_result_in_descriptor_of_SUFT .png


--------------------------------------------------------------------------------
/docs/computer_vision/image/differential_Gaussian.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/differential_Gaussian.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/extremum_DoG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/extremum_DoG.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/extremum_in_continuous_vs_discrete.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/extremum_in_continuous_vs_discrete.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/harris_weighting_function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/harris_weighting_function.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/hog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/hog.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/integral_rectangle.png.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/integral_rectangle.png.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/integral_rotation_rectangle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/integral_rotation_rectangle.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/invariant2intensity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/invariant2intensity.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/not_invariant2scale.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/not_invariant2scale.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/orientation_of_SUFT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/orientation_of_SUFT.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/principal_component_ellipse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/principal_component_ellipse.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/pyramid_LoG_VS_DoH.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/pyramid_LoG_VS_DoH.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/pyramid_box_scale.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/pyramid_box_scale.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/rectangle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/rectangle.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/rotate_coordinate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/rotate_coordinate.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/rotated_rectangle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/rotated_rectangle.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/rotation_region.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/rotation_region.png


--------------------------------------------------------------------------------
/docs/computer_vision/image/variable_scale_Gaussian.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/computer_vision/image/variable_scale_Gaussian.png


--------------------------------------------------------------------------------
/docs/computer_vision/image_filtering.md:
--------------------------------------------------------------------------------
1 | ## 数字图像
2 | 
3 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # 欢迎来到 **SLAM** 之旅
 2 | 
 3 | <center>![Logo](LOGO.png)</center>
 4 | 
 5 | **SLAM 之旅（Journey to SLAM）**是用于记录分享学习 [Simultaneous Localization and Mapping (SLAM)](https://en.wikipedia.org/wiki/Simultaneous_localization_and_mapping "Simultaneous Localization and Mapping") 知识的平台。
 6 | 
 7 | **SLAM 之旅** 旨在构建一个免费开放的知识整合站点，提供全面的 SLAM 的知识分享。网站的文档内容开源于 [**Github**](https://github.com/LSXiang/Journey2SLAM) 。欢迎各位志同道合的人士和大佬们在此平台进行知识分享。您可以将学习 SLAM 期间编写的博客、笔记等通过 Markdown 编辑并推送到此项目的 [**docs**](https://github.com/LSXiang/Journey2SLAM/tree/master/docs) 文件中，也可以修改相应的文件然后提交 `Pull Request`，或者仅针对具体情况提出 [***Issues***](https://github.com/LSXiang/Journey2SLAM/issues) 的方式来贡献和分享你的知识。
 8 | 
 9 | 
10 | 
11 | !!! note ""  
12 | 	[<div align="center">![知识共享许可协议](licensebutton.png "license")](https://creativecommons.org/licenses/by-nc-nd/4.0/deed.zh)  
13 | 	本网站采用[知识共享署名-非商业性使用-禁止演绎 4.0 国际许可协议](https://creativecommons.org/licenses/by-nc-nd/4.0/deed.zh "license")进行许可。
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/docs/licensebutton.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/licensebutton.png


--------------------------------------------------------------------------------
/docs/multiple_view_geometry/RepreOf3DMoveScene.md:
--------------------------------------------------------------------------------
  1 | ## 三维重建的起源
  2 | 
  3 | 从一组二维视图重建世界的三维结构在计算机视觉领域有着悠久的历史。这是一个经典的*不适定问题 (ill-posed problem)*，因为重构一组一致的观察或图像相通常并不是唯一的。因此，我们需要附加一些假设。在数学上，研究三维场景与观测到的二维投影之间的几何关系是基于两种类型的变换，即，
  4 | 
  5 | - 用**欧几里得运动 (Euclidean motion)** 或**刚体运动 (rigid-body motion)** 来表示相机从当前帧到下一帧图像的运动
  6 | - 用**透视投影 (Perspective projection)** 来表示图像的形成过程 (如：**针孔相机 (pinhole camera)** 等)。
  7 | 
  8 | 透视投影的概念起源于古希腊 (Euclid of Alexandria,  400 B.C.) 和文艺复兴时期 (Brunelleschi & Alberti, 1435)。透视投影的研究引出了**投影几何 (projective geometry)** 领域。
  9 | 
 10 | 关于多视几何的第一个研究工作成果是来至于 Erwin Kruppa (1913) ，他指出五个点的两个视图足以确定两个视图之间的相对变换（运动） 和点的三维位置（结构）。Longuet-Higgins 在 1981 年提出了一种基于两视图**对极约束 (epipolar constraint)** 恢复运动结构重建的线性算法。在几本教科书中总结了一系列关于这方面的著作 (Faugeras 1993, Kanatani 1993, Maybank 1993, Weng et al. 1993) 。对三个视图的扩展由 Spetsakis 和 Aloimonos 87、90、Shashua 94 和 Hartley 95 研究发布的。多视图和正交投影的因子分解技术是由 Tomasi 和 Kanade 于 1992 年研究发布的。
 11 | 
 12 | 相机运动与三维位置的联合估计称为**运动结构重建 (structure and motion)** 或**视觉 SLAM (visual SLAM)** 。
 13 | 
 14 | 
 15 | 
 16 | ## 三维欧式空间
 17 | 
 18 | 一般来说，欧几里得空间是一个集合，它的元素满足欧几里得的五个公理。三维的欧几里得空间 $\mathbb{E}^3$ 是由以下式为坐标的所有点 $P \in \mathbb{E}^3$ 组成的。
 19 | 
 20 | $$
 21 | \mathbf{X} \doteq [X_1, X_2, X_3]^\top = 
 22 | \begin{bmatrix} X_1 \\ X_2 \\ X_3 \end{bmatrix} \in \mathbb{R}^3
 23 | $$
 24 | 
 25 | 通过这样一个**笛卡尔 (Cartesian) 坐标系**的赋值，可以在 $\mathbb{E}^3$ 和 $\mathbb{R}^3$ 之间建立一一对应关系。这里的 $\mathbb{E}^3$ 可以被视为等同于 $\mathbb{R}^3$ 。这意味着允许我们当讨论一个点 ( $\mathbb{E}^3$ ) 和坐标 ( $\mathbb{R}^3$ ) 犹如是一回事一样。笛卡尔坐标是使测量距离和角度成为可能的第一步。为此，必须为 $\mathbb{E}^3$ 赋予**度量标准 (metric)** 。 度量的精确定义依赖于向量的概念。
 26 | 
 27 | ### 向量
 28 | 
 29 | 在欧式空间中，一个向量 $\mathbf{v}$ 由一对点 $q, p \in \mathbb{E}^3$ 确定，被定义为链接 $p$ 到 $q$ 的有向箭头记号，表示称 $\mathbf{v} = \overrightarrow{pq}$ 。这里的点 $p$ 通常被称为是向量 $\mathbf{v}$ 的基点。假使点 $p$ 的坐标为 $\mathbf{X}$ ，点 $q$ 的坐标为 $\mathbf{Y}$ ，那么向量 $\mathbf{v}$ 的坐标为：
 30 | 
 31 | $$
 32 | \mathbf{v} = [v_1, v_2, v_3]^\top \doteq \mathbf{Y} - \mathbf{X} \in \mathbb{R}^3
 33 | $$
 34 | 
 35 | 以上对向量的定位被称为**有界向量 (bound vector)** 。考虑这个向量独立于它的基点 $p$ 使得它是一个**自由向量 (free vector)** 。
 36 | 
 37 | !!! note ""  
 38 |     需要注意：点和向量是不同的几何对象。这一点很重要，我们很快就会看到，因为刚体运动对点和向量的作用是不同的。
 39 | 
 40 | 所有自由向量 $\mathbf{v} \in \mathbb{R}^3$ 的集合构成一个线性向量空间。通过确定 $\mathbb{E}^3$ 和 $\mathbb{R}^3$ 之间的联系， $\mathbb{E}^3$ 的欧几里德度量标准仅由向量空间 $\mathbb{R}^3$ 上的一个**内积 (inner product，或称为点积，dot product)** 定义。有了这样一个度量标准 (metric) ，我们不仅可以测量点之间的距离（向量的模）或向量之间的角度，还可以计算曲线的长度或区域的体积。
 41 | 
 42 | 运动粒子 $p$ 在 $\mathbb{E}^3$ 中的运动轨迹可用曲线 $\gamma(\cdot) : t \mapsto \mathbf{X} \in \mathbb{R}^3, t \in [0, 1]$ 来描述，则曲线的总长度为：
 43 | 
 44 | $$
 45 | l(\gamma(\cdot)) = \int_0^1 \| \dot{\mathbf{X}} (t) dt \|
 46 | $$
 47 | 
 48 | 这里的 $\dot{\mathbf{X}} (t) = \frac{\mathrm d}{\mathrm d t} \big( \mathbf{X} (t) \big) \in \mathbb{R}^3$ 被称为曲线的切向量。 
 49 | 
 50 | ### 叉积
 51 | 
 52 | 在向量空间 $\mathbb{R}^3$ 上可以定义一个叉积运算，运算形式如下：
 53 | 
 54 | $$
 55 | \times : \mathbb{R}^3 \times \mathbb{R}^3 \to \mathbb{R}^3: \quad \mathbf{u} \times \mathbf{v} = \begin{bmatrix} u_2 v_3 - u_3 v_2  \\ u_3 v_1 - u_1 v_3 \\ u_1 v_2 - u_2 v_1 \end{bmatrix} \in \mathbb{R}^3
 56 | $$
 57 | 
 58 | 其中，向量 $\mathbf{u, v}$ 的叉积结果是一个垂直于它们的向量。由于 $\mathbf{u} \times \mathbf{v} = - \mathbf{v} \times \mathbf{u}$ ，两个向量的外积正交于它的每个因子，因子的顺序决定了叉积结果的*方向 (orientation)*。这个方向满足**右手法则 (Right-hand rule)** 。
 59 | 
 60 | ### 反对称矩阵
 61 | 
 62 | 固定 $\mathbf{u}$ 可以因此一个通过反对称矩阵定义的线性映射： $\mathbf{v} \to \mathbf{u} \times \mathbf{v}$ ，该反对称矩阵表示成 $\hat{\mathbf{u}} \in \mathbb{R}^{3\times3}$ ，称为 “$\mathbf{u}$ **hat**” 。在一些文献中，反对称矩阵 $\hat{\mathbf{u}}$ 也可以表示成 $\mathbf{u}_\times \, \text{或} \, [\mathbf{u}]_\times$ ：
 63 | 
 64 | $$
 65 | \hat{\mathbf{u}} \doteq
 66 | \begin{bmatrix}
 67 |   0 & -u_3 & u_2 \\ u_3 & 0 & -u_1 \\ -u_2 & u_1 & 0
 68 | \end{bmatrix}
 69 | \in \mathbb{R}^{3\times3}
 70 | $$
 71 | 
 72 | 因此，我们可将叉积写成 $\mathbf{u} \times \mathbf{v} = \hat{\mathbf{u}} \mathbf{v}$ 。反对称矩阵有 $\hat{\mathbf{u}}^\top = -\hat{\mathbf{u}}$ 。
 73 | 
 74 | 反之，每一个反对称矩阵 $M = -M^\top \in \mathbb{R}^{3\times3}$ 可以通过向量 $\mathbf{u} \in \mathbb{R}^3$ 来关联识别。**帽 (hat)** , $\wedge : \mathbb{R}^3 \to so(3); \; \mathbf{u} \to \hat{\mathbf{u}}$ 运算符定义了一个*同构 (isomorphism)* 在空间 $\mathbb{R}^3$ 和所有由 $3\times3$ 反对称矩阵构成的 $so(3)$ 的子空间。它的逆操作称为 **vee** 操作 $\vee : so(3) \to \mathbb{R}^3; \; \hat{\mathbf{u}} \to \hat{\mathbf{u}}^\vee = \mathbf{u}$ 。
 75 | 
 76 | 
 77 | 
 78 | ## 刚体运动
 79 | 
 80 | 刚体的运动保持刚体上任意一对点 $(p,q)$ 之间的距离恒为 $d$ 。如下图所示：
 81 | 
 82 | ![Rigid-Body Motion](image/rigid-body motion.png)
 83 | 
 84 | 因此，如果 $\mathbf{X}(t)$ 和 $\mathbf{Y}(t)$ 是物体上任意两点 $p,q$ 的坐标，那么它们的距离是恒定的：
 85 | 
 86 | $$
 87 | \| \mathbf{X}(t) - \mathbf{Y}(t) \| \equiv 常数, \quad \forall t \in \mathbb{R} \tag{1}
 88 | $$
 89 | 
 90 | **刚体运动 (rigid-body motion)** (或 **刚体变换 (rigid-body transformation)** ) 是一组映射，描述刚体上每个点的坐标如何在满足上式条件下随时间变化。将该映射表示成：
 91 | 
 92 | $$
 93 | g_t : \mathbb{R}^3 \to \mathbb{R}^3; \quad \mathbf{X} \to g_t({\mathbf{X}}) \quad t \in [0, T]
 94 | $$
 95 | 
 96 | 且需要维持向量的**模 (norm)** 与**叉积 (cross product)** 满足以下两个式子：
 97 | 
 98 | - $\|g_t (\mathrm{v})\| = \|\mathrm{v}\|, \; \forall \mathrm{v} \in \mathbb{R}^3$ 
 99 | - $g_t(\mathrm{u}) \times g_t(\mathrm{v}) = g_t(\mathrm{u} \times \mathrm{v}), \forall \mathrm{u}, \mathrm{v} \in \mathbb{R}^3$ 
100 | 
101 | 所有这些运动或变换的集合用 $SE(3)$ 表示。刚体运动引起的映射或变换称为**特殊欧几里德变换 (special Euclidean transformation)** 。“特殊” 一词表示转换是保持方向的。
102 | 
103 | 根据上面刚体运动的定义，内积可以依赖范数通过*极化恒等式 (polarization identity)* 来定义：
104 | 
105 | $$
106 | \langle u,v \rangle = \frac{1}{4} (\|u+v\|^2 - \|u-v\|^2)
107 | $$
108 | 
109 | 因此我们也可以说刚体运动是一个保持内积和外积的映射，而且根据定义可知道刚体运动也保留了**三重积 (triple product)** ：
110 | 
111 | $$
112 | \langle g_t(u), g_t(v)\times g_t(w) \rangle = \langle u,v\times w \rangle, \forall u,v,w \in \mathbb{R}^3
113 | $$
114 | 
115 | 由于三重积对应于由这三个向量张成的平行六面体的体积，刚体运动也保留了**体积 (volume)** 。
116 | 
117 | 
118 | 
119 | ### 刚体旋转的表达
120 | 
121 | 如下图所示，刚体的旋转可以看作绕固定点 $o$ 沿着轴 $\omega$ 旋转。设参考系坐标 $W$ (实线) 是固定的，而旋转刚体的坐标系 $C$ (虚线) 与刚体绑定在一起。
122 | 
123 | ![Rotation of a rigid body](image/Rotation4Rigid-body.png)
124 | 
125 | 坐标系 $C$ 相对于坐标系 $W$ 的构型 (或 “方位”) 由三个标准正交向量 $\mathbf{r}_1 = g_*(\mathbf{e}_1), \mathbf{r}_2 = g_*(\mathbf{e}_2), \mathbf{r}_3 = g_*(\mathbf{e}_3) \in \mathbb{R}^3$ 相对于世界坐标系 $W$ 的坐标变换决定。这三个向量 $\mathbf{r}_1, \mathbf{r}_2, \mathbf{r}_3$ 分别是沿着坐标系 $C$ 的三个主轴 $x, y, z$ 的单位向量。因此旋转矩阵的构型可以通过一个 $3 \times 3$ 的矩阵进行定义：
126 | 
127 | $$
128 | R_{wc} \doteq [\mathbf{r}_1, \mathbf{r}_2, \mathbf{r}_3] \quad \in \mathbb{R}^{3 \times 3}
129 | $$
130 | 
131 | 其中三个向量 $\mathbf{r}_1, \mathbf{r}_2, \mathbf{r}_3$ 按照顺序堆叠成矩阵的三列。由于向量 $\mathbf{r}_1, \mathbf{r}_2, \mathbf{r}_3$ 来自正交坐标系，因此有
132 | 
133 | $$
134 | \mathbf{r}_i^\top \mathbf{r}_j = \delta_{ij} \doteq \begin{cases} 1, & i = j \\ 0, & i \neq j \end{cases} \quad \forall i,j \in [1,2,3]
135 | $$
136 | 
137 | 将此用矩阵形式表示为
138 | 
139 | $$
140 | R_{wc}^\top R_{wc} = R_{wc}R_{wc}^\top = I
141 | $$
142 | 
143 | 任何满足上述恒等式的矩阵称为**正交矩阵 (orthogonal matrix)** ，根据上面的定义，正交矩阵的逆就是它的转置： $R_{wc}^{-1} = R_{wc}^\top$ 。由于 $\mathbf{r}_1, \mathbf{r}_2, \mathbf{r}_3$ 构成了一个右手坐标系，因而进一步得到了 $R_{wc}$ 的行列式必须为 $+1$ 的条件。因此矩阵 $R_{wc}$ 是**特殊正交阵 (special orthogonal matrix)** ，这里的特殊指示了该矩阵是正定的。所有 $\mathbb{R}^{3 \times 3}$ 的特殊正交阵构成的空间定义如下：
144 | 
145 | $$
146 | SO(3) \doteq \{ R \in \mathbb{R}^{3 \times 3} | R^\top R = I, \det(R) = +1 \}
147 | $$
148 | 
149 | 
150 | 
151 | #### 旋转的指数表示
152 | 
153 | 根据上述，我们可知在 $\mathbb{E}^3$ 空间中刚体运动的旋转可以通过一个 $3 \times 3$ 的旋转矩阵 $R \in SO(3)$ 表示。每一个旋转矩阵 $R$ 通过 $3 \times 3 = 9$ 元素定义。然而，这 $9$ 个元素并不是自由参数，因为它们必须满足 $R^\top R= I$ 的约束，这实际上对 $9$ 个元素施加了 $6$ 个独立的约束。因此，由旋转矩阵构成的空间 $SO(3)$ 的维度应该只有 $3$ 维，而 $9$ 参数中有 $6$ 实际上是多余的。
154 | 
155 | 给定一个轨迹 $R(t) : \mathbb{R} \to SO(3)$ 描述一个连续的旋转运动，那么旋转必须满足下式：
156 | 
157 | $$
158 | R(t) R^\top(t) = I
159 | $$
160 | 
161 | 计算上面方程对时间 $t$ 的导数，注意到右边是一个常数矩阵，可以得到
162 | 
163 | $$
164 | \dot{R}(t) R^\top(t) + R(t) \dot{R}^\top(t) = 0 \quad \Rightarrow \quad \dot{R}(t) R^\top(t) = - (\dot{R}(t) R^\top(t))^\top
165 | $$
166 | 
167 | 根据性质，我们可知矩阵 $\dot{R}(t) R^\top(t) \in \mathbb{R}^{3 \times 3}$ 是一个反对称矩阵。 根据[反对称矩阵的性质](#反对称矩阵)，一定存在一个向量，设为 $\omega(t) \in \mathbb{R}^3$ ，使得
168 | 
169 | $$
170 | \dot{R}(t) R^\top(t) = \hat{\omega}(t) \quad \Rightarrow \quad \dot{R}(t) = \hat{\omega}(t) R(t)
171 | $$
172 | 
173 | 根据上式，如果假设 $R(t_0) = I$ 且当 $t = t_0$ 时有 $\dot{R}(t_0) = \hat{\omega}(t_0)$ 。因此，围绕单位矩阵 $I$ ，偏对称矩阵给出了旋转矩阵的一阶近似
174 | 
175 | $$
176 | R(t_0 + dt) \approx I + \hat{\omega}(t_0)dt
177 | $$
178 | 
179 | 综上可得，所有的斜对称矩阵张成的空间如下表示
180 | 
181 | $$
182 | so(3) \doteq \{ \hat{\omega} \in \mathbb{R}^{3 \times 3} | \omega \in \mathbb{R}^3 \}
183 | $$
184 | 
185 | 它亦被称为旋转群 $SO(3)$ 在恒等点处的**切空间 (tangent space)** 。旋转群 $SO(3)$ 被称为**李群 (Lie group)** ，而它的切空间 $so(3)$ 被称为**李代数 (Lie algebra)** 。
186 | 
187 | 
188 | 
189 | ##### 李群和李代数
190 | 
191 | 李群 (或无穷小群) 是一个光滑的流形，也是一个群，因此群运算乘法和逆运算是光滑的映射。在单位元处与李群相切的空间称为相关的李代数。从李代数到李群的映射称为指数映射，它的逆过程叫做对数映射。
192 | 
193 | 李群与李代数涉及到庞大的知识量，这里对这部分就不进行拓展描述。后续或许会有专门的篇幅来记录这些知识点，再进行添加链接跳转（#TODO）。这部分知识我当时是阅读了[《机器人学中的状态估计》](http://asrl.utias.utoronto.ca/~tdb/) 中相关的篇幅来学习的。当然读者也可以阅读更加数学性的书籍来学习，这里就不在引申。
194 | 
195 | 
196 | 
197 | !!! tip "Sophus Lie (1841 - 1899)"
198 |     马里乌斯·索菲斯·李 (Marius Sophus Lie) 是一位出生于挪威的数学家。他创立了连续对称理论，并将其应用于几何和微分方程的研究。他最大的成就之一是发现连续变换群在线性化的版本中更容易理解 (Theorie der transformation sgruppen 1893)。这些无限小的生成器构成了一个今天被称为李代数的结构。群律的线性化版本对应于李代数上的一个操作，称为换向器托架或李托架。
199 | 
200 | 
201 | 
202 | ##### 指数映射
203 | 
204 | 我们先假设矩阵 $\hat \omega$ 是一个常量，那么
205 | 
206 | $$
207 | \dot R(t) = \hat \omega R(t)
208 | $$
209 | 
210 | 上式中矩阵 $R(t)$ 可以解释为以下**线性常微分方程 (linear ordinary differential equation (ODE))**的**状态转移矩阵 (state transition matrix)** 
211 | 
212 | $$
213 | \dot x(t) = \hat \omega x(t) , \quad x(t) \in \mathbb{R}^3
214 | $$
215 | 
216 | 上式线性常微分方程的解为
217 | 
218 | $$
219 | x(t) = e^{\hat \omega t} x(0)
220 | $$
221 | 
222 | 其中 $e^{\hat \omega t}$ 是矩阵指数
223 | 
224 | $$
225 | e^{\hat \omega t} = I + \hat\omega t + \frac{(\hat\omega t)^2}{2!} + \dotsb + \frac{(\hat\omega t)^n}{n!} + \dotsb
226 | $$
227 | 
228 | 其中，指数 $e^{\hat \omega t}$ 通常记为 $\exp ( \hat \omega t)$ 。由于上式线性常微分方程具有唯一解，那么假设 $R(0) = I$ 为初始条件，那么有：
229 | 
230 | $$
231 | R(t) = e^{\hat \omega t}
232 | $$
233 | 
234 | 为了验证矩阵 $e^{\hat \omega t}$ 确实是一个旋转矩阵，可以直接从矩阵指数的定义中看出
235 | 
236 | $$
237 | (e^{\hat \omega t})^{-1} = e^{- \hat \omega t} = e^{\hat \omega^\top t} = (e^{\hat \omega t})^\top
238 | $$
239 | 
240 | 因此 $(e^{\hat \omega t})^\top e^{\hat \omega t} = I$ 。 $R(t) = e^{\hat \omega t}$ 的物理解释是如果 $\| \omega \| = 1$，那么 $R(t)$ 是一个沿 $\omega$ 为轴，弧度大小为 $t$ 的旋转。通常， $t$ 可以乘到 $\omega$ 中去，因此有 $R = e^{\hat\omega}$ ，其中 $\omega$ 的模为任意有理数。所以，根据上文所述，矩阵指数确实定义了空间 $so(3)$ 到 $SO(3)$ 之间的空间映射，即所谓的**指数映射 (exponential map)** :
241 | 
242 | $$
243 | \exp : so(3) \to SO(3); \quad \hat \omega \mapsto e^{\hat \omega}
244 | $$
245 | 
246 | 以上的所有推导到是在假设矩阵 $\hat \omega$ 是一个常量的前提下，那么对于任意的 $R \in SO(3)$ 是否都可以通过 $R(t) = e^{\hat \omega t}$ 来表示呢。答案是肯定的，接下来会对这个疑问进行阐述。
247 | 
248 | 
249 | 
250 | ##### 对数映射
251 | 
252 | 对于任意的 $R \in SO(3)$ 存在任意的 $\omega \in \mathbb{R}^3$ 使得 $R = \exp( \hat \omega)$ 。我们定义它的逆过程为**对数映射 (exponential map)** ：$\hat \omega = \log (R)$ 。
253 | 
254 | 如果旋转矩阵 $R \neq I$ 且定义为：
255 | 
256 | $$
257 | R = \begin{bmatrix} r_{11} & r_{12} & r_{13} \\ r_{21} & r_{22} & r_{23} \\ r_{31} & r_{32} & r_{33} \end{bmatrix}
258 | $$
259 | 
260 | 那么对应的 $\omega$ 通过下式计算：
261 | 
262 | $$
263 | \| \omega \| = \cos^{-1} \left( \frac{trace(R)-1}{2} \right) , \quad \frac{\omega}{\| \omega \|} = \frac{1}{2\sin(\| \omega \|)} \begin{bmatrix} r_{32} - r_{23} \\ r_{13} - r_{31} \\ r_{21} - r_{12} \end{bmatrix}
264 | $$
265 | 
266 | 如果 $R = I$ ，那么 $\| \omega \| = 0$ ，而 $\frac{\omega}{\| \omega \|}$ 没有定义 (因此可以任意选择) 。
267 | 
268 | 从上面的描述可以得知：任意旋转矩阵可以通过绕以固定旋转轴 $\omega$ 旋转一个特定的角度 $\| \omega \|$ 得到。然而从 $so(3)$ 到 $SO(3)$ 对数映射并不是一一对应的，这是因为任何一个 $2k\pi\omega, k\in Z$ 都可以得到相同的旋转矩阵 $R$ 。
269 | 
270 | 
271 | 
272 | ##### 罗德里格斯旋转
273 | 
274 | 通过对数映射，我们知道了如何通过一个旋转矩阵求解它的指数坐标 $\omega$ 。那么如何通过一个指数坐标 $\omega$ 求解一个旋转矩阵 $R = e^{\hat\omega}$ 呢？当然可以通过它本身的定义：
275 | 
276 | $$
277 | e^{\hat \omega} = I + \hat\omega + \frac{(\hat\omega)^2}{2!} + \dotsb + \frac{(\hat\omega)^n}{n!} + \dotsb
278 | $$
279 | 
280 | 然后一种更加有效的求解方式是通过**罗德里格斯 (Rodrigues)**公式进行求解。令 $\omega = \phi \mathbf{x}$ 其中 $\phi = \| \omega \|$ 为旋转角度，$\mathbf{x} = \omega / \| \omega \|$ 是单位长度旋转轴。根据反对称矩阵的性质有：
281 | 
282 | $$
283 | \hat{\mathbf{x}}^2 = \mathbf{x}\mathbf{x}^\top - I, \quad \hat{\mathbf{x}}^3 = -\hat{\mathbf{x}}
284 | $$
285 | 
286 | 那么
287 | 
288 | $$
289 | \begin{align*}
290 | e^{\hat \omega} &= e^{\phi \hat{\mathbf{x}}} \\
291 | &= I + \phi \hat{\mathbf{x}} + \frac{(\phi \hat{\mathbf{x}})^2}{2!} + \dotsb + \frac{(\phi \hat{\mathbf{x}})^n}{n!} + \dotsb \\
292 | &= I + \phi \hat{\mathbf{x}} + \frac{1}{2!}\phi^2 \hat{\mathbf{x}}\hat{\mathbf{x}} + \frac{1}{3!}\phi^3 \underbrace{\hat{\mathbf{x}}\hat{\mathbf{x}}\hat{\mathbf{x}}}_{-\hat{\mathbf{x}}} + \frac{1}{4!}\phi^4 \underbrace{\hat{\mathbf{x}}\hat{\mathbf{x}}\hat{\mathbf{x}}\hat{\mathbf{x}}}_{-\hat{\mathbf{x}}\hat{\mathbf{x}}} - \dotsb \\
293 | &= I + \underbrace{\left(\phi - \frac{1}{3!}\phi^3 + \frac{1}{5!}\phi^5 - \dotsb \right)}_{\sin \phi} \hat{\mathbf{x}} + \left[1- \underbrace{\left(1 - \frac{1}{2!}\phi^2 + \frac{1}{4!}\phi^4 - \dotsb \right)}_{\cos \phi} \right]\hat{\mathbf{x}}\hat{\mathbf{x}} \\
294 | &= I + \sin(\|\omega\|)\frac{\hat\omega}{\|\omega\|} + (1-\cos(\|\omega\|))\frac{\hat\omega^2}{\|\omega\|^2} \\
295 | &= \cos(\|\omega\|)I + (1-\cos(\|\omega\|))\frac{\hat\omega\hat\omega^\top}{\|\omega\|^2} + \sin(\|\omega\|)\frac{\hat\omega}{\|\omega\|}
296 | \end{align*}
297 | $$
298 | 
299 | 
300 | 
301 | 根据上面的推导，我们可以得到已知一个旋转向量 $\omega$ 求解出旋转矩阵的方式。下面给出一个更加直观的物理表现。如下图所示，我们知道任意旋转都可以表示成绕一单位旋转轴 $\mathbf{n}$ 旋转 $\theta$ 角，等价与通过一个旋转向量 $\omega = \theta\mathbf{n}$ 。
302 | 
303 | ![Rodrigues Rotation](image/rodrigues.png)
304 | 
305 | 根据上图有 $\mathbf{v}$ 绕单位旋转轴 $\mathbf{n}$ 旋转 $\theta$ 角后得到 $\mathbf{u}$ ，那么这一旋转过程的表示可以通过以下推导所得。
306 | 
307 | 首先，目标向量 $\mathbf{v}$ 平行于 $\mathbf{n}$ 的分量表示为
308 | 
309 | $$
310 | \mathbf{v}_{\|} = \mathbf{n}(\mathbf{n} \cdot \mathbf{v}) = (\mathbf{n}\mathbf{n}^\top)\mathbf{v}
311 | $$
312 | 
313 | $\mathbf{v}$ 平行于 $\mathbf{n}$ 的分量分量不受旋转影响。接着，计算目标向量 $\mathbf{v}$ 垂直于 $\mathbf{n}$ 的分量
314 | 
315 | $$
316 | \mathbf{v}_\bot = \mathbf{v} - \mathbf{v}_{\|} = (I - \mathbf{nn^\top})\mathbf{v}
317 | $$
318 | 
319 | 可以通过叉乘将这部分垂直分量旋转 $90^\circ$ 
320 | 
321 | $$
322 | \mathbf{v}_{\times} = \mathbf{n}\times\mathbf{v} = \hat{\mathbf{n}}\mathbf{v}
323 | $$
324 | 
325 | 再一次旋转 $90^\circ$ 得
326 | 
327 | $$
328 | \mathbf{v}_{\times\times} = \mathbf{n}\times\mathbf{v}_{\times} = \hat{\mathbf{n}}^2 \mathbf{v} = -\mathbf{v}_{\bot}
329 | $$
330 | 
331 | 因此
332 | 
333 | $$
334 | \mathbf{v}_{\|} = \mathbf{v} - \mathbf{v}_{\bot} = \mathbf{v} + \mathbf{v}_{\times\times} = (I + \hat{\mathbf{n}}^2)\mathbf{v}
335 | $$
336 | 
337 | 进而，我们可以得到旋转后向量 $\mathbf{u}$ 位于以 $\mathbf{n}$ 为法向量的平面分量
338 | 
339 | $$
340 | \mathbf{u}_{\bot} = \cos\theta\mathbf{v}_{\bot} + \sin\theta\mathbf{v}_{\times} = (\sin\theta\hat{\mathbf{n}} - \cos\theta\hat{\mathbf{n}}^2)\mathbf{v}
341 | $$
342 | 
343 | 最后我们得到旋转之后的向量 $\mathbf{u}$ 
344 | 
345 | $$
346 | \mathbf{u} = \mathbf{u}_{\bot} + \mathbf{v}_{\|} = (I + \sin\theta\hat{\mathbf{n}} + (1-\cos\theta)\hat{\mathbf{n}}^2) \mathbf{v}
347 | $$
348 | 
349 | 由此，我们可以将旋转矩阵表示为
350 | 
351 | $$
352 | R(\mathbf{n}, \theta) = I + \sin\theta\hat{\mathbf{n}} + (1-\cos\theta)\hat{\mathbf{n}}^2
353 | $$
354 | 
355 | 
356 | 
357 | ### 刚体运动变换的表达
358 | 
359 | 在上文的描述中，我们已经知道了刚体纯旋转运动的表示与对应的旋转矩阵的计算。在接下来的篇幅中，我们将介绍刚体更加一般性的运动变换，包含了旋转与平移。
360 | 
361 | ![Rigid-body Motion](image/Rigid-body_Motion.png)
362 | 
363 | 如上图所示，一个坐标系为 $C$ 的移动刚体上存在一点 $p$ ，点 $p$ 关于参考世界坐标系 $W$ 的表示可以简单的关联称计算向量 $X_w$ 来得到。而 $X_w$ 的计算可以归简为一个世界坐标系 $W$ 下由原点指向刚体坐标系 $C$ 原点的平移向量 $T_{wc} \in \mathbb{R}^3$ 和转换到世界坐标系 $W$ 下的向量 $X_c$ 的向量和
364 | 
365 | $$
366 | X_w = R_{wc}X_c + T_{wc}
367 | $$
368 | 
369 | 因此，刚体运动的空间由一组**特殊的欧几里德变换 (special Euclidean transformations)** 给出
370 | 
371 | $$
372 | SE(3) \doteq \{ g=(R,T) | R \in SO(3), T \in \mathbb{R}^3 \}
373 | $$
374 | 
375 | 上述的刚体运动变换是仿射变换而非线性变换。我们利用**齐次坐标系 (homogeneous coordinates)** ，将它转换称线性变换
376 | 
377 | $$
378 | SE(3) \doteq \left\{ g=\begin{bmatrix}R & T \\ 0 & 1 \end{bmatrix} \: \Bigg| \: R \in SO(3), T \in \mathbb{R}^3 \right\} \subset \mathbb{R}^{4\times4}
379 | $$
380 | 
381 | 
382 | 
383 | #### 刚体运动的正则指数坐标
384 | 
385 | 我们已经了解了旋转矩阵 $R \in SO(3)$ 的指数坐标形式，推广相似的坐标化到应用齐次性表示的刚体运动 $g \in SE(3)$ 。考虑一个连续运动的刚体轨迹为 $SE(3) : g(t) = (R(t), T(t))$ ，用齐次坐标表示为：
386 | 
387 | $$
388 | g(t) = \begin{bmatrix} R(t) & T(t) \\ 0 & 1 \end{bmatrix} \in \mathbb{R}^{4\times4}
389 | $$
390 | 
391 | 按照刚体纯旋转形式类推，首先考虑以下形式矩阵
392 | 
393 | $$
394 | \dot g(t) g^{-1}(t) = \begin{bmatrix} \dot R(t)R^\top(t) & \dot T(t) - \dot R(t) R^\top(t)T(t) \\ 0 & 0 \end{bmatrix} \in \mathbb{R}^{4\times4}
395 | $$
396 | 
397 | 已知 $\dot R(t)R^\top(t)$ 是一个反对称矩阵，存在 $\hat\omega(t) \in so(3)$ 使得 $\hat\omega(t) = \dot R(t)R^\top(t)$ ，定义一个向量 $v(t) = \dot T(t) - \dot R(t) R^\top(t)T(t)  \in \mathbb{R}^3$ 。那么上式可以转变成
398 | 
399 | $$
400 | \dot g(t) g^{-1}(t) = \begin{bmatrix} \hat\omega(t)  & v(t) \\ 0 & 0 \end{bmatrix} \in \mathbb{R}^{4\times4}
401 | $$
402 | 
403 | 进一步定义矩阵 $\hat \xi \in \mathbb{R}^{4\times4}$ 为
404 | 
405 | $$
406 | \hat \xi(t) = \begin{bmatrix} \hat\omega(t)  & v(t) \\ 0 & 0 \end{bmatrix}
407 | $$
408 | 
409 | 那么有
410 | 
411 | $$
412 | \dot g(t) = (\dot g(t) g^{-1}(t)) g(t) = \hat \xi(t)g(t)
413 | $$
414 | 
415 | 这里的 $\hat \xi$ 可以被认为沿这虚线 $g(t)$ 的 “切向量” ，且可以用于 $g(t)$ 的局部近似：
416 | 
417 | $$
418 | g(t+dt) \approx g(t) + \hat\xi(t) g(t)dt = (I + \hat\xi(t)dt)g(t)
419 | $$
420 | 
421 | 形如 $\hat \xi$ 的 $4\times4$ 的矩阵被称为**扭曲 (twist)** 。所有的扭曲构成一个空间，被定义成
422 | 
423 | $$
424 | se(3) \doteq \left\{ \hat\xi = \begin{bmatrix} \hat\omega & v \\ 0 & 0 \end{bmatrix} \; \bigg | \; \hat\omega \in so(3), v \in \mathbb{R}^3 \right\} \subset \mathbb{R}^{4\times4}
425 | $$
426 | 
427 | 这里的集合 $se(3)$ 被称为矩阵群 $SE(3)$ 的切空间或者李代数。定义两个操作 “$\vee$” 和 “$\wedge$” 关联扭曲 $\hat \xi \in se(3)$ 和它的扭曲坐标 $\xi \in \mathbb{R}^6$ 如下所示：
428 | 
429 | $$
430 | \begin{bmatrix} \hat\omega & v \\ 0 & 0 \end{bmatrix}^\vee \doteq \begin{bmatrix} v \\ \omega \end{bmatrix} \in \mathbb{R}^6 , \quad \begin{bmatrix} v \\ \omega \end{bmatrix}^\wedge \doteq \begin{bmatrix} \hat\omega & v \\ 0 & 0 \end{bmatrix} \in \mathbb{R}^{4\times4}
431 | $$
432 | 
433 | 在扭曲坐标 $\xi$ 下，将 $v$ 表示为线速度，$\omega$ 表示为角速度，这表明它们分别与刚体运动的位移和旋转相关。考虑特殊情况下（匀速变换运动）时， $\hat\xi$ 为常数矩阵
434 | 
435 | $$
436 | \dot g(t) = \hat \xi g(t)
437 | $$
438 | 
439 | 我们又得到了一个时不变线性常微分方程，它的解可以通过下式计算给出
440 | 
441 | $$
442 | g(t) = e^{\hat\xi t} g(0)
443 | $$
444 | 
445 | 假设初始条件 $g(0) = I$ ，这上式结果为
446 | 
447 | $$
448 | g(t) = e^{\hat\xi t}
449 | $$
450 | 
451 | 其中扭曲的指数为
452 | 
453 | $$
454 | e^{\hat\xi t} = I + \hat\xi t + \frac{(\hat\xi t)^2}{2!} + \cdots + \frac{(\hat\xi t)^n}{n!} + \cdots
455 | $$
456 | 
457 | 通过前面小节介绍的 Rodrigues 法则和矩阵指数的附加性质，可以建立如下关系
458 | 
459 | $$
460 | e^{\hat\xi} = \begin{bmatrix} e^{\hat\omega} & \frac{(I-e^{\hat\omega})\hat\omega v + \omega \hat\omega v}{\|\omega\|} \\ 0 & 1 \end{bmatrix}, \text{if} \quad \omega \neq 0
461 | $$
462 | 
463 | 如果 $\omega = 0$ ，那么指数化为简单的 $e^{\hat\xi} = \begin{bmatrix} I & v  \\ 0 & 1 \end{bmatrix}$ ，由上式可知， $\xi$ 的指数确实是 $SE(3)$ 中的刚体变换矩阵。因此，指数映射定义了从 $se(3)$ 空间到 $SE(3)$ 空间的变换形式
464 | 
465 | $$
466 | \exp: se(3) \to SE(3); \; \hat\xi \mapsto e^{\hat\xi}
467 | $$
468 | 
469 | 类似于 $\hat\omega \in so(3)$ 与 $SO(3)$ 的关系，我们称 ${\hat\xi} \in se(3)$ 是 $SE(3)$ 的指数坐标。对于任意的 $g \in SE(3)$ 必定存在一个扭曲坐标 $\xi = (v, \omega)$ 使得 $g = \exp(\hat\xi)$ 。我们称这个指数映射的逆过程为对数映射 ${\hat\xi} = \log(g)$ 。
470 | 
471 | 
472 | 
473 | ### 坐标和速度变换
474 | 
475 | 在 SLAM 的研究过程中，经常需要知道一个点的坐标和它的速度如何随着摄像机的移动而变化。这是因为通常选择相机帧作为参考帧，并描述相机运动和与之相关的三维点更为方便。由于相机可能在移动，我们需要知道如何将坐标和速度等量从一个相机帧转换到另一个相机帧。特别是，我们想知道如何正确地表达一个点相对于一个运动的相机的位置和速度。
476 | 
477 | 
478 | 
479 | #### 坐标变换规则
480 | 
481 | 我们将应用一个刚体变换
482 | 
483 | $$
484 | g(t) = \begin{bmatrix} R(t) & T(t) \\ 0 & 1 \end{bmatrix} \in SE(3)
485 | $$
486 | 
487 | 去表示在 $t$ 时刻刚体相对于一个固定参考系下的运动变换。我们假设一个特殊情况，在 $t = 0$ 的时候，刚体坐标系与固定参考坐标系重合，即 $g(0) = I$ 。固定参考系下的任意点 $\mathbf{X}_0$ ，在 $t$ 时刻它对应到刚体坐标系下的坐标为
488 | 
489 | $$
490 | \mathbf{X}(t) = R(t)\mathbf{X}_0 + T(t)
491 | $$
492 | 
493 | 或者应用齐次坐标表示为
494 | 
495 | $$
496 | \mathbf{X}(t) = g(t)\mathbf{X}_0
497 | $$
498 | 
499 | ![Composition of rigid-body motions](image/rigid-body_motions.png)
500 | 
501 | 如上图所示，给出两个不同的时刻 $t_1$ 和 $t_2$ ，那么 $t_1$ 时刻刚体参考系下的点到 $t_2$ 时刻刚体参考系下的点变换式为
502 | 
503 | $$
504 | \mathbf{X}(t_2) = g(t_2, t_1)\mathbf{X}(t_1)
505 | $$
506 | 
507 | 易得
508 | 
509 | $$
510 | \mathbf{X}(t_3) = g(t_3, t_2)\mathbf{X}(t_2) = g(t_3, t_2)g(t_2, t_1)\mathbf{X}(t_1) = g(t_3, t_1)\mathbf{X}(t_1)
511 | $$
512 | 
513 | 因此有
514 | 
515 | $$
516 | g(t_3, t_2)g(t_2, t_1) = g(t_3, t_1)
517 | $$
518 | 
519 | 通过把 $t_1$ 时刻坐标系的坐标转换到 $t_2$ 时刻坐标系的坐标，然后再转换回来，我们可以看到
520 | 
521 | $$
522 | \mathbf{X}(t_1) = g(t_1, t_2)\mathbf{X}(t_2) = g(t_1, t_2)g(t_2, t_1)\mathbf{X}(t_1)
523 | $$
524 | 
525 | 因此可得
526 | 
527 | $$
528 | g(t_1, t_2)g(t_2, t_1) = I \iff g^{-1}(t_1, t_2) = g(t_2, t_1)
529 | $$
530 | 
531 | 综上所述，我们可以得到刚体运动的坐标变换规则概括为
532 | 
533 | $$
534 | \mathbf{X}_i = g_{ij}\mathbf{X}_j \;,\; g_{ik} = g_{ij}g_{jk} \;,\; g^{-1}_{ij} = g_{ji}
535 | $$
536 | 
537 | 
538 | 
539 | #### 速度变换规则
540 | 
541 | 根据坐标变换规则，我们知道固定参考系下一点 $p$ 的坐标 $\mathbf{X}_0$ 转换到在 $t$ 时刻刚体坐标系下为 $\mathbf{X}(t) = g_{cw}(t) \mathbf{X}_0$ ，那么在 $t$ 时刻点 $p$ 相对于刚体的瞬时速度为
542 | 
543 | $$
544 | \dot{\mathbf{X}}(t) = \dot g_{cw}(t) \mathbf{X}_0 = \dot g_{cw}(t) g^{-1}_{cw}(t)\mathbf{X}(t)
545 | $$
546 | 
547 | 通过引入扭曲坐标
548 | 
549 | $$
550 | \hat V^c_{cw}(t) \doteq g_{cw}(t) g^{-1}_{cw}(t) = \begin{bmatrix} \hat\omega(t) & v(t) \\ 0 & 0 \end{bmatrix} \in se(3)
551 | $$
552 | 
553 | 由此，可以获得
554 | 
555 | $$
556 | \dot{\mathbf{X}}(t) = \hat V^c_{cw}(t)\mathbf{X}(t)
557 | $$
558 | 
559 | 在 3D 非齐次坐标系下表示为
560 | 
561 | $$
562 | \dot{\mathbf{X}}(t) = \hat\omega(t)\mathbf{X}(t) + v(t)
563 | $$
564 | 
565 | 因此，符号 $\hat V^c_{cw}(t)$ 表示从摄像机帧中看到的世界帧的相对速度。假设一个观测者 $A$ 从不同帧的坐标系观察当前帧，则该变换为：$g_{yx} : \mathbf{Y}(t) = g_{yx} \mathbf{X}(t)$ ，此时，观察者帧坐标系下的速度为
566 | 
567 | $$
568 | \dot{\mathbf{Y}}(t) = g_{yx} \dot{\mathbf{X}}(t) = g_{yx} \hat{V}(t) \mathbf{X}(t) = g_{yx} \hat{V}(t) g_{yx}^{-1} \mathbf{Y}(t)
569 | $$
570 | 
571 | 这表明，从观测者 $A$ 帧中观测到的点的相对速度用扭曲表示
572 | 
573 | $$
574 | \hat V_y = g_{yx} \hat{V} g_{yx}^{-1} \doteq \text{ad}_{g_{xy}}(\hat V)
575 | $$
576 | 
577 | 由此，引出了在 $se(3)$ 域上的**伴随映射 (adjoint map)**：
578 | 
579 | $$
580 | \text{ad}_g : se(3) \to se(3); \quad \hat\xi \mapsto g \hat\xi g^{-1}
581 | $$
582 | 
583 | 
584 | 
585 | ### 总结
586 | 
587 | |              |                         旋转 SO(3)                         |                        刚体运动 SE(3)                        |
588 | | :----------: | :--------------------------------------------------------: | :----------------------------------------------------------: |
589 | | **矩阵表示** | $R: \begin{cases} R^\top R = I \\ \det(R) = 1 \end{cases}$ |      $g = \begin{bmatrix} R & T \\ 0 & 1 \end{bmatrix}$      |
590 | | **坐标变换** |               $\mathbf{X} = R \mathbf{X}_0$                |              $\mathbf{X} = R \mathbf{X}_0 + T$               |
591 | |    **逆**    |                     $R^{-1} = R^\top$                      | $g^{-1} = \begin{bmatrix} R^\top & -R^\top T \\ 0 & 1 \end{bmatrix}$ |
592 | | **运算规则** |                  $R_{ik} = R_{ij}R_{jk}$                   |                   $g_{ik} = g_{ij}g_{jk}$                    |
593 | | **指数映射** |                   $R = \exp(\hat\omega)$                   |                     $g = \exp(\hat\xi)$                      |
594 | |   **速度**   |         $\dot{\mathbf{X}} = \hat\omega \mathbf{X}$         |        $\dot{\mathbf{X}} = \hat\omega \mathbf{X} + v$        |
595 | | **伴随映射** |          $\hat\omega \mapsto R \hat\omega R^\top$          |              $\hat\xi \mapsto g \hat\xi g^{-1}$              |
596 | 
597 | 另，刚体的旋转还可以用四元数或者欧拉角来表示，这部分这里不在引申，读者可以阅读相关的文档，如果后续有对这部分内容进行书写，再在这里链接补充（#TODO）
598 | 
599 | 
600 | 
601 | 
602 | 
603 | 
604 | 
605 | 
606 | 
607 | --8<--
608 | mathjax.txt
609 | --8<--


--------------------------------------------------------------------------------
/docs/multiple_view_geometry/image/Rigid-body_Motion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/multiple_view_geometry/image/Rigid-body_Motion.png


--------------------------------------------------------------------------------
/docs/multiple_view_geometry/image/Rotation4Rigid-body.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/multiple_view_geometry/image/Rotation4Rigid-body.png


--------------------------------------------------------------------------------
/docs/multiple_view_geometry/image/geometric_interpretation2svd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/multiple_view_geometry/image/geometric_interpretation2svd.png


--------------------------------------------------------------------------------
/docs/multiple_view_geometry/image/rigid-body motion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/multiple_view_geometry/image/rigid-body motion.png


--------------------------------------------------------------------------------
/docs/multiple_view_geometry/image/rigid-body_motions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/multiple_view_geometry/image/rigid-body_motions.png


--------------------------------------------------------------------------------
/docs/multiple_view_geometry/image/rodrigues.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/multiple_view_geometry/image/rodrigues.png


--------------------------------------------------------------------------------
/docs/multiple_view_geometry/linear_algebra.md:
--------------------------------------------------------------------------------
  1 | ## 线性空间的基本概念
  2 | 
  3 | ### 向量空间
  4 | 
  5 | 如果集合 $V$ 在*矢量求和 (vector summation)*  
  6 | 
  7 | $$
  8 | + : V \times V \to V
  9 | $$
 10 | 
 11 | 以及*标量乘法 (scalar multiplication)*
 12 | 
 13 | $$
 14 | \cdot : \mathbb{R} \times V \to V
 15 | $$
 16 | 
 17 | 运算下是闭合的，那么集合 $V$ 就称为在 $\mathbb{R}$ 域上的*线性空间 (linear space)* 或 *向量空间 (vector space)* 。
 18 | 
 19 | 
 20 | 
 21 | 换言之，如果 $V$ 是一个向量空间，那么对于任意的两个向量 $\mathbf{v_1}, \mathbf{v_2} \in V$ 和两个标量 $\alpha, \beta \in \mathbb{R}$ ，他们的线性组合 $\alpha \mathbf{v_1} + \beta \mathbf{v_2} \in V$ 。此外，加法运算($+$) 满足**交换律**和**结合律**，且存在**幺元 ($0$)** 以及每个元素存在**逆 ($- \mathbf{v}$)** 。标量乘法 ($\cdot$) 在 $\mathbb{R}$ 域上有：$\alpha (\beta \mathbf{v}) = (\alpha \beta)\mathbf{v}$，　$1\mathbf{v} = \mathbf{v}$ 和 $0\mathbf{v} = \mathbf{0}$ 。加法和标量乘法满足**分配率**：$(\alpha + \beta) \mathbf{v} = \alpha \mathbf{v} + \beta \mathbf{v}$，　$\alpha(\mathbf{v} + \mathbf{u}) = \alpha \mathbf{v} + \alpha \mathbf{u}$ 。  
 22 | 例如： $\mathbb{R}^n$ 就是实数域 $\mathbb{R}$ 上的线性空间。根据上述此时  $V = \mathbb{R}^n$ ， $\mathbf{v} = [x_1, \dotsc , x_n]^\top$ 。
 23 | 
 24 | 
 25 | 
 26 | 一个集合 $W \subset V$ ，$V$ 是一个向量空间的话，如果 $0 \in W$ 且集合 $W$ 对于任意的 $\alpha \in \mathbb{R}$ 在 $+$ 和 $\cdot$ 上是闭合的，那么 $W$ 称为 $V$ 的**子空间 (subspace)**。
 27 | 
 28 | 
 29 | 
 30 | ### 线性独立与基
 31 | 
 32 | 一组向量 $S = \{\mathbf{v_1}, \dotsc , \mathbf{v_k}\} \subset V$ 张成的子空间，是由这些向量的所有线性组合构成的子空间：
 33 | 
 34 | $$
 35 | span(S) = \{ \mathbf{v} \in V | \mathbf{v} = \sum_{i=1}^k \alpha_i \mathbf{v_i}\}
 36 | $$
 37 | 
 38 | 如果，
 39 | 
 40 | $$
 41 | \sum_{i=1}^k \alpha_i \mathbf{v}_i = \mathbf{0} \Rightarrow \alpha_i =  0   \forall i
 42 | $$
 43 | 
 44 | 那么集合 $S$ 就被称为**线性独立 (linearly independent)** 。
 45 | 
 46 | 换句话说，如果集合 $S$ 中的任意一个向量无法用其余向量的线性组合表示的话，那么称为线性独立，反之称为**线性相关 (linearly dependent)** 。
 47 | 
 48 | 一个向量集合 $B = \{\mathbf{v_1}, \dotsc, \mathbf{v}_n\}$ 如果他是线性独立且它可以张成向量空间 $V$ ，那么称 $B$ 是 $V$ 的**基 (basis)**。基是线性无关向量的最大集合。
 49 | 
 50 | 
 51 | 
 52 | #### 基的性质
 53 | 
 54 | 如果 $B$ 与 $B^{\prime}$ 是线性空间 $V$ 的两个基，那么：  
 55 | 
 56 | - $B$ 与 $B^{\prime}$ 将包含相同数量的线性独立向量，这个数量 $n$ 被称为向量空间 $V$ 的**维度  (dimension)** 。
 57 | 
 58 | - 让 $B = \{ b_i \}_{i = i}^n$ 和 $B' = \{ b'_i \}_{i = i}^n$ ，那么 $B$ 中的任意一个基向量都能够利用 $B^{\prime}$ 的线性组合形式表示： 
 59 | 
 60 |     $$
 61 |     b_j = a_{1j} b'_1 + a_{2j} b'_2 + \dotsb + a_{nj} b'_n　　a_{ij} \in \mathbb{R}, i, j = 1, 2, \dotsc, n
 62 |     $$
 63 | 
 64 |     这里用于**基底变换 (basis transform)** 的系数 $a_{ij}$ 能够被组合成矩阵 $\mathbf{A}$ ，那么我们可以用矩阵的形式来表示 $B \doteq [ b_1, b_2, \dotsc, b_n ]$ 和 $B' \doteq [ b'_1, b'_2, \dotsc, b'_n ]$ 之间的变换关系了：
 65 | 
 66 |     $$
 67 |     [ b_1, b_2, \dotsc, b_n ] = [ b'_1, b'_2, \dotsc, b'_n ] 
 68 |     	\begin{bmatrix}
 69 |         a_{11} & a_{12} & \cdots & a_{1n} \\
 70 |         a_{21} & a_{22} & \cdots & a_{2n} \\
 71 |         \vdots  & \vdots  & \ddots & \vdots  \\
 72 |         a_{n1} & a_{n2} & \cdots & a_{nn} 
 73 |       \end{bmatrix}
 74 |     $$
 75 | 
 76 |     即 $B = B' A$ ，当矩阵 $\mathbf{A}$ 可逆时，有 $B’ = B A^{-1}$ 。
 77 | 
 78 | - 任意一个向量 $\mathbf{v} \in V$ 都能够写成基向量的线性组合：
 79 | 
 80 |     $$
 81 |     \mathbf{v} = x_1 b_1 + x_2 b_2 + \dotsb + x_n b_n = x'_1 b'_1 + x'_2 b'_2 + \dotsb + x'_n b'_n
 82 |     $$
 83 | 
 84 |     其中系数 $\{ x_i \in \mathbb{R} \}_{i=1}^n$ 和 $\{ x'_i \in \mathbb{R} \}_{i=1}^n$ 是唯一且确定的，称为 $\mathbf{v}$ 在每一基底下的坐标。结合上一条性质，有：
 85 | 
 86 |     $$
 87 |     \mathbf{v} 
 88 |     = 
 89 |     [ b_1, b_2, \dotsc, b_n ]
 90 |     \begin{bmatrix}
 91 |     x_1 \\ x_2 \\ \vdots \\ x_n
 92 |     \end{bmatrix}
 93 |     = 
 94 |     [ b'_1, b'_2, \dotsc, b'_n ] 
 95 |     \begin{bmatrix}
 96 |       a_{11} & a_{12} & \cdots & a_{1n} \\
 97 |       a_{21} & a_{22} & \cdots & a_{2n} \\
 98 |       \vdots & \vdots & \ddots & \vdots \\
 99 |       a_{n1} & a_{n2} & \cdots & a_{nn} 
100 |     \end{bmatrix}
101 |     \begin{bmatrix}
102 |     x_1 \\ x_2 \\ \vdots \\ x_n
103 |     \end{bmatrix}
104 |     $$
105 | 
106 |     由于 $\mathbf{v}$ 关于 $B^{\prime}$ 的坐标是唯一的，因此可以得到一个向量从一个基底到另一个基底的坐标变换为：
107 | 
108 |     $$
109 |     \begin{bmatrix}
110 |     x'_1 \\ x'_2 \\ \vdots \\ x'_n
111 |     \end{bmatrix}
112 |     = 
113 |     \begin{bmatrix}
114 |       a_{11} & a_{12} & \cdots & a_{1n} \\
115 |       a_{21} & a_{22} & \cdots & a_{2n} \\
116 |       \vdots & \vdots & \ddots & \vdots \\
117 |       a_{n1} & a_{n2} & \cdots & a_{nn} 
118 |     \end{bmatrix}
119 |     \begin{bmatrix}
120 |     x_1 \\ x_2 \\ \vdots \\ x_n
121 |     \end{bmatrix}
122 |     $$
123 | 
124 | !!! note ""
125 |     **注意变换基底和变换坐标的区别：**  
126 |     
127 |     $$
128 |     B' = B A^{-1},　　\mathbf{x}' = A \mathbf{x}
129 |     $$
130 | 
131 | 
132 | 
133 | ### 内积与正交性
134 | 
135 | #### 内积
136 | 
137 | 当满足：  
138 | 
139 | 1. $\langle u, \alpha v + \beta w \rangle =  \alpha \langle u, v \rangle + \beta \langle u, w \rangle，　\forall \alpha, \beta \in \mathbb{R}$ （线性的）
140 | 2. $\langle u, v \rangle ＝ \langle v, u \rangle$  （对称的/均匀的）
141 | 3. $\langle v, v \rangle \geq 0$ 且 $\langle v, v \rangle = 0 \Leftrightarrow v = 0$ 　（正定的）
142 | 
143 | 那么，可以在向量空间上定义**内积 (inner product) [或点积 (dot product)]** 运算:
144 | 
145 | $$
146 | \langle \cdot , \cdot \rangle : V \times V \to \mathbb{R}
147 | $$
148 | 
149 | 进而引申出**模 (norm)** :
150 | 
151 | $$
152 | |\cdot | : V \to \mathbb{R}, 　| \mathbf{v} | = \sqrt{\langle \mathbf{v} , \mathbf{v} \rangle}
153 | $$
154 | 
155 | 以及**[量度 (metric)](https://en.wikipedia.org/wiki/Metric_(mathematics))** :
156 | 
157 | $$
158 | d : V \times V \to \mathbb{R}, 　d( \mathbf{v}, \mathbf{w}) = | \mathbf{v} - \mathbf{w} | = \sqrt{\langle \mathbf{v} - \mathbf{w} , \mathbf{v} - \mathbf{w} \rangle}
159 | $$
160 | 
161 | 用于测量长度与距离，使 $V$ 称为一个**量度空间 (metric space)** 。自内积引申出量度以后，向量空间 $V$ 亦称为[**希尔伯特空间 (Hibert space)**](https://en.wikipedia.org/wiki/Hilbert_space) 。
162 | 
163 | 
164 | 
165 | #### 实数域中的标准内积
166 | 
167 | 当 $V = \mathbb{R}$ 时，可以为标准基 $B = I_n$ 定义一个标准内积形式:
168 | 
169 | $$
170 | \langle \mathbf{x},\mathbf{y} \rangle \doteq \mathbf{x}^\top \mathbf{y} = \sum_{i=1}^n{x_i y_i}
171 | $$
172 | 
173 | 引申出**标准模 (L~2~-norm)** 或 **欧几里得范数 (Euclidean norm)** ：
174 | 
175 | $$
176 | \| \mathbf{x} \|_2 \doteq \sqrt{\mathbf{x}^\top \mathbf{x}} = \sqrt{x_1^2 + x_2^2 + \dotsb + x_n^2}
177 | $$
178 | 
179 | 在此基础上将利用基底变换矩阵 $\mathbf{A}$ 将 $B = I_n$ 转换到一个新的基底 $B​'$ ， 即 $I_n = B' A^{-1}$ 那么内积形式可以写成：
180 | 
181 | $$
182 | \langle \mathbf{x},\mathbf{y} \rangle = \mathbf{x}^\top \mathbf{y} = (A \mathbf{x}')^\top (A \mathbf{y}') = \mathbf{x}'^\top A^\top A \mathbf{y}' \doteq \langle \mathbf{x}',\mathbf{y}' \rangle_{A^\top A}
183 | $$
184 | 
185 | 
186 | 
187 | #### 正交性 (Orthogonality)
188 | 
189 | 如果两个向量 $\mathbf{x}, \mathbf{y}$ 正交，那么他们的内积为零，即 $\langle \mathbf{x}, \mathbf{y} \rangle = 0$ ，通常表示为 $\mathbf{x} \bot \mathbf{y}$ 。
190 | 
191 | 
192 | 
193 | #### 矩阵的克罗内克乘积 (Kronecker product) 和 堆形式(stack)
194 | 
195 | 矩阵 $A \in \mathbb{R}^{m \times n}$ 和 $B \in \mathbb{R}^{k \times l}$ 的克罗内克乘积定义为 $A \otimes B$ ，得到一个新的矩阵为：
196 | 
197 | $$
198 | A \otimes B =
199 | \begin{bmatrix}
200 |   a_{11} B & a_{12} B & \cdots & a_{1n} B \\
201 |   a_{21} B & a_{22} B & \cdots & a_{2n} B \\
202 |   \vdots   & \vdots   & \ddots & \vdots \\
203 |   a_{m1} B & a_{m2} B & \cdots & a_{mn} B 
204 | \end{bmatrix}
205 | \in \mathbb{R}^{mk \times nl}
206 | $$
207 | 
208 | 矩阵 $A \in \mathbb{R}^{m \times n}$ 的堆形式被定义为 $A^s$ ，它是通过矩阵 ${A}$ 的 $n$ 列向量 $a_1, \dotsc, a_n \in \mathbb{R}^n$堆积形成的。表示成：
209 | 
210 | $$
211 | A^s \doteq 
212 | \begin{bmatrix}
213 | a_1 \\ a_2 \\ \vdots \\ a_n
214 | \end{bmatrix}
215 | \in \mathbb{R}^{mn}
216 | $$
217 | 
218 | 克罗内克乘积和矩阵堆栈形式允许我们用许多不同但等价的方式重写涉及多个向量和矩阵的代数方程。比如方程：
219 | 
220 | $$
221 | \mathbf{u}^\top A \mathbf{v} = (\mathbf{v} \otimes \mathbf{u})^\top A^\top
222 | $$
223 | 
224 | 当矩阵 ${A}$ 是上式中唯一的未知量的时候，等号右边的形式是特别有用的。
225 | 
226 | 
227 | 
228 | ## 线性变换与矩阵群
229 | 
230 | ### 线性变换
231 | 
232 | 线性代数研究线性空间之间线性变换的性质。 由于这些可以用矩阵表示，所以也可以说线性代数研究的是矩阵的性质。
233 | 
234 | 一个线性变换 $L$ 将线性（向量）空间 $V$ 转换到线性空间 $W$ ，那么 $L$ 被称为**映射 (map)**：
235 | 
236 | - $L ( \mathbf{x} +\mathbf{y} ) = L (\mathbf{x} ) + L ( \mathbf{y})	 \qquad \forall \mathbf{x} , \mathbf{y} \in V$
237 | - $L ( \alpha \mathbf{x} ) = \alpha L (\mathbf{x} ) 	 \qquad \forall \mathbf{x} \in V, \alpha \in \mathbb{R}$
238 | 
239 | 由于线性关系，$L$ 对空间 $V$ 的映射操作是唯一的，可以通过对 $V$ 中的基向量映射来定义。因而在标准基向量 $\{ e_1, \dotsc , e_n \}$ 下，映射 $L$ 可以被表示成一个矩阵 $A \in \mathbb{R}^{m \times n}$ ，有：
240 | 
241 | $$
242 | L ( \mathbf{x} ) = A \mathbf{x} \qquad \forall \mathbf{x} \in V
243 | $$
244 | 
245 | 这里的矩阵 $A$ 中的第 $i$ 列就是标准基向量 $e_i \in \mathbb{R}^n$ 在 $L$ 映射下的像：
246 | 
247 | $$
248 | A = [ L(e_1), \, L(e_2), \, \dotsc \, , L(e_n)] \quad \in \mathbb{R}^{m \times n}
249 | $$
250 | 
251 | 这里所有的 $m \times n$ 维的矩阵集表示成 $\mathcal{M} (m, n)$ 。当 $m = n$ 的时候，矩阵集 $\mathcal{M} (m, n)  \doteq \mathcal{M} (n)$ 在 $\mathbb{R}$ 域中被称为**环 (ring)** ，即，它在矩阵乘法和矩阵加法上是封闭的。
252 | 
253 | 
254 | 
255 | ### 群
256 | 
257 | 存在某些线性变换集，它们构成一个**群 (Group)** 。在计算机视觉中遇到的线性映射或矩阵通常具有群的特殊代数结构。
258 | 
259 | 群是带有操作 $\circ : G \times G \to G$ 的集合：
260 | 
261 | - 封闭性 (closed) ： $g_1 \circ g_2 \in G \quad \forall g_1, g_2 \in G$ ;
262 | - 结合律 (associative) ： $( g_1 \circ g_2 ) \circ g_3 = g_1 ( g_2 \circ g_3) \quad \forall g_1, g_2, g_3 \in G$ ;
263 | - 幺元 (unit element) ： $\exists e \in G : e \circ g = g \circ e = g \quad \forall g \in G$ ;
264 | - 逆 (inverse) ： $\exists g^{-1} \in G : g \circ g^{-1} = g^{-1} \circ g = e \quad \forall g \in G$ 
265 | 
266 | 
267 | 
268 | #### 线性群 GL(n) 和 SL(n)
269 | 
270 | 所有的 $n \times n$ 维*非奇异 (non-singular )* 群集与矩阵乘法运算构成一个群，这样的群通常被称为**一般线性群 (general linear group)** ，定义为 $G \! L(n)$ ，即它包含所有的 $A \in \mathcal{M} (n) \; \text{且} \; det(A) \neq 0$ 。
271 | 
272 | 所有的矩阵 $A \in G \! L(n)$ 且 $det(A) = +1$ 的**子群 (subgroup)** 被称为**特殊线性群 (special linear group)** ，记为 $S \! L(n)$ 。矩阵 $A$ 的逆也属于特殊线性群，因为 $det(A^{-1}) = det(A)^{-1}$ 。
273 | 
274 | 
275 | 
276 | #### 群的矩阵表达
277 | 
278 | 如果一个群 $G$ 存在**单射映射 (injective map)**[^单射映射 (injective map)] ，那么这个群具有矩阵表达式，也被称为**矩阵群 (matrix group)** :
279 | 
280 | $$
281 | \mathcal{R} : G \to G \! L(n) \quad g \to \mathcal{R}(g)
282 | $$
283 | 
284 | 这种映射 $\mathcal{R}$ 维持了 $G$ 的**群结构 (group structure)**[^群结构 (group structure)] 。也就是说， $G$ 的组成元素与逆将通过以下形式的映射维持下来：
285 | 
286 | $$
287 | \mathcal{R}(e) = I_{n \times n}, \quad \mathcal{R}(g \circ h) = \mathcal{R}(g) \mathcal{R}(h), \quad \forall g, h \in G
288 | $$
289 | 
290 | 群的矩阵表示的概念是，它们可以通过查看各自矩阵群的属性来分析更抽象的群。例如：物体的旋转形成一个群，因为存在中性元素（无旋转）和反向（反向旋转），并且旋转的任何级联也是旋转（围绕不同的轴）。 如果旋转由各自的矩阵表示，则研究旋转群的属性更容易。
291 | 
292 | 
293 | 
294 | #### 仿射群 A(n)
295 | 
296 | 一个**仿射变换 (Affine transformation)** $L : \mathbb{R}^n \to \mathbb{R}^n$ 可以被一个矩阵 $A \in G \! L(n)$ 和向量 $b \in \mathbb{R}^n$ 定义成：
297 | 
298 | $$
299 | L(\mathbf{x}) = A\mathbf{x} + b
300 | $$
301 | 
302 | 所有这些仿射变换的集合称为 $n$ 维的仿射群，用 $A(n)$ 表示。
303 | 
304 | 上式中的 $L : \mathbb{R}^n \to \mathbb{R}^n$ 定义不是线性的，除非 $b = 0$ 。通过引入**齐次坐标系 (homogeneous coordinates)** 将 
305 | 
306 | $\mathbf{x} \in \mathbb{R}^n$ 升维成 $\binom{\mathbf{x}}{1}  \in \mathbb{R}^{n+1}$ ，那么 $L : \mathbb{R}^n \to \mathbb{R}^n$ 将变为：
307 | 
308 | $$
309 | L : \mathbb{R}^{n+1} \to \mathbb{R}^{n+1}, \quad 
310 | \begin{bmatrix} \mathbf{x} \\ 1 \end{bmatrix}
311 | \to
312 | \begin{bmatrix} A & b \\ 0 & 1 \end{bmatrix}
313 | \begin{bmatrix} \mathbf{x} \\ 1 \end{bmatrix}
314 | $$
315 | 
316 | 这里的矩阵 $\begin{bmatrix} A & b \\ 0 & 1 \end{bmatrix}  \in \mathbb{R}^{(n+1) \times (n+1)}, \: A \in G \! L(n) \; ， b \in \mathbb{R}^n$ 被称为**仿射矩阵 (Affine matrix)** ，它是 $G \! L(n+1)$ 的元素。仿射矩阵构成 $G \! L(n+1)$ 的一个子群。
317 | 
318 | 
319 | 
320 | #### 正交群 (Orthogonal Group) O(n)
321 | 
322 | 一个 $n \times n$ 维矩阵 $A$ （表示一个从 $\mathbb{R}^n$ 到自身的线性映射）是正交的，那么它的内积形式有：
323 | 
324 | $$
325 | \langle A \mathbf{x}, A \mathbf{y} \rangle = \langle \mathbf{x}, \mathbf{y} \rangle, \quad \forall \mathbf{x}, \mathbf{y} \in \mathbb{R}^n.
326 | $$
327 | 
328 | 所有 $n \times n$ **正交矩阵 (Orthogonal matrix)**的集合构成维n的正交群，用 $O(n)$ 表示。显然 $O(n)$ 是 $G \! L(n)$ 的子群。如果矩阵 $R$ 是一个正交矩阵，那么：
329 | 
330 | $$
331 | \langle R \mathbf{x}, R \mathbf{y} \rangle = \mathbf{x}^{\top} R^\top R \mathbf{y} = \mathbf{x}^\top \mathbf{y}, \quad \forall \mathbf{x}, \mathbf{y} \in \mathbb{R}^n.
332 | $$
333 | 
334 | 因此，必需有 $R^\top R = R R^\top = I$ ，换句话说：
335 | 
336 | $$
337 | O(n) = \{ R \in G\!L(n) | R^\top R = I \}
338 | $$
339 | 
340 | 上述恒等式表明，对任意的正交矩阵 $R$ ，我们有 $det(R^\top R) = (det(R))^2 = det(I) = 1$ ，也就是说正交矩阵 $R$ 的**行列式 (determinant)**  $det(R)  \in  \{ \pm1\}$ 。
341 | 
342 | 行列式值为 $+1$ 的正交群被称为**特殊正交群 (Special orthogonal group)** 被定义为 $SO(n)$ 。即，可以将 $SO(n)$ 定义成正交群与特殊线性群的交集：$SO(n) = O(n) \cap S\!L(n)$ 。当 $n = 3$ 时， $SO(3)$ 正好是 $3$ 维的**旋转矩阵 (rotation matrix)** 。
343 | 
344 | 
345 | 
346 | #### 欧几里德群 (Euclidean Group) E(n)
347 | 
348 | 正交群的仿射形式给出了欧几里德(变换)群。也就是，一个从 $\mathbb{R}^n$ 到  $\mathbb{R}^n$ 的**欧氏变换 (Euclidean transformation)** $L$ 是由矩阵 $R \in O(n)$ 和向量 $T \in \mathbb{R}^n$ 共同定义的：
349 | 
350 | $$
351 | L : \mathbb{R}^n \to \mathbb{R}^n; \quad \mathbf{x} \to R \mathbf{x} + T
352 | $$
353 | 
354 | 所有这些变换的集合称为 $n$ 维的欧氏群，用 $E(n)$ 表示。显然，欧氏群 $E(n)$ 是仿射群 $A(n)$ 的子群。因此欧氏群也可以利用齐次坐标升高一维，矩阵形式如下：
355 | 
356 | $$
357 | E(n) = 
358 | \left \{
359 | \begin{bmatrix} R & T \\ 0 & 1 \end{bmatrix} \in \mathbb{R}^{(n+1) \times (n+1)} \;
360 | \middle| \; R \in O(n),\, T \in \mathbb{R}^n 
361 | \right \}
362 | $$
363 | 
364 | 如果矩阵 $R \in SO(n)$ ，那么欧氏群转变成**特殊欧氏群 (Special Euclidean Group)** ，被定义为 $S\!E(n)$ 。当 $n = 3$ 时， $SE(3)$ 表示为**刚体运动 (Rigid-body Motion)** ，这里的 $R$ 表示旋转， $T$ 表示位移。
365 | 
366 | 
367 | 
368 | !!! note ""
369 |     **由于目前介绍的所有变换群都具有自然矩阵表达式，所以它们都是矩阵群[^李群 (Lie group)]。总结一下他们之间的关系：**
370 |     
371 |     $$
372 |     SO(n) \subset O(n) \subset G\!L(n) \quad S\!E(n) \subset E(n) \subset A(n) \subset G\!L(n+1)
373 |     $$
374 | 
375 | 
376 | 
377 | ## 矩阵的值域、零空间(核)、秩和特征向量
378 | 
379 | ### 矩阵的值域
380 | 
381 | 设矩阵 $A \in \mathbb{R}^{m \times n}$ 是一个将向量空间 $\mathbb{R}^n$ 线性映射到  $\mathbb{R}^m$ 的普通 $n \times m$ 维矩阵。矩阵 $A \in \mathbb{R}^{m \times n}$ 的[**值域 (Range)**](https://en.wikipedia.org/wiki/Row_and_column_spaces)或[**列向量张成 (Span) 的空间**](https://en.wikipedia.org/wiki/Linear_span)，记为 $range(A)$ 或 $span(A)$ ，被定义成由 $\mathbb{R}^n$ 由矩阵 $A$ 映射到 $\mathbb{R}^m$ 下的子空间。
382 | 
383 | $$
384 | range(A) = \{ \mathbf{y} \in \mathbb{R}^m \, | \, \exists \mathbf{x} \in \mathbb{R}^n : A \mathbf{x} = \mathbf{y} \}
385 | $$
386 | 
387 | 矩阵 $A$ 的值域由它的列向量张成的空间给出。
388 | 
389 | 
390 | 
391 | ### 矩阵的零空间
392 | 
393 | 矩阵 $A \in \mathbb{R}^{m \times n}$ 的[**零空间 (Null Space)** 或称为**核 (Kernel)**](https://en.wikipedia.org/wiki/Kernel_(linear_algebra)) ，记为 $null(A)$ 或 $ker(A)$，被定义为由矩阵 $A$ 映射 $\mathbb{R}^n$ 下的向量到 $0$ 的那部分子向量 $\mathbf{x} \in \mathbb{R}^n$ ：
394 | 
395 | $$
396 | null(A) \equiv ker(A) = \{ \mathbf{x} \in \mathbb{R}^n \, | \, A \mathbf{x} = 0 \}
397 | $$
398 | 
399 | 矩阵 $A$ 的零空间由正交于其行向量的向量给出。
400 | 
401 | !!! note ""
402 |     当考虑 $A \mathbf{x} = \mathbf{b}$ 线性方程的解时，值域和零空间的概览是很有用的。只有当 $\mathbf{b} \in range(A)$ 的时候，该方程才有解。而只有当 $null(A) = \emptyset \, \text{(空集)}$ d的时候才存在唯一解，易证，假设 $\mathbf{x}_s$ 是方程 $A \mathbf{x} = \mathbf{b}$ 的一个解，且有 $\mathbf{x}_o \in ker(A)$ ，那么 $\mathbf{x}_s + \mathbf{x}_o$ 也是方程的解： $A ( \mathbf{x}_s + \mathbf{x}_o )  = A \mathbf{x}_s + A \mathbf{x}_o = \mathbf{b}$ 
403 | 
404 |  
405 | 
406 | ### 矩阵的秩
407 | 
408 | 矩阵的[**秩 (Rank)**](https://en.wikipedia.org/wiki/Rank_(linear_algebra)) 是矩阵本身值域的维度：
409 | 
410 | $$
411 | rank(A) = dim(range(A))
412 | $$
413 | 
414 | 矩阵 $A \in \mathbb{R}^{m \times n}$ 的秩有以下性质：
415 | 
416 | 1. $rank(A) = n - dim( ker(A))$
417 | 2. $0 \leq rank(A) \leq min\{ m, n \}$
418 | 3. $rank(A)$ 等于 $A$ 的线性无关行 (或列) 向量的最大个数
419 | 4. $rank(A)$ 是 $A$ 的非零**余子式 (Minor)**[^余子式 (Minor)] 的最高阶
420 | 5. 西尔维斯特的秩不等式 (Sylvester's inequality) ：让 $B \in \mathbb{R}^{n \times k}$ ，那么 $A B \in \mathbb{R}^{m \times k}$ 且 $rank(A) + rank(B) - n \leq rank(AB) \leq min \{ rank(A), \, rank(B) \}$ 
421 | 6. 对于任何非奇异矩阵 $C \in \mathbb{R}^{m \times m}$ 和 $D \in \mathbb{R}^{n \times n}$ ，有 $rank(A) = rank(C\!A\!D)$ 
422 | 
423 | 
424 | 
425 | ### 特征值与特征向量
426 | 
427 | 让 $A \in \mathbb{C}^{n \times n}$ 是一个 $n \times n$ 的**复矩阵 (complex matrix)** 。一个非零向量 $\mathbf{v} \in \mathbb{C}^n$ 被称为复矩阵 $A$ 的 **(右) 特征向量 (Eigenvector)** ，如果：
428 | 
429 | $$
430 | A \mathbf{v} = \lambda \mathbf{v}, \quad \lambda \in \mathbb{C}
431 | $$
432 | 
433 | 而这里的 $\lambda$ 称为 $A$ 的**特征值 (Eigenvalue)** 。同理，如果 $\mathbf{\mathbf{\eta}}^\top A  = \lambda \mathbf{\eta}^\top , \; \lambda \in \mathbb{C}$ 的话 $\mathbf{\eta}^\top \in \mathbb{C}^n$ 被称为 **(右) 特征向量 (Eigenvector)** 。
434 | 
435 | 矩阵 $A$ 的谱 $\sigma(A)$ 是其所有特征值的集合。
436 | 
437 | 
438 | 
439 | #### 特征值与特征向量的性质
440 | 
441 | 设 $A \in \mathbb{R}^{n \times n}$ 是一个方阵，那么有：
442 | 
443 | 1. 如果 $A \mathbf{v} = \lambda \mathbf{v}$ ，那么对于同样的特征值 $\lambda​
444 | 
445 | 
446 | 
447 | ## 对称矩阵与反对称矩阵
448 | 
449 | ### 对称矩阵
450 | 
451 | 一个矩阵 $S \in \mathbb{R}^{n \times n}$ ，如果 $S^\top = S$ 的话 ，那么矩阵 $S$ 被称为**对称矩阵 (Symmetric Matrix)** 。一个对称矩阵 $S$ 如果 $\mathbf{x^\top} S \mathbf{x} \geq 0$ ，那么称之为**半正定 (Positive Semi-definite)** ，记为 $S \geq 0 \;\text{或} \; S \succeq 0$ ；又如果 $\mathbf{x^\top} S \mathbf{x} > 0$ ，那么称之为**正定 (Positive Definite)** ，记为$S > 0 \;\text{或} \; S \succ 0$  。
452 | 
453 | 
454 | 
455 | #### 对称矩阵的性质
456 | 
457 | 让 $S \in \mathbb{R}^{n \times n}$ 是实对称矩阵，那么：
458 | 
459 | 1. 矩阵 $S$ 的所有特征值是实数，即 $\sigma(S) \subset \mathbb{R}$ 
460 | 2. 矩阵 $S$ 对应于不同特征值 $\lambda_i \neq \lambda_j$ 的特征向量 $V_i$ 和 $V_j$ 是正交的
461 | 3. 矩阵 $S$ 存在 $n$ 个标准正交特征向量，它们构成了 $\mathbb{R}^n$ 的一组基。让 $V = (\mathbf{v_1}, \dotsc, \mathbf{v_n}) \in O(n)$ 是矩阵 $S$ 特征向量构成的正交矩阵，且 $\Lambda = diag\{ \lambda_1, \dotsc, \lambda_n \}$ 是与之特征向量关联的特征值构成的对角矩阵，那么有 $S = V \Lambda V^\top$ 
462 | 4. 如果所有的特征值是 (非负的) 正数，那么矩阵 $S$ 是 (半)正定的
463 | 5. 如果矩阵 $S \geq 0$ ，让其特征值从大到小排序 $\lambda_1 \geq \lambda_2 \geq \dotsb \geq \lambda_n$ ，那么有 $max_{\|\mathbf{x}\|_2 = 1} \langle \mathbf{x},\, S \mathbf{x} \rangle = \lambda_1$ 和 $min_{\|\mathbf{x}\|_2 = 1} \langle \mathbf{x},\, S \mathbf{x} \rangle = \lambda_n$ 
464 | 
465 | 
466 | 
467 | ### 矩阵的范数
468 | 
469 | 在空间上，矩阵 $A \in \mathbb{R}^{m \times n}$ 的**范数 (Norms)** 有很多种定义方式。他们可以基于矩阵 $A$ 在域中或者[**上域空间 (codomain spaces)**](https://en.wikipedia.org/wiki/Codomain) 上的规范来定义。其中，矩阵 $A$ 的[**诱导二范数 (Induced 2-norm)**](https://en.wikipedia.org/wiki/Norm_(mathematics))被定义为：
470 | 
471 | $$
472 | \| A \|_2 \doteq \mathop{\text{max}}\limits_{\| \mathbf{x} \|_2 = 1} \| A \mathbf{x} \|_2 = \mathop{\text{max}}\limits_{\| \mathbf{x} \|_2 = 1} \sqrt{\langle \mathbf{x}, A^\top A \mathbf{x} \rangle}
473 | $$
474 | 
475 | 或者，可以将矩阵 $A$ 的[**弗洛贝尼乌斯范数 (Frobenius norm)**](https://en.wikipedia.org/wiki/Matrix_norm#Frobenius_norm)定义为:
476 | 
477 | $$
478 | \| A \|_f = \sqrt{ \sum_{i=1}^m \sum_{j=1}^n |a_{ij}|^2} = \sqrt{\text{trace}(A^\top A)} = \sqrt{ \sum_{i=1}^{\text{min}\{m,n\}} \sigma_i^2(A)}
479 | $$
480 | 
481 | 请注意，这些规范通常并不相同。由于矩阵 $A^\top A$ 是对称半正定的，我们可以对角化它为：
482 | 
483 | $$
484 | A^\top A = V diag\{\sigma_1^2 , \dotsc, \sigma_n^2 \} V^\top , \; \text{其中 } \sigma_1^2 \geq \sigma_i^2 \geq 0
485 | $$
486 | 
487 | 由此可得：
488 | 
489 | $$
490 | \| A \|_2 = \sigma_1 \, , \quad \| A \|_f = \sqrt{\text{trace}(A^\top A)} = \sqrt{\sigma_1^2 + \dotsb + \sigma_n^2}
491 | $$
492 | 
493 | 
494 | ### 反对称矩阵
495 | 
496 | 一个矩阵 $A \in \mathbb{R}^{n \times n}$ ，如果 $A^\top =-A$ 的话 ，那么矩阵 $A$ 被称为**反对称矩阵 (skew-symmetric or anti-symmetric)** 。
497 | 
498 | 
499 | 
500 | #### 反对称矩阵的性质
501 | 
502 | 设 $A \in \mathbb{R}^{n \times n}$ 是反对称矩阵，那么：
503 | 
504 | 1. 矩阵 $A$ 的所有特征值不是 $0$ 就是纯虚数，也就是所有特征值形如 $i \omega \, ,i^2 = -1 \, ,\omega \in \mathbb{R}$ 
505 | 2. 存在这样一个正交矩阵 $V$ 使得 $A = V \Lambda V^\top$ ，这里的 $\Lambda$ 是一个**区块对角化 (block-diagonal)** 矩阵  $\Lambda = diag \{ A_1, \dotsc , A_m, 0, \dotsc , 0 \}$ ，其中 $A_i$ 是实反对角矩阵，形式为：$\begin{bmatrix} 0 & a_i \\ -a_i & 0 \end{bmatrix} \in \mathbb{R}^{2 \times 2} , \; i = 1, \dotsc , m$ 因而，任何反对称矩阵的秩都是偶数。
506 | 
507 | 
508 | 
509 | #### 反对称矩阵的例子
510 | 
511 | 在计算机视觉中，定义一个向量 $\mathbf{u} \in \mathbb{R}^3$ 通过 ”帽运算 (hat operator)“ 后得到一个常见的反对称矩阵，形如：
512 | 
513 | $$
514 | \widehat{\mathbf{u}} = 
515 | \begin{bmatrix}
516 | 0 & -u_3 & u_2 \\
517 | u_3 & 0 & -u_1 \\
518 | -u_2 & u_1 & 0
519 | \end{bmatrix}
520 | \in \mathbb{R}^{3 \times 3}
521 | $$
522 | 
523 | 可见， ”帽运算 (^) “ 是一个从向量空间 $\mathbb{R}^3$ 线性变换到反对称矩阵 $\mathbb{R^{3 \times 3}}$ 空间。
524 | 
525 | 而且，矩阵 $\widehat{\mathbf{u}}$ 满足 $\widehat{\mathbf{u}} \mathbf{v} = \mathbf{u} \times \mathbf{v}$ ，这里的 $\times$ 定义为在向量空间 $\mathbb{R}^3$ 上的标准向量**叉积 (cross product)**。当 $\mathbf{u}  \neq 0$ 时，有  $\text{rank} ( \widehat{\mathbf{u}} )  = 2$ ，且向量 $\mathbf{u} \in \mathbb{R}^3$ 所张成的空间正好是矩阵 $\widehat{\mathbf{u}}$ 的零空间，因为有 $\widehat{\mathbf{u}} \mathbf{u} = \mathbf{u^\top} \widehat{\mathbf{u}}$ 。
526 | 
527 | 如果矩阵 $A \in \mathbb{R}^{3 \times 3}, \; \text{det}(A) = 1$ ，那么有 $A^\top \widehat{\mathbf{u}} A = \widehat{A^{-1} \mathbf{u}}$ 。
528 | 
529 | 
530 | 
531 | ## 奇异值分解
532 | 
533 |  以上内容中，我们研究了矩阵的许多性质，如秩、制约、零空间和矩阵的诱导范数等。这些属性中有许多属性可以通过**奇异值分解 (Singular Value Decomposition, SVD)** 来获得。
534 | 
535 | 奇异值分解可以看作特征值和特征向量对非方阵的推广。奇异值分解的计算具有良好的数值条件。它对求解矩阵的逆、秩计算、线性最小二乘估计 (linear least-squares estimation) 、投影和定秩近似等线性代数问题非常有用。
536 | 
537 | 在实际应用中，奇异值分解得到了广泛的应用。
538 | 
539 | 
540 | 
541 | ### 奇异值的代数推导
542 | 
543 | 设矩阵 $A \in \mathbb{R}^{m \times n}$ 的秩 $rank(A) = p$ ，且不失一般性有 $m \geq n$ ， 那么存在：
544 | 
545 | $$
546 | A = U \Sigma V^\top
547 | $$
548 | 
549 | 其中：
550 | 
551 | - $\exists U \in \mathbb{R}^{m \times p}$ 的列向量是正交的
552 | - $\exists V \in \mathbb{R}^{n \times p}$ 的列向量也是正交的
553 | - $\exists \Sigma \in \mathbb{R}^{p \times p} , \, \Sigma = diag\{\sigma_1, \sigma_2, \dotsb, \sigma_p \} \text{且} \sigma_1 \geq \sigma_2 \geq \dotsb \geq \sigma_p$ 
554 | 
555 | 以上是一般情况下的奇异值分解。当矩阵 $A$ 是一个 $n \times n$ 的对称方阵时有：
556 | 
557 | $$
558 | A = V \Lambda V^\top, \quad \text{其中}\, V \in O(n), \, \Lambda = diag\{ \lambda_1, \dotsc, \lambda_n \}
559 | $$
560 | 
561 | 综上，奇异值分解能够将任意秩为 $p$ (非方阵) 矩阵分解成如上所示的两个列向量正交的矩阵 $U$ 和  $V$ 。不过我们也要重视基于对称方阵特征值分解的奇异值分解。
562 | 
563 | 
564 | 
565 | ### 奇异值分解推导
566 | 
567 | 设矩阵 $A \in \mathbb{R}^{m \times n}$ 的秩 $rank(A) = p$ ，且不失一般性有 $m \geq n$ ， 那么矩阵 $A^\top A \in \mathbb{R}^{n \times n}$ 是对称且半正定的。因此可以将它分解得到非负的特征值 $\sigma_1^2 \geq \dotsb \geq\sigma_p^2 \geq \dotsb \geq \sigma_n^2 \geq 0$ 和关联的特征向量 $\mathbf{v_1}, \dotsc, \mathbf{v_n}$ 。这里的 $\sigma_i$ 被称为奇异值。 
568 | 
569 | 由于 $ker(A^\top A) = ker(A), \,range(A^\top A) =  range{A^\top}$ (证明看参考文献2中的 Theorem A.25) ，因此有 $span \{ \mathbf{v_1}, \dotsc, \mathbf{v_p} \} = range(A^\top) , \, span \{ \mathbf{v_{p+1}}, \dotsc, \mathbf{v_n} \} = ker(A)$ 。
570 | 
571 | 让 $\mathbf{u} \doteq \frac{1}{\sigma_i} A \mathbf{v_i} \, \Leftrightarrow A \mathbf{v_i} = \sigma_i \mathbf{u_i}, \, i = 1, \dotsc, p$ ，那么 $\{ \mathbf{u_i} \}_{i=1}^p$ 是正交的，因为 $\langle \mathbf{u_i}, \mathbf{u_j} \rangle = \frac{1}{\sigma_i \sigma_j} \langle A \mathbf{v_i}, A \mathbf{v_j} \rangle = \frac{1}{\sigma_i \sigma_j} \langle \mathbf{v_i}, A^\top A \mathbf{v_j} \rangle = \delta_{ij}$ 
572 | 
573 | 拓充 $\{ \mathbf{u_i} \}_{i=1}^p$ 成 $\mathbb{R^m}$ 中的基 $\{ \mathbf{u_i} \}_{i=1}^m$ 。因为有 $A \mathbf{v_i} = \sigma_i \mathbf{u_i}$ ，可得：
574 | 
575 | $$
576 | A \begin{bmatrix} \mathbf{v_1}, \dotsc, \mathbf{v_n} \end{bmatrix} = \begin{bmatrix} \mathbf{u_1}, \dotsc, \mathbf{u_m} \end{bmatrix} 
577 | \begin{bmatrix}
578 | \sigma_1 & 0 & \cdots & \cdots & \cdots & 0 \\
579 | 0 & \sigma_2 & \cdots & \cdots & \cdots & 0 \\
580 | \vdots & \vdots & \ddots & \vdots & \vdots & \vdots \\
581 | \vdots & \vdots & \vdots & \sigma_p & \vdots & \vdots \\
582 | \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\
583 | 0 & 0 & 0 & \vdots & \vdots & 0_n\\
584 | \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
585 | 0 & 0 & 0 & \cdots & \cdots & 0_m\\
586 | \end{bmatrix}
587 | $$
588 | 
589 | 定义上式为 $A \tilde{V} = \tilde{U} \tilde{\Sigma}$ ，因此有 $A = \tilde{U} \tilde{\Sigma}  \tilde{V}^\top$ 。
590 | 
591 | 现在我们简单的去除矩阵 $\tilde{U}$ 列中和 $\tilde{V}^\top$ 行中所有与奇异值 $0$ 相乘的那些行列，从而得到：
592 | 
593 | $$
594 | A = U \Sigma V^\top, \quad \text{其中} \; U \in \mathbb{R}^{m \times p}, \, V \in \mathbb{R}^{n \times p}
595 | $$
596 | 
597 | 
598 | ### 奇异值分解的几何意义
599 | 
600 | 针对于矩阵 $A \in \mathbb{R}^{n \times n}$ 奇异值分解 $A = U \Sigma V^\top$ 后得由到 $\mathbb{R}^n$ 中正交基构成列向量的矩阵 $V = \begin{bmatrix} \mathbf{v_1}, \dotsc, \mathbf{v_n} \end{bmatrix}$ 和 $U = \begin{bmatrix} \mathbf{u_1}, \dotsc, \mathbf{u_m} \end{bmatrix}$ 。如果一个点 $\mathbf{x} \in \mathbb{R}^n$ 通过矩阵 $A$ 转换称点 $\mathbf{y} \in \mathbb{R}^n$ ，那么以 $U$ 为基底的点 $\mathbf{y}$ 的坐标是通过以 $V$ 为基底的点 $\mathbf{x}$ 的坐标乘以对角矩阵 $\Sigma$ 得到的，即，每个坐标只是由对应的奇异值缩放：
601 | 
602 | $$
603 | \mathbf{y} = A \mathbf {x} = U \Sigma V^\top \mathbf{x} \iff U^\top \mathbf{y} = \Sigma V^\top \mathbf{x}
604 | $$
605 | 
606 | 设矩阵 $A \in \mathbb{R}^{n \times n}= U \Sigma V^\top$ 是一个方阵。那么矩阵 $A$ 将*单位球体 (unit sphere)* $\mathbb{S}^{n-1} \doteq \{ {\mathbf{x} \in \mathbb{R}^n :  \| \mathbf{x} \|_2 = 1} \}$ 映射到具有 $\sigma_i \mathbf{u}_i$ 为半轴的*椭圆体 (ellipsoid)* ，这里的 $\mathbf{u}_i$ 是矩阵 $U$ 的第 $i$ 列。
607 | 
608 | 针对这一点证明：让 $A \mathbf{x} = \mathbf{y}$ ，设集合  $\{ \mathbf{v_i} \}_{i=1}^n$ 是 $\mathbb{R}^n$ 中一组正交基，那么关联这个基底的点 $\mathbf{x}$ 的坐标有：
609 | 
610 | $$
611 | \begin{bmatrix} \alpha_1, \alpha_2 , \dotsc, \alpha_n \end{bmatrix}^\top = 
612 | \begin{bmatrix}
613 | \langle \mathbf{v}_1 , \mathbf{x} \rangle, \langle \mathbf{v}_2 , \mathbf{x} \rangle, \dotsc \langle \mathbf{v}_n , \mathbf{x} \rangle
614 | \end{bmatrix}^\top
615 | \, \iff \, \mathbf{\alpha} = V^\top \mathbf{x}
616 | $$
617 | 
618 | 因此有 $\mathbf{x} =  \sum_{i=1}^n \alpha_i \mathbf{v}_i$ 。那么关联  $\{ \mathbf{u_i} \}_{i=1}^n$ 基底的点 $\mathbf{y}$ 的坐标有：
619 | 
620 | $$
621 | \begin{bmatrix} \beta_1, \beta_2 , \dotsc, \beta_n \end{bmatrix}^\top = 
622 | \begin{bmatrix}
623 | \langle \mathbf{u}_1 , \mathbf{y} \rangle, \langle \mathbf{u}_2 , \mathbf{y} \rangle, \dotsc \langle \mathbf{u}_n , \mathbf{y} \rangle
624 | \end{bmatrix}^\top
625 | \, \iff \, \mathbf{\beta} = U^\top \mathbf{y}
626 | $$
627 | 
628 | 因此有 $\mathbf{y} =  \sum_{i=1}^n \beta_i \mathbf{u}_i = A\mathbf{x} = \sum_{i=1}^{n} \sigma_i \mathbf{u}_i \mathbf{v}_i^\top \mathbf{x} = \sum_{i = 1}^n \sigma_i \langle \mathbf{v}_i, \mathbf{x} \rangle \mathbf{u}_i$ ，因此有$\beta_i = \sigma_i \alpha_i$ 。现在有 $\| \mathbf{x} \|_2^2 = \sum_{i=1} \alpha_i^2 = 1 , \, \forall \mathbf{x} \in \mathbb{S}^{n-1}$ ，那么有 $\sum_{i=1}^n  \beta_i^2 / \alpha_i^2 = 1$ 。这意味着点 $\mathbf{x}$ 表示变换后得到的点 $\mathbf{y}$ 位于一个沿以 $U$ 为基底轴方向的椭球面上。如下插图表示当 $n =2$ 时，一个单位圆通过非奇异 $A \in \mathbb{R}^{2 \times 2}$ 映射到一个椭圆的情况。
629 | 
630 | ![二维下奇异值分解的几何意义](image/geometric_interpretation2svd.png)
631 | 
632 | 
633 | 
634 | ### 广义逆的性质
635 | 
636 | 如果矩阵 $A \in \mathbb{R}^{m \times n}$ 的行列式不为零，即 $det(A) \neq 0$ ， 那么矩阵 $A$ 的二次式存在能够定义的逆矩阵。所有可逆矩阵的集合构成群 $G\!L(n)$ 。对于任意 (非二次) 矩阵 $A \in \mathbb{R}^{m \times n}$ 能够定义一个**广义逆 (generalized inverse，**也称为**伪逆 (Pseudo inverse))** ，如果矩阵 $A$ 的奇异值分解为 $A = U \Sigma V^\top$ ，那么它的伪逆被定义成：
637 | 
638 | $$
639 | A^\dagger = V \Sigma^\dagger U, \quad
640 | \Sigma^\dagger = \begin{bmatrix} \Sigma_1^{-1} & 0 \\ 0 & 0 \end{bmatrix}_{n \times m}
641 | $$
642 | 
643 | 这里的 $\Sigma_1$ 是由矩阵 $A$ 的非零奇异值构成的对角矩阵。同时，伪逆可以用类似于二次可逆矩阵的逆的方式来使用：
644 | 
645 | $$
646 | A A^\dagger A = A, \quad A^\dagger A A^\dagger = A^\dagger
647 | $$
648 | 
649 | 考虑一个线性系统 $A \mathbf{x} = \mathbf{b}$ 其中矩阵 $A \in \mathbb{R}^{m \times n}$ 的秩 $r \leq min(m, n)$ 。那么最小二乘形式 $min \| A \mathbf{x} - \mathbf{b} \|_2$ 的解 $\mathbf{x}^*$ 可以通过 $\mathbf{x}^* = A^\dagger \mathbf{b}$ 获得。
650 | 
651 | 
652 | 
653 | 
654 | 
655 | 
656 | 
657 | 
658 | 
659 | 
660 | 
661 | ## 参考
662 | 
663 | 1.  Multiple View Geometry (IN2228) SS 2016, TU München : Chapter 1
664 | 2.  An Invitation to 3D Vision: From Images to Geometric Models : Appendix A
665 | 3.  推荐观看「[麻省理工的线性代数公开课](http://open.163.com/special/opencourse/daishu.html)」
666 | 
667 | 
668 | 
669 | 
670 | 
671 | [^单射映射 (injective map)]: 一个映射 $f(\cdot)$ 满足 $f(x) \neq f(y) \; \forall x \neq y$ 那么这个映射被称为单射(injective map)。
672 | 
673 | [^群结构 (group structure)]:  	这种映射在代数中称为群同态 (group homomorphism) 。
674 | [^李群 (Lie group)]:  由于这些群体本身具有不同的结构，它们属于[**李群 (Lie Group)**](https://en.wikipedia.org/wiki/Lie_group) 。
675 | 
676 | [^余子式 (Minor)]: 矩阵的 $k$ 阶[余子式](https://en.wikipedia.org/wiki/Minor_(linear_algebra))是矩阵$A$ 的$k \times k$ 子矩阵的行列式
677 | 
678 | 
679 | 
680 | ---8<---
681 | mathjax.txt
682 | ---8<---


--------------------------------------------------------------------------------
/docs/multiple_view_geometry/mvg.md:
--------------------------------------------------------------------------------
 1 | ## 前言
 2 | 
 3 |  **多视几何 (Multiple View Geometry)**　是 SLAM 的核心部分之一。因此多视几何是每一个「SLAM 算法工程师」必备的知识。在进入每一小节内容之前，先给出一些知名高校的课程 Slides 和公开课链接网址：  
 4 | 
 5 | - [慕尼黑工业大学](https://vision.in.tum.de/teaching/ss2016/mvg2016/material "password: mvg-ss16") 
 6 | - [加州大学圣地亚哥分校](https://cseweb.ucsd.edu/classes/sp04/cse252b)
 7 | 
 8 | 在接下来与计算机视觉相关的大部分章节基本都源自于以上课程以及下面书籍加以翻译简述。如果读者的英语水平可以的话，强烈建议去阅读和阅读这些更加原汁原味的课程、书籍。亦或者，结合该网站知识章节与这些课程一同学习。
 9 | 
10 | 关于多视几何的相关书籍，个人觉得只要阅读以下两本就足够了：  
11 | 
12 | - **Multiple View Geometry in Computer Vision** -- by Hartley & Zisserman (被誉为‘圣经’级的书籍)
13 | - **An Invitation to 3D Vision: From Images to Geometric Models** -- by Ma, Soatto, Kosecka and Sastry (马毅老师的著作，大多数评论说好理解一点)
14 | 
15 | 
16 | 
17 | ## 目录列表
18 | 
19 | ### #TODO
20 | 
21 | 


--------------------------------------------------------------------------------
/docs/others/md_template.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # test
  3 | 
  4 | ## #TODO
  5 | 
  6 | Lorem ipsum dolor sit amet: $p(x|y) = \frac{p(y|x)p(x)}{p(y)}​$
  7 | 
  8 | 
  9 | $$
 10 | \mathbf{V}_1 \times \mathbf{V}_2 = \begin{vmatrix}
 11 | \mathbf{i} & \mathbf{j} & \mathbf{k} \\
 12 | \frac{\partial X}{\partial u} & \frac{\partial Y}{\partial u} & 0 \\
 13 | \frac{\partial X}{\partial v} & \frac{\partial Y}{\partial v} & 0 \\
 14 | \end{vmatrix}
 15 | $$
 16 | 
 17 | 
 18 | 
 19 | !!! note "前面需要空4个空格"
 20 |     这样才是对的
 21 | 
 22 |     这是才是对的
 23 | 
 24 | 
 25 | !!! warning ""
 26 |     这是hi什么
 27 | 
 28 | > 这是什么鬼啊
 29 | >
 30 | > 你好
 31 | >
 32 | > > 你好啊
 33 | 
 34 | ## A setext style header
 35 | 
 36 | ### A hash style header
 37 | 
 38 | 
 39 | 
 40 | <!-- [link](http://example.com){: class="foo bar" title="Some title!" } -->
 41 | 
 42 | [link](https://www.baidu.com 'baidu') 
 43 | 
 44 | ```C++ hl_lines="1 3"
 45 | int main(int argc, char** argv) {
 46 |   if (argc != 2) {
 47 |     return -1;
 48 |   }
 49 | 
 50 |   return 0;
 51 | }
 52 | ```
 53 | *Markdown 源文件*
 54 | 
 55 | - <strong>Markdown Preview Enhanced: Toggle</strong>  
 56 |   <kbd>ctrl-shift-m</kbd>  
 57 |   开／关 Markdown 文件预览。      
 58 | - <strong> Markdown Preview Enhanced: Open Mathjax Config </strong>  
 59 |   打开 `MathJax` 设置文件。  
 60 | - **粗体**
 61 |   开关  
 62 | 
 63 | ### 按键
 64 | 
 65 | ++ctrl+alt+delete++
 66 | 
 67 | - [x] tasklist  
 68 | 
 69 | 
 70 | The HTML specification
 71 | is maintained by the W3C.
 72 | 
 73 | *[HTML]: Hyper Text Markup Language
 74 | *[W3C]:  World Wide Web Consortium
 75 | 
 76 | !!! note "Phasellus posuere in sem ut cursus"
 77 |     Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla et euismod
 78 |     nulla. Curabitur feugiat, tortor non consequat finibus, justo purus auctor
 79 |     massa, nec semper lorem quam in massa.
 80 | 
 81 | ==Apple :==
 82 | :   Pomaceous fruit of plants of the genus Malus in
 83 |     the family Rosaceae.
 84 | 
 85 | Orange
 86 | :   The fruit of an evergreen tree of the genus Citrus.
 87 | 
 88 | ???+ note ""
 89 |     ```Bash tab="Bash"
 90 |     #!/bin/bash
 91 |     STR="Hello World!"
 92 |     echo $STR
 93 |     ```
 94 | 
 95 |     ```C tab="C"
 96 |     #include 
 97 | 
 98 |     int main(void) {
 99 |       printf("hello, world\n");
100 |     }
101 |     ```
102 | 
103 |     ```C++ tab="C++"
104 |     #include <iostream>
105 | 
106 |     int main() {
107 |       std::cout << "Hello, world!\n";
108 |       return 0;
109 |     }
110 |     ```
111 | 
112 |     ```C# tab="C#"
113 |     using System;
114 | 
115 |     class Program {
116 |       static void Main(string[] args) {
117 |         Console.WriteLine("Hello, world!");
118 |       }
119 |     }
120 |     ```
121 | 
122 | ???+ note "Open styled details"
123 | 
124 |     ??? danger "Nested details!"
125 |         And more content again.
126 | 
127 | 
128 | ```hl_lines="1 3" linenums="2"
129 | """Some file."""
130 | import foo.bar
131 | import boo.baz
132 | import foo.bar.baz
133 | ```
134 | 
135 | 
136 | 
137 | [![996.icu](https://img.shields.io/badge/link-996.icu-red.svg)](https://996.icu)
138 | 
139 | First Header  | Second Header
140 | ------------- | -------------
141 | Content Cell  | Content Cell
142 | Content Cell  | Content Cell
143 | 
144 | :smile:这是干嘛
145 | 
146 | !!! example "Flow Chart Example"
147 | 
148 |     ````tab="Source"
149 |     ```flow
150 |     st=>start: Start:>http://www.google.com[blank]
151 |     e=>end:>http://www.google.com
152 |     op1=>operation: My Operation
153 |     sub1=>subroutine: My Subroutine
154 |     cond=>condition: Yes
155 |     or No?:>http://www.google.com
156 |     io=>inputoutput: catch something...
157 | 
158 |     st->op1->cond
159 |     cond(yes)->io->e
160 |     cond(no)->sub1(right)->op1
161 |     ```
162 |     ````
163 | 
164 |     ```flow tab="Output"
165 |     st=>start: Start:>http://www.google.com[blank]
166 |     e=>end:>http://www.google.com
167 |     op1=>operation: My Operation
168 |     sub1=>subroutine: My Subroutine
169 |     cond=>condition: Yes
170 |     or No?:>http://www.google.com
171 |     io=>inputoutput: catch something...
172 | 
173 |     st->op1->cond
174 |     cond(yes)->io->e
175 |     cond(no)->sub1(right)->op1
176 |     ```
177 | 
178 | !!! example "Sequence Diagram Example"
179 | 
180 |     ````tab="Source"
181 |     ```sequence
182 |     Title: Here is a title
183 |     A->B: Normal line
184 |     B-->C: Dashed line
185 |     C->>D: Open arrow
186 |     D-->>A: Dashed open arrow
187 |     ```
188 |     ````
189 | 
190 |     ```sequence tab="Output"
191 |     Title: Here is a title
192 |     A->B: Normal line
193 |     B-->C: Dashed line
194 |     C->>D: Open arrow
195 |     D-->>A: Dashed open arrow
196 |     ```
197 | 
198 | [=0% "0%"]
199 | [=5% "5%"]
200 | [=25% "25%"]
201 | [=45% "45%"]
202 | [=65% "65%"]
203 | [=85% "85%"]
204 | [=100% "100%"]
205 | 
206 | --8<--
207 | mathjax.txt
208 | uml.txt
209 | --8<--


--------------------------------------------------------------------------------
/docs/projects/orb_slam/image/circumference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/projects/orb_slam/image/circumference.png


--------------------------------------------------------------------------------
/docs/projects/orb_slam/local_mapping.md:
--------------------------------------------------------------------------------
1 | #TODO


--------------------------------------------------------------------------------
/docs/projects/orb_slam/loop_closing.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/projects/orb_slam/loop_closing.md


--------------------------------------------------------------------------------
/docs/projects/orb_slam/tracking.md:
--------------------------------------------------------------------------------
1 | #TODO


--------------------------------------------------------------------------------
/docs/projects/projects.md:
--------------------------------------------------------------------------------
 1 | ### 前言
 2 | 
 3 | 随着 SLAM 技术在市场中的应用越来越广泛，越来越多的学者投身于 SLAM 的研究中。国内外的诸多大学相关实验室发布了许多研究成果，其中不少成果进行开源，并发布了许多相关的数据集用于算法的验证。在此，借用网络上大神 [^1] [^2] [^3] 对各大学实验室的介绍引出研究成果进行归类。后续的篇幅主要是针对目前主要流行的的开源项目进行梳理。个人能力有限，如果哪些讲解和推导有问题劳请大神指出。
 4 | 
 5 | - [香港科技大学的 Aerial Robotics Group](http://uav.ust.hk/ "http://uav.ust.hk/")
 6 |     - [VINS-Mono](https://github.com/HKUST-Aerial-Robotics/VINS-Mono) ：一种鲁棒且通用的实时单目视觉惯性状态估计框架
 7 |     - [VINS-Fusion](https://github.com/HKUST-Aerial-Robotics/VINS-Fusion)：一种基于优化的多传感器状态框架，可实现自主应用（无人机，汽车和  AR / VR）的精确自定位。VINS-Fusion 是 VINS-Mono 的扩展，支持多种视觉惯性传感器类型（单声道摄像机 + IMU，立体摄像机 + IMU，甚至仅限立体声摄像机
 8 | - [浙江大学CAD＆CG国家重点实验室的CVG（Computer Vision Group）](http://www.zjucvg.net/)
 9 |     - RKSLAM：用于AR的基于关键帧的鲁棒单目SLAM系统
10 |     - LS-ACTS：大型自动相机跟踪系统，可以处理大型视频/序列数据集 https://github.com/zju3dv/ENFT ，https://github.com/zju3dv/SegmentBA ， https://github.com/zju3dv/ENFT-SfM
11 |     - ACTS：自动相机跟踪系统
12 |     - RDSLAM：是一个实时同步定位和建图系统，它允许场景的一部分是动态的或整个场景逐渐变化。与PTAM相比，RDSLAM不仅可以在动态环境中稳健地工作，而且还可以处理更大规模的场景（重建的3D点的数量可以是数万个）
13 | - [清华大学自动化系宽带网络与数字媒体实验室 BBNC](http://media.au.tsinghua.edu.cn/index/index/index)
14 | - [中科院自动化研究所国家模式识别实验室 Robot Vision Group](http://vision.ia.ac.cn/)
15 | - [英国伦敦大学帝国理工学院 Dyson 机器人实验室](http://www.imperial.ac.uk/dyson-robotics-lab)
16 |     - [ElasticFusion](https://bitbucket.org/dysonroboticslab/elasticfusionpublic/src/master/)：一个实时的稠密的视觉 SLAM 系统，可以利用 RGB-D 相机来对房间进行全局一致的三维稠密重建
17 |     - CodeSLAM：一种生成室内场景轨迹的大规模照片级真实渲染的系统
18 |     - [SceneNet RGB-D](https://bitbucket.org/dysonroboticslab/scenenetrgb-d/src/master/)：一种生成室内场景轨迹的大规模照片级真实渲染的系统，数据集地址：https://robotvault.bitbucket.io/scenenet-rgbd.html
19 |     - [SemanticFusion](https://bitbucket.org/dysonroboticslab/semanticfusion/src/master/)：一种实时可视 SLAM 系统，能够使用卷积神经网络在语义上注释密集的 3D 场景
20 | - [英国牛津大学 Active Vision Laboratory](http://www.robots.ox.ac.uk/ActiveVision/index.html)
21 |     - [PTAM](https://github.com/Oxford-PTAM/PTAM-GPL)：（并行跟踪和建图）用于增强现实的相机跟踪系统
22 | - [英国牛津大学 Torr Vision Group](http://www.robots.ox.ac.uk/~tvg/)
23 |     - [交互式实时 3D 场景分割的框架](https://github.com/torrvision/spaint/tree/collaborative)：创建了一个只需使用廉价的硬件，就可以在半小时内捕获并重建整个房屋或实验室的建图系统
24 | - [苏黎世联邦理工学院 Autonomous System Lab](https://asl.ethz.ch/)
25 |     - [libpointmatcher](https://github.com/ethz-asl/libpointmatcher)：一个模块化库，它实现了迭代最近点（ICP）算法，用于配准点云
26 |     - [libnabo](https://github.com/ethz-asl/libnabo)：用于低维空间的快速K最近邻库
27 |     - [ethzasl_sensor_fusion](https://github.com/ethz-asl/ethzasl_sensor_fusion)：基于EKF的时延补偿单传感器和多传感器融合框架
28 |     - [ethzasl_ptam](https://github.com/ethz-asl/ethzasl_ptam)：用于单目SLAM的框架PTAM
29 | - [苏黎世 Robotics and Perception Group](http://rpg.ifi.uzh.ch/)
30 |     - [视觉（惯性）里程计轨迹定量评估方法](https://github.com/uzh-rpg/rpg_trajectory_evaluation)：通过视觉（ 惯性）里程计（VO / VIO）定量评估估计轨迹的质量
31 |     - [基于高效数据的分布式视觉SLAM](https://github.com/uzh-rpg/dslam_open)：该算法可实现使用便宜，轻便和多功能的相机进行分布式通信多机器人建图
32 |     - [Kalibr](https://github.com/ethz-asl/kalibr)：相机惯导标定
33 | - [慕尼黑工业大学 The Computer Vision Group](https://vision.in.tum.de/research)
34 |     - [dvo_slam](https://github.com/tum-vision/dvo_slam)：提供了来自连续图像的RGB-D相机的刚体运动估计的实现方案
35 |     - [LSD-SLAM](https://github.com/tum-vision/lsd_slam): Large-Scale Direct Monocular SLAM：一种直接单目SLAM建图技术
36 |     - [DSO](https://github.com/JakobEngel/dso): Direct Sparse Odometry：一种用于视觉里程计的新的直接稀疏建图方法
37 |     - [Basalt](https://gitlab.com/VladyslavUsenko/basalt): Visual-Inertial Mapping with Non-Linear Factor Recovery：使用非线性因子恢复法从视觉 - 惯性里程计提取信息来进行视觉 - 惯性建图
38 | - [德国弗莱堡大学 Autonomous Intelligent Systems](http://ais.informatik.uni-freiburg.de/index_en.php)
39 |     - [GMapping](https://github.com/OpenSLAM-org/openslam_gmapping) : 基于Bpf粒子滤波算法的滤波SLAM框架
40 |     - [RGBD SLAM2](https://github.com/felixendres/rgbdslam_v2)：是一个非常全面优秀的系统，将SLAM领域的图像特征、优化、闭环检测、点云、octomap等技术融为一体，非常适合RGBD SLAM初学者，也可以在其基础上继续开发
41 | - [西班牙萨拉戈萨大学RoPeRT机器人，感知和实时组SLAM实验室](http://robots.unizar.es/slamlab/)
42 | - [明尼苏达大学 Multiple Autonomous Robotic Systems Laboratory（MARS）](http://mars.cs.umn.edu/)
43 | - [卡内基梅隆大学 Robot Perception Lab](http://rpl.ri.cmu.edu/)
44 |     - [isam](https://github.com/ori-drs/isam)：增量平滑和建图（iSAM），这是一种基于快速增量矩阵分解的同时定位和建图问题方法，通过更新自然稀疏平滑信息矩阵的QR分解来实现
45 | - [斯坦福大学人工智能实验室自动驾驶团队](http://driving.stanford.edu/)
46 | - [麻省理工大学计算机科学与人工智能实验室（CSAIL）海洋机器人组](https://marinerobotics.mit.edu/)
47 | - [宾夕法尼亚大学机械工程与应用力学系Vijay Kumar实验室](https://www.kumarrobotics.org/)
48 | - [华盛顿大学 UW Robotics and State Estimation Lab](http://rse-lab.cs.washington.edu/)
49 | - [加拿大谢布鲁克大学 IntRoLab](https://introlab.3it.usherbrooke.ca/mediawiki-introlab/index.php/Main_Page)
50 | 
51 | 
52 | 
53 | ### 开源项目列表
54 | 
55 | 
56 | 
57 | 
58 | 
59 | ### 目录
60 | 
61 | 
62 | 
63 | 
64 | 
65 | ### 参考
66 | 
67 | [^1]: https://zhuanlan.zhihu.com/p/70066976 
68 | [^2]: https://blog.csdn.net/mulinb/article/details/53421864 
69 | [^3]: http://www.computervisionblog.com/2016/01/why-slam-matters-future-of-real-time.html 


--------------------------------------------------------------------------------
/docs/projects/rpg_svo/algorithm_framework.md:
--------------------------------------------------------------------------------
  1 | ## SVO : Semi-Direct Visual Odometry
  2 | 
  3 | 半直接视觉里程计，所谓的半直接是指对图像中提取的特征点图像块进行直接匹配来获取相机的位姿，而不同于直接匹配法那么对整个图像使用直接匹配的方式来获取相机位姿。虽然*半直接 (Semi-Direct)* 法使用了特征块，但它的基础思想还是类似于*直接法 (Direct method)* 来获取位姿信息，这点与*特征点法  (Feature-Based method)* 的提取额一组稀疏特征点，使用特征描述子匹配，通过对极约束来估计位姿是不一样的。然而，半直接法与直接法不同的是它利用了特征块匹配，通过再投影误差最小化来对直接法估计的位姿进行优化。
  4 | 
  5 | !!! tip  
  6 |     虽然 SVO 并不是一个标准的完整 SLAM ，它舍弃了后端优化与回环检查部分，也基本没有建图功能，但是 SVO 的代码结构清晰易于理解，很适合作为第一个入门项目。
  7 | 
  8 | 
  9 | 
 10 | ## SVO 算法架构
 11 | 
 12 | SVO 算法架构主要分成两个部分：位姿估计、深度估计。如下图所示
 13 | 
 14 | ![SVO 框架流程](image/SVO_Structure.png)
 15 | 
 16 | 运动估计线程部分实现了相对姿态估计的半直接法。步骤如下：  
 17 | 
 18 | 1. 通过基于稀疏的图像对齐进行姿态初始化：通过最小化对应于相同 3D 点投影位置的像素之间的光度误差，得到相对于前一帧的相机姿态
 19 | 2. 通过对相应的 feature-patch 进行对齐，对重新投影点对应的 2D 坐标进行优化
 20 | 3. 通过最小化前向特征对准步骤中引入的重投影误差来精炼姿态和空间特征点位置以得到运动估计的结果
 21 | 
 22 | 深度估计部分，为每个待估计相应 3D 点的 2D 特征初始化概率深度滤波器。每当在图像中发现此时的 3D 到 2D 的特征对应少于设定阈值的时候，将选择新的关键帧提取特征点，进而初始化新的深度滤波器。这些滤波器的初始值具有很大的不确定性，在随后的每一帧中，深度估计都以贝叶斯方式更新。当深度滤波器的不确定性足够小时（即收敛），在地图中插入一个新的三维点，并立即用于运动估计。
 23 | 
 24 | 
 25 | 
 26 | ### 运动估计
 27 | 
 28 | SVO 利用直接方法对相机的相对运动和特征对应进行了初步的估计，并以基于特征的非线性重投影误差最小化方法进行了优化。下面将详细介绍其中的每个步骤。
 29 | 
 30 | #### 基于稀疏模型的图像对齐
 31 | 
 32 | 基于稀疏模型的图像对齐 (Sparse Model-based Image Alignment) 使用直接法最小化图像块重投影残缺来获取位姿。如下图所示：其中{==红色==}的 $\color{red}{T_{k, k-1}}$ 为相邻帧之间的位姿变换，即待优化变量。
 33 | 
 34 | ![Image Alignment](image/image_alignment.png)
 35 | 
 36 | 这个过程的数学表达为求一个关于刚体运动最大似然估计 $T_{k, k-1}$ ，即可以通过求在两个连续的相机姿态之间亮度残差的最小化负对数似然函数来得到：
 37 | 
 38 | $$
 39 | \mathrm{T}_{k,k-1} = \mathrm{arg\, \mathop{min}\limits_T} \iint_{\bar{\mathcal{R}}} \mathrm{\rho} [\delta I(\mathrm{T}, \mathbf{u})] \mathrm{d} \mathbf{u} \tag{1.1}
 40 | $$
 41 | 
 42 | 因此这个过程可以分解为：
 43 | 
 44 | - **准备工作：**假设相邻帧之间的位姿 $\mathrm{T}_{k, k-1}$ 已知，一般初始化为上一相邻时刻的位姿或者假设为单位矩阵。通过之前多帧之间的特征检测以及深度估计，我们已经知道在第 k-1 帧中的特征点位置 $\mathbf{u}$ 以及它们的深度 $\mathrm{d}_\mathbf{u}$ 。 
 45 | 
 46 | - **重投影：**亮度残差 $\delta I$ 由观测同一个三维空间点的像素间的光度差确定。准备工作中已知了 $I_{k-1}$ 中的某个特征在图像平面中位置 $\mathbf{u}$ 以及它们的深度 $\mathrm{d}_\mathbf{u}$ ，能够将该特征投影到三维空间 $\mathrm{p}_{k-1}$ 。由于该三维空间的坐标系是定义在 $I_{k-1}$ 相机坐标系下的，因此需要通过位姿变换 $T_{k, k-1}$ 将它投影到当前帧 $I_{k}$ 中，得到该点当前帧坐标系中的三维坐标 $\mathrm{p}_{k}$ 。最后通过相机内参，投影到 $I_{k}$ 的图像平面得到坐标 $\mathbf{u}'$ ，完成重投影。亮度残差 $\delta I$ 定义为：
 47 | 
 48 |     $$
 49 | \delta I (\mathrm{T}, \mathbf{u}) = I_k \Big( \underbrace{ \pi \big( \underbrace{ \mathrm{T} \cdot \underbrace{\pi^{-1}(\mathbf{u}, \mathrm{d}_\mathbf{u}) \big)}_{1}}_2 }_3 \Big) - I_{k-1}(\mathbf{u}) \quad \forall \mathbf{u} \in \bar{\mathcal{R}} \tag{1.2}
 50 |     $$
 51 | 
 52 |     公式中第 1 步为根据前一帧图像特征位置和深度逆投影到三维空间，第 2 步将三维坐标点旋转平移到当前帧坐标系下，第 3 步再将三维坐标点投影回当前帧图像坐标。其中上一帧 $I_{k-1}$ 和当前帧  $I_{k}$ 能共视到的特征集合为 $\bar{\mathcal{R}}$ ，即
 53 |     
 54 |     $$
 55 | \bar{\mathcal{R}} = \{ \mathbf{u} | \mathbf{u} \in \mathcal{R}_{k-1} \wedge \pi (\mathrm{T} \cdot \pi^{-1}(\mathbf{u}, \mathrm{d}_\mathbf{u})) \in \Omega_k \} \tag{1.3}
 56 |     $$
 57 |     
 58 |     当然在优化过程中，亮度残差 $\delta I$ 的计算方式不止这一种形式：有**前向 (forwards)** ，**逆向 (inverse)** 之分，并且还有**叠加式 (additive)** 和**构造式 (compositional)** 之分。这方面可以读读光流法方面的论文 [^2]。选择的方式不同，在迭代优化过程中计算雅克比矩阵的时候就有差别，一般为了减小计算量，都采用的是 **inverse compositional algorithm** 。 (#TODO 抑或是参考计算机视觉基础-光流篇) 
 59 | 
 60 | - **迭代优化更新位姿：**按理来说极短时间内的相邻两帧拍到空间中同一个点的亮度值应该没啥变化。但由于位姿是假设的一个值，所以重投影的点不准确，导致投影前后的亮度值是不相等的。不断优化位姿使得这些以特征点为中心的 $4 \times 4$ 像素块残差最小，就能得到优化后的位姿 $\mathrm{T}_{k, k-1}$ 。
 61 | 
 62 | 将上述过程公式化如下：为简便起见，我们假设亮度残差服从单位方差正态分布，那么负对数最小化似然估计等同于最小二乘问题，即 $\rho[\cdot] \hat{=} \frac{1}{2} \|\cdot \| ^2$ 。因此位姿 $T_{k, k-1}$ 的最小化残差**损失函数 (Cost Function)** 为：
 63 | 
 64 | $$
 65 | \mathrm{T}_{k,k-1} = \arg  \min_\limits{\mathrm{T}_{k,k-1}} \frac{1}{2} \sum_{i \in \bar{\mathcal{R}}} \| \delta \mathrm{I}(\mathrm{T}_{k,k-1}, \mathbf{u}_i \|^2 \tag{1.4}
 66 | $$
 67 | 
 68 | 上面的非线性最小化二乘问题，可以用高斯牛顿迭代法求解。设位姿变换的估计值为 $\hat{T}_{k, k-1}$ 、通过**旋转坐标 (twist coordinates)** $\xi = (\omega, \upsilon)^\top \in \mathfrak{se}(3)$ 参数化估计的增量更新 $\mathrm{T}(\xi)$ 。依据图像 $I_{k-1}$ 的计算更新 $\mathrm{T}(\xi)$ ，通过 **inverse compositional** 构造亮度残差：
 69 | 
 70 | $$
 71 | \delta \mathrm{I}(\xi, \mathbf{u}_i) = \mathrm{I}_k \big(\pi(\hat{\mathrm{T}}_{k,k-1} \cdot \mathbf{p}_i) \big) - \mathrm{I}_k \big(\pi(\mathrm{T}(\xi) \cdot \mathbf{p}_i) \big)\, , \, \mathbf{p}_i = \pi^{-1}(\mathbf{u}_i, \mathrm{d}\mathbf{u}_i) \tag{1.5}
 72 | $$
 73 | 
 74 | 当前的估计值通过下式跟新，
 75 | 
 76 | $$
 77 | \hat{\mathrm{T}}_{k,k-1} \gets \hat{\mathrm{T}}_{k,k-1} \cdot {\mathrm{T}}(\xi)^{-1} \tag{1.6}
 78 | $$
 79 | 
 80 | 为了找到最佳的更新量 $\mathrm{T}(\xi)$ ，我们可以通过求式 (4) 的偏导数并让它等于零：
 81 | 
 82 | $$
 83 | \sum_{i \in \bar{\mathcal{R}}} \nabla \delta \mathrm{I} (\xi, \mathbf{u}_i)^\top \delta \mathrm{I}(\xi, \mathbf{u}_i) = 0 \tag{1.7}
 84 | $$
 85 | 
 86 | 为了求解上式，我们对当前状态进行线性化：
 87 | 
 88 | $$
 89 | \delta \mathrm{I} (\xi, \mathbf{u}_i) \approx \delta \mathrm{I}(0, \mathbf{u}_i) + \nabla \delta \mathrm{I}(0, \mathbf{u}_i) \cdot \xi \tag{1.8}
 90 | $$
 91 | 
 92 | 其中雅克比矩阵 $\mathbf{J}_i := \nabla \delta \mathrm{I}(0, \mathbf{u}_i)$ 为图像残差对李代数的求导，可以通过链式求导得到:
 93 | 
 94 | $$
 95 | \frac{\partial \delta \mathrm{I} (\xi, \mathbf{u}_i)}{\partial \xi} = \left. \frac{\partial \mathrm{I}_{k-1}(\mathrm{a})}{\partial \mathrm{a}} \right|_{\mathrm{a} = \mathbf{u}_i} \cdot \left. \frac{\partial \pi (\mathrm{b})}{\partial \mathrm{b}} \right|_{\mathrm{b}=\mathbf{p}_i} \cdot \left. \frac{\mathrm{T}(\xi)}{\partial \xi} \right|_{\xi=0} \cdot \mathbf{p}_i \tag{1.9}
 96 | $$
 97 | 
 98 | 其中文章中导数的求解，请参考高博的[直接法](http://www.cnblogs.com/gaoxiang12/p/5689927.html)。（#TODO）
 99 | 
100 | 通过将式 (1.8) 代入式 (1.7) 并通过将雅克比堆叠成矩阵 $\mathbf{J}$ ，我们得到正规方程：
101 | 
102 | $$
103 | \mathbf{J}^\top \mathbf{J} \xi = - \mathbf{J}^\top \delta \mathrm{I}(0) \tag{1.10}
104 | $$
105 | 
106 | 注意，通过使用 inverse compositional 构造亮度残差方法，雅可比可以预先计算，因为它在所有迭代中保持不变，因此降低了计算量。
107 | 
108 | 以上通过当前帧与相邻前一帧反投影解算出了相对位姿 $\mathrm{T}_{k, k-1}$ ，由于三维点的位置不准确，这种 frame-to-frame 估计位姿的方式不可避免的会带来累计误差从而导致漂移，因而通过以上步骤求出的相机的姿态需要进一步优化。由此，进行一下步骤：Relaxation Through Feature Alignment 。
109 | 
110 | #### 通过特征点对齐优化匹配关系
111 | 
112 | 为了减少偏移，相机的姿态应该通过已经建立好的地图模型，来进一步约束当前帧的位姿。利用初始的位姿 $\mathrm{T}_{k, k-1}$ 关系，可以大体的对当前图像中所有可见三维点的特征位置进行初步猜测，将能够观察到地图中已经收敛的特征点投影到当前帧中。但是由于估计位姿存在偏差，导致将地图中特征点重投影到 $\mathrm{I}_k$ 中的位置并不和真正的吻合，也就是还会有残差的存在。如下图所示：（图中 $\mathrm{I}_k$ 帧图像中的灰色特征块为真实位置，蓝色特征块为预测位置）
113 | 
114 | ![Feature Alignment](image/feature_alignment.png)
115 | 
116 | 对于每个地图中重投影的特征点，识别出观察角度最小关键帧 $\mathrm{I}_r$ 上的对应点 $\mathbf{u}_i$ 。由于 3D 点和相机姿态估计不准确，所有利用**特征对齐 (Feature Alignment)** 通过最小化当前图像 $\mathrm{I}_k$ 中 patch (蓝色方块) 与关键帧 $\mathrm{I}_r$ 中的参考 patch 的光度差值来{==优化当前图像中每个 patch 的 2D 位置==} $\color{red}{\mathbf{u'}_i}$ ：
117 | 
118 | $$
119 | \mathbf{u'}_i = \arg \min\limits_{\mathbf{u'}_i} \frac{1}{2} \| \mathrm{I}_k(\mathbf{u'}_i) - \mathrm{A}_i \cdot \mathrm{I}_r{\mathbf{u}_i} \|^2, \quad \forall i. \tag{1.11}
120 | $$
121 | 
122 | 这种对齐使用 inverse compositional  Lucas-Kanade 算法来求解[^2]。并且注意，光度误差的前一部分是当前图像中 $\mathrm{I}_k$ 的亮度值，后一部分不是 $\mathrm{I}_{k-1}$ 而是 $\mathrm{I}_r$ 。由于是特征块对比，并且 3D 点所在的关键帧可能离当前帧比较远，所以光度误差和前面不一样的是还加了一个仿射变换 $\mathrm{A}_i$ ，需要对关键帧中的特征块进行旋转拉伸之类仿射变换后才能和当前帧的特征块对比。 这步不考虑极线约束，因为此时的位姿还是不准确的。这时候的迭代量计算方程和之前是一样的，只不过雅克比矩阵变了，这里的雅克比矩阵很好计算 $\mathbf{J} = \frac{\partial \mathrm{I}(\mathbf{u}_i)}{\partial \mathbf{u}_i}$ ，即为图像横纵两个方向的梯度。
123 | 
124 | 通过这一步我们能够得到优化后的特征点预测位置，它比之前通过相机位姿预测的位置更准，所以反过来，我们利用这个优化后的特征位置，能够进一步去优化相机位姿以及特征点的三维坐标。所以位姿估计的最后一步就是 Pose and Structure Refinement 。
125 | 
126 | #### BA 优化
127 | 
128 | 利用上一步建立的 $(\mathbf{p_i \, , \, \mathbf{u}_i})$ 的对应关系，再次优化世界坐标系下的位姿 $\mathrm{T}_{k, w}$ ，以最小化重投影残差：
129 | 
130 | $$
131 | \mathrm{T}_{k,w} = \arg \min\limits_{\mathrm{T}_{k,w}} \frac{1}{2} \sum_i \| \mathbf{u}_i - \pi (\mathrm{T}_{k,w} \; {}_w\mathbf{p}_i) \|^2 \tag{1.12}
132 | $$
133 | 
134 | 上式中{==误差变成了像素重投影以后位置的差异 (不是像素值的差异)==} ，优化变量还是相机位姿，雅克比矩阵大小为 2×6 (坐标 $\mathbf{u}_i$ 分别对李代数变量 $\xi = (\omega, \upsilon)^\top \in \mathfrak{se}(3)$ 求导) 。这一步叫做 **motion-only Bundler Adjustment** 。同时根据这个误差定义，我们还能够对获取的三维点的坐标 $[x, y, z]^\top$ 进行优化，还是上面的误差像素位置误差形式，只不过优化变量变成三维点的坐标，这一步叫 **Structure -only Bundler Adjustment** ，优化过程中雅克比矩阵大小为 2×3 (坐标 $\mathbf{u}_i$ 分别对三维点的坐标 $[x, y, z]^\top$ 变量求导) 。
135 | 
136 | 
137 | 
138 | ### 地图构建
139 | 
140 | 地图模型通常用来存储三维空间点，在 SVO 中每一个 Key frame 通过深度估计能够得到特征点的三维坐标，这些收敛的三维坐标点通过特征点在 Key Frame 中进行保存。当新帧被选为关键帧时，它会被立即插入地图。同时，又在这个新的关键帧上检测新的特征点作为深度估计的 seed ，这些 seed 会不断融合之后的图像进行深度估计。但是，如果有些 seed 点 3D 点位置通过深度估计已经收敛了，此时 map 用一个 point_candidates 来保存这些尚未插入地图中的点。所以 SVO 地图上保存的是 Key Frame 以及还未插入地图关键帧中的已经收敛的 3D 点坐标（这些 3D 点坐标是在世界坐标系下的）。
141 | 
142 | #### 深度计算
143 | 
144 | SVO 中的每个新特征点对应一个深度估计，其初值为该帧的平均深度，并被赋予极大的不确定性。通过两帧图像的匹配点就可以计算出这一点的深度值，如果有多幅图像，那就能计算出这一点的多个深度值。这就像对同一个状态变量我们进行了多次测量，因此，可以用贝叶斯估计来对多个测量值进行融合，使得估计的不确定性缩小。如下图所示：
145 | 
146 | ![Depth Estimation](image/depth_estimation.png)
147 | 
148 | 一开始深度估计的不确定性较大 ($\color{cyan}{\mathbf{\text{青色部分}}}$) ，通过三角化得到一个深度估计值以后，能够极大的缩小这个不确定性 ($\color{teal}{\mathbf{\text{墨绿部分}}}$) 。 
149 | 
150 | SVO 关于三角化计算深度的过程，主要是极线搜索确定匹配点。我们知道参考帧 $\mathrm{I}_r$ 中的一个特征的图像位置，假设它的深度值在 $[\mathrm{d}_{min},\mathrm{d}_{max}]$ 之间，那么根据这两个端点深度值，利用对极几何就能够计算出特征点在当前帧 $\mathrm{I}_k$ 中的大概位置 (位于极线段附近，即上图 $\mathrm{I}_k$ 中 $\color{cyan}{\mathbf{\text{青色线段}}}$)  。确定了特征出现的极线段位置，就可以进行特征搜索匹配。如果极线段很短，小于两个像素，那直接使用前面面求位姿时提到的 Feature Alignment 光流法就可以比较准确地预测特征位置。如果极线段很长，那分两步走，第一步在极线段上间隔采样，对采样的多个特征块一一和参考帧中的特征块匹配，用 Zero mean Sum of Squared Differences 方法对各采样特征块评分，哪一个特征块得分最高，说明它和参考帧中的特征块最匹配。第二步就是在这个得分最高点附近使用 Feature Alignment 得到次像素精度的特征点位置。
151 | 
152 | 像素点位置确定了，就可以三角化计算深度了。 SVO 使用三角化计算特征点深度，使用的是中点法，关于这个三角化代码算法的推导见 [Github Issue](https://github.com/uzh-rpg/rpg_svo/issues/62) 。这是多视角几何的基础内容，可以参考《Multiple View Geometry in Computer Vision》，或者白巧克力亦唯心的博客[^4] （or #TODO）。
153 | 
154 | #### 深度值的不确定性计算
155 | 
156 | 在三角化计算深度的时候，还有一个很重要的量需要计算，那就是这个深度值的不确定度。它在后续的利用贝叶斯概率模型更新深度的过程中被用来确定更新权重 (就像卡尔曼滤波器中的协方差矩阵扮演的角色) 。SVO 中对特征点定位不准确导致的三角化深度误差分析如下图所示：
157 | 
158 | ![Depth Uncertainty](image/depth_uncertainty.png)
159 | 
160 | 它是通过假设特征点定位差一个像素偏差，来计算深度估计的不确定性。下面给出 SVO 代码算法推导，也可见参考 5[^5]、6[^6] 。
161 | 
162 | 已知量：$C_r$ 坐标系下的单位长度特征 $\mathbf{f}$ ，位移量 $\overrightarrow{C_r C_k} : \mathbf{t}$  ，特征 $\mathbf{f}$  的计算深度 $z$ ，以及一个像素偏差的误差角度 $\angle{err\_angle} = \arctan (1 /(2.0 * focal\_length))*2.0$ ，则：
163 | 
164 | $$
165 | 向量 \: \overrightarrow{C_k \: {}_r\mathrm{p}} : \mathbf{a} = \mathbf{f} \cdot z - \mathbf{t} \\
166 | \alpha = \arccos \big(\mathbf{f} \cdot \mathbf{t} \div ( \|\mathbf{f}\| \times \|\mathbf{t}\|) \big) \\
167 | \beta = \arccos \big(\mathbf{a} \cdot (-\mathbf{t}) \div ( \|\mathbf{a}\| \times \|\mathbf{t}\|) \big) \\
168 | \beta^+ = \beta + \angle{err\_angle} \\
169 | \gamma^+ =  \pi - \alpha - \beta^+ \\
170 | \frac{z^+}{\sin(\beta^+)} = \frac{\|\mathbf{t}\|}{\sin(\gamma^+)} \quad (正弦定理) \\
171 | \tau = z^+ - z
172 | $$
173 | 
174 | #### 深度值估计更新
175 | 
176 | 有了新的深度估计值和估计不确定量以后，就可以根据贝叶斯概率模型对深度值进行更新。SVO 对深度值的估计分布采用了高斯与均匀混合分布来表示 (见参考 7 [^7]) 。
177 | 
178 | 高斯与均匀混合分布给出：一个好的测量值是在真实深度 $Z$ 为均值的正态分布附近，而一个离群值的测量值是在范围为 $[Z_{min}, Z_{max}]$ 的均匀分布的区间内，因此 $x$ 概率密度函数为：
179 | 
180 | $$
181 | p(x_n | Z, \pi) = \pi \mathcal{N}(x_n | Z, \tau_n^2) + (1-\pi) \mathcal{U}(x_n | Z_{min}, Z_{max}) \tag{2.1}
182 | $$
183 | 
184 | 注意，这里的 $\pi$ 与上文中的不是同一个，其中这里 $\pi$ 是为有效测量的概率，$\tau$ 是上一步计算的深度估计值的不确定量。当我们得到同一 seed 的一系列测量值 $x_1, \dotsc, x_n$ 假设这些测量值独立。我们想从式 (2.1) 求出 $Z, \pi$ ，最为直观的做法是求解最大似然估计，然而参考 7 [^7] 作者 G. Vogiatzis 认为最大似然估计容易被局部极大值干扰，其结果并不准确，于是选择从最大后验概率求解，等价求解 $\arg \max_\limits{Z,\pi}p(Z, \pi| x_1, \dotsc, x_n)$ 。
185 | 
186 | 下面结合 G. Vogiatzis 论文 (参考 7 [^7] ) 中的 Supplementary material 以及引用参考 8[^8]、9[^9] ，整理出该概率模型的后验概率 $p(Z, \pi| x_1, \dotsc, x_n)$ 可以用 Gaussian×Beta 分布来近似的证明推导，以及后验概率迭代形式的推导。
187 | 
188 | 首先假设 $Z , \pi$ 的先验分布在没有任何其他信息的情况下，这些量在概率上是独立的，因此满足：
189 | 
190 | $$
191 | p(Z,\pi) = p(Z) p(\pi) \tag{2.2}
192 | $$
193 | 
194 | 引入**二进制潜变量 (binary latent variables)** $y_1 \dotsc y_n$ ，那么有：
195 | 
196 | $$
197 | p(x_n | Z, \pi, y_n) = \mathcal{N}(x_n|Z, \tau_n^2)^{y_n} \mathcal{U}(x_n)^{1-y_n} \tag{2.3}
198 | $$
199 | 
200 | 和
201 | 
202 | $$
203 | p(y_n | \pi) = \pi^{y_n}(1-\pi)^{1-y_n} \tag{2.4}
204 | $$
205 | 
206 | 其中，当 $y_n = 1$ 的时候表示第 n 个测量值为内点 (inlier) (即，此次测量满足高斯分布) ，反之，当 $y_n = 0$ 的时候表示此次测量值为离群值 (outlier) (即，此次测量值属于均匀分布) 。当这些潜变量被 (2.3) 和 (2.4) 所描述的模型边缘化时，我们回到 (2.1) 的简单混合模型。令 $\mathcal{X} = [x_1, \dotsc, x_n] , \mathcal{Y} = [y_1, \dotsc, y_n]$ ，那么 $\mathcal{X, Y}, Z, \pi$ 的联合分布为：
207 | 
208 | $$
209 | \begin{align*}
210 | p(\mathcal{X, Y},Z,\pi) &= p(\mathcal{X}|Z, \pi , \mathcal{Y}) p(Z, \pi, \mathcal{Y}) \\
211 | &= p(\mathcal{X}|Z, \pi , \mathcal{Y}) p(\mathcal{Y} |Z, \pi) p(Z,\pi) \\
212 | 带入式子(2.2)得 \: &= p(\mathcal{X}|Z, \pi , \mathcal{Y}) p(\mathcal{Y} |Z, \pi) p(Z) p(\pi) \\
213 | 由于\mathcal{Y}与Z无关 \: &= p(\mathcal{X}|Z, \pi , \mathcal{Y}) p(\mathcal{Y} | \pi) p(Z) p(\pi) \\
214 | &= \Bigg[ \prod\limits_{n=1}^N p(x_n|Z,\pi,y_n)p(y_n|\pi) \Bigg] p(Z)p(\pi)
215 | \end{align*} \tag{2.5}
216 | $$
217 | 
218 | 由于并不知道要求解的后验估计 $p(\mathcal{Y}, Z, \pi | \mathcal{X})$ 是怎么样的形式，因此令 $q(\mathcal{Y}, Z, \pi)$ 是后验估计 $p(\mathcal{Y}, Z, \pi | \mathcal{X})$ 的一个近似推断，且满足以下因式分解形式：
219 | 
220 | $$
221 | q(\mathcal{Y}, Z, \pi) = q_{\mathcal{Y}}(\mathcal{Y}) \, q_{Z,\pi}(Z, \pi) \tag{2.6}
222 | $$
223 | 
224 | 由变分 (calculus of variations) 推断理论，求解后验估计 $p(\mathcal{Y}, Z, \pi | \mathcal{X})$ 的最佳近似分布等价于最小化 $q(\mathcal{Y}, Z, \pi)$ 与 $p(\mathcal{X, Y},Z,\pi)$ 的 Kullback-Leibler 散度，由此推出 $q_{\mathcal{Y}}(\mathcal{Y}) , \: q_{Z,\pi}(Z, \pi)$ 需要满足：（这步未仔细研究，读者可以先看参考10[^10] 中的 10.1.1 章节（变分推断之分解分布）、参考 11[^11] ，#TODO）
225 | 
226 | $$
227 | \ln q_{Z,\pi}(Z, \pi) = E_\mathcal{Y}[\ln p(\mathcal{X, Y}, Z, \pi)] + const \tag{2.7}
228 | $$
229 | 
230 | 和
231 | 
232 | $$
233 | \ln q_{\mathcal{Y}}(\mathcal{Y}) = E_{Z,\pi}[\ln p(\mathcal{X, Y}, Z, \pi)] + const \tag{2.8}
234 | $$
235 | 
236 | 其中 $E_\mathcal{Y}, \, E_{Z,\pi}$ 分别表示 $q_{\mathcal{Y}}(\mathcal{Y}, \, q_{Z,\pi}(Z, \pi)$ 的期望，这里我们只关心 $Z, \pi$ 的估计，将式 (2.3) (2.4) (2.5) 带入式 (2.7) 中：
237 | 
238 | $$
239 | \scriptsize {
240 | \begin{align*}
241 | & \small{ \ln q_{Z,\pi}(Z, \pi) } \\
242 | &= E_\mathcal{Y} \Bigg[ \ln \bigg( \Big( \prod_\limits{n=1}^N p(x_n|Z,\pi,y_n) p(y_n|\pi) \Big)p(Z)p(\pi) \bigg) \Bigg] + const \\
243 | &= E_\mathcal{Y} \Bigg[ \ln \Big( \prod_\limits{n=1}^N \mathcal{N}(x_n|Z,\tau_n^2)^{y_n} \mathcal{U}(x_n)^{1-y_n} \pi^{y_n}(1-\pi)^{1-y_n} \Big) + \ln(Z) + \ln(\pi) \Bigg] + const \\
244 | &= E_\mathcal{Y} \Bigg[ \sum_\limits{n=1}^N y_n \ln \mathcal{N}(x_n|Z,\tau_n^2) + \sum_\limits{n=1}^N (1-y_n) \ln \mathcal{U}(x_n) + \sum_\limits{n=1}^N y_n \ln \pi + \sum_\limits{n=1}^N (1-y_n)\ln(1-\pi) \Bigg] \\
245 | & \quad + \ln(Z) + \ln(\pi) + const \\
246 | &= \sum_{n=1}^N E_\mathcal{Y}[y_n] \big(\ln \mathcal{N}(x_n|Z,\tau_n^2) + \ln \pi \big) + \sum_{n=1}^N E_\mathcal{Y}[1-y_n] \big(\ln \mathcal{U}(x_n) + \ln(1-\pi) \big) \\ 
247 | & \quad + \ln(Z) + \ln(\pi) + const
248 | \end{align*}
249 | \tag{2.9}
250 | }
251 | $$
252 | 
253 | 对上式取两边取指数 (exponentiating) 得：
254 | 
255 | $$
256 | q_{Z,\pi}(Z, \pi) = \Bigg[\prod_{n=1}^N \mathcal{N}(x_n|Z,\tau_n^2)^{r_n} \pi^S (1-\pi)^{N-S} p(Z) p(\pi) \Bigg] \tag{2.10} \\
257 | 其中, \quad r_n = E_\mathcal{Y}[y_n], \, S = \sum_{n=1}^N r_n
258 | $$
259 | 
260 | 如果我们为 Z 和 π 选择共轭先验，就可以证明的近似分布式 (10) 具有 Gaussian×Beta 形式。因此给出了一个近似真实后验的概密度函数函数：
261 | 
262 | $$
263 | q(Z,\pi|a,b,\mu,\sigma^2) \doteq \mathcal{N}(Z|\mu, \sigma^2)Beta(\pi|a,b) \tag{2.11}
264 | $$
265 | 
266 | 这里的 $\mathcal{N}(Z|\mu, \sigma_n^2)$ 是高斯分布，以及[**贝塔分布**](https://zh.wikipedia.org/wiki/%CE%92%E5%88%86%E5%B8%83) $Beta(\pi|a,b)$ ：
267 | 
268 | $$
269 | Beta(\pi|a,b) = \frac{\Gamma(a+b)}{\Gamma(a)\Gamma(b)} \pi^{a-1}(1-\pi)^{b-1} \tag{2.12}
270 | $$
271 | 
272 | 其中 $\Gamma(\cdot)$ 是 [**Gamma 函数**](https://zh.wikipedia.org/wiki/%CE%93%E5%87%BD%E6%95%B0)，Gamma 函数具有 $\Gamma(x+1) = x\,\Gamma(x)$ 的递归形式。并且给出了该模型的迭代式：
273 | 
274 | $$
275 | q(Z,\pi|a',b',\mu',\sigma') \approx p(x|Z,\pi) q(Z,\pi|a,b,\mu,\sigma^2) \tag{2.13}
276 | $$
277 | 
278 | 其中，这里的 $(\cdot)'$ 表示 n 时刻的参数， $(\cdot)$ 表示 n-1 时刻的参数。由迭代式可以看出新时刻的参数是通过上一时刻的参数和新的深度测量值来更新的。{==之所以是**约等于**是因为上式的右端形式并不是 Gaussian×Beta 分布，而是用 $q(Z,\pi|a',b',\mu',\sigma')$ 关于 $Z$ 和 $\pi$ 的一阶矩和二阶矩相等去近似右端项，且两端的全概率等于 1，进而更新参数（这里类似于 EKF 中的更新，通常一个高斯分布通过非线性变换后分布并非高斯，但是通常利用高斯分布的一阶矩和二阶矩去近似这个变换结果）==}。将式 (2.1) (2.11) 代入上式的右端得：
279 | 
280 | $$
281 | \big(\pi \mathcal{N}(x | Z, \tau^2) + (1-\pi) \mathcal{U}(x) \big) \mathcal{N}(Z|\mu, \sigma^2)Beta(\pi|a,b) \tag{2.14}
282 | $$
283 | 
284 | 由于要求关于 $Z$ 和 $\pi$ 的一阶矩和二阶矩，因此需要构造的关于 $Z$ 和 $\pi$ 为变量的式子，因此将上式变换如下 ({==这一步骤的思路是让式 (2.14) 转变成便于求关于 $Z$ 和 $\pi$ 的一阶矩和二阶矩的形式==}) :
285 | 
286 | $$
287 | \fbox{$
288 | \scriptsize{
289 |   \begin{align*}
290 |     & \quad \big(\pi \mathcal{N}(x | Z, \tau^2) + (1-\pi) \mathcal{U}(x) \big) \mathcal{N}(Z|\mu, \sigma^2)Beta(\pi|a,b) \\
291 |     &= \pi \mathcal{N}(x | Z, \tau^2) \mathcal{N}(Z|\mu, \sigma^2)Beta(\pi|a,b) + (1-\pi) \mathcal{U}(x) \mathcal{N}(Z|\mu, \sigma^2) Beta(\pi|a,b) \\
292 |     & 根据 \Gamma(x+1) = x\,\Gamma(x) 的性质有：\\
293 |     & \quad \quad Beta(\pi|a,b) = \frac{1}{\pi} \frac{a}{a+b} Beta(\pi|a+1,b) = \frac{1}{1-\pi} \frac{b}{a+b} Beta(\pi|a,b+1) \\
294 |     &= \underbrace{\frac{a}{a+b} \mathcal{N}(x | Z, \tau^2) \mathcal{N}(Z|\mu, \sigma^2) Beta(\pi|a+1,b)}_{1} + \underbrace{\frac{b}{a+b} \mathcal{U}(x) \mathcal{N}(Z|\mu, \sigma^2) Beta(\pi|a,b+1)}_{2} \\
295 |     & 我们发现上式中的第1项中存在以 Z 或者 \pi 为参数的项 \mathcal{N}(x | Z, \tau^2)，\\
296 |     & 因此将上式第1项中高斯分布乘积中的Z与其他参数分离，进行变形 \\
297 |     & 为了方便只取 \mathcal{N}(x | Z, \tau^2) \mathcal{N}(Z|\mu, \sigma^2) 的指数项做推导: \\
298 |     & \fbox{$ \small{ \begin{align*}
299 |         & \overbrace{-\frac{(x-Z)^2}{2 \tau^2}}^{\mathcal{N}(x | Z, \tau^2)} \overbrace{-\frac{(Z-\mu)^2}{2\sigma^2}}^{\mathcal{N}(Z|\mu, \sigma^2)} \\
300 |         &= -\frac{(\sigma^2+\tau^2)Z^2 - 2(x \sigma^2 + \mu \tau^2)Z + \sigma^2 x^2 + \mu^2\tau^2}{2\tau^2\sigma^2} \\
301 |         &= -\frac{Z^2-2(\frac{x\sigma^2+\mu\tau^2}{\sigma^2+\tau^2})Z + (\frac{x\sigma^2+\mu\tau^2}{\sigma^2+\tau^2})^2 - (\frac{x\sigma^2+\mu\tau^2}{\sigma^2+\tau^2})^2 + \frac{\sigma^2x^2+\tau^2\mu^2}{\sigma^2+\tau^2}}{2\frac{\tau^2\sigma^2}{\sigma^2+\tau^2}} \\
302 |         &=-\frac{Z^2-2(\frac{x\sigma^2+\mu\tau^2}{\sigma^2+\tau^2})Z + (\frac{x\sigma^2+\mu\tau^2}{\sigma^2+\tau^2})^2}{2\frac{\tau^2\sigma^2}{\sigma^2+\tau^2}}-\frac{- (\frac{x\sigma^2+\mu\tau^2}{\sigma^2+\tau^2})^2 + \frac{\sigma^2x^2+\tau^2\mu^2}{\sigma^2+\tau^2}}{2\frac{\tau^2\sigma^2}{\sigma^2+\tau^2}} \\
303 |         &=\underbrace{-\frac{(Z-\frac{x\sigma^2+\mu\tau^2}{\sigma^2+\tau^2})^2}{2\frac{\tau^2\sigma^2}{\sigma^2+\tau^2}}}_{\mathcal{N}\big(Z \big| \color{red}{ \frac{x\sigma^2+\mu\tau^2}{\sigma^2+\tau^2}},\color{blue}{ \frac{\tau^2\sigma^2}{\sigma^2+\tau^2}}\big)} \underbrace{-\frac{(x-\mu)^2}{2(\sigma^2+\tau^2)}}_{\mathcal{N}(x|\mu, \sigma^2+\tau^2)}
304 |       \end{align*}}$}
305 |   \end{align*}
306 | } $}
307 | $$
308 | 
309 | 根据上面的推导我们可以将式 (2.14) 转变成：
310 | 
311 | $$
312 | \small{\begin{align*}\frac{a}{a+b} \mathcal{N}(x | u, \sigma^2 + \tau^2) \mathcal{N}(Z| \color{red}{m}, \color{blue}{s^2}) Beta(\pi|a+1,b) \\ + \frac{b}{a+b} \mathcal{U}(x) \mathcal{N}(Z|\mu, \sigma^2) Beta(\pi|a,b+1)\end{align*}} \tag{2.15}
313 | $$
314 | 
315 | 其中有：
316 | 
317 | $$
318 | s^2 = \frac{\tau^2\sigma^2}{\sigma^2+\tau^2} \to \frac{1}{s^2} = \frac{1}{\sigma^2} + \frac{1}{\tau^2} \tag{2.16}
319 | $$
320 | 
321 | 和
322 | 
323 | $$
324 | m = \frac{x\sigma^2+\mu\tau^2}{\sigma^2+\tau^2} \to m = s^2 (\frac{\mu}{\sigma^2} + \frac{x}{\tau^2}) \tag{2.17}
325 | $$
326 | 
327 | 式 (2.15) 令
328 | 
329 | $$
330 | C_1 = \frac{a}{a+b} \mathcal{N}(x | u, \sigma^2 + \tau^2), \quad C_2 = \frac{b}{a+b} \mathcal{U}(x) \tag{2.18}
331 | $$
332 | 
333 | 易知 $C_1, C_2$ 是与 $Z, \pi$ 无关的系数，因此式 (2.15) 变为
334 | 
335 | $$
336 | \fbox{$
337 | \scriptsize{ \begin{align*}
338 | & C_1 \mathcal{N}(Z| m, s^2) Beta(\pi|a+1,b)  + C_2 \mathcal{N}(Z|\mu, \sigma^2) Beta(\pi|a,b+1) \\
339 | & 上式的全概率为 \\
340 | & \int C_1 \mathcal{N}(Z| m, s^2) Beta(\pi|a+1,b)  + C_2 \mathcal{N}(Z|\mu, \sigma^2) Beta(\pi|a,b+1) dZ\, d\pi = C_1 +C_2 \\
341 | & 由于全概率需要为1，因此转变成 \\
342 | & \frac{C_1}{C_1 + C_2} \mathcal{N}(Z| m, s^2) Beta(\pi|a+1,b)  + \frac{C_2}{C_1 + C_2} \mathcal{N}(Z|\mu, \sigma^2) Beta(\pi|a,b+1)
343 | \end{align*}
344 | } $}
345 | $$
346 | 
347 | 我们令
348 | 
349 | $$
350 | C'_1 = \frac{C_1}{C_1 + C_2}, \quad C'_2 = \frac{C_2}{C_1 + C_2} \tag{2.19}
351 | $$
352 | 
353 | 由此归一化后的概率密度函数形式为：
354 | 
355 | $$
356 | \small{ C'_1 \mathcal{N}(Z| m, s^2) Beta(\pi|a+1,b)  + C'_2 \mathcal{N}(Z|\mu, \sigma^2) Beta(\pi|a,b+1) } \tag{A}
357 | $$
358 | 
359 | 分别对上式 (A) 和 $q(Z,\pi|a',b',\mu',\sigma')$ 求关于 $Z$ 和 $\pi$ 的一阶矩和二阶矩：
360 | 
361 | $$
362 | \fbox{$
363 | \scriptsize{\begin{align*}
364 |   & 先对 q(Z,\pi|a',b',\mu',\sigma') =\mathcal{N}(Z|\mu', \sigma'^2)Beta(\pi|a',b') 求关于 Z 的一阶矩和二阶矩: \\
365 |   & E_Z = \int Z \, \mathcal{N}(Z|\mu',\sigma'^2)Beta(\pi|a',b') dZ \\
366 |   & \quad = \int Z \, \mathcal{N}(Z|\mu',\sigma'^2) dZ = E[\mathcal{N}(Z|\mu',\sigma'^2)] = \mu' \\
367 |   & D_Z = \int Z^2 \, \mathcal{N}(Z|\mu',\sigma'^2)Beta(\pi|a',b') dZ \\
368 |   & \quad = \underbrace{\int (Z-u')^2 \, \mathcal{N}(Z|\mu',\sigma'^2) dZ}_{\small{D[\mathcal{N}(Z|\mu',\sigma'^2)]=\sigma'^2}} + 2\mu' \underbrace{\int Z \, \mathcal{N}(Z|\mu',\sigma'^2) dZ}_{\small{E[\mathcal{N}(Z|\mu',\sigma'^2)]=\mu'}} - \mu^2 \underbrace{\int \mathcal{N}(Z|\mu',\sigma'^2) dZ}_{\small{=1}}  \\
369 |   & \qquad = \sigma'^2 + \mu'^2 \\ \\
370 |   & 对 q(Z,\pi|a',b',\mu',\sigma') 求关于 \pi 的一阶矩和二阶矩: \\
371 |   & E_\pi = \int \pi \, \mathcal{N}(Z|\mu',\sigma'^2)Beta(\pi|a',b') d\pi \\
372 |   & \quad\! = \underbrace{\int \pi \, Beta(\pi|a',b') d\pi}_{\small{E[Beta(\pi|a',b')] = \frac{a'}{a'+b'}}} = \frac{a'}{a'+b'} \underbrace{\int Beta(\pi|a'+1,b') d\pi}_{\small{=1}} = \frac{a'}{a'+b'} \\
373 |   & D_\pi = \int \pi^2 \, \mathcal{N}(Z|\mu',\sigma'^2)Beta(\pi|a',b') d\pi \\
374 |   & \quad\! = \int \pi^2 \, Beta(\pi|a',b') d\pi = \frac{a'}{a'+b'} \frac{a'+1}{a'+b'+1} \underbrace{\int Beta(\pi|a'+1+1,b') d\pi}_{\small{=1}} \\
375 |   & 或 \; = \underbrace{\int (\pi-E_\pi)^2 \, Beta(\pi|a',b') d\pi}_{\small{D[Beta(\pi|a',b')] = \frac{a'b'}{(a'+b')^2(a'+b'+1)}}} - \underbrace{ 2E_\pi \int \pi \, Beta(\pi|a',b') d\pi}_{\small{2 E^2_\pi = 2\frac{a'^2}{(a'+b')^2}}} + \underbrace{E_\pi^2 \int Beta(\pi|a',b') d\pi}_{\small{E_\pi^2\cdot1 = \frac{a'^2}{(a'+b')^2}}} \\
376 |   & \quad\! = \frac{a'(a'+1)}{(a'+b')(a'+b'+1)} \\ \\ \\
377 |   & 根据上面求解的方式对 式(A) 求关于 Z 和 \pi 的一阶矩和二阶矩有: \\
378 |   & E'_Z = C'_1 m + C'_2\mu \quad D'_Z = C'_1(m^2+s^2) + C'_2(\mu^2+\sigma^2) \\ \\
379 |   & E'_\pi = C'_1\frac{a+1}{a+b+1} + C'_2\frac{a}{a+b+1} \quad D'_\pi = C'_1\frac{(a+1)(a+2)}{(a+b+1)(a+b+2)} + C'_2\frac{a(a+1)}{(a+b+1)(a+b+2)}
380 | \end{align*}}
381 | $}
382 | $$
383 | 
384 | 为了使迭代式 (式 2.13) 成立，需要令式子两端关于 $Z$ 和 $\pi$ 的一阶矩和二阶矩相等，所以有：
385 | 
386 | 令 $E_Z = E'_Z$ 得：
387 | 
388 | $$
389 | \mu' = C'_1 m + C'_2\mu \tag{2.20}
390 | $$
391 | 
392 | 令 $D_Z = D'_Z$ 得：
393 | 
394 | $$
395 | \sigma'^2+\mu'^2 = C'_1(m^2+s^2) + C'_2(\mu^2+\sigma^2) \tag{2.21}
396 | $$
397 | 
398 | 令 $E_\pi = E'_\pi$ 得：
399 | 
400 | $$
401 | \frac{a'}{a'+b'} = C'_1\frac{a+1}{a+b+1} + C'_2\frac{a}{a+b+1} \tag{2.22}
402 | $$
403 | 
404 | 令 $D_\pi = D'_\pi$ 得：
405 | 
406 | $$
407 | \begin{align*}
408 |   \frac{a'(a'+1)}{(a'+b')(a'+b'+1)} &= C'_1\frac{(a+1)(a+2)}{(a+b+1)(a+b+2)} \\
409 |   &\quad + C'_2\frac{a(a+1)}{(a+b+1)(a+b+2)} 
410 | \end{align*} \tag{2.23}
411 | $$
412 | 
413 | 由上面四个等式就可以通过 n-1 时刻的参数 $a, b, \mu, \sigma$ 求解 n 时刻的参数 $a', b', \mu', \sigma'$ 了。因此在加入新的测量时，通过迭代式 (2.14) 近似后验概率分布会得到更新，当 $\sigma$ 小于给定阈值时，认为估计的深度值已经收敛，计算对应特征点的三维坐标，并加入地图。
414 | 
415 | 
416 | 
417 | ## 总结
418 | 
419 | SVO 相对于目前其他开源的方案，它的最大优势是速度极快。这是因为它使用了稀疏的直接法，既不用费力的去计算描述子用于特征匹配，也不必像稠密或半稠密法那么处理大量的信息，因此 SVO 在低端的计算平台上也可以打到实时性。但也由于 SVO 针对目标应用平台选为了无人机的俯视相机，围绕该特点应用场景设计也使得 SVO 的局限性比较大，并且为了计算速度，直接舍弃了后端优化月回环检查，也基本没有重定位能力。因此我们可以称它仅仅是个里程计，而非完整的 SLAM 。
420 | 
421 | 
422 | 
423 | 关于代码中的其他细节，请观看下一篇 「 [SVO 代码解析](code_analysis.md)」。
424 | 
425 | 
426 | 
427 | 
428 | 
429 | 
430 | 
431 | 
432 | 
433 | ## 参考
434 | 
435 | [^1]: Forster, Christian, Matia Pizzoli, and Davide Scaramuzza. "[SVO: Fast semi-direct monocular visual odometry.](http://rpg.ifi.uzh.ch/docs/ICRA14_Forster.pdf)" 2014 IEEE International Conference on Robotics and Automation (ICRA). IEEE, 2014.
436 | 
437 | [^2]: S. Baker and I. Matthews, “[Lucas-Kanade 20 Years On: A Unifying Framework](https://www.cs.cmu.edu/afs/cs/academic/class/15385-s12/www/lec_slides/Baker&Matthews.pdf): Part 1,” International Journal of Computer Vision, vol. 56, no. 3, pp. 221–255, 2002.
438 | 
439 | [^3]: [白巧克力亦唯心 - svo : semi-direct visual odometry 论文解析](https://blog.csdn.net/heyijia0327/article/details/51083398) 
440 | 
441 | [^4]: [Monocular slam 中的理论基础 (2) ](https://blog.csdn.net/heyijia0327/article/details/50774104) 
442 | 
443 | [^5]: [REMODE: Probabilistic, Monocular Dense Reconstruction in Real Time](https://files.ifi.uzh.ch/rpg/website/rpg.ifi.uzh.ch/html/docs/ICRA14_Pizzoli.pdf) 
444 | 
445 | [^6]: 《视觉 SLAM 十四讲》高翔等著：13.2.3 小节
446 | [^7]:  [G. Vogiatzis](http://www.george-vogiatzis.org/) and C. Hern´ andez, “[Video-based, Real-Time Multi View Stereo](http://www.george-vogiatzis.org/publications/ivcj2010.pdf),” Image and Vision Computing, vol. 29, no. 7, 2011.  [Supplementary matterial](http://www.george-vogiatzis.org/publications/ivcj2010supp.pdf) 
447 | 
448 | [^8]: [路游侠 - SVO 解析](http://www.cnblogs.com/luyb/p/5773691.html) 
449 | [^9]: [可爱的小蚂蚁 - svo 的 Supplementary material 推导过程](https://blog.csdn.net/u013004597/article/details/52069741)
450 | 
451 | [^10]: Christopher M. Bishop. Pattern Recognition and Machine Learning (Information Science and Statistics). Springer, 1 edition, October 2007.
452 | [^11]: [参考 10 的翻译版本](https://mqshen.gitbooks.io/prml/content/Chapter10/variational/factorized_distributions.html)，膜拜大神
453 | 
454 | 
455 | 
456 | --8<--
457 | mathjax.txt
458 | --8<--


--------------------------------------------------------------------------------
/docs/projects/rpg_svo/code_analysis.md:
--------------------------------------------------------------------------------
  1 | ## SVO 运行实践
  2 | 
  3 | SVO 的下载编译可以参照 SVO 作者写的教程：https://github.com/uzh-rpg/rpg_svo/wiki 。如果手上没有摄像头的话，作者提供了数据集，可以参照文档先跑跑试试。
  4 | 
  5 | 下面给出利用 [**MYNT-EYE(S)**](https://mynt-eye-s-sdk.readthedocs.io/zh_CN/latest/index.html) 摄像头运行 SVO 程序的简单实践过程。作者提供的数据集选用的是 ATAN 相机模型，然而 MYNT-EYE 选用 Pinhole 模型。利用 MYNT-EYE 官方提供的 SDK 运行摄像头，关于摄像头标定这部分这里就不展开。根据 live.launch 我们知道需要提供摄像头的 Mono 图像数据，以及该摄像头的标定参数。下面简单给出我的实现过程。
  6 | 
  7 | 在 `svo_ros/param` 文件夹下创建 `MYNT-EYE` 左目摄像头的标定参数文件，命名为 `mynteye_pinhole_left.yaml` ：
  8 | 
  9 | ```yaml
 10 | cam_model: Pinhole
 11 | cam_width: 752
 12 | cam_height: 480
 13 | cam_fx: 357.77341636441826722
 14 | cam_fy: 358.22830460728204116
 15 | cam_cx: 396.35636517871080287
 16 | cam_cy: 249.02802206835875154
 17 | cam_d0: -0.28849480567934699
 18 | cam_d1: 0.06557692100207448
 19 | cam_d2: 0.00058043720553085
 20 | cam_d3: 0.00017708338176132
 21 | ```
 22 | 
 23 | 然后在 `svo_ros/launch` 文件夹下创建启动文件，命名为 `mynteyt-live.launch` ：
 24 | 
 25 | ```xml
 26 | <launch>
 27 | 
 28 |   <node pkg="svo_ros" type="vo" name="svo" clear_params="true" output="screen">
 29 |   
 30 |     <!-- Camera topic to subscribe to -->
 31 |     <param name="cam_topic" value="/mynteye/left/image_raw" type="str" />
 32 |     
 33 |     <!-- Camera calibration file -->
 34 |     <rosparam file="$(find svo_ros)/param/mynteye_pinhole_left.yaml" />
 35 |     
 36 |     <!-- Default parameter settings: choose between vo_fast and vo_accurate -->
 37 |     <rosparam file="$(find svo_ros)/param/vo_fast.yaml" />
 38 | 
 39 |   </node>
 40 | 
 41 | <!--  <node pkg="rqt_svo" type="rqt_svo" name="rqt_svo" />-->
 42 |   
 43 |   <node pkg="rviz" type="rviz" name="odometry_rviz" args="-d $(find svo_ros)/rviz_config.rviz"/>
 44 |         
 45 | </launch>
 46 | ```
 47 | 
 48 | 然后分别在两个 `Terminal` 中启动 SVO 和 MYNT-EYE ：
 49 | 
 50 | ```bash
 51 | source $(path-to-svo-catkin-wokespace)/devel/setup.bash
 52 | roslaunch svo_ros mynteye-live.launch 
 53 | ```
 54 | 
 55 | ```bash
 56 | source $(path-to-mynteye-catkin-wokespace)/devel/setup.bash
 57 | roslaunch mynt_eye_ros_wrapper mynteye.launch
 58 | ```
 59 | 
 60 | 然后运行效果如下：
 61 | 
 62 | <iframe height=480 width=640 src="//player.bilibili.com/player.html?aid=52568604&cid=91997375&page=1" scrolling="no" border="0" frameborder="no" framespacing="0" allowfullscreen="true"> </iframe>
 63 | 从视频效果来看，SVO 的整体表现还是相当不错的。当然这与 MYNT-EYE 为全局曝光有一定的关系，并且我在桌面上提供了相对较多的纹理可观的特征。个人的目标是将 SVO 修改称可以可前视并添加多摄像机模型以及添加 IMU 使之成为 SVIO （即表现力为现在未开源的 SVO2 ）。
 64 | 
 65 | 下面为对 SVO 的代码解读，有些内容想分散到各知识结构部分，添加了 #TODO ，但是都对这部分内容给出了目前已有的相关文档链接。
 66 | 
 67 | 
 68 | 
 69 | ## 初始化
 70 | 
 71 | 在介绍初始化前，先简单介绍一下关于 `SVO_ROS` 的节点函数。当运行 `roslaunch` 调用 svo 之后，来到 `rpg_svo/svo_ros/ src/vo_node.cpp` 文件下运行 main 函数。
 72 | 
 73 | 1. 初始化 `ROS` ，接着创建一个节点句柄和节点 `VoNode` ，在创建 `VoNode` 的构造函数中开辟了一个线程用于监听控制台输入，然后加载摄像头参数，并初始化可视化的初始位姿，最后创建视觉里程计，并完成一系列初始化操作
 74 | 2. 订阅摄像头消息，每当获取到更新图像信息后回调 `svo::VoNode::imgCb` 函数，进入循环之后，接下来的所有工作都会在这个函数内完成
 75 | 3. 订阅远程输入消息（应该指的就是键盘输入）
 76 | 
 77 | 紧接着我们还是进入正题。在节点创建的过程中，程序创建了一个里程计算法入口 `svo::FrameHandlerMono` 类的变量 ，在整个构造函数运行过程中，以及并调用了 initialize 函数来完成一系列算法部件的初始化。其中有几个比较重要的过程：
 78 | 
 79 | - 进行重投影的初始化，由 `Reprojector` (定义在 `reprojector.cpp` 中) 构造函数以及 `initialize` 函数完成，`grid_` 为 `Grid` 类变量，`Grid` 中定义了 `CandidateGrid` 型变量 `cells` ，而 `CandidateGrid` 是一个 `Candidate` 型的list（双向链表）组成的 vector（向量）向量。`grid_.cells.resize` 是设置了 `cells` 的大小，即将图像划分成多少个格子。然后通过 `for_each` 函数对 `cells` 每个链表（即图像每个格子）申请一块内存。之后通过 `for` 函数给每个格子编号，最后调用 `random_shuffle` 函数将格子的编号顺序打乱。
 80 | - 特征检测初始化，创建一个 `FastDetector` 类变量，该类继承于 `AbstractDetector` 类，初始化特征检测格子大小及需要多少行多少列格子，设置特征提取的金字塔层数，和每个格子是否已经存在特征点的 `std::vector<bool> grid_occupancy_` 变量。
 81 | - 通过 `DepthFilter`（深度滤波器）构造函数完成初始化，设置了特征检测器指针、回调函数指针、线程、新关键帧深度的初值，并启动深度滤波器线程。其中这里的回调函数指针需要注意，它的本意为：`depth_filter_cb(_1, _2) => (&map_.point_candidates_)->newCandidatePoint(_1, _2)`
 82 | 
 83 | 
 84 | 
 85 | ### 第一帧
 86 | 
 87 | 图像是通过 `FrameHandlerMono::addImage` 函数加载进里程计算法中的。在每一新建 `Frame` 类指针变量时，对图像进行提取图像金字塔，默认 5 层、尺度因子为 2 。接着进入 `FrameHandlerMono::processFirstFrame` 函数处理第一帧图像。
 88 | 
 89 | 1. 创建一个位姿变换矩阵赋给第一帧的 `T_f_w_` (表示从世界坐标到相机坐标的变换矩阵) 
 90 | 2. 创建一个特征检测器，默认设置特征提取的金字塔为 0~3 层，栅格大小为 30 。调用特征检测函数提取当前帧中的 Fast 特征角点。为了适应多尺度变化，对图像金字塔的多层图像提取 Fast 角点，紧接通过小窗口做非最大值抑制。之后对同一层图像坐落在同一网格中的 Fast 角点求 shiTomasi 得分，取得分最高的强角点。（关于 Fast 角点，与 Shi-Tomasi 角点/得分这部分知识可以参考「计算机视觉基础 - 特征点提取与匹配 - [FAST 角点检测](../../computer_vision/FAST.md) / [Shi-Tomasi 算法](../../computer_vision/Harris.md#shi-tomasi-算法)」）
 91 | 3. 判定特征数与门限值进行比较，如果特征点数少于门限值则下一帧图像重新进行以上操作，否则将提取的所有角点暂存储在向量容器中，用于后续做光流跟踪。并将该帧设置为关键帧，添加到地图中。
 92 | 
 93 | 
 94 | 
 95 | ### 第二帧
 96 | 
 97 | 这里的第二帧，实际上是指在能够与第一帧图像较好的进行三角化并计算出空间点和计算 Homography 矩阵并恢复位姿变换矩阵这段期间的所有帧图像都当作第二帧来处理。
 98 | 
 99 | 1. 利用 LK 光流法跟踪第一帧提取的特征点。作者一直使用第一帧检测的特征点为基准，然后后面来的帧一直用第一帧的特征点做光流跟踪。个人感觉这种做法鲁棒性并不好， 应该改成当前帧和前一帧做光流跟踪，而不是一直只用第一帧跟踪，这样修改之后初始化鲁邦了很多，特别是在畸变较大的摄像头上。
100 | 2. 去掉跟踪丢失的特征点，并计算所有跟踪良好的关联角点的像素距离。如果跟踪点数少于阈值，则退出并返回 FAILURE 。否则，判定所有跟踪良好的关联角点的像素距离的中位数是否大于阈值，如果是，则进行下一步骤，否则返回非关键帧等待下一帧重复以上步骤。
101 | 3. 将第一帧和第二帧关联角点各自图像坐标系下的单位球坐标（图像坐标系下的三维空间单位向量）转换成单位平面坐标来计算 Homography 矩阵。作者利用 OpenCV 自带的 `cv::findHomography` 函数来计算单应矩阵，采用了**随机采样一致性 (RANSAC)**  方式，并设置重投影误差需要小于 2 个像素点。然后利用「 **Faugeras SVD-based decomposition** 」[^1] 方式恢复出两帧之间的变换矩阵。（关于单应矩阵的计算以及从单应矩阵求取变换矩阵的算法可参考 xxx多视几何-单应矩阵xxx  #TODO，我对这部分代码做了注解，可以参考 [`homography.cpp`](https://github.com/LSXiang/SVIO/blob/master/vikit_common/src/homography.cpp) ）
102 | 4. 利用上一步解算的变换矩阵计算特征点在当前帧坐标系下的 3D 位置坐标，以及分离出内点 (inlier) 与外点 (outlier) 并统计个数，其中利用了中点法三角化求解深度值（算法推导参考 [issue](https://github.com/uzh-rpg/rpg_svo/issues/62) ）。如果内点个数少于阈值，则返回错误，否则从这些内点的 3D 位置坐标提取深度信息，然后选着中位数作为均值，调节地图大小以使得地图平均深度等于指定比率。之后，计算当前帧与世界坐标系下变换矩阵，并根据地图深度尺寸比率重新计算当前帧与世界坐标系的位移量。最后为每一个内点创建世界坐标系下的 3D 坐标，并将这些关联世界坐标系的 3D 特征点的角点信息添加到第一帧和当前帧中。
103 | 5. 如果你选着了使用 G2O 则进行两帧之间的 Bundle Adjustment 。设置当前帧为关键帧，并计算当前帧中观察到的 3D 空间特征点到当前图像的深度均值和最小值。之后将当前帧添加到深度滤波器和地图中。
104 | 
105 | 
106 | 
107 | ## 后续帧的里程估计
108 | 
109 | 完成初始化过程之后，接下来就是针对更新的每一帧图像来计算里程了。而这一部分的所有算法过程可以参考「[SVO 算法架构](./algorithm_framework.md#svo-算法架构)」。
110 | 
111 | 1. #### 稀疏直接法
112 |     系数直接法设置是在 `Config::kltMaxLevel(), Config::kltMinLevel()` 这几层图像金字塔层之间迭代，默认值为 4 与 2 。如果摄像头原始图像的大小为 752×480 ，那么在默认情况下，那么直接法从第 4 层金字塔图像大小为 47×30 开始优化，优化得到一个结果作为初始值传给下个金字塔层进行直接法迭代优化位姿，最后结束迭代的第 2 层金字塔的分辨率是 188×120 。直接法应用了 4×4 的像素块进行对比求取残差，并利用高斯牛顿法进行迭代更新。程序统计直接法最终返回的是成功跟踪计算残差的特征点数。
113 | 
114 |     
115 |     
116 | 2. #### 重投影
117 | 
118 |     重投影过程首先是获取地图中能够与当前帧存在共视的关键帧。共视的判断是通过投影关键帧中的 5 个关键特征点（位于图像中间和四个边角）到当前帧中，判断是这些关键特征点是否能够被当前帧所观察得到。如果存在共视，那么计算参考关键帧与当前帧之间的距离，然后按照距离从小到大排序，默认情况只从投影距离较小的 10 帧关键参考帧中的已经存在空间 3D 坐标的特征点到当前图像中，且通过记录空间点最后投影图像的 id (`point->last_projected_kf_id_`) 确保了同一个空间特征点只投影一次。
119 | 
120 |     > 这里引用 白巧克力亦唯心 (贺博) [博客](https://blog.csdn.net/heyijia0327/article/details/51649082)中关于地图点和关键帧上的图像特征点是如何联系的来捋顺下这部分的描述的。
121 |     >
122 |     > ![Feature Reproject](image/feature_reproject.png) 
123 |     >
124 |     > 如下图所示，首先地图点 $P$ 存储的是他们在世界坐标系中三维坐标，因此，他们可以自由的投放到任意帧上去。另外，地图点记录了哪些帧上的特征点能够观测到它，比如，通过 $P_i^2 , P_j^2$ 能够找到地图点 $P_2$ ，反过来，地图点 $P_2$ 通过变量 `obs_` 记录了它和哪些特征是联系起来了，因此也知道图像中特征 $P_i^2 , P_j^2$ 能够观测到它。 
125 |     >
126 |     > 现在，回到关键帧重投影步骤，找到关键帧后，遍历关键帧上的特征点，通过特征点找到地图点，然后一一投影到当前帧。注意，一个地图 point 点只需要投一次，所以比如 $P_2$ 通过 $\text{refframe}_i$ 投影了一次，就不用通过 $\text{refframe}_j$ 再投一次了。
127 | 
128 |     投影完关键帧中的特征点之后，紧接着是把地图中没有插入到关键帧中的候选点 `point_candidates_` 也全部投影到当前帧中。最后针对重投影坐落于同一个 cell 中空间特征点按照质量（这里的质量是通过这些特征点被观察到的次数来衡量的）进行排序然后进行投影匹配操作。
129 | 
130 |     匹配操作首先是针对该空间投影点 `point` 挑选最好的匹配特征块，即，该为地图点 $P_2$ 筛选图像中特征 $P_i^2$ 还是 $P_j^2$ 来进行匹配。挑选的方法是根据该空间点投影到当前帧和参考帧之间的夹角大小来筛选的，挑选最小的投影夹角且该夹角要求小于 60 度。然后判断该空间点筛选处理的特征块是否符合要求。之后计算两帧之间的仿射变换。然后根据求解出来的仿射变换矩阵的行列式大小来判断特征快投影后位于当前帧中的合适尺寸大小，即位于哪一层金字塔上。接着，从参考帧中提取一块参考特征块用于与当前帧进行匹配。最后，通过[特征点对齐优化](algorithm_framework.md#通过特征点对齐优化匹配关系)当前帧特征点的位姿。
131 | 
132 |     如果特征点对齐优化失败，则累加该空间投影点 `point` 失败次数，如果次数超过阈值且状态不是 `TYPE_GOOD` 的话，则删除该空间点。反之，特征点对齐优化成功，则累加空间投影点 `point` 成功次数次数，如果次数超过阈值且状态为 `TYPE_UNKNOWN` 的话，则状态转变为 `TYPE_GOOD` 。最后为当前帧创建优化后的特征点，并添加到该帧中。
133 | 
134 |     进行完该步骤之后，判断重投影成功的空间点数，如果数量少于阈值的话，则进入重定位，否则继续一下步骤
135 | 
136 |     
137 | 
138 | 3. #### BA 优化
139 | 
140 |     利用上一步重投影以后的特征点开始进行当前帧的位姿及结构优化。位姿优化用到了核函数 `TukeyWeightFunction` ，根据误差模型来调整误差权重，Tukey's hard re-descending function 可以参考[维基百科](http://en.wikipedia.org/wiki/Redescending_M-estimator)。位姿优化的时候，会根据投影误差丢掉一些误差大的特征点，最后留下来进行位姿优化的这些特征点被变量 `sfba_n_edges_final` 记录下来，利用它来判断跟踪质量好不好 `setTrackingQuality(sfba_n_edges_final)`  函数判断跟踪不好的依据是，用于位姿优化的点数 `sfba_n_edges_final` 小于一个阈值，或者比上一帧中用于优化的点减少了很多。
141 | 
142 |     优化地图点是现实寻找（默认为 20 个）空间点中最后一次优化较早的空间点，然后针对每一个空间点优化其三维位置，使得它在它被观察到的每个关键帧上的重投影误差最小。默认每个地图点优化 5 次。如果这次优化的误差平方和小于上次优化的误差平方和，就接受这次的优化结果。
143 | 
144 | 
145 | 
146 | ## 深度估计与建图
147 | 
148 | 跟踪上面所得到的结果，先获取当前帧图像中空间点的平均深度和最小深度。然后判读机制判读该帧是否是关键帧，如果不是，则添加到深度滤波器中用于更新优化关键帧中深度估计未收敛的特征点。如果判读为关键帧，那么先置位为关键帧标志位，并寻找关键空间点。然后为当前帧能观察到的空间点的 `obs_` 添加当前帧的特征点为关联关系。并将地图中能被当前帧观察到的已经收敛的空间候选点添加到第一次观察到它们的关键帧中。如果启动 bundle adjustment 则会进行一次关键帧的全局 BA 优化。当深度滤波器添加进去的帧是关键帧的话，那么将为该帧划分栅格，为栅格中还未有特征点的添加 Fast 特征，并利用后续帧对这些特征点进行深度估计。深度估计的整个过程算法可以参考上一篇算法分析中的[地图构建](algorithm_framework.md#地图构建)部分。
149 | 
150 | 在把新关键帧添加到地图中的时候，先判断下地图中存储的关键帧是否已经超过阈值了，如果是则选着地图中与当前关键帧距离最远的关键帧进行删除。
151 | 
152 | 
153 | 
154 | ## 重定位
155 | 
156 | SVO中重定位，实现很简单，就是在跟丢之后，仍然假设当前帧的位姿和前一帧一样，往这个位姿上投地图点，用「后续帧的里程估计 」部分中的方法去优化计算，如果优化成功，就重定位回来，如果优化不成功，就继续下一帧。所以，在跟丢后，只能再回到跟丢时的位置，才能重定位回来。
157 | 
158 | 这样子实现重定位的方法很简单，可重定位的效果就很差了。这地方可以进行改进。 
159 | 
160 | 
161 | 
162 | ## 总结
163 | 
164 | 关于 SVO 跑自己摄像头老丢失特征并重定位的一些分析可以参考贺博的博客[^2] 。对于 SVO 中多处雅克比矩阵的推导可以参考极品巧克力的博客[^3] 。如果你需要更细致的代码注解那么可以参考冯兵的博客[^4] 。
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | [^1]: [Motion and structure from motion in a piecewise plannar environment](https://www.researchgate.net/publication/243764888_Motion_and_Structure_from_Motion_in_a_Piecewise_Planar_Environment)
175 | 
176 | [^2]: [SVO 代码笔记](https://blog.csdn.net/heyijia0327/article/details/51649082) 
177 | [^3]: [SVO 详细解读](https://www.cnblogs.com/ilekoaiq/p/8659631.html) 
178 | [^4]: [一步步完善视觉里程计 系列](http://fengbing.net) 
179 | 
180 | 
181 | 
182 | 
183 | 
184 | --8<--
185 | mathjax.txt
186 | --8<--


--------------------------------------------------------------------------------
/docs/projects/rpg_svo/image/SVO_Structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/projects/rpg_svo/image/SVO_Structure.png


--------------------------------------------------------------------------------
/docs/projects/rpg_svo/image/depth_estimation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/projects/rpg_svo/image/depth_estimation.png


--------------------------------------------------------------------------------
/docs/projects/rpg_svo/image/depth_uncertainty.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/projects/rpg_svo/image/depth_uncertainty.png


--------------------------------------------------------------------------------
/docs/projects/rpg_svo/image/feature_alignment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/projects/rpg_svo/image/feature_alignment.png


--------------------------------------------------------------------------------
/docs/projects/rpg_svo/image/feature_reproject.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/projects/rpg_svo/image/feature_reproject.png


--------------------------------------------------------------------------------
/docs/projects/rpg_svo/image/image_alignment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LSXiang/Journey2SLAM/901bf5fd97c93dc68248f74cd5bae1a52bef85a2/docs/projects/rpg_svo/image/image_alignment.png


--------------------------------------------------------------------------------
/docs/state_estimation_robotics/state_estimation_robotics.md:
--------------------------------------------------------------------------------
1 | #TODO
2 | 
3 | - [Least Squares Estimation and SLAM](http://www.dis.uniroma1.it/~grisetti/teaching/lectures-ls-slam-master/web)
4 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | site_name: SLAM 之旅
  2 | site_description: SLAM 之旅（Journey to SLAM）是用于记录分享学习 SLAM 知识的平台
  3 | site_author: Jacob.lsx
  4 | site_url: https://lsxiang.github.io/Journey2SLAM
  5 | 
  6 | # Repository
  7 | repo_name: LSXiang/Journey2SLAM
  8 | repo_url: https://github.com/LSXiang/Journey2SLAM
  9 | 
 10 | # Copyright
 11 | copyright: 'Copyright &copy; 2019. All rights reserved.'
 12 | 
 13 | # Configuration
 14 | theme:
 15 |   name: 'material'    # readthedocs material 
 16 |   # 404 page
 17 |   static_templates:
 18 |     - 404.html
 19 |   # Don't include MkDocs' JavaScript
 20 |   include_search_page: false
 21 |   search_index_only: true
 22 |   language: zh
 23 |   logo:
 24 |     icon: 'school'
 25 |   palette:
 26 |     primary: 'Blue'  # default: indigo
 27 |     accent:  'indigo'  # default: indigo
 28 |   feature:
 29 |     tabs: true
 30 |   font:
 31 |     text: Roboto
 32 |     code: Roboto Mono
 33 |   # navigation_depth: 10
 34 | 
 35 | # Path
 36 | docs_dir: docs
 37 | 
 38 | # Navigation 
 39 | nav:
 40 | - 主页: index.md
 41 | - 关于 SLAM: aboutSLAM.md
 42 | - 计算机视觉基础: 
 43 |     - 计算机视觉基础概览: computer_vision/computer_vision.md
 44 |     - 图像滤波器: computer_vision/image_filtering.md
 45 |     - 特征提取与匹配: 
 46 |         - Harris 角点检测: computer_vision/Harris.md
 47 |         - Fast 角点检测: computer_vision/FAST.md
 48 |         - ORB 特征: computer_vision/ORB.md
 49 |         - SIFT 特征: computer_vision/SIFT.md
 50 |         - SUFT 特征: computer_vision/SUFT.md
 51 |         - Haar 特征: computer_vision/Haar.md
 52 | - 多视几何: 
 53 |     - 多视几何概览: multiple_view_geometry/mvg.md
 54 |     - 数学背景 - 线性代数: multiple_view_geometry/linear_algebra.md
 55 |     - 三维运动场景的表示: multiple_view_geometry/RepreOf3DMoveScene.md
 56 | - 状态估计: 
 57 |     - 状态估计概览: state_estimation_robotics/state_estimation_robotics.md
 58 | - 开源项目:
 59 |     - 开源项目概览: projects/projects.md
 60 |     - RPG-SVO:
 61 |       - SVO 算法框架: projects/rpg_svo/algorithm_framework.md
 62 |       - SVO 代码解析: projects/rpg_svo/code_analysis.md
 63 |     - ORB-SLAM:
 64 |       - 跟踪线程: projects/orb_slam/tracking.md
 65 |       - 局部建图线程: projects/orb_slam/local_mapping.md
 66 |       - 回环检测线程: projects/orb_slam/loop_closing.md
 67 | - 其他:
 68 |     - Markdown 写作模版: others/md_template.md
 69 |     # - 待归类整合文稿: 
 70 | 
 71 | # Customization
 72 | extra:
 73 |   version: 1.0
 74 |   search:
 75 |     language: 'jp'
 76 |   manifest: 'manifest.webmanifest'
 77 |   social:
 78 |     - type: 'github'
 79 |       link: 'https://github.com/LSXiang'
 80 | 
 81 | # Google Analytics
 82 | google_analytics:
 83 |   - 'UA-XXXXXXXX-X'
 84 |   - 'auto'
 85 | 
 86 | # Extensions
 87 | markdown_extensions:
 88 |     # The Table of Contents extension generates a Table of Contents from a Markdown document and 
 89 |     # adds it into the resulting HTML document.
 90 |   - toc:
 91 |       # marker:           # Text to find and replace with the Table of Contents. Defaults to [TOC].
 92 |       # title:            # Title to insert in the Table of Contents’ <div>. Defaults to None
 93 |       permalink: "\ue157"   # Set to True or a string to generate permanent links at the end of each header. Useful with Sphinx style sheets.
 94 |       baselevel: 1      # Base level for headers. Defaults to 1.
 95 |       slugify: !!python/name:pymdownx.slugs.uslugify  # Callable to generate anchors.
 96 |       # separator:        # Word separator. Character which replaces white space in id. Defaults to “-”.
 97 |       # toc_depth:        # Define the range of section levels to include in the Table of Contents.Defaults to 6 (bottom).
 98 | 
 99 |     # The Admonition extension adds rST-style admonitions to Markdown documents.
100 |     # directive types: "attention", "caution", "danger", "error", "hint", 
101 |     #                  "important", "note", "tip", "warning", "admonition"
102 |     # eg:
103 |     #     !!! type "optional explicit title within double quotes"
104 |     #     Any number of other indented markdown elements.
105 |     #
106 |     #     This is the second paragraph.
107 |   - admonition:
108 | 
109 |     # The SmartyPants extension converts ASCII dashes, quotes and ellipses to their HTML entity equivalents.
110 |   - smarty:
111 |       # smart_dashes: true          # whether to convert dashes
112 |       # smart_quotes: true  	      # whether to convert straight quotes
113 |       # smart_angled_quotes: false  # whether to convert angled quotes
114 |       # smart_ellipses: true 	      # whether to convert ellipses
115 |       # substitutions: {} 	        # overwrite default substitutions
116 | 
117 |     # BetterEm is a different approach to emphasis than Python Markdown's default.
118 |     # It works similar but handles certain corner cases differently.
119 |   - pymdownx.betterem:
120 |       smart_enable: all
121 | 
122 |     # The Attribute Lists extension adds a syntax to define attributes on the 
123 |     # various HTML elements in markdown’s output.
124 |   - attr_list:
125 | 
126 |     # The Definition Lists extension adds the ability to create definition 
127 |     # lists in Markdown documents.
128 |     # eg:
129 |     #     Apple
130 |     #     :   Pomaceous fruit of plants of the genus Malus in
131 |     #         the family Rosaceae.
132 |   - def_list:
133 | 
134 |     # The Tables extension adds the ability to create tables in Markdown documents.
135 |   - tables:
136 | 
137 |     # The Abbreviations extension adds the ability to define abbreviations.
138 |     # eg:
139 |     #     The HTML specification is maintained by the W3C
140 |     #     *[HTML]: Hyper Text Markup Language
141 |     #     *[W3C]:  World Wide Web Consortium
142 |   - abbr:
143 | 
144 |     # The Footnotes extension adds syntax for defining footnotes in Markdown documents.
145 |     # eg:
146 |     #     Footnotes[^1] have a label[^@#$%] and the footnote's content.
147 |     #     [^1]: This is a footnote content.
148 |     #     [^@#$%]: A footnote on the label: "@#$%".
149 |   - footnotes:
150 | 
151 |     # The CodeHilite extension adds code/syntax highlighting to standard 
152 |     # Python-Markdown code blocks using Pygments.
153 |   - codehilite:
154 |       use_pygments: true  # Defaults to True
155 |       # guess_lang: false   # Automatic language detection， defaults to true.
156 |       # linenums:         # Use line numbers, 'true' of 'false', defaults to None.
157 |       # noclasses:        # Use inline styles instead of CSS classes. Defaults to False.
158 |       # css_class:        # Set CSS class name for the wrapper <div> tag. Defaults to codehilite
159 | 
160 |     # The Fenced Code Blocks extension adds a secondary way to define code blocks,
161 |     # which overcomes a few limitations of the indented code blocks.
162 |   - fenced_code
163 | 
164 |     # The Legacy EM extension restores Markdown’s original behavior for emphasis 
165 |     # and strong syntax when using underscores.
166 |   - legacy_em 
167 | 
168 |     # The Meta-Data extension adds a syntax for defining meta-data about a document. 
169 |     # It is inspired by and follows the syntax of MultiMarkdown. Currently, this extension
170 |     # does not use the meta-data in any way, but simply provides it as a Meta attribute of
171 |     # a Markdown instance for use by other extensions or directly by your python code.
172 |   - meta
173 | 
174 |     # The New-Line-to-Break (nl2br) Extension will cause newlines to be treated as hard breaks
175 |     # like StackOverflow and GitHub flavored Markdown do.
176 |   - nl2br
177 | 
178 |     # The Sane Lists extension alters the behavior of the Markdown List syntax to be less surprising.
179 |   - sane_lists
180 | 
181 |     # The WikiLinks extension adds support for WikiLinks
182 |   - wikilinks
183 | 
184 |     # B64 converts all local images in a document to base64 encoding and embeds them in the document.
185 |   # - b64
186 | 
187 |     # The Legacy Attributes extension restores Python-Markdown’s original attribute 
188 |     # setting syntax. Older versions of Python Markdown (prior to 3.0) included built-in
189 |     # and undocumented support for defining attributes on elements. Most users have never
190 |     # made use of the syntax and it has been deprecated in favor of Attribute Lists. 
191 |     # This extension restores the legacy behavior for users who have existing documents 
192 |     # which use the syntax.
193 |   # - legacy_attr
194 | 
195 | 
196 |   ### PyMdown Extensions is a collection of extensions for Python Markdown.
197 |   ### you can get detail from https://facelessuser.github.io/pymdown-extensions/
198 | 
199 |   - pymdownx.extrarawhtml:
200 | 
201 |     # SuperFences is like Python Markdown's fences, but better. Nest fences under lists, admonitions,
202 |     # and other syntaxes. Combine multiple fences together in tabbed groups, and even create special
203 |     # custom fences for content like UML.
204 |   - pymdownx.superfences:
205 |       highlight_code: true  # Enable or disable code highlighting.
206 |       preserve_tabs: true   # Experimental feature that preserves tabs in fenced code blocks.
207 |       disable_indented_code_blocks: false # Disables Python Markdown's indented code block parsing. This is nice if you only ever use fenced blocks.
208 |       custom_fences:        # Custom fences.
209 |         - name: math
210 |           class: arithmatex
211 |           format: !!python/name:pymdownx.arithmatex.fence_mathjax_format
212 |         - name: flow
213 |           class: uml-flowchart
214 |           format: !!python/name:pymdownx.superfences.fence_code_format
215 |         - name: sequence
216 |           class: uml-sequence-diagram
217 |           format: !!python/name:pymdownx.superfences.fence_code_format
218 |         # - css_class:        # Class name is applied to the wrapper element of the code. 
219 |                             # If configured, this setting will override the css_class option of either CodeHilite or Highlight. 
220 |                             # If nothing is configured here or via CodeHilite or Highlight, the class highlight will be used.
221 | 
222 |     # Highlight allows you to configure the syntax highlighting of SuperFences and InlineHilite.
223 |     # Also passes standard Markdown indented code blocks through the syntax highlighter.
224 |   - pymdownx.highlight:
225 |       css_class: codehilite #'highlight'
226 |       # guess_lang: false
227 |       # pygments_style: 'default'
228 |       # noclasses: false
229 |       # use_pygments: true
230 |       # linenums: false
231 |       # linenums_special: 1
232 |       # linenums_style: 'table' # default 'table'
233 |       # extend_pygments_lang:
234 | 
235 |     # InlineHilite highlights inline code: from module import function as func.
236 |   - pymdownx.inlinehilite:
237 |       custom_inline:
238 |         - name: math
239 |           class: arithmatex
240 |           format: !!python/name:pymdownx.arithmatex.inline_mathjax_format
241 | 
242 |     # MagicLink linkafies URL and email links without having to wrap them in Markdown syntax.
243 |     # Also, allows shortens repository issue, pull request, and commit links.
244 |   - pymdownx.magiclink:
245 |       repo_url_shortener: true
246 |       repo_url_shorthand: true
247 |       social_url_shorthand: true
248 |       user: Jacob.lsx
249 |       repo: Journey2SLAM
250 | 
251 |     # Tilde is syntactically built around the ~ character. 
252 |     # It adds support for inserting subscripts and adds an easy way to place text in a <del> tag.
253 |   - pymdownx.tilde:
254 |       smart_delete: true  # Use smart logic with delete characters.
255 |       delete: true        # Enable delete feature.
256 |       subscript: true     # Enable subscript feature.
257 | 
258 |     # Caret is an extension that is syntactically built around the ^ character. 
259 |     # It adds support for inserting supers^cripts^ and adds an easy way to place text in an <ins> tag.
260 |   - pymdownx.caret:
261 | 
262 |     # SmartSymbols inserts commonly used Unicode characters via simple ASCII representations: =/= → ≠.
263 |   - pymdownx.smartsymbols:
264 |       # trademark:       true 	# Add syntax for trademark symbol.
265 |       # copyright:       true 	# True 	Add syntax for copyright symbol.
266 |       # registered:      true 	# Add syntax for registered symbol.
267 |       # care_of:         true 	# Add syntax for care / of.
268 |       # plusminus:       true 	# Add syntax for plus / minus.
269 |       # arrows:          true 	# Add syntax for creating arrows.
270 |       # notequal:        true 	# Add syntax for not equal symbol.
271 |       # fractions:       true   # Add syntax for common fractions.
272 |       # ordinal_numbers: true   # Add syntax for ordinal numbers.
273 | 
274 |     # Emoji makes adding emoji via Markdown easy 😄.
275 |   - pymdownx.emoji:
276 |       emoji_generator: !!python/name:pymdownx.emoji.to_png
277 | 
278 |     # EscapeAll allows the escaping of any character, some with additional effects.
279 |   - pymdownx.escapeall:
280 |       hardbreak: true  # Escaped newlines will be hard breaks: <br>.
281 |       nbsp: true       # Escaped spaces will be non-breaking spaces: &nbsp;.
282 | 
283 |     # Tasklist allows inserting lists with check boxes.
284 |   - pymdownx.tasklist:
285 |       custom_checkbox: true     # Generate task lists in such a way as to allow for styling the check box with CSS.
286 |       # clickable_checkbox: true  # Enable user to interact with checkboxes.
287 | 
288 |     # ProgressBar creates progress bars quick and easy.
289 |   - pymdownx.progressbar:
290 |       # level_class: true       # Enables or disables the level class feature. The level class feature adds level classes in increments defined by progress_increment.
291 |       # add_classes:            # This option accepts a string of classes separated by spaces.
292 |       # progress_increment: 20  # Defines the increment at which the level_class classes are generated at.
293 | 
294 |     # Arithmatex is an extension that preserves LaTeX math equations (x√y3) during the Markdown 
295 |     # conversion process so that they can be used with MathJax.
296 |   - pymdownx.arithmatex:
297 | 
298 |     # Mark allows you to mark words easily.
299 |   - pymdownx.mark:
300 | 
301 |     # StripHTML can strip out HTML comments and specific tag attributes.
302 |   - pymdownx.striphtml:
303 |       # css_class:
304 |       # style_plain_text: false 
305 |       # custom_inline:
306 | 
307 |     # Snippets include other Markdown or HTML snippets into the current Markdown file being parsed.
308 |   - pymdownx.snippets:
309 |       base_path: docs/_configurations/_snippets # A string indicating a base path to be used resolve snippet locations.
310 |       # encoding: 'utf-8'   # Encoding to use when reading in the snippets.
311 |       # check_paths: false  # Make the build fail if a snippet can't be found.
312 | 
313 |     # Keys makes inserting key inputs into documents as easy as pressing Ctrl＋Alt＋Del.
314 |   - pymdownx.keys:
315 |       separator: "\uff0b"
316 | 
317 |     # Details creates collapsible elements with <details><summary> tags.
318 |   - pymdownx.details:
319 | 
320 |     # Critic adds handling and support of Critic Markup.
321 |   - pymdownx.critic
322 | 
323 |     # PathConverter converts paths to absolute or relative to a given base path.
324 |   - pymdownx.pathconverter
325 | 
326 |     # Extra is just like Python Markdown's Extra package except it uses PyMdown Extensions 
327 |     # to substitute similar extensions.
328 |   # - pymdownx.extra
329 | 
330 | 
331 | extra_javascript:
332 |   - _configurations/_theme_extra/theme_extra.js
333 |   # - js/extra.js
334 | 
335 | extra_css:
336 |   - _configurations/_theme_extra/theme_extra.css
337 |   # - scss/extra.scss
338 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mkdocs
2 | Pygments
3 | pymdown-extensions
4 | mkdocs-material


--------------------------------------------------------------------------------