{{ .Title }} 21 |
22 | 25 |{{ .Summary | plainify | htmlUnescape }}
28 |├── .gitignore ├── README.md ├── assets └── style │ ├── _base.scss │ ├── _content.scss │ ├── _header.scss │ ├── _normalize.scss │ ├── _pagination.scss │ └── style.scss ├── config.toml ├── content ├── about.md └── posts │ ├── arrays.md │ ├── big-o-notation.md │ ├── hash-tables.md │ ├── intrusive-linked-lists.md │ └── linked-lists.md ├── i18n └── en.yaml ├── layouts ├── 404.html ├── _default │ ├── list.html │ └── single.html └── partials │ ├── footer.html │ └── header.html ├── package.json ├── static ├── icons │ ├── android-chrome-192x192.png │ ├── android-chrome-512x512.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico ├── images │ ├── arrays │ │ ├── array-with-bytes.svg │ │ ├── array.svg │ │ └── ruby-array.svg │ ├── big-o │ │ ├── big-o-formal.svg │ │ ├── big-o-graph.svg │ │ └── big-o-relative-growth.svg │ ├── hash-tables │ │ ├── hash-table-chaining.svg │ │ ├── hash-table-clustering.svg │ │ ├── hash-table-cpython-structure-high-level.svg │ │ ├── hash-table-cpython-structure.svg │ │ └── hash-table-open-addressing.svg │ ├── intrusive-linked-lists │ │ ├── circular-doubly-linked-list.svg │ │ ├── doubly-linked-list.svg │ │ ├── intrusive-linked-list-pointers.svg │ │ ├── intrusive-list-memory-address.svg │ │ └── linked-list-pointers.svg │ └── linked-lists │ │ ├── singly-linked-list-with-head.svg │ │ └── singly-linked-list.svg └── manifest.json └── yarn.lock /.gitignore: -------------------------------------------------------------------------------- 1 | public 2 | resources 3 | node_modules 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # data structures in practice 2 | 3 | A blog about data structures and how they are used in open source projects. 4 | 5 | https://www.data-structures-in-practice.com/ 6 | -------------------------------------------------------------------------------- /assets/style/_base.scss: -------------------------------------------------------------------------------- 1 | body { 2 | color: $body-color; 3 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, 4 | Ubuntu, Cantarell, "Open Sans", "Helvetica Neue", sans-serif; 5 | font-size: 1.1rem; 6 | line-height: 1.9; 7 | } 8 | 9 | h1 { 10 | font-size: 2.25rem; 11 | margin: 0; 12 | } 13 | 14 | h2 { 15 | margin-bottom: 1.5rem; 16 | } 17 | 18 | h3 { 19 | margin-bottom: 0.75rem; 20 | } 21 | 22 | p, 23 | ul, 24 | ol, 25 | dl { 26 | margin-bottom: 1.5rem; 27 | } 28 | 29 | ul, 30 | ol { 31 | padding: 0; 32 | padding-left: 1.25rem; 33 | } 34 | 35 | li { 36 | margin-bottom: 0.3125rem; 37 | } 38 | 39 | code { 40 | color: $secondary-color; 41 | } 42 | 43 | pre { 44 | padding: 1.25rem 1.375rem; 45 | margin-bottom: 2rem; 46 | line-height: 1.5; 47 | border-radius: 0.125rem; 48 | font-size: 1rem; 49 | overflow-x: auto; 50 | 51 | // inherit inline styles added by pygment 52 | code { 53 | color: inherit; 54 | font-size: inherit; 55 | } 56 | } 57 | 58 | a { 59 | color: $body-color; 60 | text-decoration: none; 61 | // Remove the gray background on active links in IE 10. 62 | background-color: transparent; 63 | 64 | &:hover { 65 | color: $primary-color; 66 | text-decoration-color: $primary-color; 67 | } 68 | } 69 | 70 | table { 71 | width: 100%; 72 | border-collapse: collapse; 73 | border-spacing: 0; 74 | margin-bottom: 2rem; 75 | } 76 | 77 | th, 78 | td { 79 | text-align: left; 80 | padding: 0.625rem; 81 | border-bottom: 0.0625rem solid #eee; 82 | } 83 | 84 | img { 85 | display: block; 86 | max-width: 100%; 87 | } 88 | 89 | figure { 90 | margin: 0; 91 | } 92 | 93 | blockquote { 94 | margin: 0 0 0 -1.5rem; 95 | padding: 0 0 0 1.3125rem; 96 | font-style: italic; 97 | border-left: 0.1875rem solid $body-color; 98 | } 99 | 100 | code { 101 | margin-left: 4px; 102 | margin-right: 4px; 103 | padding: 4px 6px; 104 | font-family: Menlo, Monaco, "Courier New", Courier, monospace; 105 | font-size: 0.8em; 106 | white-space: pre-wrap; 107 | background: #f7f7f8; 108 | border-radius: 2px; 109 | } 110 | 111 | pre code { 112 | margin-left: 0; 113 | margin-right: 0; 114 | padding: 0; 115 | white-space: inherit; 116 | } 117 | 118 | .highlight table, 119 | .highlight pre { 120 | margin-bottom: 0; 121 | } 122 | .highlight code { 123 | background: transparent; 124 | } 125 | .highlight table { 126 | width: auto; 127 | } 128 | .highlight table td { 129 | padding: 0; 130 | border-bottom: 0; 131 | } 132 | .highlight table td:first-child pre code { 133 | padding-right: 16px; 134 | color: var(--tertiary); 135 | text-align: right; 136 | } 137 | .highlight table td:last-child pre code { 138 | padding-left: 0; 139 | overflow: hidden; 140 | } 141 | .highlight .ln { 142 | margin-right: 16px; 143 | color: var(--tertiary); 144 | } 145 | -------------------------------------------------------------------------------- /assets/style/_content.scss: -------------------------------------------------------------------------------- 1 | .post-content { 2 | margin-bottom: 3rem; 3 | 4 | a { 5 | text-decoration: underline; 6 | } 7 | } 8 | 9 | .main { 10 | max-width: 40rem; 11 | display: block; 12 | margin: 0 auto; 13 | padding: 2.25rem 1.5rem 0; 14 | } 15 | 16 | .post-meta { 17 | color: $secondary-color; 18 | font-size: 0.8125rem; 19 | letter-spacing: 0.0313rem; 20 | } 21 | 22 | .post-title { 23 | transform: translateX(-1px); 24 | } 25 | 26 | .post-header { 27 | margin-bottom: 2.25rem; 28 | } 29 | 30 | .post-entry { 31 | margin-bottom: 3.75rem; 32 | } 33 | -------------------------------------------------------------------------------- /assets/style/_header.scss: -------------------------------------------------------------------------------- 1 | .header { 2 | overflow: hidden; 3 | padding: 0.5rem 1.5rem 0; 4 | } 5 | 6 | .nav-link { 7 | font-size: 1rem; 8 | } 9 | 10 | .logo { 11 | margin: 0; 12 | font-weight: 700; 13 | font-size: 1.5rem; 14 | float: left; 15 | } 16 | 17 | .nav { 18 | max-width: 64rem; 19 | margin: 0 auto; 20 | } 21 | 22 | .nav-link { 23 | display: block; 24 | padding-top: 0.25rem; 25 | clear: both; 26 | 27 | @media only screen and (min-width: 768px) { 28 | clear: none; 29 | float: right; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /assets/style/_normalize.scss: -------------------------------------------------------------------------------- 1 | /*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */ 2 | 3 | /* Document 4 | ========================================================================== */ 5 | 6 | /** 7 | * 1. Correct the line height in all browsers. 8 | * 2. Prevent adjustments of font size after orientation changes in iOS. 9 | */ 10 | 11 | html { 12 | line-height: 1.15; /* 1 */ 13 | -webkit-text-size-adjust: 100%; /* 2 */ 14 | } 15 | 16 | /* Sections 17 | ========================================================================== */ 18 | 19 | /** 20 | * Remove the margin in all browsers. 21 | */ 22 | 23 | body { 24 | margin: 0; 25 | } 26 | 27 | /** 28 | * Render the `main` element consistently in IE. 29 | */ 30 | 31 | main { 32 | display: block; 33 | } 34 | 35 | /* Grouping content 36 | ========================================================================== */ 37 | 38 | /** 39 | * 1. Add the correct box sizing in Firefox. 40 | * 2. Show the overflow in Edge and IE. 41 | */ 42 | 43 | hr { 44 | box-sizing: content-box; /* 1 */ 45 | height: 0; /* 1 */ 46 | overflow: visible; /* 2 */ 47 | } 48 | 49 | /** 50 | * 1. Correct the inheritance and scaling of font size in all browsers. 51 | * 2. Correct the odd `em` font sizing in all browsers. 52 | */ 53 | 54 | pre { 55 | font-family: monospace, monospace; /* 1 */ 56 | font-size: 1em; /* 2 */ 57 | } 58 | 59 | /* Text-level semantics 60 | ========================================================================== */ 61 | 62 | /** 63 | * Remove the gray background on active links in IE 10. 64 | */ 65 | 66 | a { 67 | background-color: transparent; 68 | } 69 | 70 | /** 71 | * 1. Remove the bottom border in Chrome 57- 72 | * 2. Add the correct text decoration in Chrome, Edge, IE, Opera, and Safari. 73 | */ 74 | 75 | abbr[title] { 76 | border-bottom: none; /* 1 */ 77 | text-decoration: underline; /* 2 */ 78 | text-decoration: underline dotted; /* 2 */ 79 | } 80 | 81 | /** 82 | * Add the correct font weight in Chrome, Edge, and Safari. 83 | */ 84 | 85 | b, 86 | strong { 87 | font-weight: bolder; 88 | } 89 | 90 | /** 91 | * 1. Correct the inheritance and scaling of font size in all browsers. 92 | * 2. Correct the odd `em` font sizing in all browsers. 93 | */ 94 | 95 | code, 96 | kbd, 97 | samp { 98 | font-family: monospace, monospace; /* 1 */ 99 | font-size: 1em; /* 2 */ 100 | } 101 | 102 | /** 103 | * Add the correct font size in all browsers. 104 | */ 105 | 106 | small { 107 | font-size: 80%; 108 | } 109 | 110 | /** 111 | * Prevent `sub` and `sup` elements from affecting the line height in 112 | * all browsers. 113 | */ 114 | 115 | sub, 116 | sup { 117 | font-size: 75%; 118 | line-height: 0; 119 | position: relative; 120 | vertical-align: baseline; 121 | } 122 | 123 | sub { 124 | bottom: -0.25em; 125 | } 126 | 127 | sup { 128 | top: -0.5em; 129 | } 130 | 131 | /* Embedded content 132 | ========================================================================== */ 133 | 134 | /** 135 | * Remove the border on images inside links in IE 10. 136 | */ 137 | 138 | img { 139 | border-style: none; 140 | } 141 | 142 | /* Forms 143 | ========================================================================== */ 144 | 145 | /** 146 | * 1. Change the font styles in all browsers. 147 | * 2. Remove the margin in Firefox and Safari. 148 | */ 149 | 150 | button, 151 | input, 152 | optgroup, 153 | select, 154 | textarea { 155 | font-family: inherit; /* 1 */ 156 | font-size: 100%; /* 1 */ 157 | line-height: 1.15; /* 1 */ 158 | margin: 0; /* 2 */ 159 | } 160 | 161 | /** 162 | * Show the overflow in IE. 163 | * 1. Show the overflow in Edge. 164 | */ 165 | 166 | button, 167 | input { 168 | /* 1 */ 169 | overflow: visible; 170 | } 171 | 172 | /** 173 | * Remove the inheritance of text transform in Edge, Firefox, and IE. 174 | * 1. Remove the inheritance of text transform in Firefox. 175 | */ 176 | 177 | button, 178 | select { 179 | /* 1 */ 180 | text-transform: none; 181 | } 182 | 183 | /** 184 | * Correct the inability to style clickable types in iOS and Safari. 185 | */ 186 | 187 | button, 188 | [type="button"], 189 | [type="reset"], 190 | [type="submit"] { 191 | -webkit-appearance: button; 192 | } 193 | 194 | /** 195 | * Remove the inner border and padding in Firefox. 196 | */ 197 | 198 | button::-moz-focus-inner, 199 | [type="button"]::-moz-focus-inner, 200 | [type="reset"]::-moz-focus-inner, 201 | [type="submit"]::-moz-focus-inner { 202 | border-style: none; 203 | padding: 0; 204 | } 205 | 206 | /** 207 | * Restore the focus styles unset by the previous rule. 208 | */ 209 | 210 | button:-moz-focusring, 211 | [type="button"]:-moz-focusring, 212 | [type="reset"]:-moz-focusring, 213 | [type="submit"]:-moz-focusring { 214 | outline: 1px dotted ButtonText; 215 | } 216 | 217 | /** 218 | * Correct the padding in Firefox. 219 | */ 220 | 221 | fieldset { 222 | padding: 0.35em 0.75em 0.625em; 223 | } 224 | 225 | /** 226 | * 1. Correct the text wrapping in Edge and IE. 227 | * 2. Correct the color inheritance from `fieldset` elements in IE. 228 | * 3. Remove the padding so developers are not caught out when they zero out 229 | * `fieldset` elements in all browsers. 230 | */ 231 | 232 | legend { 233 | box-sizing: border-box; /* 1 */ 234 | color: inherit; /* 2 */ 235 | display: table; /* 1 */ 236 | max-width: 100%; /* 1 */ 237 | padding: 0; /* 3 */ 238 | white-space: normal; /* 1 */ 239 | } 240 | 241 | /** 242 | * Add the correct vertical alignment in Chrome, Firefox, and Opera. 243 | */ 244 | 245 | progress { 246 | vertical-align: baseline; 247 | } 248 | 249 | /** 250 | * Remove the default vertical scrollbar in IE 10+. 251 | */ 252 | 253 | textarea { 254 | overflow: auto; 255 | } 256 | 257 | /** 258 | * 1. Add the correct box sizing in IE 10. 259 | * 2. Remove the padding in IE 10. 260 | */ 261 | 262 | [type="checkbox"], 263 | [type="radio"] { 264 | box-sizing: border-box; /* 1 */ 265 | padding: 0; /* 2 */ 266 | } 267 | 268 | /** 269 | * Correct the cursor style of increment and decrement buttons in Chrome. 270 | */ 271 | 272 | [type="number"]::-webkit-inner-spin-button, 273 | [type="number"]::-webkit-outer-spin-button { 274 | height: auto; 275 | } 276 | 277 | /** 278 | * 1. Correct the odd appearance in Chrome and Safari. 279 | * 2. Correct the outline style in Safari. 280 | */ 281 | 282 | [type="search"] { 283 | -webkit-appearance: textfield; /* 1 */ 284 | outline-offset: -2px; /* 2 */ 285 | } 286 | 287 | /** 288 | * Remove the inner padding in Chrome and Safari on macOS. 289 | */ 290 | 291 | [type="search"]::-webkit-search-decoration { 292 | -webkit-appearance: none; 293 | } 294 | 295 | /** 296 | * 1. Correct the inability to style clickable types in iOS and Safari. 297 | * 2. Change font properties to `inherit` in Safari. 298 | */ 299 | 300 | ::-webkit-file-upload-button { 301 | -webkit-appearance: button; /* 1 */ 302 | font: inherit; /* 2 */ 303 | } 304 | 305 | /* Interactive 306 | ========================================================================== */ 307 | 308 | /* 309 | * Add the correct display in Edge, IE 10+, and Firefox. 310 | */ 311 | 312 | details { 313 | display: block; 314 | } 315 | 316 | /* 317 | * Add the correct display in all browsers. 318 | */ 319 | 320 | summary { 321 | display: list-item; 322 | } 323 | 324 | /* Misc 325 | ========================================================================== */ 326 | 327 | /** 328 | * Add the correct display in IE 10+. 329 | */ 330 | 331 | template { 332 | display: none; 333 | } 334 | 335 | /** 336 | * Add the correct display in IE 10. 337 | */ 338 | 339 | [hidden] { 340 | display: none; 341 | } 342 | -------------------------------------------------------------------------------- /assets/style/_pagination.scss: -------------------------------------------------------------------------------- 1 | .pagination { 2 | display: flex; 3 | 4 | a { 5 | color: #fff; 6 | font-size: 0.8125rem; 7 | line-height: 2.125rem; 8 | background: $body-color; 9 | border-radius: 2.125rem; 10 | text-decoration: none; 11 | } 12 | 13 | .prev { 14 | padding-left: 1rem; 15 | padding-right: 1.125rem; 16 | } 17 | 18 | .next { 19 | margin-left: auto; 20 | padding-left: 1.125rem; 21 | padding-right: 1rem; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /assets/style/style.scss: -------------------------------------------------------------------------------- 1 | $primary-color: #0088ff; 2 | $body-color: #272721; 3 | $secondary-color: black; 4 | 5 | @import "./normalize.scss"; 6 | @import "./base.scss"; 7 | @import "./content.scss"; 8 | @import "./header.scss"; 9 | @import "./pagination.scss"; 10 | -------------------------------------------------------------------------------- /config.toml: -------------------------------------------------------------------------------- 1 | title = "Data structures in practice" 2 | baseURL = "https://www.data-structures-in-practice.com/" 3 | languageCode = "en-us" 4 | 5 | pygmentsCodeFences=true 6 | 7 | disqusShortname = "data-structures-in-practice" 8 | googleAnalytics = "UA-74647525-2" 9 | 10 | [Params] 11 | description = "Learn about data structures and how they are used in open source projects." 12 | author = "Edd Yerburgh" 13 | 14 | [menu] 15 | [[menu.main]] 16 | identifier = "about" 17 | name = "about" 18 | url = "/about/" 19 | weight = -110 20 | 21 | [permalinks] 22 | posts = "/:title" 23 | -------------------------------------------------------------------------------- /content/about.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "About" 3 | type: "page" 4 | comments: false 5 | --- 6 | 7 | This is a series on data structures and how they are used in open source projects, like Linux and Ruby. 8 | -------------------------------------------------------------------------------- /content/posts/arrays.md: -------------------------------------------------------------------------------- 1 | --- 2 | Title: "Arrays" 3 | Summary: Learn what arrays are, why they're useful, and how they are implemented in C and Ruby. 4 | date: 2019-05-18T12:27:04+01:00 5 | --- 6 | 7 | In order to master data structures you need a solid understanding of the basics. In this post you'll learn what arrays are, why they are useful, and how they are implemented in C and Ruby. 8 | 9 | ## What is an array? 10 | 11 | An array is a data structure that holds a collection of elements. 12 | 13 | For example, this is an array in the Ruby programming language: 14 | 15 | ```ruby 16 | arr = ['a','b','c','d','e'] 17 | ``` 18 | 19 | Generally, an array is **a contiguous piece of memory**, where each element exists one after the other. 20 | 21 | You can imagine an array as a list of boxes containing a value and a number (an _index_) to identify the box. 22 | 23 |  24 | 25 | Array elements are accessed by their index number. Normally arrays are 0 indexed, meaning the first element is at index `0`: 26 | 27 | ```ruby 28 | arr[0] #=> "a" 29 | arr[1] #=> "b" 30 | ``` 31 | 32 | Arrays provide **random access** to data. Theoretically, it takes the same amount of time to access an element at index `1` as it does to access an element at index `1000`. That's an O(1) operation in [Big O notation](../big-o). 33 | 34 | There are two common array implementations in programming languages: static arrays, and dynamic arrays. Static arrays cannot grow in size once they have been defined, whereas dynamic arrays can. 35 | 36 | _Note: static arrays shouldn't be confused with C arrays defined with the `static` keyword. In this post, I use static to mean an array that can't increase in size once defined._ 37 | 38 | C arrays are static. To define an array in C, you provide the data type that the array will hold (for example `int`) and the size of the array: 39 | 40 | ```c 41 | int arr[5]; 42 | ``` 43 | 44 | Once defined, you can access and set array data using an index: 45 | 46 | ```c 47 | arr[0] = 0; 48 | ``` 49 | 50 | But you can't increase the number of elements that the array can hold. 51 | 52 | In contrast, dynamic arrays grow to accommodate extra elements. Dynamic arrays are often implemented as a class with methods to operate on the array, like the `push` method: 53 | 54 | ```ruby 55 | arr = ['a','b'] 56 | 57 | arr.push('c') #=> ["a", "b", "c"] 58 | ``` 59 | 60 | Arrays are a fundamental data structure, and almost every language implements them, but why should you use them? 61 | 62 | ## Why use arrays? 63 | 64 | Random memory access is the most compelling reason to use an array. Algorithms like [binary search](https://en.wikipedia.org/wiki/Binary_search_algorithm) use random access to speed up tasks by orders of magnitude. 65 | 66 | Although access is fast, other array operations are expensive. Take a look at the worst case complexity table for array operations: 67 | 68 | | Operation | Worst case | 69 | | --------- | ---------- | 70 | | Access | O(1) | 71 | | Search | O(n) | 72 | | Insertion | O(n) | 73 | | Deletion | O(n) | 74 | 75 | Other data structures like stacks, and hash-tables offer improved search, insertion, and deletion times. But random access is so damn powerful that many of the other data structures are built using arrays. 76 | 77 | ## How are arrays implemented? 78 | 79 | Because arrays are often a language feature, you need to look at the level below the language to see how they are implemented. 80 | 81 | In this section you'll learn how static arrays are implemented by looking at the assembly code generated from a C array, and then how dynamic arrays are implemented by looking at the code of a Ruby interpreter. 82 | 83 | ### Implementing static arrays in C 84 | 85 | C is a compiled language. In order to execute a C program, you must first convert it to machine code by running it through a compiler. 86 | 87 | Arrays in C contain _homogenous data_, where each element is of the same data type: 88 | 89 | ```c 90 | int arr[5]; 91 | 92 | arr[0] = 1; 93 | ``` 94 | 95 | A C compiler uses the data type and size of an array to allocate space in memory. For example, on my Mac an `int` is 4 bytes, So an array that can hold 5 `int` values requires 20 bytes of memory. 96 | 97 |  98 | 99 | When C is compiled to machine code, array elements are accessed by their memory address, which is calculated using the element index and data type. You can see how this works by looking at the assembly code generated from a C program. 100 | 101 | _Note: assembly code is human-readable source code that's very close to actual machine code._ 102 | 103 | Take the following C code that declares an array `arr` with space for 100 `int` elements: 104 | 105 | _Note: `extern` is used to make the assembly easier to understand. `extern` tells the compiler that the memory for `arr` is allocated elsewhere._ 106 | 107 | ```c 108 | extern int arr[100]; 109 | 110 | arr[0] = 1; 111 | arr[1] = 2; 112 | arr[99] = 3; 113 | ``` 114 | 115 | The base memory address of `arr` is represented as `arr(%rip)` in the assembly code. So the following code moves a value of 1 (`$1`) to the array base memory address: 116 | 117 | ```S 118 | movl $1, _arr(%rip) 119 | ``` 120 | 121 | Which is the equivalent of: 122 | 123 | ```c 124 | arr[0] = 1; 125 | ``` 126 | 127 | The assembly accesses array elements at different indexes by adding an _offset_ to the array base address, calculated as `index * data_type_bytes`. For example, index `1` is `4` bytes from the base address, because the index (`1`) multiplied by the number of bytes of the data type (`4`) is `4`: 128 | 129 | ```S 130 | movl $2, _arr+4(%rip) 131 | ``` 132 | 133 | And index `99` is 396 bytes (`99 * 4`) from the base address: 134 | 135 | ```S 136 | movl $3, _arr+396(%rip) 137 | ``` 138 | 139 | As you can see, arrays in C translate very closely to assembly code. 140 | 141 | Because C arrays are static and can't grow in size, they can be difficult to work with. Dynamic arrays solve this problem. 142 | 143 | ### Implementing dynamic arrays in Ruby 144 | 145 | Ruby is an interpreted language. Instead of being compiled to machine code, Ruby programs are run by another program known as an interpreter. There are many different Ruby interpreters, but I'll show you how Ruby arrays are implemented in the original Ruby interpreter written in C (CRuby). 146 | 147 | 148 | _Note: The code examples are from CRuby v1\_0, which uses legacy C syntax. The current CRuby array implementation contains optimizations that make it more difficult to explain, but it works in a similar way._ 149 | 150 | 151 | Unlike C arrays, Ruby arrays are implemented as a class with methods for operating on the array. For example the `push` method, which adds new elements to an array: 152 | 153 | ```ruby 154 | arr = [0,1] 155 | 156 | arr.push(2) #=> [0, 1, 2] 157 | ``` 158 | 159 | Ruby arrays can also handle different data types: 160 | 161 | ```ruby 162 | arr = ['str', 1, nil] 163 | ``` 164 | 165 | So how is this dynamic array implemented? 166 | 167 | The basic idea is to create a wrapper object to manage the array data. The wrapper handles access to the array elements, and reallocates memory if the array needs to increase in size. 168 | 169 | In CRuby an array is defined in a C struct—`RArray`. `RArray` contains a `len` value, which is the current number of elements in the array (known as the _length_). It also contains a `capa` value, which is the maximum capacity of the array, and a `ptr` which is a pointer (reference) to a contiguous chunk of memory. 170 | 171 | ```c 172 | struct RArray { 173 | struct RBasic basic; 174 | UINT len, capa; 175 | VALUE *ptr; 176 | }; 177 | ``` 178 | 179 | The memory that `ptr` points to contains a contiguous chunk of memory with space for `capa` number of elements. 180 | 181 |  182 | 183 | Notice that the `ptr` data type is `VALUE`: 184 | 185 | ```c 186 | struct RArray { 187 | // .. 188 | VALUE *ptr; 189 | }; 190 | ``` 191 | 192 | `VALUE` represents any valid Ruby object, either as a pointer or as the value itself. This is how Ruby handles multiple data types in the same array. 193 | 194 | Ruby achieves random access to array elements by using the pointer (`ptr`) and accessing the element at the offset. You can see this in the CRuby function that accesses an element at a given offset (`ary_entry`): 195 | 196 | ```c 197 | ary_entry(ary, offset) 198 | struct RArray *ary; 199 | int offset; 200 | { 201 | // .. 202 | 203 | return ary->ptr[offset]; 204 | } 205 | ``` 206 | 207 | To learn how Ruby dynamically increases the array capacity, you can look at the implementation of the `<<` (append) method. `<<` adds a new element to the end of an array: 208 | 209 | ```ruby 210 | arr = [] 211 | arr << 0 #=> [0] 212 | arr << 1 #=> [0,1] 213 | ``` 214 | 215 | Internally, Ruby uses the function `ary_push` to implement the `<<` method. 216 | 217 | `ary_push` calls a helper function `ary_store` to store an element (`item`) at the index of the arrays current length (`ary->len`). This has the effect of appending the element to the array, because the last item in a Ruby array is always at index `length - 1`: 218 | 219 | ```c 220 | ary_push(ary, item) 221 | struct RArray *ary; 222 | VALUE item; 223 | { 224 | ary_store(ary, ary->len, item); 225 | return (VALUE) ary; 226 | } 227 | ``` 228 | 229 | The `ary_store` function is where the magic happens. It attempts to add a new element (`val`) to `ary -> ptr` at the specified index, but will reallocate memory if needed. 230 | 231 | `ary_store` first checks that the index for the new array element (`idx`) can fit in the currently assigned memory (by checking the `ary -> capa` value). If it can't, then `ary_store` allocates more memory using `REALLOC_N` (which uses `realloc` internally): 232 | 233 | _Note: `realloc` expands the current area of memory if possible. If there isn't enough free memory following the existing area `realloc` allocates a new memory block, copies the old memory area to the newly allocated memory, and frees the old memory area_ 234 | 235 | ```c 236 | void ary_store(ary, idx, val) 237 | struct RArray *ary; 238 | int idx; 239 | VALUE val; 240 | { 241 | if (idx >= ary->capa) { 242 | ary->capa = idx + ARY_DEFAULT_SIZE; 243 | REALLOC_N(ary->ptr, VALUE, ary->capa); 244 | } 245 | 246 | // .. 247 | } 248 | ``` 249 | 250 | `ary_store` then updates the `ary -> len` value, and adds the new item to the array `ptr` at the index (`idx`): 251 | 252 | ```c 253 | void ary_store(ary, idx, val) 254 | struct RArray *ary; 255 | int idx; 256 | VALUE val; 257 | { 258 | // .. 259 | 260 | if (idx >= ary->len) { 261 | ary->len = idx + 1; 262 | } 263 | ary->ptr[idx] = val; 264 | } 265 | ``` 266 | 267 | The full function looks like this: 268 | 269 | ```c 270 | void ary_store(ary, idx, val) 271 | struct RArray *ary; 272 | int idx; 273 | VALUE val; 274 | { 275 | if (idx >= ary->capa) { 276 | ary->capa = idx + ARY_DEFAULT_SIZE; 277 | REALLOC_N(ary->ptr, VALUE, ary->capa); 278 | } 279 | 280 | if (idx >= ary->len) { 281 | ary->len = idx + 1; 282 | } 283 | ary->ptr[idx] = val; 284 | } 285 | ``` 286 | 287 | You might have noticed that insertion to a CRuby array takes a different amount of time depending on whether the array is at capacity or not. Adding a new element takes constant time (O(1)) if the array has capacity for new elements, but if the array does not have capacity it will take O(n) time to allocate new memory and copy over existing values. Instead of using worst-case analysis, we can use a technique called amortized analysis to get a better idea of the cost of an insertion operation. 288 | 289 | ### Amortized analysis 290 | 291 | Amortized analysis is a way of analyzing the run time of an operation on average over many operations, rather than the worst case of a single operation. Amortized analysis is useful when looking at data structures with operations that occasionally require an expensive operation, like allocating memory, but normally take much less time. 292 | 293 | One method of calculating the amortized time is the aggregate method. You analyze how long k operations take in total, and then divide by k. 294 | 295 | For the CRuby array implementation in this blog post, the array capacity begins at 16, and the reallocation adds 16 extra capacity to the array each time it reallocates memory. The amortized cost of this works out to O(n), which is the same as the worse case insertion operation. This means the original CRuby dynamic array wasn't efficient for insertion operations. 296 | 297 | In 2009 CRuby improved performance by doubling the array capacity each time memory is reallocated. This improvement means that the total cost of k operations divided by k is equal to a constant time. So the amortized cost of insertion in the new CRuby implementation is O(1). (for a rigorous explanation of how this works, see these [lecture notes on amortized analysis](https://www2.cs.duke.edu/courses/fall17/compsci330/lecture17note.pdf)) 298 | 299 | ## Conclusion 300 | 301 | Arrays are a fundamental data structure that can be either dynamic or static. Although static arrays are easier to implement in a language, dynamic arrays are easier to use from a programmers perspective. 302 | 303 | Future posts in this series will explore how arrays are used to implement more complicated data structures. 304 | 305 | I hope you enjoyed this post. If you have any questions, please leave a comment. 306 | -------------------------------------------------------------------------------- /content/posts/big-o-notation.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Big O notation" 3 | date: 2019-05-18T12:27:04+01:00 4 | --- 5 | 6 | In this post you'll learn how to use big O notation to compare the performance of different algorithms. 7 | 8 | ## Measuring the running time of an algorithm 9 | 10 | There are two ways to measure the running time of an algorithm: 11 | 12 | 1. Experimental analysis 13 | 2. Theoretical analysis 14 | 15 | Experimental analysis involves running a program and measuring how long it takes to complete. The problem with this approach is that programs run non-deterministically and at different speeds depending on the hardware, compiler, programming language, and other factors. 16 | 17 | The alternative is theoretical analysis. One approach to theoretical analysis is to approximate the running time by counting the number of steps an algorithm takes for a given input. 18 | 19 | ## Calculating algorithm steps 20 | 21 | Steps are a unit that measure how long an operation takes to complete on a hypothetical computer. 22 | 23 | The rules for this hypothetical computer are: 24 | 25 | 1. Simple operations take 1 step 26 | 2. Loops and subroutines are made up of simple operations 27 | 3. Memory is unlimited and memory access takes 0 steps 28 | 29 | Simple operations can be arithmetic operations like `*`, comparison operations like `==`, assignment operations like `=`, and statements like `return`. They all take 1 step. 30 | 31 | Loops and subroutines are made up of the simple operations performed by them. If a loop performs 3 operations each iteration, then n loops would take a total of 3n steps. 32 | 33 | The final rule is that memory is unlimited and access takes 0 steps. This makes analysis simpler by ignoring the fact that memory access times vary on real-world machines. 34 | 35 | With these rules you can calculate the total number of steps a program will take to run on the hypothetical computer. 36 | 37 | For example, the following `sum` function performs 3 simple operations: `=`, `return`, and `+`. So the function takes 3 steps to complete: 38 | 39 | ```c 40 | int sum(int a, int b) { 41 | int temp = a + b; 42 | return temp; 43 | } 44 | ``` 45 | 46 | You can express the total number of steps an algorithm takes using a mathematical function. A mathematical function expresses the output for a given input. In the case of the `sum` function, the number of steps is always 3. So a mathematical function for the total steps taken by the algorithm for an input of n is f(n) = 3. 47 | 48 | The following `exp` function performs 1 assignment operation and 2 operations for each loop, where the number of loops is based on the input `e`. So the total number of steps is 2e + 1, which can be expressed as f(n) = 2n + 1: 49 | 50 | ```c 51 | int exp(int base, int e) { 52 | int total = 1; 53 | while(e) { 54 | total *= base; 55 | e--; 56 | } 57 | } 58 | ``` 59 | 60 | _Note: It can be tricky to determine what a simple operation is. Is `*=` 1 operation, or 2 operations? Generally it's OK to choose one or the other, as long as you're consistent._ 61 | 62 | What about an algorithm where the number of operations depends on the state of the input? For example, an algorithm that returns either the index of a value if it exists in an array, or `-1` if it does not. If the value being searched for is the first element in the array then the function will return after 1 loop, but if the value doesn't exist in the array the algorithm will loop over each element in the array. How do you decide which case to analyze? 63 | 64 | ## Analyzing the worst-case 65 | 66 | I like to think of myself as a realistic optimist, but when I analyze algorithms I'm usually a pessimist. Why? Consider the cases you can analyze: 67 | 68 | 1. Best-case 69 | 2. Average-case 70 | 3. Worst-case 71 | 72 | The best-case can be misleading. An algorithm might run quickly in the best-case, but take an impossible amount of time in the average-case. 73 | 74 | The average-case is intuitively the best choice, but it's difficult to calculate. You might need to use probability theory and make subjective assumptions about the algorithms input. 75 | 76 | The worst-case is both easy to determine and useful in practice. The worst-case is an upper-bounds on the algorithms running time, which guarantees the algorithm will finish on time. 77 | 78 | Using the worst-case you can analyze algorithms like the `find_index` function below. In the worst-case, `find_index` takes 3n + 2 steps (f(n) = 3n + 2), because in the worst-case the value (`val`) doesn't exist in the array (`n`) and the function will need to loop through the array n times. The loop performs 3 operations each loop, hence 3n: 79 | 80 | ```c 81 | int find_index(int arr[], int n, int val) { 82 | for(int i = 0; i < n; i++) { 83 | if(arr[i] == val) { 84 | return i; 85 | } 86 | } 87 | 88 | return -1; 89 | } 90 | ``` 91 | 92 | Expressing algorithms as mathematical functions is a good way to see how the algorithm handles different size inputs, but the detail of the functions can make it difficult to compare them. For example consider the following functions: 93 | 94 | f(n) = 3n + 1 95 | 96 | f(n) = 16n + 3 97 | 98 | f(n) = 2n + 1230 99 | 100 | These functions look quite different, but they grow at roughly the same rate as the input (n) gets large. When you measure algorithms, you normally care about how well it runs as the input gets large. 101 | 102 | ## Using Big O notation 103 | 104 | Big O notation is a way of classifying how quickly mathematical functions grow as their input gets large. 105 | 106 | Big O works by removing clutter from functions to focus on the terms that have the biggest impact on the growth of the function. In big O notation f(n) = 5n + 42 and f(n) = 2n can both be written as O(n), pronounced "order of n". 107 | 108 | The clutter that I'm talking about are the constant factors and lower-order terms in an expression. Constant factors are the constant values that a term is multiplied by. For example in the term 2n, 2 is a constant factor of n. 109 | 110 | If all terms in an expression have the same variable, the leading term is the term with the highest exponent, and the lower-order terms are all other terms in the expression. For example, in the expression 2n³ + n² + 2n + 1, n³ is the leading term, all other terms are lower-order terms. 111 | 112 | You can ignore constant factors and lower-order terms because the leading term always determines how output grows as the input gets large. You can see this by looking at a graph of linear functions f(n) = n, f(n) = 2n, and f(n) = 3n, and a quadratic function f(n) = n². The linear functions barely grow in comparison to the quadratic function. 113 | 114 |  115 | 116 | Since big O is a mathematical concept that's been borrowed by computer scientists, it has a formal definition: 117 | 118 | f(n) = O(g(n)) as n→∞ iff |f(n)| ≤ Kg(n) for all n ≥ n₀. 119 | 120 | A less precise (but easier-to-understand) definition is that: 121 | 122 | > For any two functions that take n as inputs: f and g. f can be written as O(g), if any value of f is less than or equal to g multiplied by some constant (K) for every value of n when n is greater than or equal to some value (n₀). 123 | 124 | For example, take the function f(n) = 2x² + 400. f can be written as O(n²) because there is a function g(n) = n² that when multiplied by a constant K (for example 4) is always bigger than f after a certain value (n₀). 125 | 126 |  127 | 128 | Note that using this definition of big O you could describe the function f(n) = 2x² + 400 as O(n²), O(n³), or O(n⁴). In practice, you should use the lowest-order term that satisfies the definition. 129 | 130 | ## Common classes in Big O 131 | 132 | Big O creates classes of algorithm by ignoring the details, which makes it easy to compare algorithms to each other. 133 | 134 | The common classes have their own terminology. For example an O(n) algorithm is said to run in linear time. You should learn the terminology for the most common classes: 135 | 136 | | Big O | Name | 137 | | -------- | ----------- | 138 | | O(1) | constant | 139 | | O(log n) | logarithmic | 140 | | O(n) | linear | 141 | | O(n²) | quadratic | 142 | | O(2ⁿ) | exponential | 143 | 144 | You can see how the classes compare in the following graph. 145 | 146 |  147 | 148 | To make this more concrete, take a look at some examples. 149 | 150 | ### O(1) 151 | 152 | O(1) is constant time. No matter the size of the input the algorithm will take the same amount of time to complete. For example, a `sum` function: 153 | 154 | ```c 155 | int sum(int a, int b) { 156 | return a + b; 157 | } 158 | ``` 159 | 160 | O(1) solutions are the best, although they aren't possible for most problems. 161 | 162 | ### O(log n) 163 | 164 | O(log n) is a logarithmic function. Logarithmic functions grow slowly because they halve the amount of data they work with on each iteration. 165 | 166 | | Input | Steps (rounded up) | 167 | | ----------- | ------------------ | 168 | | 100 | 7 | 169 | | 1,000 | 10 | 170 | | 10,000 | 13 | 171 | | 100,000 | 16 | 172 | | 1,000,000 | 20 | 173 | | 100,000,000 | 27 | 174 | 175 | Even at 100,000,000 input elements, a logarithmic function barely breaks a sweat at 27 steps! 176 | 177 | An example of O(log n) is [binary search](https://en.wikipedia.org/wiki/Binary_search_algorithm), which is an algorithm that searches for a value in a sorted array. 178 | 179 | ### O(n) 180 | 181 | Linear functions grow linearly with `n`. For example an algorithm to calculate the exponent of a number: 182 | 183 | ```c 184 | int exp(int base, int e) { 185 | int total = 1; 186 | while(e) { 187 | total *= base; 188 | e--; 189 | } 190 | } 191 | ``` 192 | 193 | Linear algorithms are pretty good and can handle large inputs. 194 | 195 | ### O(n²) 196 | 197 | Quadratic functions grow quickly, so O(n²) algorithms run slowly. An example is an algorithm that checks for duplicates in an array by looping over each element, and then looping over each of the previous element to check for matches: 198 | 199 | ```c 200 | bool contains_duplicates(int arr[], int n) { 201 | for (int i = 0; i < n; i++) { 202 | for (int j = 0; j < i; j++) { 203 | if (arr[j] == arr[i] && i != j) { 204 | return true; 205 | } 206 | } 207 | } 208 | return false; 209 | } 210 | ``` 211 | 212 | Nested loops are a sign that your algorithm is probably quadratic. 213 | 214 | ### O(2ⁿ) 215 | 216 | Exponential functions grow even faster than quadratic functions, so O(2ⁿ) algorithms are incredibly slow. An example of an exponential O(2ⁿ) algorithm is a recursive algorithm to find the nth term of the fibonacci sequence: 217 | 218 | ```c 219 | int fib(int n) { 220 | if (n <= 1) { 221 | return n; 222 | } 223 | return fib(n - 1) + fib(n - 2); 224 | } 225 | ``` 226 | 227 | ## Downsides of worst-case Big O analysis 228 | 229 | Although worst-case big O is the standard for algorithm analysis there are a couple of problems with it: 230 | 231 | 1. Big O ignores constants 232 | 2. The worst-case can be rare 233 | 234 | First, big O ignores constants and constants are sometimes large. They can make a difference if the input is small, and it can be difficult to compare algorithms of the same class because the constants are hidden. 235 | 236 | Secondly, the worst-case can be rare. A good example of this is the sorting algorithm quicksort, which has a worst-case runtime of O(n²), but an average-case of O(n log n). quicksort is often used regardless of its worst-case because the worst-case is so rare in practice. 237 | 238 | Despite its downsides, worst-case big O is the most common method of analyzing algorithms, and I'll use it throughout this series. 239 | 240 | ## Conclusion 241 | 242 | Measuring algorithms experimentally is difficult, so instead you can measure them theoretically. You do this by counting the steps an algorithm will take to complete, and expressing the growth rate of the steps using big O notation. 243 | 244 | I hope you enjoyed this post. If you have any questions, please leave a comment. 245 | -------------------------------------------------------------------------------- /content/posts/intrusive-linked-lists.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Intrusive linked lists" 3 | date: 2019-09-23 4 | --- 5 | 6 | This post will teach you what intrusive linked lists are and how they are used to manage processes in Linux. 7 | 8 | ## What are intrusive linked lists? 9 | 10 | Intrusive linked lists are a variation of [linked lists]({{< ref "/linked-lists" >}}) where the links are embedded in the structure that's being linked. 11 | 12 | In a typical linked list implementation, a list node contains a `data` pointer to the linked data and a `next` pointer to the next node in the list. 13 | 14 | {{< figure src="/images/intrusive-linked-lists/linked-list-pointers.svg" title="Figure 1: A linked list" >}} 15 | 16 | In an intrusive linked list implementation, the list node contains `next` pointer to the next list node, but no `data` pointer because the list is embedded in the linked object itself. 17 | 18 | {{< figure src="/images/intrusive-linked-lists/intrusive-linked-list-pointers.svg" title="Figure 2: An intrusive linked list" >}} 19 | 20 | A `list` structure for an intrusive singly linked list contains a single `next` pointer to another list node: 21 | 22 | ```c 23 | typedef struct list { 24 | struct list *next; 25 | } list; 26 | ``` 27 | 28 | The `list` structure is then embedded in the structure that will be linked. For example, you might have a `item` structure with a `val` member: 29 | 30 | ```c 31 | typedef struct item { 32 | int val; 33 | list items; 34 | } item; 35 | ``` 36 | 37 | To add a new item `i2` to the list of `i1`, you set the `items.next` pointer of `i1` to the address of `i2.items`: 38 | 39 | ```c 40 | item* i1 = create_item(16); 41 | item* i2 = create_item(18); 42 | 43 | i1->items.next = &i2->items; 44 | ``` 45 | 46 | You can access the object that contains a list node by first getting the address of the list object (e.g. the value of `i1.items.next`). You then subtract the offset of the list member from the address of the list object. 47 | 48 | The **offset** is the number of bytes a member is positioned from the beginning of its containing object. 49 | 50 | {{< figure src="/images/intrusive-linked-lists/intrusive-list-memory-address.svg" title="Figure 3: The address of an object with an embedded list" >}} 51 | 52 | Consider a `list` object in an object `i2` at memory address 0x18. The `list` member is offset 8 bytes from the beginning of the `item` data structure. Therefore, the beginning address of the `i2` object is 0x18 - 8 = 0x10. 53 | 54 | In GCC-compiled C, you can subtract bytes from a pointer by casting the pointer variable to a void pointer (which has a size of 1 byte when compiled with GCC). You can then subtract the bytes from the pointer value without the number being scaled to `num * sizeof(structure)`: 55 | 56 | ```c 57 | item* _i2 = (void *)(i1->items.next) - 8; 58 | ``` 59 | 60 | _Note: Pointer arithmetic on a void pointer is illegal in C, but is supported by GCC. Linux is compiled using GCC, and so it can perform pointer arithmetic on void pointers._ 61 | 62 | Subtracting an absolute value isn't portable, because data types can be different sizes depending on the CPU architecture. A better way is to use the `offsetof` macro. `offsetof` returns the offset of a member from its containing structure (in bytes): 63 | 64 | ```c 65 | item* _s2 = (void *)(i1->items.next) - (offsetof(item, items)); 66 | ``` 67 | 68 | To summarize: 69 | 70 | * The list node is embedded in a containing object. 71 | * The list node points to another list node embedded in the linked object. 72 | * The base address of the linked object is calculated by subtracting the offset of the list member from the memory address of the linked list object. 73 | 74 | After all that pointer arithmetic, you're probably wondering why anyone in their right mind would use an intrusive linked list over a regular linked list. 75 | 76 | ## Why use intrusive linked lists? 77 | 78 | There are two main reasons to use intrusive lists over non-intrusive linked lists: 79 | 80 | * Fewer memory allocations. 81 | * Less cache thrashing. 82 | 83 | With non-intrusive linked lists, creating a new object and adding it to a list requires two memory allocations: one for the object, and one for the list node. With intrusive linked lists, you only need to allocate one object (since the list node is embedded in the object). This means fewer errors to be handled, because there are half as many cases where memory allocation can fail. 84 | 85 | Intrusive linked lists also suffer less from cache thrashing. Iterating through a non-intrusive list node requires dereferencing a list node, and then dereferencing the list data. Intrusive linked lists only require dereferencing the next list node. 86 | 87 | Before looking at how processes are managed using linked lists in Linux, you need to understand doubly and circular linked lists. 88 | 89 | ## Doubly and circular linked lists 90 | 91 | Doubly linked lists and circular linked lists are variations of singly linked lists. Linux uses circular doubly linked lists, so this section will cover both variations. 92 | 93 | A **doubly linked list** is a linked list that keeps pointers to both the next node and the previous node. 94 | 95 | {{< figure src="/images/intrusive-linked-lists/doubly-linked-list.svg" title="Figure 4: A doubly linked list" >}} 96 | 97 | The list structure would contain an extra `prev` pointer: 98 | 99 | ```c 100 | typedef struct dlist { 101 | struct dlist *next; 102 | struct dlist *prev; 103 | } dlist; 104 | ``` 105 | 106 | Doubly linked lists make deletion and insertion easier, because you only need a reference to a single node to perform the deletion or insertion. 107 | 108 | Another variant of linked lists are **circular linked lists**. A circular linked list is a linked list that never points to a null value. Instead, the last node points to the first node. In a circular doubly linked list, the first node also points to the last node. 109 | 110 | {{< figure src="/images/intrusive-linked-lists/circular-doubly-linked-list.svg" title="Figure 5: A circular doubly linked list" >}} 111 | 112 | A circular linked list makes it easy to iterate through an entire list from any node, without keeping a reference to a specific list head: 113 | 114 | ```c 115 | void list_print_each(list* node) { 116 | list* start = node; 117 | do { 118 | printf("%d,", node->val); 119 | node = node->next; 120 | } while (node != start); 121 | } 122 | ``` 123 | 124 | The most popular linked lists in Linux are circular doubly linked lists. 125 | 126 | ## Linked lists in Linux 127 | 128 | Linux uses linked lists extensively. They are used for all kinds of tasks, from keeping track of free memory slabs, to iterating through each running process. A search for the `struct list_head` structure returns over 10,000 results in Linux 5.2. 129 | 130 | In Linux, list nodes are added and removed a lot more than they are traversed. An analysis of Linux during normal usage found that traversals were only 6% of total linked list operations. Of those, 28% of traversals either occurred on empty lists, or only visited one node (see Rusty Russel's [analysis of linked lists](http://rusty.ozlabs.org/?p=168) for more info). 131 | 132 | As Rusty's analysis suggests, Linux mainly uses linked lists to keep lists of objects when either traversal is infrequent, or when the list size is small. 133 | 134 | Linux includes a few different list structures. The most popular is an intrusive circular doubly linked list. 135 | 136 | ### Implementing intrusive linked lists 137 | 138 | The Linux circular doubly linked list is defined in [include/linux/list.h](https://elixir.bootlin.com/linux/v5.2/source/include/linux/list.h). 139 | 140 | The list structure is named `list_head`. It contains a `next` and `prev` pointer: 141 | 142 | ```c 143 | struct list_head { 144 | struct list_head *next, *prev; 145 | }; 146 | ``` 147 | 148 | You create a linked list of objects by embedding `list_head` as a member on the struct that will be made into a list: 149 | 150 | ```c 151 | struct atmel_sha_drv { 152 | struct list_head head; 153 | // .. 154 | }; 155 | ``` 156 | 157 | A new list can be either statically or dynamically initialized. 158 | 159 | A statically initialized list can use the `LIST_HEAD_INIT` macro: 160 | 161 | ```c 162 | static struct atmel_sha_drv atmel_sha = { 163 | .dev_list = LIST_HEAD_INIT(atmel_sha.dev_list), 164 | // .. 165 | }; 166 | ``` 167 | 168 | `LIST_HEAD_INIT` expands to set the `next` and `prev` pointers of the list node to point to itself: 169 | 170 | ```plain 171 | #define LIST_HEAD_INIT(name) { &(name), &(name) } 172 | ``` 173 | 174 | To initiate a list dynamically, you can use the `INIT_LIST_HEAD` macro. Often a separate `list_head` is kept as a head node: 175 | 176 | ```c 177 | static struct list_head hole_cache; 178 | 179 | IINIT_LIST_HEAD(&hole_cache); 180 | ``` 181 | 182 | `INIT_LIST_HEAD` is called with a pointer to a `list` node. Again, the list's `next` and `prev` pointers are set to point to itself: 183 | 184 | ```c 185 | static inline void INIT_LIST_HEAD(struct list_head *list) 186 | { 187 | WRITE_ONCE(list->next, list); 188 | list->prev = list; 189 | } 190 | ``` 191 | 192 | _Note: the `WRITE_ONCE` macro prevents unwanted compiler optimizations when assigning a value._ 193 | 194 | 195 | After a list has been initialized, new items can be added with `list_add`: 196 | 197 | ```c 198 | struct hole { 199 | // .. 200 | struct list_head list; 201 | }; 202 | 203 | static struct hole initholes[64]; 204 | 205 | // .. 206 | 207 | for(i = 0; i < 64; i++) 208 | list_add(&(initholes[i].list), &hole_cache); 209 | ``` 210 | 211 | `list_add` accepts a head node pointer, and a pointer to the node that should be inserted. It then calls `__list_add` to insert the new node between the `head` node, and `head->next`: 212 | 213 | ```c 214 | static inline void list_add(struct list_head *new, struct list_head *head) 215 | { 216 | __list_add(new, head, head->next); 217 | } 218 | ``` 219 | 220 | `__list_add` reassigns pointers to add the new list node: 221 | 222 | ```c 223 | static inline void __list_add(struct list_head *new, 224 | struct list_head *prev, 225 | struct list_head *next) 226 | { 227 | // .. 228 | 229 | next->prev = new; 230 | new->next = next; 231 | new->prev = prev; 232 | WRITE_ONCE(prev->next, new); 233 | } 234 | ``` 235 | 236 | Linux provides a `list_entry` macro to access the containing data structure of a list node: 237 | 238 | ```c 239 | struct hole *ret; 240 | 241 | ret = list_entry(hole_cache.next, struct hole, list); 242 | ``` 243 | 244 | This uses the `offsetof` trick from earlier in this post. `list_entry` expands to a `container_of` macro: 245 | 246 | ```plain 247 | #define list_entry(ptr, type, member) \ 248 | container_of(ptr, type, member) 249 | ``` 250 | 251 | The `container_of` macro calculates the containing object's address by subtracting the offset of the list node from the address of the `list_head` object: 252 | 253 | ```plain 254 | #define container_of(ptr, type, member) ({ \ 255 | void *__mptr = (void *)(ptr); \ 256 | ((type *)(__mptr - offsetof(type, member))); }) 257 | ``` 258 | 259 | That's the basic implementation of intrusive linked lists in Linux. 260 | 261 | ### Tracking processes 262 | 263 | In POSIX, a process is an executing instance of a program. One of the kernel's key responsibilities is to create processes and schedule them so that each process runs for an appropriate amount of time. 264 | 265 | Internally, Linux refers to processes as **tasks**. As tasks are created, they are added to a **task list**. This list can be used when Linux needs to iterate over every single task, for example when sending a signal to each process. 266 | 267 | Linux represents tasks as a `tast_struct`. A `task_struct` includes a `list_head` member named `tasks` to link between tasks: 268 | 269 | ```c 270 | struct task_struct { 271 | // .. 272 | pid_t pid; 273 | // .. 274 | struct list_head tasks; 275 | // .. 276 | }; 277 | ``` 278 | 279 | The initial task is statically allocated as `init_task`, and the `tasks` field is initialized with itself as the head: 280 | 281 | ```c 282 | struct task_struct init_task = { 283 | // .. 284 | .tasks = LIST_HEAD_INIT(init_task.tasks), 285 | }; 286 | ``` 287 | 288 | Future tasks are added to this task list when they are created. 289 | 290 | New tasks are created in Linux by forking. This is implemented in `copy_process`, which creates a new `task_struct` from the currently executing process (`current`) by calling `dup_task_struct`: 291 | 292 | ```c 293 | struct task_struct *copy_process( 294 | // .. 295 | ) 296 | { 297 | struct task_struct *p; 298 | // .. 299 | p = dup_task_struct(current, node); 300 | // .. 301 | } 302 | ``` 303 | 304 | After a new task is created, it's added to the task list by calling `list_add_tail_rcu` with the address of `init_task.tasks`: 305 | 306 | ```c 307 | struct task_struct *copy_process( 308 | // .. 309 | ) 310 | { 311 | // .. 312 | list_add_tail_rcu(&p->tasks, &init_task.tasks); 313 | } 314 | ``` 315 | 316 | `list_add_tail_rcu` is a variation of the `list_add` function from earlier. It uses RCU, which is a synchronization mechanism that supports concurrency between a single writer and multiple readers (no need to go into the details). `list_add_tail_rcu` has the effect of adding the newly created task's `tasks` node to the tail of the `init_task` task list. 317 | 318 | As mentioned, the task list is mainly used when the Kernel needs to perform an action on each task. For example, freezing tasks when a computer is going into hibernate mode, swapping tasks to an updated version of the kernel during a live patch, or when a signal is sent to each process. Most of these uses are rare, and so the efficiency of iterating over each item in a list isn't a major concern. 319 | 320 | One of the times a signal is sent to each process is when the SysRq key and e are pressed together, which terminates all processes. 321 | 322 | _Note: SysReq is a key that was added in the 80s, Linux adds default shortcuts you can use with it._ 323 | 324 | The kernel registers a handler function that's called when the SysReq + e keys are pressed. The handler calls `send_sig_all`, with `SIGTERM` which sends a `SIGTERM` signal to all processes apart from the `init` process and kernel tasks. It does this with the `for_each_process` macro. You can see from the code that it does nothing if the process is a kernel thread or the `init` task, otherwise it calls `do_send_sig_info`. 325 | 326 | ```c 327 | static void send_sig_all(int sig) 328 | { 329 | struct task_struct *p; 330 | 331 | // .. 332 | 333 | for_each_process(p) { 334 | if (p->flags & PF_KTHREAD) 335 | continue; 336 | if (is_global_init(p)) 337 | continue; 338 | 339 | do_send_sig_info(sig, SEND_SIG_PRIV, p, PIDTYPE_MAX); 340 | } 341 | 342 | // .. 343 | } 344 | ``` 345 | 346 | At this point it's macros all the way down. The `for_each_process` macro expands into a `for` loop that loops over each item in the list by changing the value of `p`. Starting at `init_task`, it uses the `next_task` macro to reach the next task in the list: 347 | 348 | ```plain 349 | #define for_each_process(p) \ 350 | for (p = &init_task ; (p = next_task(p)) != &init_task ; ) 351 | ``` 352 | 353 | The `next_task` macro expands to `list_entry_rcu` to get the next `task_struct` of the list head pointer: 354 | 355 | ```plain 356 | #define next_task(p) \ 357 | list_entry_rcu((p)->tasks.next, struct task_struct, `tasks`) 358 | ``` 359 | 360 | `list_entry_rcu` is itself a macro that expands to the `container_of` macro, which then gets the base address of the containing structure. 361 | 362 | It's worth noting that the `tasks` linked list isn't the only way Linux keeps reference to tasks. It also creates a dictionary data structure (an idr) that offers constant time access, which is used to quickly access a `task` object from a given pid. This is much more efficient way of accessing a single task than traversing the entire task list. 363 | 364 | ## Conclusion 365 | 366 | Intrusive linked lists are an interesting alternative to non-intrusive linked lists that reduce cache thrashing and memory allocations. 367 | 368 | Linux uses intrusive linked lists a lot, generally when the lists are short or when they are rarely traversed. If you plan to become a kernel hacker, you should become familiar with intrusive linked lists. 369 | -------------------------------------------------------------------------------- /content/posts/linked-lists.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Linked lists" 3 | date: 2019-09-13T12:28:04+01:00 4 | --- 5 | 6 | This post will teach you what linked lists are, why they're useful, and how they're used in cURL to set HTTP headers. 7 | 8 | ## What are linked lists? 9 | 10 | A linked list is a collection of data connected via references. 11 | 12 | {{< figure src="/images/linked-lists/singly-linked-list.svg" title="Figure 1: A linked list" >}} 13 | 14 | Each element (or **node**) in a list contains a value and a reference to the next node. 15 | 16 | The simplest linked list is a **singly linked list**, where each node points to the next node in the list. In a singly linked list, the final node points to a null value. 17 | 18 | You can represent a singly linked list as a `list` structure containing `data` and a `next` pointer to another `list`: 19 | 20 | ```c 21 | typedef struct list { 22 | DATA data; 23 | struct list *next; 24 | } list; 25 | ``` 26 | 27 | To find an element in a list you need to visit each node one after the other, starting at the first node in the list (the **head**) and finishing at the last node (the **tail**). 28 | 29 | {{< figure src="/images/linked-lists/singly-linked-list-with-head.svg" title="Figure 2: A linked list with a head and tail" >}} 30 | 31 | The following `find` function demonstrates how you would search a linked list. Starting from the first node in the list (`head`), `find` loops through each node and checks to see if the node contains the data it's searching for: 32 | 33 | ```c 34 | list* find(list* head, DATA data) { 35 | list* node = head; 36 | while(node) { 37 | if(node->data == data) { 38 | return node; 39 | } 40 | node = node->next; 41 | } 42 | return node; 43 | } 44 | ``` 45 | 46 | An algorithm to find a list node may need to traverse an entire list in the worst case. This makes linked list search an O(n) operation. 47 | 48 | In contrast, linked list insertion takes constant time (provided you have a reference to the node you want to insert at). An `insert` method can insert a new node by creating a node dynamically and changing the pointers of existing nodes. 49 | 50 | The following code creates a new node and adds it to the end of a list: 51 | 52 | ```c 53 | void insert(DATA data, list* tail) { 54 | list* node = create_node(data) 55 | tail->next = node; 56 | tail = node; 57 | } 58 | ``` 59 | 60 | _Note: A linked list node is often dynamically allocated using a function like `malloc`. This adds overhead for each node that's created._ 61 | 62 | Now you know what a linked list is, the next question is _why use them_? 63 | 64 | ## Why use linked lists? 65 | 66 | There are three main benefits of linked lists: 67 | 68 | 1. They grow dynamically. 69 | 2. They have constant time insertion and deletion. 70 | 3. They're easy to implement. 71 | 72 | A big plus for linked lists is that they grow and shrink dynamically. You can add and remove nodes on demand without performing costly resizing operations. 73 | 74 | Another benefit is the speed of insertion and deletion, which are both O(1) operations. 75 | 76 | You can see the cost of the common operations in the following table: 77 | 78 | | Operation | Worst case | 79 | | --------- | ---------- | 80 | | Access | O(n) | 81 | | Search | O(n) | 82 | | Insertion | O(1) | 83 | | Deletion | O(1) | 84 | 85 | The final benefit is that they're easy to implement (which means less surface area for bugs). 86 | 87 | Despite these benefits, dynamic arrays are often more performant than linked lists. Think carefully before you decide to use linked lists in your project. 88 | 89 | The rest of this post will look at a project that's used linked lists successfully—cURL. 90 | 91 | ## Linked lists in cURL 92 | 93 | cURL is a command line tool (curl) and a library (libcurl) used for transferring data with URLs. 94 | 95 | Commonly, the curl tool is used to make HTTP requests to a server: 96 | 97 | ```bash 98 | curl http://www.google.com 99 | ``` 100 | 101 | HTTP requests can include a variable number of **headers** in the form `field-name ":" [ field-value ]`. 102 | 103 | You can add headers with the curl `--header` option: 104 | 105 | ```bash 106 | curl --header "X-MyHeader: 123" http://www.google.com 107 | ``` 108 | 109 | Both curl and libcurl represent headers as linked lists. 110 | 111 | ### Implementing a linked list 112 | 113 | cURL has two linked list data structures: `curl_llist` and `curl_slist`. `curl_slist` is the data structure used to represent HTTP headers. 114 | 115 | `curl_slist` is a singly linked list that holds strings (hence the name _slist_, short for _string list_). Each node contains a pointer to a string (`data`) and a pointer to the next node in the list (`next`): 116 | 117 | ```c 118 | struct curl_slist { 119 | char *data; 120 | struct curl_slist *next; 121 | }; 122 | ``` 123 | 124 | _Note: The code examples in this post are from curl v7.66.1._ 125 | 126 | libcurl provides methods to interact with the list. For example, `curl_slist_append`, which adds a new string to a list: 127 | 128 | ```c 129 | struct curl_slist *headers = NULL; 130 | 131 | slist = curl_slist_append(headers, "X-MyHeader: 123"); 132 | ``` 133 | 134 | `curl_slist_append` copies the string (using `strdup`) and then calls `Curl_slist_append_nodup` to add the duplicated string to the list: 135 | 136 | ```c 137 | struct curl_slist *curl_slist_append(struct curl_slist *list, 138 | const char *data) 139 | { 140 | char *dupdata = strdup(data); 141 | // .. 142 | list = Curl_slist_append_nodup(list, dupdata); 143 | // .. 144 | return list; 145 | } 146 | ``` 147 | 148 | _Note: Copying the string means the original string can be overwritten after `curl_slist_append` is called._ 149 | 150 | `Curl_slist_append_nodup` creates a new list node (calling `malloc` to allocate memory). It then adds the data and sets the `next` pointer of the new node to `NULL`. 151 | 152 | If the current list is `NULL` (and therefore uninitialized) `Curl_slist_append_nodup` returns the new list node. If the list exists, the last node is retrieved with `slist_get_last`, and its `next` pointer is set to the new node: 153 | 154 | ```c 155 | struct curl_slist *Curl_slist_append_nodup(struct curl_slist *list, char *data) 156 | { 157 | struct curl_slist *last; 158 | struct curl_slist *new_item; 159 | // .. 160 | new_item = malloc(sizeof(struct curl_slist)); 161 | // .. 162 | new_item->next = NULL; 163 | new_item->data = data; 164 | 165 | /* if this is the first item, then new_item *is* the list */ 166 | if(!list) 167 | return new_item; 168 | 169 | last = slist_get_last(list); 170 | last->next = new_item; 171 | return list; 172 | } 173 | ``` 174 | 175 | `slist_get_last` loops through until it reaches an item where `next` points to `NULL`. This is the last item in the list, and so it's returned: 176 | 177 | ```c 178 | static struct curl_slist *slist_get_last(struct curl_slist *list) 179 | { 180 | struct curl_slist *item; 181 | // .. 182 | item = list; 183 | while(item->next) { 184 | item = item->next; 185 | } 186 | return item; 187 | } 188 | ``` 189 | 190 | That's an overview of the `curl_slist` API. The interesting part is how it's used to add headers to an HTTP request. 191 | 192 | Before that, a quick primer on HTTP/1.1. 193 | 194 | ### HTTP/1.1 195 | 196 | HTTP/1.1 is an application-layer protocol for sending data between a client and a server. It was created with ease-of-use in mind. 197 | 198 | HTTP/1.1 is ASCII-encoded, so it's human readable. A request contains a **request-line** (in the form `Method Request-URI HTTP-Version CRLF`), and a variable-length list of **headers**. 199 | 200 | An HTTP/1.1 GET request might look like this: 201 | 202 | ```plain 203 | GET / HTTP/1.1 204 | Host: www.google.com 205 | User-Agent: curl/7.54.0 206 | Accept: */* 207 | ``` 208 | 209 | HTTP is typically sent over TCP. On POSIX-compliant systems, TCP connections are created using the sockets API. **Sockets** are an abstraction that let you establish a TCP connection with another process (often running on a remote host) and send/ receive data over the connection. You can send data to an open socket using the POSIX-defined `send` function. 210 | 211 | The main takeaway here is that cURL can send an HTTP request by calling `send` with the HTTP request as a string. 212 | 213 | The next section will look at how headers gets converted from command-line options, into a linked list, and then into the HTTP request string that `send` is called with. 214 | 215 | ### Adding custom headers in curl 216 | 217 | In libcurl, custom headers are set using a `curl_slist` structure. So `curl_slist` is a user-facing data structure. 218 | 219 | curl (the tool) uses libcurl internally. In fact, most of the work curl does it to convert command-line options into configuration operations for libcurl. 220 | 221 | libcurl exposes an "easy API" to perform transfers, you create a `CURL` handle (called an "easy handle") to configure the request. You can set options using helper methods, like `curl_easy_setopt`. 222 | 223 | The following is an example of making a request with a custom header: 224 | 225 | ```c 226 | CURLcode res; 227 | CURL *curl = curl_easy_init(); 228 | 229 | struct curl_slist *headers = NULL; 230 | 231 | headers = curl_slist_append(headers, "X-MyHeader: 123"); 232 | 233 | // .. add other options 234 | 235 | curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); 236 | 237 | res = curl_easy_perform(curl); // make the request 238 | 239 | curl_slist_free_all(headers); 240 | ``` 241 | 242 | curl tool uses the libcurl easy API internally. curl converts the command line options into an easy handle object. 243 | 244 | When curl is called from the command line, it first parses the options passed to it in `argv`. It normalizes option names, and enters a switch statement, which runs against all possible options. For any `header` options, the option parameter value is added to a `curl_slist`. 245 | 246 | Once curl has parsed the options, it creates an easy handle to pass to libcurl, before executing the operation. 247 | 248 | For HTTP requests, libcurl creates a buffer to hold the request string. The request string is built by converting options into the relevant strings. The headers are added by `Curl_add_custom_headers`. 249 | 250 | `Curl_add_custom_headers` loops over the headers list. For each header, the header is validated (by checking for a colon), and then added to the request buffer along with with a CRLF (`\r\n`). 251 | 252 | ```c 253 | CURLcode Curl_add_custom_headers( 254 | struct connectdata *conn, 255 | bool is_connect, 256 | Curl_send_buffer *req_buffer) 257 | { 258 | // .. 259 | char *ptr; 260 | struct curl_slist *headers; 261 | 262 | // .. 263 | 264 | // .. assign headers list to headers variable 265 | 266 | while(headers) { 267 | // .. 268 | ptr = strchr(headers->data, ':'); 269 | // .. 270 | if(ptr) { 271 | // .. 272 | if(*ptr /* .. */ ) { 273 | // .. 274 | char *compare = /* ... */ headers->data; 275 | // .. 276 | result = Curl_add_bufferf( 277 | &req_buffer, 278 | "%s\r\n", 279 | compare 280 | ); 281 | // .. 282 | } 283 | } 284 | headers = headers->next; 285 | } 286 | // .. 287 | 288 | return CURLE_OK; 289 | } 290 | ``` 291 | 292 | Once all the text has been added to an HTTP request, cURL calls `send` to send the data over the TCP connection. 293 | 294 | As well as being used for request headers, `curl_slist` is also used for other variable-length data, like cookies and trailer headers. The linked list data structure is well-suited to representing objects of varying lengths. 295 | 296 | ## Problems with linked lists 297 | 298 | Although linked lists work well for representing variable data, they can cause performance problems in time-critical apps due to poor [cache locality](https://en.wikipedia.org/wiki/Locality_of_reference). 299 | 300 | For example, when DOOM 3 was rewritten to run at 60FPS, the developers identified the use of linked lists as a major performance issue (see [the DOOM 3 technical note](http://fabiensanglard.net/doom3_documentation/DOOM-3-BFG-Technical-Note.pdf) for details). 301 | 302 | Despite potential performance performance issues, linked lists are a common data structure in C programs. They are good at representing variable-length data and they are simple to implement (which goes a long way in C!). 303 | 304 | The next blog post will look at a variation of linked lists—intrusive linked lists—and how they are used in Linux. 305 | -------------------------------------------------------------------------------- /i18n/en.yaml: -------------------------------------------------------------------------------- 1 | - id: prev_page 2 | translation: "Prev Page" 3 | 4 | - id: next_page 5 | translation: "Next Page" 6 | -------------------------------------------------------------------------------- /layouts/404.html: -------------------------------------------------------------------------------- 1 | {{ partial "header.html" . }} 2 |
{{ .Summary | plainify | htmlUnescape }}
28 |