├── graphs ├── Cargo.toml ├── src │ ├── mod.rs │ ├── rc_graph.rs │ ├── ref_graph.rs │ └── ref_graph_generic_mod.rs └── README.md ├── compile_pdf.sh ├── LICENSE.md ├── primitives.md ├── README.md ├── destructuring.md ├── rc-raw.md ├── unique.md ├── control-flow.md ├── hello-world.md ├── borrowed.md ├── destructuring-2.md ├── arrays.md ├── data-types.md └── closures.md /graphs/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "graphs" 3 | version = "0.0.1" 4 | authors = ["nrc "] 5 | 6 | [dependencies] 7 | typed-arena = "2" 8 | 9 | [[bin]] 10 | name = "graphs" 11 | path = "src/mod.rs" 12 | -------------------------------------------------------------------------------- /compile_pdf.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | pandoc -s README.md hello-world.md control-flow.md primitives.md unique.md borrowed.md data-types.md rc-raw.md destructuring.md destructuring-2.md arrays.md graphs/README.md closures.md -o r4cppp.pdf 4 | -------------------------------------------------------------------------------- /graphs/src/mod.rs: -------------------------------------------------------------------------------- 1 | #![feature(rustc_private)] 2 | 3 | extern crate typed_arena; 4 | 5 | mod rc_graph; 6 | mod ref_graph; 7 | 8 | fn main() { 9 | println!("\nRc>:"); 10 | rc_graph::main(); 11 | println!("\n&Node and UnsafeCell:"); 12 | ref_graph::main(); 13 | } 14 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2015 The Rust for C++ programmers Developers. 2 | 3 | Licensed under the Apache License, Version 2.0 or the MIT license 5 | , at your 6 | option. This file may not be copied, modified, or distributed 7 | except according to those terms. 8 | -------------------------------------------------------------------------------- /graphs/src/rc_graph.rs: -------------------------------------------------------------------------------- 1 | 2 | use std::rc::Rc; 3 | use std::cell::RefCell; 4 | use std::collections::HashSet; 5 | 6 | struct Node { 7 | datum: &'static str, 8 | edges: Vec>>, 9 | } 10 | 11 | impl Node { 12 | fn new(datum: &'static str) -> Rc> { 13 | Rc::new(RefCell::new(Node { 14 | datum: datum, 15 | edges: Vec::new(), 16 | })) 17 | } 18 | 19 | fn traverse(&self, f: &F, seen: &mut HashSet<&'static str>) 20 | where F: Fn(&'static str) 21 | { 22 | if seen.contains(&self.datum) { 23 | return; 24 | } 25 | f(self.datum); 26 | seen.insert(self.datum); 27 | for n in &self.edges { 28 | n.borrow().traverse(f, seen); 29 | } 30 | } 31 | 32 | fn first(&self) -> Rc> { 33 | self.edges[0].clone() 34 | } 35 | } 36 | 37 | fn foo(node: &Node) { 38 | println!("foo: {}", node.datum); 39 | } 40 | 41 | fn init() -> Rc> { 42 | let root = Node::new("A"); 43 | 44 | let b = Node::new("B"); 45 | let c = Node::new("C"); 46 | let d = Node::new("D"); 47 | let e = Node::new("E"); 48 | let f = Node::new("F"); 49 | 50 | { 51 | let mut mut_root = root.borrow_mut(); 52 | mut_root.edges.push(b.clone()); 53 | mut_root.edges.push(c.clone()); 54 | mut_root.edges.push(d.clone()); 55 | 56 | let mut mut_c = c.borrow_mut(); 57 | mut_c.edges.push(e.clone()); 58 | mut_c.edges.push(f.clone()); 59 | mut_c.edges.push(root.clone()); 60 | } 61 | 62 | root 63 | } 64 | 65 | pub fn main() { 66 | let g = init(); 67 | let g = g.borrow(); 68 | g.traverse(&|d| println!("{}", d), &mut HashSet::new()); 69 | let f = g.first(); 70 | foo(&*f.borrow()); 71 | } 72 | -------------------------------------------------------------------------------- /graphs/src/ref_graph.rs: -------------------------------------------------------------------------------- 1 | 2 | use std::cell::UnsafeCell; 3 | use std::collections::HashSet; 4 | use typed_arena::Arena; 5 | 6 | struct Node<'a> { 7 | datum: &'static str, 8 | edges: UnsafeCell>>, 9 | } 10 | 11 | impl<'a> Node<'a> { 12 | fn new<'b>(datum: &'static str, arena: &'b Arena>) -> &'b Node<'b> { 13 | arena.alloc(Node { 14 | datum: datum, 15 | edges: UnsafeCell::new(Vec::new()), 16 | }) 17 | } 18 | 19 | fn traverse(&self, f: &F, seen: &mut HashSet<&'static str>) 20 | where F: Fn(&'static str) 21 | { 22 | if seen.contains(&self.datum) { 23 | return; 24 | } 25 | f(self.datum); 26 | seen.insert(self.datum); 27 | unsafe { 28 | for n in &(*self.edges.get()) { 29 | n.traverse(f, seen); 30 | } 31 | } 32 | } 33 | 34 | fn first(&'a self) -> &'a Node<'a> { 35 | unsafe { 36 | (*self.edges.get())[0] 37 | } 38 | } 39 | } 40 | 41 | fn foo<'a>(node: &'a Node<'a>) { 42 | println!("foo: {}", node.datum); 43 | } 44 | 45 | fn init<'a>(arena: &'a Arena>) ->&'a Node<'a> { 46 | let root = Node::new("A", arena); 47 | 48 | let b = Node::new("B", arena); 49 | let c = Node::new("C", arena); 50 | let d = Node::new("D", arena); 51 | let e = Node::new("E", arena); 52 | let f = Node::new("F", arena); 53 | 54 | unsafe { 55 | (*root.edges.get()).push(b); 56 | (*root.edges.get()).push(c); 57 | (*root.edges.get()).push(d); 58 | 59 | (*c.edges.get()).push(e); 60 | (*c.edges.get()).push(f); 61 | (*c.edges.get()).push(root); 62 | } 63 | 64 | root 65 | } 66 | 67 | pub fn main() { 68 | let arena = Arena::new(); 69 | let g = init(&arena); 70 | g.traverse(&|d| println!("{}", d), &mut HashSet::new()); 71 | foo(g.first()); 72 | } 73 | -------------------------------------------------------------------------------- /graphs/src/ref_graph_generic_mod.rs: -------------------------------------------------------------------------------- 1 | // Note that this one is hypothetical future Rust and will not compile today. 2 | 3 | use std::cell::UnsafeCell; 4 | use std::collections::HashSet; 5 | use arena::TypedArena; 6 | 7 | // Module is parameterised with the lifetime of the graph. 8 | mod graph<'a> { 9 | struct Node { 10 | datum: &'static str, 11 | // The module-level lifetime is used for the lifetime of each Node. 12 | edges: UnsafeCell>, 13 | } 14 | 15 | impl Node { 16 | fn new(datum: &'static str, arena: &'a TypedArena) -> &'a Node { 17 | arena.alloc(Node { 18 | datum: datum, 19 | edges: UnsafeCell::new(Vec::new()), 20 | }) 21 | } 22 | 23 | fn traverse(&self, f: &F, seen: &mut HashSet<&'static str>) 24 | where F: Fn(&'static str) 25 | { 26 | if seen.contains(&self.datum) { 27 | return; 28 | } 29 | f(self.datum); 30 | seen.insert(self.datum); 31 | for n in &self.edges { 32 | unsafe { 33 | for n in &(*self.edges.get()) { 34 | n.traverse(f, seen); 35 | } 36 | } 37 | } 38 | } 39 | 40 | fn first(&self) -> &Node { 41 | unsafe { 42 | (*self.edges.get())[0] 43 | } 44 | } 45 | } 46 | 47 | // It would be nice if we could rely on lifetime elision and remove the `'a` 48 | // on the `foo` and `init` functions. 49 | fn foo(node: &'a Node) { 50 | println!("foo: {}", node.datum); 51 | } 52 | 53 | fn init(arena: &'a TypedArena) -> &'a Node { 54 | let root = Node::new("A", arena); 55 | 56 | let b = Node::new("B", arena); 57 | let c = Node::new("C", arena); 58 | let d = Node::new("D", arena); 59 | let e = Node::new("E", arena); 60 | let f = Node::new("F", arena); 61 | 62 | unsafe { 63 | (*root.edges.get()).push(b); 64 | (*root.edges.get()).push(c); 65 | (*root.edges.get()).push(d); 66 | 67 | (*c.edges.get()).push(e); 68 | (*c.edges.get()).push(f); 69 | (*c.edges.get()).push(root); 70 | } 71 | 72 | root 73 | } 74 | } 75 | 76 | pub fn main() { 77 | let arena = TypedArena::new(); 78 | // The lifetime of the module is inferred here from the lifetime of the 79 | // reference to the arena, i.e., the scope of the main function. 80 | let g = graph::init(&arena); 81 | g.traverse(&|d| println!("{}", d), &mut HashSet::new()); 82 | foo(g.first()); 83 | } 84 | -------------------------------------------------------------------------------- /primitives.md: -------------------------------------------------------------------------------- 1 | # Primitive types and operators 2 | 3 | Rust has pretty much the same arithmetic and logical operators as C++. `bool` is 4 | the same in both languages (as are the `true` and `false` literals). Rust has 5 | similar concepts of integers, unsigned integers, and floats. However the syntax 6 | is a bit different. Rust uses `isize` to mean an integer and `usize` to mean an 7 | unsigned integer. These types are pointer sized. E.g., on a 32 bit system, 8 | `usize` means a 32 bit unsigned integer. Rust also has explicitly sized types 9 | which are `u` or `i` followed by 8, 16, 32, 64, or 128. So, for example, `u8` is 10 | an 8 bit unsigned integer and `i32` is a 32 bit signed integer. For floats, Rust 11 | has `f32` and `f64`. 12 | 13 | Numeric literals can take suffixes to indicate their type. If no suffix is given, Rust tries to infer the 14 | type. If it can't infer, it uses `i32` or `f64` (if there is a decimal point). 15 | Examples: 16 | 17 | ```rust 18 | fn main() { 19 | let x: bool = true; 20 | let x = 34; // type i32 21 | // let x = 2147483648; // error: literal out of range for `i32` 22 | let x = 34isize; 23 | let x = 34usize; 24 | let x = 34u8; 25 | let x = 34i64; 26 | let x = 34f32; 27 | } 28 | ``` 29 | 30 | As a side note, Rust lets you redefine variables so the above code is legal - 31 | each `let` statement creates a new variable `x` and hides the previous one. This 32 | is more useful than you might expect due to variables being immutable by 33 | default. 34 | 35 | Numeric literals can be given as binary, octal, and hexadecimal, as well as 36 | decimal. Use the `0b`, `0o`, and `0x` prefixes, respectively. You can use an 37 | underscore anywhere in a numeric literal and it will be ignored. E.g, 38 | 39 | ```rust 40 | fn main() { 41 | let x = 12; 42 | let x = 0b1100; 43 | let x = 0o14; 44 | let x = 0xe; 45 | let y = 0b_1100_0011_1011_0001; 46 | } 47 | ``` 48 | 49 | Rust has chars and strings, but since they are Unicode, they are a bit different 50 | from C++. I'm going to postpone talking about them until after I've introduced 51 | pointers, references, and vectors (arrays). 52 | 53 | Rust does not implicitly coerce numeric types. In general, Rust has much less 54 | implicit coercion and subtyping than C++. Rust uses the `as` keyword for 55 | explicit coercions and casting. Any numeric value can be cast to another numeric 56 | type. `as` cannot be used to convert from numeric types to boolean types, but 57 | the reverse can be done. E.g., 58 | 59 | ```rust 60 | fn main() { 61 | let x = 34usize as isize; // cast usize to isize 62 | let x = 10 as f32; // isize to float 63 | let x = 10.45f64 as i8; // float to i8 (loses precision) 64 | let x = 4u8 as u64; // gains precision 65 | let x = 400u16 as u8; // 144, loses precision (and thus changes the value) 66 | println!("`400u16 as u8` gives {}", x); 67 | let x = -3i8 as u8; // 253, signed to unsigned (changes sign) 68 | println!("`-3i8 as u8` gives {}", x); 69 | //let x = 45 as bool; // FAILS! (use 45 != 0 instead) 70 | let x = true as usize; // cast bool to usize (gives a 1) 71 | } 72 | ``` 73 | 74 | Rust has the following operators: 75 | 76 | | Type | Operators | 77 | | --------------------- | -------------------------------- | 78 | | Numeric | `+`, `-`, `*`, `/`, `%` | 79 | | Bitwise | `\|`, `&`, `^`, `<<`, `>>` | 80 | | Comparison | `==`, `!=`, `>`, `<`, `>=`, `<=` | 81 | | Short-circuit logical | `\|\|`, `&&` | 82 | 83 | All of these behave as in C++, however, Rust is a bit stricter about the types 84 | the operators can be applied to - the bitwise operators can only be applied to 85 | integers and the logical operators can only be applied to booleans. Rust has the 86 | `-` unary operator which negates a number. The `!` operator negates a boolean 87 | and inverts every bit on an integer type (equivalent to `~` in C++ in the latter 88 | case). Rust has compound assignment operators as in C++, e.g., `+=`, but does 89 | not have increment or decrement operators (e.g., `++`). 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rust For Systems Programmers 2 | 3 | A Rust tutorial for experienced C and C++ programmers. 4 | 5 | Jump to [contents](#contents). 6 | Jump to [contributing](#contributing). 7 | 8 | This tutorial is intended for programmers who already know how pointers and 9 | references work and are used to systems programming concepts such as integer 10 | widths and memory management. We intend to cover, primarily, the differences 11 | between Rust and C++ to get you writing Rust programs quickly without lots of 12 | fluff you probably already know. 13 | 14 | Hopefully, Rust is a pretty intuitive language for C++ programmers. Most of the 15 | syntax is pretty similar. The big difference (in my experience) is that the 16 | sometimes vague concepts of good systems programming are strictly enforced by 17 | the compiler. This can be infuriating at first - there are things you want to 18 | do, but the compiler won't let you (at least in safe code), and sometimes these 19 | things *are* safe, but you can't convince the compiler of that. However, you'll 20 | quickly develop a good intuition for what is allowed. Communicating your own 21 | notions of memory safety to the compiler requires some new and sometimes 22 | complicated type annotations. But if you have a strong idea of lifetimes for 23 | your objects and experience with generic programming, they shouldn't be too 24 | tough to learn. 25 | 26 | This tutorial started as a [series of blog posts](http://featherweightmusings.blogspot.co.nz/search/label/rust-for-c). 27 | Partly as an aid for me (@nrc) learning Rust (there is no better way to 28 | check that you have learnt something than to try and explain it to somebody 29 | else) and partly because I found the existing resources for learning Rust 30 | unsatisfactory - they spent too much time on the basics that I already knew and 31 | used higher level intuitions to describe concepts that could better be explained 32 | to me using lower level intuitions. Since then, the documentation for Rust has 33 | got *much* better, but I still think that existing C++ programmers are an 34 | audience who are a natural target for Rust, but are not particularly well 35 | catered for. 36 | 37 | 38 | ## Contents 39 | 40 | 1. [Introduction - Hello world!](hello-world.md) 41 | 1. [Control flow](control-flow.md) 42 | 1. [Primitive types and operators](primitives.md) 43 | 1. [Unique pointers](unique.md) 44 | 1. [Borrowed pointers](borrowed.md) 45 | 1. [Rc and raw pointers](rc-raw.md) 46 | 1. [Data types](data-types.md) 47 | 1. [Destructuring pt 1](destructuring.md) 48 | 1. [Destructuring pt 2](destructuring-2.md) 49 | 1. [Arrays and vecs](arrays.md) 50 | 1. [Graphs and arena allocation](graphs/README.md) 51 | 1. [Closures and first-class functions](closures.md) 52 | 53 | 54 | ## Other resources 55 | 56 | * [The Rust book/guide](http://doc.rust-lang.org/book/) - the best place for 57 | learning Rust in general and probably the best place to go for a second opinion 58 | on stuff here or for stuff not covered. 59 | * [Rust API documentation](http://doc.rust-lang.org/std/index.html) - detailed 60 | documentation for the Rust libraries. 61 | * [The Rust reference manual](https://doc.rust-lang.org/reference/) - a little 62 | out of date in places, but thorough; good for looking up details. 63 | * [Discuss forum](http://users.rust-lang.org/) - general forum for discussion or 64 | questions about using and learning Rust. 65 | * [StackOverflow Rust questions](https://stackoverflow.com/questions/tagged/rust) - answers 66 | to many beginner and advanced questions about Rust, but be careful though - Rust 67 | has changed *a lot* over the years and some of the answers might be very out of date. 68 | * [A Firehose of Rust](https://www.youtube.com/watch?v=IPmRDS0OSxM) - a recorded talk 69 | introducing C++ programmers to how lifetimes, mutable aliasing, and move semantics work 70 | in Rust 71 | 72 | 73 | ## Contributing 74 | 75 | Yes please! 76 | 77 | If you spot a typo or mistake, please submit a PR, don't be shy! Please feel 78 | free to file [an issue](https://github.com/nrc/r4cppp/issues/new) for 79 | larger changes or for new chapters you'd like to see. I'd also be happy to see 80 | re-organisation of existing work or expanded examples, if you feel the tutorial 81 | could be improved in those ways. 82 | 83 | If you'd like to contribute a paragraph, section, or chapter please do! If you 84 | want ideas for things to cover, see the [list of issues](https://github.com/nrc/r4cppp/issues), 85 | in particular those tagged [new material](https://github.com/nrc/r4cppp/labels/new%20material). 86 | If you're not sure of something, please get in touch by pinging me here 87 | (@nrc) or on irc (nrc, on #rust or #rust-internals). 88 | 89 | 90 | ### Style 91 | 92 | Obviously, the intended audience is C++ programmers. The tutorial should 93 | concentrate on things that will be new to experienced C++ programmers, rather 94 | than a general audience (although, I don't assume the audience is familiar with 95 | the most recent versions of C++). I'd like to avoid too much basic material and 96 | definitely avoid too much overlap with other resources, in particular the Rust 97 | guide/book. 98 | 99 | Work on edge case use cases (e.g., using a different build system from Cargo, or 100 | writing syntax extensions, using unstable APIs) is definitely welcome, as is 101 | in-depth work on topics already covered at a high level. 102 | 103 | I'd like to avoid recipe-style examples for converting C++ code to Rust code, 104 | but small examples of this kind are OK. 105 | 106 | Use of different formats (e.g., question and answer/FAQs, or larger worked 107 | examples) are welcome. 108 | 109 | I don't plan on adding exercises or suggestions for mini-projects, but if you're 110 | interested in that, let me know. 111 | 112 | I'm aiming for a fairly academic tone, but not too dry. All writing should be in 113 | English (British English, not American English; although I would be very happy 114 | to have localisations/translations into any language, including American 115 | English) and be valid GitHub markdown. For advice on writing style, grammar, 116 | punctuation, etc. see the Oxford Style Manual 117 | or [The Economist Style Guide](http://www.economist.com/styleguide/introduction). 118 | Please limit width to 80 columns. I am a fan of the Oxford comma. 119 | 120 | Don't feel like work has to be perfect to be submitted, I'm happy to edit and 121 | I'm sure other people will be in the future. 122 | -------------------------------------------------------------------------------- /destructuring.md: -------------------------------------------------------------------------------- 1 | # Destructuring 2 | 3 | Last time we looked at Rust's data types. Once you have some data inside a structure, you 4 | will want to get that data out. For structs, Rust has field access, just like 5 | C++. For tuples, tuple structs, and enums you must use destructuring (there are 6 | various convenience functions in the library, but they use destructuring 7 | internally). Destructuring of data structures exists in C++ only since C++17, so 8 | it most likely familiar from languages such as Python or various functional 9 | languages. The idea is that just as you can initialize a data structure by 10 | filling out its fields with data from a bunch of local variables, you can fill 11 | out a bunch of local variables with data from a data structure. From this 12 | simple beginning, destructuring has become one of Rust's most powerful 13 | features. To put it another way, destructuring combines pattern matching with 14 | assignment into local variables. 15 | 16 | Destructuring is done primarily through the let and match statements. The match 17 | statement is used when the structure being destructured can have different 18 | variants (such as an enum). A let expression pulls the variables out into the 19 | current scope, whereas match introduces a new scope. To compare: 20 | 21 | ```rust 22 | fn foo(pair: (int, int)) { 23 | let (x, y) = pair; 24 | // we can now use x and y anywhere in foo 25 | 26 | match pair { 27 | (x, y) => { 28 | // x and y can only be used in this scope 29 | } 30 | } 31 | } 32 | ``` 33 | 34 | The syntax for patterns (used after `let` and before `=>` in the above example) 35 | in both cases is (pretty much) the same. You can also use these patterns in 36 | argument position in function declarations: 37 | 38 | ```rust 39 | fn foo((x, y): (int, int)) { 40 | } 41 | ``` 42 | 43 | (Which is more useful for structs or tuple-structs than tuples). 44 | 45 | Most initialisation expressions can appear in a destructuring pattern and they 46 | can be arbitrarily complex. That can include references and primitive literals 47 | as well as data structures. For example, 48 | 49 | ```rust 50 | struct St { 51 | f1: int, 52 | f2: f32 53 | } 54 | 55 | enum En { 56 | Var1, 57 | Var2, 58 | Var3(int), 59 | Var4(int, St, int) 60 | } 61 | 62 | fn foo(x: &En) { 63 | match x { 64 | &Var1 => println!("first variant"), 65 | &Var3(5) => println!("third variant with number 5"), 66 | &Var3(x) => println!("third variant with number {} (not 5)", x), 67 | &Var4(3, St { f1: 3, f2: x }, 45) => { 68 | println!("destructuring an embedded struct, found {} in f2", x) 69 | } 70 | &Var4(_, ref x, _) => { 71 | println!("Some other Var4 with {} in f1 and {} in f2", x.f1, x.f2) 72 | } 73 | _ => println!("other (Var2)") 74 | } 75 | } 76 | ``` 77 | 78 | Note how we destructure through a reference by using `&` in the patterns and how 79 | we use a mix of literals (`5`, `3`, `St { ... }`), wildcards (`_`), and 80 | variables (`x`). 81 | 82 | You can use `_` wherever a variable is expected if you want to ignore a single 83 | item in a pattern, so we could have used `&Var3(_)` if we didn't care about the 84 | integer. In the first `Var4` arm we destructure the embedded struct (a nested 85 | pattern) and in the second `Var4` arm we bind the whole struct to a variable. 86 | You can also use `..` to stand in for all fields of a tuple or struct. So if you 87 | wanted to do something for each enum variant but don't care about the content of 88 | the variants, you could write: 89 | 90 | ```rust 91 | fn foo(x: En) { 92 | match x { 93 | Var1 => println!("first variant"), 94 | Var2 => println!("second variant"), 95 | Var3(..) => println!("third variant"), 96 | Var4(..) => println!("fourth variant") 97 | } 98 | } 99 | ``` 100 | 101 | When destructuring structs, the fields don't need to be in order and you can use 102 | `..` to elide the remaining fields. E.g., 103 | 104 | ```rust 105 | struct Big { 106 | field1: int, 107 | field2: int, 108 | field3: int, 109 | field4: int, 110 | field5: int, 111 | field6: int, 112 | field7: int, 113 | field8: int, 114 | field9: int, 115 | } 116 | 117 | fn foo(b: Big) { 118 | let Big { field6: x, field3: y, ..} = b; 119 | println!("pulled out {} and {}", x, y); 120 | } 121 | ``` 122 | 123 | As a shorthand with structs you can use just the field name which creates a 124 | local variable with that name. The let statement in the above example created 125 | two new local variables `x` and `y`. Alternatively, you could write 126 | 127 | ```rust 128 | fn foo(b: Big) { 129 | let Big { field6, field3, .. } = b; 130 | println!("pulled out {} and {}", field3, field6); 131 | } 132 | ``` 133 | 134 | Now we create local variables with the same names as the fields, in this case 135 | `field3` and `field6`. 136 | 137 | There are a few more tricks to Rust's destructuring. Lets say you want a 138 | reference to a variable in a pattern. You can't use `&` because that matches a 139 | reference, rather than creates one (and thus has the effect of dereferencing the 140 | object). For example, 141 | 142 | ```rust 143 | struct Foo { 144 | field: &'static int 145 | } 146 | 147 | fn foo(x: Foo) { 148 | let Foo { field: &y } = x; 149 | } 150 | ``` 151 | 152 | Here, `y` has type `int` and is a copy of the field in `x`. 153 | 154 | To create a reference to something in a pattern, you use the `ref` keyword. For 155 | example, 156 | 157 | ```rust 158 | fn foo(b: Big) { 159 | let Big { field3: ref x, ref field6, ..} = b; 160 | println!("pulled out {} and {}", *x, *field6); 161 | } 162 | ``` 163 | 164 | Here, `x` and `field6` both have type `&int` and are references to the fields in `b`. 165 | 166 | One last trick when destructuring is that if you are destructuring a complex 167 | object, you might want to name intermediate objects as well as individual 168 | fields. Going back to an earlier example, we had the pattern `&Var4(3, St{ f1: 169 | 3, f2: x }, 45)`. In that pattern we named one field of the struct, but you 170 | might also want to name the whole struct object. You could write `&Var4(3, s, 171 | 45)` which would bind the struct object to `s`, but then you would have to use 172 | field access for the fields, or if you wanted to only match with a specific 173 | value in a field you would have to use a nested match. That is not fun. Rust 174 | lets you name parts of a pattern using `@` syntax. For example `&Var4(3, s @ St{ 175 | f1: 3, f2: x }, 45)` lets us name both a field (`x`, for `f2`) and the whole 176 | struct (`s`). 177 | 178 | That just about covers your options with Rust pattern matching. There are a few 179 | features I haven't covered, such as matching vectors, but hopefully you know how 180 | to use `match` and `let` and have seen some of the powerful things you can do. 181 | Next time I'll cover some of the subtle interactions between match and borrowing 182 | which tripped me up a fair bit when learning Rust. 183 | -------------------------------------------------------------------------------- /rc-raw.md: -------------------------------------------------------------------------------- 1 | # Reference counted and raw pointers 2 | 3 | TODO add discussion of custom pointers and Deref trait (maybe later, not here) 4 | 5 | So far we've covered unique and borrowed pointers. Unique pointers are very 6 | similar to the new std::unique_ptr in C++ and borrowed references are the 7 | 'default' pointer you usually reach for if you would use a pointer or reference 8 | in C++. Rust has a few more, rarer pointers either in the libraries or built in 9 | to the language. These are mostly similar to various kinds of smart pointers you 10 | might be used to in C++. 11 | 12 | This post took a while to write and I still don't like it. There are a lot of 13 | loose ends here, both in my write up and in Rust itself. I hope some will get 14 | better with later posts and some will get better as the language develops. If 15 | you are learning Rust, you might even want to skip this stuff for now, hopefully 16 | you won't need it. Its really here just for completeness after the posts on 17 | other pointer types. 18 | 19 | It might feel like Rust has a lot of pointer types, but it is pretty similar to 20 | C++ once you think about the various kinds of smart pointers available in 21 | libraries. In Rust, however, you are more likely to meet them when you first 22 | start learning the language. Because Rust pointers have compiler support, you 23 | are also much less likely to make errors when using them. 24 | 25 | I'm not going to cover these in as much detail as unique and borrowed references 26 | because, frankly, they are not as important. I might come back to them in more 27 | detail later on. 28 | 29 | ## Rc 30 | 31 | Reference counted pointers come as part of the rust standard library. They are 32 | in the `std::rc` module (we'll cover modules soon-ish. The modules are the 33 | reason for the `use` incantations in the examples). A reference counted pointer 34 | to an object of type `T` has type `Rc`. You create reference counted pointers 35 | using a static method (which for now you can think of like C++'s, but we'll see 36 | later they are a bit different) - `Rc::new(...)` which takes a value to create 37 | the pointer to. This constructor method follows Rust's usual move/copy semantics 38 | (like we discussed for unique pointers) - in either case, after calling Rc::new, 39 | you will only be able to access the value via the pointer. 40 | 41 | As with the other pointer types, the `.` operator does all the dereferencing you 42 | need it to. You can use `*` to manually dereference. 43 | 44 | To pass a ref-counted pointer you need to use the `clone` method. This kinda 45 | sucks, and hopefully we'll fix that, but that is not for sure (sadly). You can 46 | take a (borrowed) reference to the pointed at value, so hopefully you don't need 47 | to clone too often. Rust's type system ensures that the ref-counted variable 48 | will not be deleted before any references expire. Taking a reference has the 49 | added advantage that it doesn't need to increment or decrement the ref count, 50 | and so will give better performance (although, that difference is probably 51 | marginal since Rc objects are limited to a single thread and so the ref count 52 | operations don't have to be atomic). As in C++, you can also take a reference to 53 | the Rc pointer. 54 | 55 | An Rc example: 56 | 57 | ```rust 58 | use std::rc::Rc; 59 | 60 | fn bar(x: Rc) { } 61 | fn baz(x: &i32) { } 62 | 63 | fn foo() { 64 | let x = Rc::new(45); 65 | bar(x.clone()); // Increments the ref-count 66 | baz(&*x); // Does not increment 67 | println!("{}", 100 - *x); 68 | } // Once this scope closes, all Rc pointers are gone, so ref-count == 0 69 | // and the memory will be deleted. 70 | ``` 71 | 72 | Ref counted pointers are always immutable. If you want a mutable ref-counted 73 | object you need to use a RefCell (or Cell) wrapped in an `Rc`. 74 | 75 | ## Cell and RefCell 76 | 77 | Cell and RefCell are structs which allow you to 'cheat' the mutability rules. 78 | This is kind of hard to explain without first covering Rust data structures and 79 | how they work with mutability, so I'm going to come back to these slightly 80 | tricky objects later. For now, you should know that if you want a mutable, ref 81 | counted object you need a Cell or RefCell wrapped in an Rc. As a first 82 | approximation, you probably want Cell for primitive data and RefCell for objects 83 | with move semantics. So, for a mutable, ref-counted int you would use 84 | `Rc>`. 85 | 86 | ## \*T - raw pointers 87 | 88 | Finally, Rust has two kinds of raw pointers (aka unsafe pointers): `*const T` 89 | for an immutable raw pointer, and `*mut T` for a mutable raw pointer. They are 90 | created using `&` or `&mut` (you might need to specify a type to get a `*T` 91 | rather than a `&T` since the `&` operator can create either a borrowed reference 92 | or a raw pointer). Raw pointers are like C pointers, just a pointer to memory 93 | with no restrictions on how they are used (you can't do pointer arithmetic 94 | without casting, but you can do it that way if you must). Raw pointers are the 95 | only pointer type in Rust which can be null. There is no automatic dereferencing 96 | of raw pointers (so to call a method you have to write `(*x).foo()`) and no 97 | automatic referencing. The most important restriction is that they can't be 98 | dereferenced (and thus can't be used) outside of an unsafe block. In regular 99 | Rust code you can only pass them around. 100 | 101 | So, what is unsafe code? Rust has strong safety guarantees, and (rarely) they 102 | prevent you doing something you need to do. Since Rust aims to be a systems 103 | language, it has to be able to do anything that is possible and sometimes that 104 | means doing things the compiler can't verify is safe. To accomplish that, Rust 105 | has the concept of unsafe blocks, marked by the `unsafe` keyword. In unsafe code 106 | you can do unsafe things - dereference a raw pointer, index into an array 107 | without bounds checking, call code written in another language via the FFI, or 108 | cast variables. Obviously, you have to be much more careful writing unsafe code 109 | than writing regular Rust code. In fact, you should only very rarely write 110 | unsafe code. Mostly it is used in very small chunks in libraries, rather than in 111 | client code. In unsafe code you must do all the things you normally do in C++ to 112 | ensure safety. Furthermore, you must manually ensure that you maintain the 113 | invariants which the compiler would usually enforce. Unsafe blocks allow you to 114 | manually enforce Rust's invariants, it does not allow you to break those 115 | invariants. If you do, you can introduce bugs both in safe and unsafe code. 116 | 117 | An example of using an raw pointer: 118 | 119 | ```rust 120 | fn foo() { 121 | let mut x = 5; 122 | let x_p: *mut i32 = &mut x; 123 | println!("x+5={}", add_5(x_p)); 124 | } 125 | 126 | fn add_5(p: *mut i32) -> i32 { 127 | unsafe { 128 | if !p.is_null() { // Note that *-pointers do not auto-deref, so this is 129 | // a method implemented on *i32, not i32. 130 | *p + 5 131 | } else { 132 | -1 // Not a recommended error handling strategy. 133 | } 134 | } 135 | } 136 | ``` 137 | 138 | And that concludes our tour of Rust's pointers. Next time we'll take a break 139 | from pointers and look at Rust's data structures. We'll come back to borrowed 140 | references again in a later post though. 141 | -------------------------------------------------------------------------------- /unique.md: -------------------------------------------------------------------------------- 1 | # Unique pointers 2 | 3 | Rust is a systems language and therefore must give you raw access to memory. It 4 | does this (as in C++) via pointers. Pointers are one area where Rust and C++ are 5 | very different, both in syntax and semantics. Rust enforces memory safety by 6 | type checking pointers. That is one of its major advantages over other 7 | languages. Although the type system is a bit complex, you get memory safety and 8 | bare-metal performance in return. 9 | 10 | I had intended to cover all of Rust's pointers in one post, but I think the 11 | subject is too large. So this post will cover just one kind - unique pointers - 12 | and other kinds will be covered in follow up posts. 13 | 14 | First, an example without pointers: 15 | 16 | ```rust 17 | fn foo() { 18 | let x = 75; 19 | 20 | // ... do something with `x` ... 21 | } 22 | ``` 23 | 24 | When we reach the end of `foo`, `x` goes out of scope (in Rust as in C++). That 25 | means the variable can no longer be accessed and the memory for the variable can 26 | be reused. 27 | 28 | In Rust, for every type `T` we can write `Box` for an owning (aka unique) 29 | pointer to `T`. We use `Box::new(...)` to allocate space on the heap and 30 | initialise that space with the supplied value. This is similar to `new` in C++. 31 | For example, 32 | 33 | ```rust 34 | fn foo() { 35 | let x = Box::new(75); 36 | } 37 | ``` 38 | 39 | Here `x` is a pointer to a location on the heap which contains the value `75`. 40 | `x` has type `Box`; we could have written `let x: Box = 41 | Box::new(75);`. This is similar to writing `int* x = new int(75);` in C++. 42 | Unlike in C++, Rust will tidy up the memory for us, so there is no need to call 43 | `free` or `delete`[^1]. Unique pointers behave similarly to 44 | values - they are deleted when the variable goes out of scope. In our example, 45 | at the end of the function `foo`, `x` can no longer be accessed and the memory 46 | pointed at by `x` can be reused. 47 | 48 | Owning pointers are dereferenced using the `*` as in C++. E.g., 49 | 50 | ```rust 51 | fn foo() { 52 | let x = Box::new(75); 53 | println!("`x` points to {}", *x); 54 | } 55 | ``` 56 | 57 | As with primitive types in Rust, owning pointers and the data they point to are 58 | immutable by default. Unlike in C++, you can't have a mutable (unique) pointer to 59 | immutable data or vice versa. Mutability of the data follows from the pointer. 60 | E.g., 61 | 62 | ```rust 63 | fn foo() { 64 | let x = Box::new(75); 65 | let y = Box::new(42); 66 | // x = y; // Not allowed, x is immutable. 67 | // *x = 43; // Not allowed, *x is immutable. 68 | let mut x = Box::new(75); 69 | x = y; // OK, x is mutable. 70 | *x = 43; // OK, *x is mutable. 71 | } 72 | ``` 73 | 74 | Owning pointers can be returned from a function and continue to live on. If they 75 | are returned, then their memory will not be freed, i.e., there are no dangling 76 | pointers in Rust. The memory will not leak. However, it will eventually go out of 77 | scope and then it will be freed. E.g., 78 | 79 | ```rust 80 | fn foo() -> Box { 81 | let x = Box::new(75); 82 | x 83 | } 84 | 85 | fn bar() { 86 | let y = foo(); 87 | // ... use y ... 88 | } 89 | ``` 90 | 91 | Here, memory is initialised in `foo`, and returned to `bar`. `x` is returned 92 | from `foo` and stored in `y`, so it is not deleted. At the end of `bar`, `y` 93 | goes out of scope and so the memory is reclaimed. 94 | 95 | Owning pointers are unique (also called linear) because there can be only one 96 | (owning) pointer to any piece of memory at any time. This is accomplished by 97 | move semantics. When one pointer points at a value, any previous pointer can no 98 | longer be accessed. E.g., 99 | 100 | ```rust 101 | fn foo() { 102 | let x = Box::new(75); 103 | let y = x; 104 | // x can no longer be accessed 105 | // let z = *x; // Error. 106 | } 107 | ``` 108 | 109 | Likewise, if an owning pointer is passed to another function or stored in a 110 | field, it can no longer be accessed: 111 | 112 | ```rust 113 | fn bar(y: Box) { 114 | } 115 | 116 | fn foo() { 117 | let x = Box::new(75); 118 | bar(x); 119 | // x can no longer be accessed 120 | // let z = *x; // Error. 121 | } 122 | ``` 123 | 124 | Rust's unique pointers are similar to C++ `std::unique_ptr`s. In Rust, as in 125 | C++, there can be only one unique pointer to a value and that value is deleted 126 | when the pointer goes out of scope. Rust does most of its checking statically 127 | rather than at runtime. So, in C++ accessing a unique pointer whose value has 128 | moved will result in a runtime error (since it will be null). In Rust this 129 | produces a compile time error and you cannot go wrong at runtime. 130 | 131 | We'll see later that it is possible to create other pointer types which point at 132 | a unique pointer's value in Rust. This is similar to C++. However, in C++ this 133 | allows you to cause errors at runtime by holding a pointer to freed memory. That 134 | is not possible in Rust (we'll see how when we cover Rust's other pointer 135 | types). 136 | 137 | As shown above, owning pointers must be dereferenced to use their values. 138 | However, method calls automatically dereference, so there is no need for a `->` 139 | operator or to use `*` for method calls. In this way, Rust pointers are a bit 140 | similar to both pointers and references in C++. E.g., 141 | 142 | ```rust 143 | fn bar(x: Box, y: Box>>>) { 144 | x.foo(); 145 | y.foo(); 146 | } 147 | ``` 148 | 149 | Assuming that the type `Foo` has a method `foo()`, both these expressions are OK. 150 | 151 | Calling `Box::new()` with an existing value does not take a reference to that 152 | value, it copies that value. So, 153 | 154 | ```rust 155 | fn foo() { 156 | let x = 3; 157 | let mut y = Box::new(x); 158 | *y = 45; 159 | println!("x is still {}", x); 160 | } 161 | ``` 162 | 163 | In general, Rust has move rather than copy semantics (as seen above with unique 164 | pointers). Primitive types have copy semantics, so in the above example the 165 | value `3` is copied, but for more complex values it would be moved. We'll cover 166 | this in more detail later. 167 | 168 | Sometimes when programming, however, we need more than one reference to a value. 169 | For that, Rust has borrowed pointers. I'll cover those in the next post. 170 | 171 | 172 | [^1]: The `std::unique_ptr`, introduced in C++11, is similar in some aspects 173 | to Rust's `Box` but there are also significant differences. 174 | 175 | Similarities: 176 | * The memory pointed to by a `std::unique_ptr` in C++11 and a `Box` in Rust 177 | is automatically released once the `std::unique_ptr` goes out of the scope. 178 | * Both C++11's `std::unique_ptr` and Rust's `Box` only exhibit move semantics. 179 | 180 | Differences: 181 | 182 | 1. C++11 allows for a `std::unique_ptr` to be constructed from an existing pointer, 183 | thereby allowing multiple unique pointers to the same memory. 184 | This behaviour is not permitted with `Box`. 185 | 2. Dereferencing a `std::unique_ptr` that has been moved to another variable or function, 186 | causes undefined behavior in C++11. This would be caught at compile time in Rust. 187 | 3. Mutability or immutability does not go "through" `std::unique_ptr` 188 | -- dereferencing a `const std::unique_ptr` still yields a mutable 189 | (non-`const`) reference to the underlying data. In Rust, an immutable 190 | `Box` does not allow mutation of the data it points to. 191 | 192 | `let x = Box::new(75)` in Rust may be interpreted as `const auto x = 193 | std::unique_ptr{new int{75}};` in C++11 and `const auto x = 194 | std::make_unique(75);` in C++14. 195 | -------------------------------------------------------------------------------- /control-flow.md: -------------------------------------------------------------------------------- 1 | # Control flow 2 | 3 | ## If 4 | 5 | The `if` statement is pretty much the same in Rust as C++. One difference is 6 | that the braces are mandatory, but parentheses around the expression being tested 7 | are not. Another is that `if` is an expression, so you can use it the same way 8 | as the ternary `?:` operator in C++ (remember from the previous section that if the last 9 | expression in a block is not terminated by a semi-colon, then it becomes the 10 | value of the block). There is no ternary `?:` in Rust. So, the following two 11 | functions do the same thing: 12 | 13 | ```rust 14 | fn foo(x: i32) -> &'static str { 15 | let result: &'static str; 16 | if x < 10 { 17 | result = "less than 10"; 18 | } else { 19 | result = "10 or more"; 20 | } 21 | return result; 22 | } 23 | 24 | fn bar(x: i32) -> &'static str { 25 | if x < 10 { 26 | "less than 10" 27 | } else { 28 | "10 or more" 29 | } 30 | } 31 | ``` 32 | 33 | (Why not `mut result`? The code in `foo` makes `result` immutable, it's just initialized in two possible places. Rust can see that by the time of `return result`, it is guaranteed to have been initialized.) 34 | 35 | The first is a fairly literal translation of what you might write in C++. The 36 | second is better Rust style. 37 | 38 | You can also write `let result = if x < 10 ...`, etc. 39 | 40 | 41 | ## Loops 42 | 43 | Rust has while loops, again just like C++: 44 | 45 | ```rust 46 | fn main() { 47 | let mut x = 10; 48 | while x > 0 { 49 | println!("Current value: {}", x); 50 | x -= 1; 51 | } 52 | } 53 | ``` 54 | 55 | There is no `do...while` loop in Rust, but there is the `loop` statement which 56 | just loops forever: 57 | 58 | ```rust 59 | fn main() { 60 | loop { 61 | println!("Just looping"); 62 | } 63 | } 64 | ``` 65 | 66 | Rust has `break` and `continue` just like C++. 67 | 68 | 69 | ## For loops 70 | 71 | Rust also has `for` loops, but these are a bit different. Let's say you have a 72 | vector of integers and you want to print them all (we'll cover vectors/arrays, 73 | iterators, and generics in more detail in the future. For now, know that a 74 | `Vec` is a sequence of `T`s and `iter()` returns an iterator from anything 75 | you might reasonably want to iterate over). A simple `for` loop would look like: 76 | 77 | ```rust 78 | fn print_all(all: Vec) { 79 | for a in all.iter() { 80 | println!("{}", a); 81 | } 82 | } 83 | ``` 84 | 85 | TODO also &all/all instead of all.iter() 86 | 87 | If we want to index over the indices of `all` (a bit more like a standard C++ 88 | for loop over an array), you could do 89 | 90 | ```rust 91 | fn print_all(all: Vec) { 92 | for i in 0..all.len() { 93 | println!("{}: {}", i, all[i]); 94 | } 95 | } 96 | ``` 97 | 98 | Hopefully, it is obvious what the `len` function does. TODO range notation 99 | 100 | A more Rust-like equivalent of the preceding example would be to use an 101 | enumerating iterator: 102 | 103 | ```rust 104 | fn print_all(all: Vec) { 105 | for (i, a) in all.iter().enumerate() { 106 | println!("{}: {}", i, a); 107 | } 108 | } 109 | ``` 110 | 111 | Where `enumerate()` chains from the iterator `iter()` and yields the current 112 | count and the element during iteration. 113 | 114 | *The following example incorporates more advanced topics covered in the section 115 | on [Borrowed Pointers](borrowed.md).* Let's say you have a vector of integers 116 | and want to call the function, passing the vector by reference and have the 117 | vector modified in place. Here the `for` loop uses a mutable iterator which 118 | gives mutable refererences - the `*` dereferencing should be familiar to C++ 119 | programmers: 120 | 121 | ```rust 122 | fn double_all(all: &mut Vec) { 123 | for a in all.iter_mut() { 124 | *a += *a; 125 | } 126 | } 127 | ``` 128 | 129 | 130 | ## Switch/Match 131 | 132 | Rust has a match expression which is similar to a C++ switch statement, but much 133 | more powerful. This simple version should look pretty familiar: 134 | 135 | ```rust 136 | fn print_some(x: i32) { 137 | match x { 138 | 0 => println!("x is zero"), 139 | 1 => println!("x is one"), 140 | 10 => println!("x is ten"), 141 | y => println!("x is something else {}", y), 142 | } 143 | } 144 | ``` 145 | 146 | There are some syntactic differences - we use `=>` to go from the matched value 147 | to the expression to execute, and the match arms are separated by `,` (that last 148 | `,` is optional). There are also some semantic differences which are not so 149 | obvious: the matched patterns must be exhaustive, that is all possible values of 150 | the matched expression (`x` in the above example) must be covered. Try removing 151 | the `y => ...` line and see what happens; that is because we only have matches 152 | for 0, 1, and 10, but there are obviously lots of other integers which don't get 153 | matched. In that last arm, `y` is bound to the value being matched (`x` in this 154 | case). We could also write: 155 | 156 | ```rust 157 | fn print_some(x: i32) { 158 | match x { 159 | x => println!("x is something else {}", x) 160 | } 161 | } 162 | ``` 163 | 164 | Here the `x` in the match arm introduces a new variable which hides the argument 165 | `x`, just like declaring a variable in an inner scope. 166 | 167 | If we don't want to name the variable, we can use `_` for an unnamed variable, 168 | which is like having a wildcard match. If we don't want to do anything, we can 169 | provide an empty branch: 170 | 171 | ```rust 172 | fn print_some(x: i32) { 173 | match x { 174 | 0 => println!("x is zero"), 175 | 1 => println!("x is one"), 176 | 10 => println!("x is ten"), 177 | _ => {} 178 | } 179 | } 180 | ``` 181 | 182 | Another semantic difference is that there is no fall through from one arm to the 183 | next so it works like `if...else if...else`. 184 | 185 | We'll see in later posts that match is extremely powerful. For now I want to 186 | introduce just a couple more features - the 'or' operator for values and `if` 187 | clauses on arms. Hopefully an example is self-explanatory: 188 | 189 | ```rust 190 | fn print_some_more(x: i32) { 191 | match x { 192 | 0 | 1 | 10 => println!("x is one of zero, one, or ten"), 193 | y if y < 20 => println!("x is less than 20, but not zero, one, or ten"), 194 | y if y == 200 => println!("x is 200 (but this is not very stylish)"), 195 | _ => {} 196 | } 197 | } 198 | ``` 199 | 200 | Just like `if` expressions, `match` statements are actually expressions so we 201 | could re-write the last example as: 202 | 203 | ```rust 204 | fn print_some_more(x: i32) { 205 | let msg = match x { 206 | 0 | 1 | 10 => "one of zero, one, or ten", 207 | y if y < 20 => "less than 20, but not zero, one, or ten", 208 | y if y == 200 => "200 (but this is not very stylish)", 209 | _ => "something else" 210 | }; 211 | 212 | println!("x is {}", msg); 213 | } 214 | ``` 215 | 216 | Note the semi-colon after the closing brace, that is because the `let` statement 217 | is a statement and must take the form `let msg = ...;`. We fill the rhs with a 218 | match expression (which doesn't usually need a semi-colon), but the `let` 219 | statement does. This catches me out all the time. 220 | 221 | Motivation: Rust match statements avoid the common bugs with C++ switch 222 | statements - you can't forget a `break` and unintentionally fall through; if you 223 | add a case to an enum (more later on) the compiler will make sure it is covered 224 | by your `match` statement. 225 | 226 | 227 | ## Method call 228 | 229 | Finally, just a quick note that methods exist in Rust, similarly to C++. They 230 | are always called via the `.` operator (no `->`, more on this in another post). 231 | We saw a few examples above (`len`, `iter`). We'll go into more detail in the 232 | future about how they are defined and called. Most assumptions you might make 233 | from C++ or Java are probably correct. 234 | -------------------------------------------------------------------------------- /hello-world.md: -------------------------------------------------------------------------------- 1 | # Introduction - hello world! 2 | 3 | If you are using C or C++, it is probably because you have to - either you need 4 | low-level access to the system, or need every last drop of performance, or both. 5 | Rust aims to offer the same level of abstraction around memory, the same 6 | performance, but be safer and make you more productive. 7 | 8 | Concretely, there are many languages out there that you might prefer to use to 9 | C++: Java, Scala, Haskell, Python, and so forth, but you can't because either 10 | the level of abstraction is too high (you don't get direct access to memory, 11 | you are forced to use garbage collection, etc.), or there are performance issues 12 | (either performance is unpredictable or it's simply not fast enough). Rust does 13 | not force you to use garbage collection, and as in C++, you get raw pointers to 14 | memory to play with. Rust subscribes to the 'pay for what you use' philosophy of 15 | C++. If you don't use a feature, then you don't pay any performance overhead for 16 | its existence. Furthermore, all language features in Rust have a predictable (and 17 | usually small) cost. 18 | 19 | Whilst these constraints make Rust a (rare) viable alternative to C++, Rust also 20 | has benefits: it is memory safe - Rust's type system ensures that you don't get 21 | the kind of memory errors which are common in C++ - accessing un-initialised 22 | memory, and dangling pointers - all are impossible in Rust. Furthermore, 23 | whenever other constraints allow, Rust strives to prevent other safety issues 24 | too - for example, all array indexing is bounds checked (of course, if you want 25 | to avoid the cost, you can (at the expense of safety) - Rust allows you to do 26 | this in unsafe blocks, along with many other unsafe things. Crucially, Rust 27 | ensures that unsafety in unsafe blocks stays in unsafe blocks and can't affect 28 | the rest of your program). Finally, Rust takes many concepts from modern 29 | programming languages and introduces them to the systems language space. 30 | Hopefully, that makes programming in Rust more productive, efficient, and 31 | enjoyable. 32 | 33 | In the rest of this section we'll download and install Rust, create a minimal 34 | Cargo project, and implement Hello World. 35 | 36 | 37 | ## Getting Rust 38 | 39 | You can get Rust from [http://www.rust-lang.org/tools/install](http://www.rust-lang.org/tools/install). 40 | The downloads from there include the Rust compiler, standard libraries, and 41 | Cargo, which is a package manager and build tool for Rust. 42 | 43 | Rust is available on three channels: stable, beta, and nightly. Rust works on a 44 | rapid-release, schedule with new releases every six weeks. On the release date, 45 | nightly becomes beta and beta becomes stable. 46 | 47 | Nightly is updated every night and is ideal for users who want to experiment with 48 | cutting edge features and ensure that their libraries will work with future Rust. 49 | 50 | Stable is the right choice for most users. Rust's stability guarantees only 51 | apply to the stable channel. 52 | 53 | Beta is designed to mostly be used in users' CI to check that their code will 54 | continue to work as expected. 55 | 56 | So, you probably want the stable channel. If you're on Linux or OS X, the 57 | easiest way to get it is to run 58 | 59 | ``` 60 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh 61 | ``` 62 | 63 | On Windows, a similarly easy way would be to run 64 | 65 | ``` 66 | choco install rust 67 | ``` 68 | 69 | For other ways to install, see [http://www.rust-lang.org/tools/install](http://www.rust-lang.org/tools/install). 70 | 71 | You can find the source at [github.com/rust-lang/rust](https://github.com/rust-lang/rust). 72 | To build the compiler, run `./configure && make rustc`. See 73 | [building-from-source](https://github.com/rust-lang/rust#building-from-source) 74 | for more detailed instructions. 75 | 76 | 77 | ## Hello World! 78 | 79 | The easiest and most common way to build Rust programs is to use Cargo. To start 80 | a project called `hello` using Cargo, run `cargo new --bin hello`. This will 81 | create a new directory called `hello` inside which is a `Cargo.toml` file and 82 | a `src` directory with a file called `main.rs`. 83 | 84 | `Cargo.toml` defines dependencies and other metadata about our project. We'll 85 | come back to it in detail later. 86 | 87 | All our source code will go in the `src` directory. `main.rs` already contains 88 | a Hello World program. It looks like this: 89 | 90 | ```rust 91 | fn main() { 92 | println!("Hello, world!"); 93 | } 94 | ``` 95 | 96 | To build the program, run `cargo build`. To build and run it, `cargo run`. If 97 | you do the latter, you should be greeted in the console. Success! 98 | 99 | Cargo will have made a `target` directory and put the executable in there. 100 | 101 | If you want to use the compiler directly you can run `rustc src/main.rs` which 102 | will create an executable called `main`. See `rustc --help` for lots of 103 | options. 104 | 105 | OK, back to the code. A few interesting points - we use `fn` to define a 106 | function or method. `main()` is the default entry point for our programs (we'll 107 | leave program args for later). There are no separate declarations or header 108 | files as with C++. `println!` is Rust's equivalent of printf. The `!` means that 109 | it is a macro. A subset of the standard library is available without needing to 110 | be explicitly imported/included (the prelude). The `println!` macro is included 111 | as part of that subset. 112 | 113 | Let's change our example a little bit: 114 | 115 | ```rust 116 | fn main() { 117 | let world = "world"; 118 | println!("Hello {}!", world); 119 | } 120 | ``` 121 | 122 | `let` is used to introduce a variable, world is the variable name and it is a 123 | string (technically the type is `&'static str`, but more on that later). We 124 | don't need to specify the type, it will be inferred for us. 125 | 126 | Using `{}` in the `println!` statement is like using `%s` in printf. In fact, it 127 | is a bit more general than that because Rust will try to convert the variable to 128 | a string if it is not one already[^1] (like `operator<<()` in C++). 129 | You can easily play around with this sort of thing - try multiple strings and 130 | using numbers (integer and float literals will work). 131 | 132 | If you like, you can explicitly give the type of `world`: 133 | 134 | ```rust 135 | let world: &'static str = "world"; 136 | ``` 137 | 138 | In C++ we write `T x` to declare a variable `x` with type `T`. In Rust we write 139 | `x: T`, whether in `let` statements or function signatures, etc. Mostly we omit 140 | explicit types in `let` statements, but they are required for function 141 | arguments. Let's add another function to see it work: 142 | 143 | ```rust 144 | fn foo(_x: &'static str) -> &'static str { 145 | "world" 146 | } 147 | 148 | fn main() { 149 | println!("Hello {}!", foo("bar")); 150 | } 151 | ``` 152 | 153 | The function `foo` has a single argument `_x` which is a string literal (we pass 154 | it "bar" from `main`)[^2]. 155 | 156 | The return type for a function is given after `->`. If the function doesn't 157 | return anything (a void function in C++), we don't need to give a return type at 158 | all (as in `main`). If you want to be super-explicit, you can write `-> ()`, 159 | `()` is the void type in Rust. 160 | 161 | You don't need the `return` keyword in Rust, if the last expression in a 162 | function body (or any other block, we'll see more of this later) is not finished 163 | with a semicolon, then it is the return value. So `foo` will return 164 | "world". The `return` keyword still exists so we can do early returns. You can 165 | replace `"world"` with `return "world";` and it will have the same effect. 166 | 167 | 168 | ## Why? 169 | 170 | I would like to motivate some of the language features above. Local type 171 | inference is convenient and useful without sacrificing safety or performance 172 | (it's even in modern versions of C++ now). A minor convenience is that language 173 | items are consistently denoted by keyword (`fn`, `let`, etc.), this makes 174 | scanning by eye or by tools easier, in general the syntax of Rust is simpler and 175 | more consistent than C++. The `println!` macro is safer than printf - the number 176 | of arguments is statically checked against the number of 'holes' in the string 177 | and the arguments are type checked. This means you can't make the printf 178 | mistakes of printing memory as if it had a different type or addressing memory 179 | further down the stack by mistake. These are fairly minor things, but I hope 180 | they illustrate the philosophy behind the design of Rust. 181 | 182 | 183 | [^1]: This is a programmer specified conversion which uses the `Display` trait, which 184 | works a bit like `toString` in Java. You can also use `{:?}` which gives a 185 | compiler generated representation which is sometimes useful for debugging. As 186 | with printf, there are many other options. 187 | 188 | [^2]: We don't actually use that argument in `foo`. Usually, 189 | Rust will warn us about this. By prefixing the argument name with `_` we avoid 190 | these warnings. In fact, we don't need to name the argument at all, we could 191 | just use `_`. 192 | -------------------------------------------------------------------------------- /borrowed.md: -------------------------------------------------------------------------------- 1 | # Borrowed pointers 2 | 3 | In the last post I introduced unique pointers. This time I will talk about 4 | another kind of pointer which is much more common in most Rust programs: 5 | borrowed pointers (aka borrowed references, or just references). 6 | 7 | If we want to have a reference to an existing value (as opposed to creating a 8 | new value on the heap and pointing to it, as with unique pointers), we must use 9 | `&`, a borrowed reference. These are probably the most common kind of pointer in 10 | Rust, and if you want something to fill in for a C++ pointer or reference (e.g., 11 | for passing a parameter to a function by reference), this is probably it. 12 | 13 | We use the `&` operator to create a borrowed reference and to indicate reference 14 | types, and `*` to dereference them. The same rules about automatic dereferencing 15 | apply as for unique pointers. For example, 16 | 17 | ```rust 18 | fn foo() { 19 | let x = &3; // type: &i32 20 | let y = *x; // 3, type: i32 21 | bar(x, *x); 22 | bar(&y, y); 23 | } 24 | 25 | fn bar(z: &i32, i: i32) { 26 | // ... 27 | } 28 | ``` 29 | 30 | The `&` operator does not allocate memory (we can only create a borrowed 31 | reference to an existing value) and if a borrowed reference goes out of scope, 32 | no memory gets deleted. 33 | 34 | Borrowed references are not unique - you can have multiple borrowed references 35 | pointing to the same value. E.g., 36 | 37 | ```rust 38 | fn foo() { 39 | let x = 5; // type: i32 40 | let y = &x; // type: &i32 41 | let z = y; // type: &i32 42 | let w = y; // type: &i32 43 | println!("These should all be 5: {} {} {}", *w, *y, *z); 44 | } 45 | ``` 46 | 47 | Like values, borrowed references are immutable by default. You can also use 48 | `&mut` to take a mutable reference, or to denote mutable reference types. 49 | Mutable borrowed references are unique (you can only take a single mutable 50 | reference to a value, and you can only have a mutable reference if there are no 51 | immutable references). You can use a mutable reference where an immutable one is 52 | wanted, but not vice versa. Putting all that together in an example: 53 | 54 | ```rust 55 | fn bar(x: &i32) { ... } 56 | fn bar_mut(x: &mut i32) { ... } // &mut i32 is a reference to an i32 which 57 | // can be mutated 58 | 59 | fn foo() { 60 | let x = 5; 61 | //let xr = &mut x; // Error - can't make a mutable reference to an 62 | // immutable variable 63 | let xr = &x; // Ok (creates an immutable ref) 64 | bar(xr); 65 | //bar_mut(xr); // Error - expects a mutable ref 66 | 67 | let mut x = 5; 68 | let xr = &x; // Ok (creates an immutable ref) 69 | //*xr = 4; // Error - mutating immutable ref 70 | //let xr = &mut x; // Error - there is already an immutable ref, so we 71 | // can't make a mutable one 72 | 73 | let mut x = 5; 74 | let xr = &mut x; // Ok (creates a mutable ref) 75 | *xr = 4; // Ok 76 | //let xr2 = &x; // Error - there is already a mutable ref, so we 77 | // can't make an immutable one 78 | //let xr2 = &mut x; // Error - can only have one mutable ref at a time 79 | bar(xr); // Ok 80 | bar_mut(xr); // Ok 81 | } 82 | ``` 83 | 84 | Note that the reference may be mutable (or not) independently of the mutableness 85 | of the variable holding the reference. This is similar to C++ where pointers can 86 | be const (or not) independently of the data they point to. This is in contrast 87 | to unique pointers, where the mutableness of the pointer is linked to the 88 | mutableness of the data. For example, 89 | 90 | ```rust 91 | fn foo() { 92 | let mut x = 5; 93 | let mut y = 6; 94 | let xr = &mut x; 95 | //xr = &mut y; // Error xr is immutable 96 | 97 | let mut x = 5; 98 | let mut y = 6; 99 | let mut xr = &mut x; 100 | xr = &mut y; // Ok 101 | 102 | let x = 5; 103 | let y = 6; 104 | let mut xr = &x; 105 | xr = &y; // Ok - xr is mut, even though the referenced data is not 106 | } 107 | ``` 108 | 109 | If a mutable value is borrowed, it becomes immutable for the duration of the 110 | borrow. Once the borrowed pointer goes out of scope, the value can be mutated 111 | again. This is in contrast to unique pointers, which once moved can never be 112 | used again. For example, 113 | 114 | ```rust 115 | fn foo() { 116 | let mut x = 5; // type: i32 117 | { 118 | let y = &x; // type: &i32 119 | //x = 4; // Error - x has been borrowed 120 | println!("{} {}", y, x); // Ok - x can be read 121 | } 122 | x = 4; // OK - y no longer exists 123 | } 124 | ``` 125 | 126 | The same thing happens if we take a mutable reference to a value - the value 127 | still cannot be modified. In general in Rust, data can only ever be modified via 128 | one variable or pointer. Furthermore, since we have a mutable reference, we 129 | can't take an immutable reference. That limits how we can use the underlying 130 | value: 131 | 132 | ```rust 133 | fn foo() { 134 | let mut x = 5; // type: i32 135 | { 136 | let y = &mut x; // type: &mut i32 137 | //x = 4; // Error - x has been borrowed 138 | //println!("{}", x); // Error - requires borrowing x 139 | } 140 | x = 4; // OK - y no longer exists 141 | } 142 | ``` 143 | 144 | Unlike C++, Rust won't automatically reference a value for you. So if a function 145 | takes a parameter by reference, the caller must reference the actual parameter. 146 | However, pointer types will automatically be converted to a reference: 147 | 148 | ```rust 149 | fn foo(x: &i32) { ... } 150 | 151 | fn bar(x: i32, y: Box) { 152 | foo(&x); 153 | // foo(x); // Error - expected &i32, found i32 154 | foo(y); // Ok 155 | foo(&*y); // Also ok, and more explicit, but not good style 156 | } 157 | ``` 158 | 159 | ## `mut` vs `const` 160 | 161 | At this stage it is probably worth comparing `mut` in Rust to `const` in C++. 162 | Superficially they are opposites. Values are immutable by default in Rust and 163 | can be made mutable by using `mut`. Values are mutable by default in C++, but 164 | can be made constant by using `const`. The subtler and more important difference 165 | is that C++ const-ness applies only to the current use of a value, whereas 166 | Rust's immutability applies to all uses of a value. So in C++ if I have a 167 | `const` variable, someone else could have a non-const reference to it and it 168 | could change without me knowing. In Rust if you have an immutable variable, you 169 | are guaranteed it won't change. 170 | 171 | As we mentioned above, all mutable variables are unique. So if you have a 172 | mutable value, you know it is not going to change unless you change it. 173 | Furthermore, you can change it freely since you know that no one else is relying 174 | on it not changing. 175 | 176 | ## Borrowing and lifetimes 177 | 178 | One of the primary safety goals of Rust is to avoid dangling pointers (where a 179 | pointer outlives the memory it points to). In Rust, it is impossible to have a 180 | dangling borrowed reference. It is only legal to create a borrowed reference to 181 | memory which will be alive longer than the reference (well, at least as long as 182 | the reference). In other words, the lifetime of the reference must be shorter 183 | than the lifetime of the referenced value. 184 | 185 | That has been accomplished in all the examples in this post. Scopes introduced 186 | by `{}` or functions are bounds on lifetimes - when a variable goes out of scope 187 | its lifetime ends. If we try to take a reference to a shorter lifetime, such as 188 | in a narrower scope, the compiler will give us an error. For example, 189 | 190 | ```rust 191 | fn foo() { 192 | let x = 5; 193 | let mut xr = &x; // Ok - x and xr have the same lifetime 194 | { 195 | let y = 6; 196 | xr = &y // Error - xr will outlive y 197 | } // y is released here 198 | println!("{:?}", xr); // xr is used here so it outlives y. Try to comment out this line. 199 | } // x and xr are released here 200 | ``` 201 | 202 | In the above example, xr and y don't have the same lifetime because y starts 203 | later than xr, but it's the end of lifetimes which is more interesting, since you 204 | can't reference a variable before it exists in any case - something else which 205 | Rust enforces and which makes it safer than C++. 206 | 207 | ## Explicit lifetimes 208 | 209 | After playing with borrowed pointers for a while, you'll probably come across 210 | borrowed pointers with an explicit lifetime. These have the syntax `&'a T` ([cf.](https://en.wikipedia.org/wiki/Cf.) 211 | `&T`). They're kind of a big topic since I need to cover lifetime-polymorphism 212 | at the same time so I'll leave it for another post (there are a few more less 213 | common pointer types to cover first though). For now, I just want to say that 214 | `&T` is a shorthand for `&'a T` where `a` is the current scope, that is the 215 | scope in which the type is declared. 216 | -------------------------------------------------------------------------------- /destructuring-2.md: -------------------------------------------------------------------------------- 1 | # Destructuring pt2 - match and borrowing 2 | 3 | When destructuring there are some surprises in store where borrowing is 4 | concerned. Hopefully, nothing surprising once you understand borrowed references 5 | really well, but worth discussing (it took me a while to figure out, that's for 6 | sure. Longer than I realised, in fact, since I screwed up the first version of 7 | this blog post). 8 | 9 | Imagine you have some `&Enum` variable `x` (where `Enum` is some enum type). You 10 | have two choices: you can match `*x` and list all the variants (`Variant1 => 11 | ...`, etc.) or you can match `x` and list reference to variant patterns 12 | (`&Variant1 => ...`, etc.). (As a matter of style, prefer the first form where 13 | possible since there is less syntactic noise). `x` is a borrowed reference and 14 | there are strict rules for how a borrowed reference can be dereferenced, these 15 | interact with match expressions in surprising ways (at least surprising to me), 16 | especially when you are modifying an existing enum in a seemingly innocuous way 17 | and then the compiler explodes on a match somewhere. 18 | 19 | Before we get into the details of the match expression, lets recap Rust's rules 20 | for value passing. In C++, when assigning a value into a variable or passing it 21 | to a function there are two choices - pass-by-value and pass-by-reference. The 22 | former is the default case and means a value is copied either using a copy 23 | constructor or a bitwise copy. If you annotate the destination of the parameter 24 | pass or assignment with `&`, then the value is passed by reference - only a 25 | pointer to the value is copied and when you operate on the new variable, you are 26 | also operating on the old value. 27 | 28 | Rust has the pass-by-reference option, although in Rust the source as well as 29 | the destination must be annotated with `&`. For pass-by-value in Rust, there are 30 | two further choices - copy or move. A copy is the same as C++'s semantics 31 | (except that there are no copy constructors in Rust). A move copies the value 32 | but destroys the old value - Rust's type system ensures you can no longer access 33 | the old value. As examples, `i32` has copy semantics and `Box` has move 34 | semantics: 35 | 36 | ```rust 37 | fn foo() { 38 | let x = 7i32; 39 | let y = x; // x is copied 40 | println!("x is {}", x); // OK 41 | 42 | let x = Box::new(7i32); 43 | let y = x; // x is moved 44 | //println!("x is {}", x); // error: use of moved value: `x` 45 | } 46 | ``` 47 | 48 | You can also choose to have copy semantics for user-defined types 49 | by implementing the `Copy` trait. One straightforward way to do that is 50 | to add `#[derive(Copy)]` before the definition of the `struct`. Not all 51 | user-defined types are allowed to implement the `Copy` trait. All fields of 52 | a type must implement `Copy` and the type must not have a destructor. 53 | Destructors probably need a post of their own, but for now, an object 54 | in Rust has a destructor if it implements the `Drop`trait. 55 | Just like C++, the destructor is executed just before an object is 56 | destroyed. 57 | 58 | Now, it is important that a borrowed object is not moved, otherwise you would 59 | have a reference to the old object which is no longer valid. This is equivalent 60 | to holding a reference to an object which has been destroyed after going out of 61 | scope - it is a kind of dangling pointer. If you have a pointer to an object, 62 | there could be other references to it. So if an object has move semantics and 63 | you have a pointer to it, it is unsafe to dereference that pointer. (If the 64 | object has copy semantics, dereferencing creates a copy and the old object will 65 | still exist, so other references will be fine). 66 | 67 | OK, back to match expressions. As I said earlier, if you want to match some `x` 68 | with type `&T` you can dereference once in the match clause or match the 69 | reference in every arm of the match expression. Example: 70 | 71 | ```rust 72 | enum Enum1 { 73 | Var1, 74 | Var2, 75 | Var3 76 | } 77 | 78 | fn foo(x: &Enum1) { 79 | match *x { // Option 1: deref here. 80 | Enum1::Var1 => {} 81 | Enum1::Var2 => {} 82 | Enum1::Var3 => {} 83 | } 84 | 85 | match x { 86 | // Option 2: 'deref' in every arm. 87 | &Enum1::Var1 => {} 88 | &Enum1::Var2 => {} 89 | &Enum1::Var3 => {} 90 | } 91 | } 92 | ``` 93 | 94 | In this case you can take either approach because `Enum1` has copy semantics. 95 | Let's take a closer look at each approach: in the first approach we dereference 96 | `x` to a temporary variable with type `Enum1` (which copies the value in `x`) 97 | and then do a match against the three variants of `Enum1`. This is a 'one level' 98 | match because we don't go deep into the value's type. In the second approach 99 | there is no dereferencing. We match a value with type `&Enum1` against a 100 | reference to each variant. This match goes two levels deep - it matches the type 101 | (always a reference) and looks inside the type to match the referred type (which 102 | is `Enum1`). 103 | 104 | Either way, we must ensure that we (that is, the compiler) respect 105 | Rust's invariants around moves and references - we must not move any 106 | part of an object if it is referenced. If the value being matched has copy 107 | semantics, that is trivial. If it has move semantics then we must make sure that 108 | moves don't happen in any match arm. This is accomplished either by ignoring 109 | data which would move, or making references to it (so we get by-reference 110 | passing rather than by-move). 111 | 112 | ```rust 113 | enum Enum2 { 114 | // Box has a destructor so Enum2 has move semantics. 115 | Var1(Box), 116 | Var2, 117 | Var3 118 | } 119 | 120 | fn foo(x: &Enum2) { 121 | match *x { 122 | // We're ignoring nested data, so this is OK 123 | Enum2::Var1(..) => {} 124 | // No change to the other arms. 125 | Enum2::Var2 => {} 126 | Enum2::Var3 => {} 127 | } 128 | 129 | match x { 130 | // We're ignoring nested data, so this is OK 131 | &Enum2::Var1(..) => {} 132 | // No change to the other arms. 133 | &Enum2::Var2 => {} 134 | &Enum2::Var3 => {} 135 | } 136 | } 137 | ``` 138 | 139 | In either approach we don't refer to any of the nested data, so none of it is 140 | moved. In the first approach, even though `x` is referenced, we don't touch its 141 | innards in the scope of the dereference (i.e., the match expression) so nothing 142 | can escape. We also don't bind the whole value (i.e., bind `*x` to a variable), 143 | so we can't move the whole object either. 144 | 145 | We can take a reference to any variant in the second match, but not in the 146 | dereferenced version. So, in the second approach replacing the second arm with `a 147 | @ &Var2 => {}` is OK (`a` is a reference), but under the first approach we 148 | couldn't write `a @ Var2 => {}` since that would mean moving `*x` into `a`. We 149 | could write `ref a @ Var2 => {}` (in which `a` is also a reference), although 150 | it's not a construct you see very often. 151 | 152 | But what about if we want to use the data nested inside `Var1`? We can't write: 153 | 154 | ```rust 155 | match *x { 156 | Enum2::Var1(y) => {} 157 | _ => {} 158 | } 159 | ``` 160 | 161 | or 162 | 163 | ```rust 164 | match x { 165 | &Enum2::Var1(y) => {} 166 | _ => {} 167 | } 168 | ``` 169 | 170 | because in both cases it means moving part of `x` into `y`. We can use the 'ref' 171 | keyword to get a reference to the data in `Var1`: `&Var1(ref y) => {}`. That is 172 | OK, because now we are not dereferencing anywhere and thus not moving any part 173 | of `x`. Instead we are creating a pointer which points into the interior of `x`. 174 | 175 | Alternatively, we could destructure the Box (this match is going three levels 176 | deep): `&Var1(box y) => {}` (note `box` pattern syntax is experimental as of rustc 1.58 177 | and is available only in nightly version of rustc). 178 | This is OK because `i32` has copy semantics and `y` 179 | is a copy of the `i32` inside the `Box` inside `Var1` (which is 'inside' a 180 | borrowed reference). Since `i32` has copy semantics, we don't need to move any 181 | part of `x`. We could also create a reference to the int rather than copy it: 182 | `&Var1(box ref y) => {}`. Again, this is OK, because we don't do any 183 | dereferencing and thus don't need to move any part of `x`. If the contents of 184 | the Box had move semantics, then we could not write `&Var1(box y) => {}`, we 185 | would be forced to use the reference version. We could also use similar 186 | techniques with the first approach to matching, which look the same but without 187 | the first `&`. For example, `Var1(box ref y) => {}`. 188 | 189 | Now lets get more complex. Lets say you want to match against a pair of 190 | reference-to-enum values. Now we can't use the first approach at all: 191 | 192 | ```rust 193 | fn bar(x: &Enum2, y: &Enum2) { 194 | // Error: x and y are being moved. 195 | // match (*x, *y) { 196 | // (Enum2::Var2, _) => {} 197 | // _ => {} 198 | // } 199 | 200 | // OK. 201 | match (x, y) { 202 | (&Enum2::Var2, _) => {} 203 | _ => {} 204 | } 205 | } 206 | ``` 207 | 208 | The first approach is illegal because the value being matched is created by 209 | dereferencing `x` and `y` and then moving them both into a new tuple object. So 210 | in this circumstance, only the second approach works. And of course, you still 211 | have to follow the rules above for avoiding moving parts of `x` and `y`. 212 | 213 | If you do end up only being able to get a reference to some data and you need 214 | the value itself, you have no option except to copy that data. Usually that 215 | means using `clone()`. If the data doesn't implement clone, you're going to have 216 | to further destructure to make a manual copy or implement clone yourself. 217 | 218 | What if we don't have a reference to a value with move semantics, but the value 219 | itself. Now moves are OK, because we know no one else has a reference to the 220 | value (the compiler ensures that if they do, we can't use the value). For 221 | example, 222 | 223 | ```rust 224 | fn baz(x: Enum2) { 225 | match x { 226 | Enum2::Var1(y) => {} 227 | _ => {} 228 | } 229 | } 230 | ``` 231 | 232 | There are still a few things to be aware of. Firstly, you can only move to one 233 | place. In the above example we are moving part of `x` into `y` and we'll forget 234 | about the rest. If we wrote `a @ Var1(y) => {}` we would be attempting to move 235 | all of `x` into `a` and part of `x` into `y`. That is not allowed, an arm like 236 | that is illegal. Making one of `a` or `y` a reference (using `ref a`, etc.) is 237 | not an option either, then we'd have the problem described above where we move 238 | whilst holding a reference. We can make both `a` and `y` references and then 239 | we're OK - neither is moving, so `x` remains intact and we have pointers to the 240 | whole and a part of it. 241 | 242 | Similarly (and more common), if we have a variant with multiple pieces of nested 243 | data, we can't take a reference to one datum and move another. For example if we 244 | had a `Var4` declared as `Var4(Box, Box)` we can have a match arm 245 | which references both (`Var4(ref y, ref z) => {}`) or a match arm which moves 246 | both (`Var4(y, z) => {}`) but you cannot have a match arm which moves one and 247 | references the other (`Var4(ref y, z) => {}`). This is because a partial move 248 | still destroys the whole object, so the reference would be invalid. 249 | -------------------------------------------------------------------------------- /arrays.md: -------------------------------------------------------------------------------- 1 | # Arrays and Vectors 2 | 3 | Rust arrays are pretty different from C arrays. For starters they come in 4 | statically and dynamically sized flavours. These are more commonly known as 5 | fixed length arrays and slices. As we'll see, the former is kind of a bad name 6 | since both kinds of array have fixed (as opposed to growable) length. For a 7 | growable 'array', Rust provides the `Vec` collection. 8 | 9 | 10 | ## Fixed length arrays 11 | 12 | The length of a fixed length array is known statically and features in its 13 | type. E.g., `[i32; 4]` is the type of an array of `i32`s with length four. 14 | 15 | Array literal and array access syntax is the same as C: 16 | 17 | ```rust 18 | let a: [i32; 4] = [1, 2, 3, 4]; // As usual, the type annotation is optional. 19 | println!("The second element is {}", a[1]); 20 | ``` 21 | 22 | You'll notice that array indexing is zero-based, just like C. 23 | 24 | However, unlike C/C++[^1], array indexing is bounds checked. In 25 | fact all access to arrays is bounds checked, which is another way Rust is a 26 | safer language. 27 | 28 | If you try to do `a[4]`, then you will get a runtime panic. Unfortunately, the 29 | Rust compiler is not clever enough to give you a compile time error, even when 30 | it is obvious (as in this example). 31 | 32 | If you like to live dangerously, or just need to get every last ounce of 33 | performance out of your program, you can still get unchecked access to arrays. 34 | To do this, use the `get_unchecked` method on an array. Unchecked array accesses 35 | must be inside an unsafe block. You should only need to do this in the rarest 36 | circumstances. 37 | 38 | Just like other data structures in Rust, arrays are immutable by default and 39 | mutability is inherited. Mutation is also done via the indexing syntax: 40 | 41 | ```rust 42 | let mut a = [1, 2, 3, 4]; 43 | a[3] = 5; 44 | println!("{:?}", a); 45 | ``` 46 | 47 | And just like other data, you can borrow an array by taking a reference to it: 48 | 49 | ```rust 50 | fn foo(a: &[i32; 4]) { 51 | println!("First: {}; last: {}", a[0], a[3]); 52 | } 53 | 54 | fn main() { 55 | foo(&[1, 2, 3, 4]); 56 | } 57 | ``` 58 | 59 | Notice that indexing still works on a borrowed array. 60 | 61 | This is a good time to talk about the most interesting aspect of Rust arrays for 62 | C++ programmers - their representation. Rust arrays are value types: they are 63 | allocated on the stack like other values and an array object is a sequence of 64 | values, not a pointer to those values (as in C). So from our examples above, `let 65 | a = [1_i32, 2, 3, 4];` will allocate 16 bytes on the stack and executing `let b 66 | = a;` will copy 16 bytes. If you want a C-like array, you have to explicitly 67 | make a pointer to the array, this will give you a pointer to the first element. 68 | 69 | A final point of difference between arrays in Rust and C++ is that Rust arrays 70 | can implement traits, and thus have methods. To find the length of an array, for 71 | example, you use `a.len()`. 72 | 73 | 74 | ## Slices 75 | 76 | A slice in Rust is just an array whose length is not known at compile time. The 77 | syntax of the type is just like a fixed length array, except there is no length: 78 | e.g., `[i32]` is a slice of 32 bit integers (with no statically known length). 79 | 80 | There is a catch with slices: since the compiler must know the size of all 81 | objects in Rust, and it can't know the size of a slice, then we can never have a 82 | value with slice type. If you try and write `fn foo(x: [i32])`, for example, the 83 | compiler will give you an error. 84 | 85 | So, you must always have pointers to slices (there are some very technical 86 | exceptions to this rule so that you can implement your own smart pointers, but 87 | you can safely ignore them for now). You must write `fn foo(x: &[i32])` (a 88 | borrowed reference to a slice) or `fn foo(x: *mut [i32])` (a mutable raw pointer 89 | to a slice), etc. 90 | 91 | The simplest way to create a slice is by coercion. There are far fewer implicit 92 | coercions in Rust than there are in C++. One of them is the coercion from fixed 93 | length arrays to slices. Since slices must be pointer values, this is 94 | effectively a coercion between pointers. For example, we can coerce `&[i32; 4]` 95 | to `&[i32]`, e.g., 96 | 97 | ```rust 98 | let a: &[i32] = &[1, 2, 3, 4]; 99 | ``` 100 | 101 | Here the right hand side is a fixed length array of length four, allocated on 102 | the stack. We then take a reference to it (type `&[i32; 4]`). That reference is 103 | coerced to type `&[i32]` and given the name `a` by the let statement. 104 | 105 | Again, access is just like C (using `[...]`), and access is bounds checked. You 106 | can also check the length yourself by using `len()`. So clearly the length of 107 | the array is known somewhere. In fact all arrays of any kind in Rust have known 108 | length, since this is essential for bounds checking, which is an integral part 109 | of memory safety. The size is known dynamically (as opposed to statically in the 110 | case of fixed length arrays), and we say that slice types are dynamically sized 111 | types (DSTs, there are other kinds of dynamically sized types too, they'll be 112 | covered elsewhere). 113 | 114 | Since a slice is just a sequence of values, the size cannot be stored as part of 115 | the slice. Instead it is stored as part of the pointer (remember that slices 116 | must always exist as pointer types). A pointer to a slice (like all pointers to 117 | DSTs) is a fat pointer - it is two words wide, rather than one, and contains the 118 | pointer to the data plus a payload. In the case of slices, the payload is the 119 | length of the slice. 120 | 121 | So in the example above, the pointer `a` will be 128 bits wide (on a 64 bit 122 | system). The first 64 bits will store the address of the `1` in the sequence 123 | `[1, 2, 3, 4]`, and the second 64 bits will contain `4`. Usually, as a Rust 124 | programmer, these fat pointers can just be treated as regular pointers. But it 125 | is good to know about (it can affect the things you can do with casts, for 126 | example). 127 | 128 | 129 | ### Slicing notation and ranges 130 | 131 | A slice can be thought of as a (borrowed) view of an array. So far we have only 132 | seen a slice of the whole array, but we can also take a slice of part of an 133 | array. There is a special notation for this which is like the indexing 134 | syntax, but takes a range instead of a single integer. E.g., `a[0..4]`, which 135 | takes a slice of the first four elements of `a`. Note that the range is 136 | exclusive at the top and inclusive at the bottom. Examples: 137 | 138 | ```rust 139 | let a: [i32; 4] = [1, 2, 3, 4]; 140 | let b: &[i32] = &a; // Slice of the whole array. 141 | let c = &a[0..4]; // Another slice of the whole array, also has type &[i32]. 142 | let c = &a[1..3]; // The middle two elements, &[i32]. 143 | let c = &a[1..]; // The last three elements. 144 | let c = &a[..3]; // The first three elements. 145 | let c = &a[..]; // The whole array, again. 146 | let c = &b[1..3]; // We can also slice a slice. 147 | ``` 148 | 149 | Note that in the last example, we still need to borrow the result of slicing. 150 | The slicing syntax produces an unborrowed slice (type: `[i32]`) which we must 151 | then borrow (to give a `&[i32]`), even if we are slicing a borrowed slice. 152 | 153 | Range syntax can also be used outside of slicing syntax. `a..b` produces an 154 | iterator which runs from `a` to `b-1`. This can be combined with other iterators 155 | in the usual way, or can be used in `for` loops: 156 | 157 | ```rust 158 | // Print all numbers from 1 to 10. 159 | for i in 1..11 { 160 | println!("{}", i); 161 | } 162 | ``` 163 | 164 | ## Vecs 165 | 166 | A vector is heap allocated and is an owning reference. Therefore (and like 167 | `Box<_>`), it has move semantics. We can think of a fixed length array 168 | analogously to a value, a slice to a borrowed reference. Similarly, a vector in 169 | Rust is analogous to a `Box<_>` pointer. 170 | 171 | It helps to think of `Vec<_>` as a kind of smart pointer, just like `Box<_>`, 172 | rather than as a value itself. Similarly to a slice, the length is stored in the 173 | 'pointer', in this case the 'pointer' is the Vec value. 174 | 175 | A vector of `i32`s has type `Vec`. There are no vector literals, but we can 176 | get the same effect by using the `vec!` macro. We can also create an empty 177 | vector using `Vec::new()`: 178 | 179 | ```rust 180 | let v = vec![1, 2, 3, 4]; // A Vec with length 4. 181 | let v: Vec = Vec::new(); // An empty vector of i32s. 182 | ``` 183 | 184 | In the second case above, the type annotation is necessary so the compiler can 185 | know what the vector is a vector of. If we were to use the vector, the type 186 | annotation would probably not be necessary. 187 | 188 | Just like arrays and slices, we can use indexing notation to get a value from 189 | the vector (e.g., `v[2]`). Again, these are bounds checked. We can also use 190 | slicing notation to take a slice of a vector (e.g., `&v[1..3]`). 191 | 192 | The extra feature of vectors is that their size can change - they can get longer 193 | or shorter as needed. For example, `v.push(5)` would add the element `5` to the 194 | end of the vector (this would require that `v` is mutable). Note that growing a 195 | vector can cause reallocation, which for large vectors can mean a lot of 196 | copying. To guard against this you can pre-allocate space in a vector using 197 | `with_capacity`, see the [Vec docs](https://doc.rust-lang.org/std/vec/struct.Vec.html) 198 | for more details. 199 | 200 | 201 | ## The `Index` traits 202 | 203 | Note for readers: there is a lot of material in this section that I haven't 204 | covered properly yet. If you're following the tutorial, you can skip this 205 | section, it is a somewhat advanced topic in any case. 206 | 207 | The same indexing syntax used for arrays and vectors is also used for other 208 | collections, such as `HashMap`s. And you can use it yourself for your own 209 | collections. You opt-in to using the indexing (and slicing) syntax by 210 | implementing the `Index` trait. This is a good example of how Rust makes 211 | available nice syntax to user types, as well as built-ins (`Deref` for 212 | dereferencing smart pointers, as well as `Add` and various other traits, work in 213 | a similar way). 214 | 215 | The `Index` trait looks like 216 | 217 | ```rust 218 | pub trait Index { 219 | type Output: ?Sized; 220 | 221 | fn index(&self, index: Idx) -> &Self::Output; 222 | } 223 | ``` 224 | 225 | `Idx` is the type used for indexing. For most uses of indexing this is `usize`. 226 | For slicing this is one of the `std::ops::Range` types. `Output` is the type 227 | returned by indexing, this will be different for each collection. For slicing it 228 | will be a slice, rather than the type of a single element. `index` is a method 229 | which does the work of getting the element(s) out of the collection. Note that 230 | the collection is taken by reference and the method returns a reference to the 231 | element with the same lifetime. 232 | 233 | Let's look at the implementation for `Vec` to see how what an implementation 234 | looks like: 235 | 236 | ```rust 237 | impl Index for Vec { 238 | type Output = T; 239 | 240 | fn index(&self, index: usize) -> &T { 241 | &(**self)[index] 242 | } 243 | } 244 | ``` 245 | 246 | As we said above, indexing is done using `usize`. For a `Vec`, indexing will 247 | return a single element of type `T`, thus the value of `Output`. The 248 | implementation of `index` is a bit weird - `(**self)` gets a view of the whole 249 | vec as a slice, then we use indexing on slices to get the element, and finally 250 | take a reference to it. 251 | 252 | If you have your own collections, you can implement `Index` in a similar way to 253 | get indexing and slicing syntax for your collection. 254 | 255 | 256 | ## Initialiser syntax 257 | 258 | As with all data in Rust, arrays and vectors must be properly initialised. Often 259 | you just want an array full of zeros to start with and using the array literal 260 | syntax is a pain. So Rust gives you a little syntactic sugar to initialise an 261 | array full of a given value: `[value; len]`. So for example to create an array 262 | with length 100 full of zeros, we'd use `[0; 100]`. 263 | 264 | Similarly for vectors, `vec![42; 100]` would give you a vector with 100 265 | elements, each with the value 42. 266 | 267 | The initial value is not limited to integers, it can be any expression. For 268 | array initialisers, the length must be an integer constant expression. For 269 | `vec!`, it can be any expression with type `usize`. 270 | 271 | 272 | [^1]: In C++11 there is `std::array` that provides boundary checking when 273 | `at()` method is used. 274 | -------------------------------------------------------------------------------- /data-types.md: -------------------------------------------------------------------------------- 1 | # Data types 2 | 3 | In this post I'll discuss Rust's data types. These are roughly equivalent to 4 | classes, structs, and enums in C++. One difference with Rust is that data and 5 | behaviour are much more strictly separated in Rust than C++ (or Java, or other 6 | OO languages). Behaviour is defined by functions and those can be defined in 7 | traits and `impl`s (implementations), but traits cannot contain data, they are 8 | similar to Java's interfaces in that respect. I'll cover traits and impls in a 9 | later post, this one is all about data. 10 | 11 | ## Structs 12 | 13 | A rust struct is similar to a C struct or a C++ struct without methods. Simply a 14 | list of named fields. The syntax is best seen with an example: 15 | 16 | ```rust 17 | struct S { 18 | field1: i32, 19 | field2: SomeOtherStruct 20 | } 21 | ``` 22 | 23 | Here we define a struct called `S` with two fields. The fields are comma 24 | separated; if you like, you can comma-terminate the last field too. 25 | 26 | Structs introduce a type. In the example, we could use `S` as a type. 27 | `SomeOtherStruct` is assumed to be another struct (used as a type in the 28 | example), and (like C++) it is included by value, that is, there is no pointer 29 | to another struct object in memory. 30 | 31 | Fields in structs are accessed using the `.` operator and their name. An example 32 | of struct use: 33 | 34 | ```rust 35 | fn foo(s1: S, s2: &S) { 36 | let f = s1.field1; 37 | if f == s2.field1 { 38 | println!("field1 matches!"); 39 | } 40 | } 41 | ``` 42 | 43 | Here `s1` is struct object passed by value and `s2` is a struct object passed by 44 | reference. As with method calls, we use the same `.` to access fields in both, no 45 | need for `->`. 46 | 47 | Structs are initialised using struct literals. These are the name of the struct 48 | and values for each field. For example, 49 | 50 | ```rust 51 | fn foo(sos: SomeOtherStruct) { 52 | let x = S { field1: 45, field2: sos }; // initialise x with a struct literal 53 | println!("x.field1 = {}", x.field1); 54 | } 55 | ``` 56 | 57 | Structs cannot be recursive; that is, you can't have cycles of struct names 58 | involving definitions and field types. This is because of the value semantics of 59 | structs. So for example, `struct R { r: Option }` is illegal and will cause a 60 | compiler error (see below for more about Option). If you need such a structure 61 | then you should use some kind of pointer; cycles with pointers are allowed: 62 | 63 | ```rust 64 | struct R { 65 | r: Option> 66 | } 67 | ``` 68 | 69 | If we didn't have the `Option` in the above struct, there would be no way to 70 | instantiate the struct and Rust would signal an error. 71 | 72 | Structs with no fields do not use braces in either their definition or literal 73 | use. Definitions do need a terminating semi-colon though, presumably just to 74 | facilitate parsing. 75 | 76 | ```rust 77 | struct Empty; 78 | 79 | fn foo() { 80 | let e = Empty; 81 | } 82 | ``` 83 | 84 | ## Tuples 85 | 86 | Tuples are anonymous, heterogeneous sequences of data. As a type, they are 87 | declared as a sequence of types in parentheses. Since there is no name, they are 88 | identified by structure. For example, the type `(i32, i32)` is a pair of 89 | integers and `(i32, f32, S)` is a triple. Tuple values are initialised in the 90 | same way as tuple types are declared, but with values instead of types for the 91 | components, e.g., `(4, 5)`. An example: 92 | 93 | ```rust 94 | // foo takes a struct and returns a tuple 95 | fn foo(x: SomeOtherStruct) -> (i32, f32, S) { 96 | (23, 45.82, S { field1: 54, field2: x }) 97 | } 98 | ``` 99 | 100 | Tuples can be used by destructuring using a `let` expression, e.g., 101 | 102 | ```rust 103 | fn bar(x: (i32, i32)) { 104 | let (a, b) = x; 105 | println!("x was ({}, {})", a, b); 106 | } 107 | ``` 108 | 109 | We'll talk more about destructuring next time. 110 | 111 | 112 | ## Tuple structs 113 | 114 | Tuple structs are named tuples, or alternatively, structs with unnamed fields. 115 | They are declared using the `struct` keyword, a list of types in parentheses, 116 | and a semicolon. Such a declaration introduces their name as a type. Their 117 | fields must be accessed by destructuring (like a tuple), rather than by name. 118 | Tuple structs are not very common. 119 | 120 | ```rust 121 | struct IntPoint (i32, i32); 122 | 123 | fn foo(x: IntPoint) { 124 | let IntPoint(a, b) = x; // Note that we need the name of the tuple 125 | // struct to destructure. 126 | println!("x was ({}, {})", a, b); 127 | } 128 | ``` 129 | 130 | ## Enums 131 | 132 | Enums are types like C++ enums or unions, in that they are types which can take 133 | multiple values. The simplest kind of enum is just like a C++ enum: 134 | 135 | ```rust 136 | enum E1 { 137 | Var1, 138 | Var2, 139 | Var3 140 | } 141 | 142 | fn foo() { 143 | let x: E1 = Var2; 144 | match x { 145 | Var2 => println!("var2"), 146 | _ => {} 147 | } 148 | } 149 | ``` 150 | 151 | However, Rust enums are much more powerful than that. Each variant can contain 152 | data. Like tuples, these are defined by a list of types. In this case they are 153 | more like unions than enums in C++. Rust enums are tagged unions rather than untagged unions (as in C++). 154 | That means you can't mistake one variant of an enum for another at runtime[^1]. An example: 155 | 156 | ```rust 157 | enum Expr { 158 | Add(i32, i32), 159 | Or(bool, bool), 160 | Lit(i32) 161 | } 162 | 163 | fn foo() { 164 | let x = Or(true, false); // x has type Expr 165 | } 166 | ``` 167 | 168 | Many simple cases of object-oriented polymorphism are better handled in Rust 169 | using enums. 170 | 171 | To use enums we usually use a match expression. Remember that these are similar 172 | to C++ switch statements. I'll go into more depth on these and other ways to 173 | destructure data next time. Here's an example: 174 | 175 | ```rust 176 | fn bar(e: Expr) { 177 | match e { 178 | Add(x, y) => println!("An `Add` variant: {} + {}", x, y), 179 | Or(..) => println!("An `Or` variant"), 180 | _ => println!("Something else (in this case, a `Lit`)"), 181 | } 182 | } 183 | ``` 184 | 185 | Each arm of the match expression matches a variant of `Expr`. All variants must 186 | be covered. The last case (`_`) covers all remaining variants, although in the 187 | example there is only `Lit`. Any data in a variant can be bound to a variable. 188 | In the `Add` arm we are binding the two i32s in an `Add` to `x` and `y`. If we 189 | don't care about the data, we can use `..` to match any data, as we do for `Or`. 190 | 191 | 192 | ## Option 193 | 194 | One particularly common enum in Rust is `Option`. This has two variants - `Some` 195 | and `None`. `None` has no data and `Some` has a single field with type `T` 196 | (`Option` is a generic enum, which we will cover later, but hopefully the 197 | general idea is clear from C++). Options are used to indicate a value might be 198 | there or might not. Any place you use a null pointer in C++[^2] 199 | to indicate a value which is in some way undefined, uninitialised, or false, 200 | you should probably use an Option in Rust. Using Option is safer because you 201 | must always check it before use; there is no way to do the equivalent of 202 | dereferencing a null pointer. They are also more general, you can use them with 203 | values as well as pointers. An example: 204 | 205 | ```rust 206 | use std::rc::Rc; 207 | 208 | struct Node { 209 | parent: Option>, 210 | value: i32 211 | } 212 | 213 | fn is_root(node: Node) -> bool { 214 | match node.parent { 215 | Some(_) => false, 216 | None => true 217 | } 218 | } 219 | ``` 220 | 221 | Here, the parent field could be either a `None` or a `Some` containing an 222 | `Rc`. In the example, we never actually use that payload, but in real life 223 | you usually would. 224 | 225 | 226 | There are also convenience methods on Option, so you could write the body of 227 | `is_root` as `node.parent.is_none()` or `!node.parent.is_some()`. 228 | 229 | ## Inherited mutability and Cell/RefCell 230 | 231 | Local variables in Rust are immutable by default and can be marked mutable using 232 | `mut`. We don't mark fields in structs or enums as mutable, their mutability is 233 | inherited. This means that a field in a struct object is mutable or immutable 234 | depending on whether the object itself is mutable or immutable. Example: 235 | 236 | ```rust 237 | struct S1 { 238 | field1: i32, 239 | field2: S2 240 | } 241 | struct S2 { 242 | field: i32 243 | } 244 | 245 | fn main() { 246 | let s = S1 { field1: 45, field2: S2 { field: 23 } }; 247 | // s is deeply immutable, the following mutations are forbidden 248 | // s.field1 = 46; 249 | // s.field2.field = 24; 250 | 251 | let mut s = S1 { field1: 45, field2: S2 { field: 23 } }; 252 | // s is mutable, these are OK 253 | s.field1 = 46; 254 | s.field2.field = 24; 255 | } 256 | ``` 257 | 258 | Inherited mutability in Rust stops at references. This is similar to C++ where 259 | you can modify a non-const object via a pointer from a const object. If you want 260 | a reference field to be mutable, you have to use `&mut` on the field type: 261 | 262 | ```rust 263 | struct S1 { 264 | f: i32 265 | } 266 | struct S2<'a> { 267 | f: &'a mut S1 // mutable reference field 268 | } 269 | struct S3<'a> { 270 | f: &'a S1 // immutable reference field 271 | } 272 | 273 | fn main() { 274 | let mut s1 = S1{f:56}; 275 | let s2 = S2 { f: &mut s1}; 276 | s2.f.f = 45; // legal even though s2 is immutable 277 | // s2.f = &mut s1; // illegal - s2 is not mutable 278 | let s1 = S1{f:56}; 279 | let mut s3 = S3 { f: &s1}; 280 | s3.f = &s1; // legal - s3 is mutable 281 | // s3.f.f = 45; // illegal - s3.f is immutable 282 | } 283 | ``` 284 | 285 | (The `'a` parameter on `S2` and `S3` is a lifetime parameter, we'll cover those soon). 286 | 287 | Sometimes whilst an object is logically immutable, it has parts which need to be 288 | internally mutable. Think of various kinds of caching or a reference count 289 | (which would not give true logical immutability since the effect of changing the 290 | ref count can be observed via destructors). In C++, you would use the `mutable` 291 | keyword to allow such mutation even when the object is const. In Rust we have 292 | the Cell and RefCell structs. These allow parts of immutable objects to be 293 | mutated. Whilst that is useful, it means you need to be aware that when you see 294 | an immutable object in Rust, it is possible that some parts may actually be 295 | mutable. 296 | 297 | RefCell and Cell let you get around Rust's strict rules on mutation and 298 | aliasability. They are safe to use because they ensure that Rust's invariants 299 | are respected dynamically, even though the compiler cannot ensure that those 300 | invariants hold statically. Cell and RefCell are both single threaded objects. 301 | 302 | Use Cell for types which have copy semantics (pretty much just primitive types). 303 | Cell has `get` and `set` methods for changing the stored value, and a `new` 304 | method to initialise the cell with a value. Cell is a very simple object - it 305 | doesn't need to do anything smart since objects with copy semantics can't keep 306 | references elsewhere (in Rust) and they can't be shared across threads, so there 307 | is not much to go wrong. 308 | 309 | Use RefCell for types which have move semantics, that means nearly everything in 310 | Rust, struct objects are a common example. RefCell is also created using `new` 311 | and has a `set` method. To get the value in a RefCell, you must borrow it using 312 | the borrow methods (`borrow`, `borrow_mut`, `try_borrow`, `try_borrow_mut`) 313 | these will give you a borrowed reference to the object in the RefCell. These 314 | methods follow the same rules as static borrowing - you can only have one 315 | mutable borrow, and can't borrow mutably and immutably at the same time. 316 | However, rather than a compile error you get a runtime failure. The `try_` 317 | variants return an Option - you get `Some(val)` if the value can be borrowed and 318 | `None` if it can't. If a value is borrowed, calling `set` will fail too. 319 | 320 | Here's an example using a ref-counted pointer to a RefCell (a common use-case): 321 | 322 | ```rust 323 | use std::rc::Rc; 324 | use std::cell::RefCell; 325 | 326 | struct S { 327 | field: i32 328 | } 329 | 330 | fn foo(x: Rc>) { 331 | { 332 | let s = x.borrow(); 333 | println!("the field, twice {} {}", s.field, x.borrow().field); 334 | // let s = x.borrow_mut(); // Error - we've already borrowed the contents of x 335 | } 336 | 337 | let mut s = x.borrow_mut(); // OK, the earlier borrows are out of scope 338 | s.field = 45; 339 | // println!("The field {}", x.borrow().field); // Error - can't mut and immut borrow 340 | println!("The field {}", s.field); 341 | } 342 | 343 | fn main() { 344 | let s = S{field:12}; 345 | let x: Rc> = Rc::new(RefCell::new(s)); 346 | foo(x.clone()); 347 | 348 | println!("The field {}", x.borrow().field); 349 | } 350 | ``` 351 | 352 | If you're using Cell/RefCell, you should try to put them on the smallest object 353 | you can. That is, prefer to put them on a few fields of a struct, rather than 354 | the whole struct. Think of them like single threaded locks, finer grained 355 | locking is better since you are more likely to avoid colliding on a lock. 356 | 357 | 358 | [^1]: In C++17 there is `std::variant` type that is closer to Rust enums than unions. 359 | 360 | [^2]: Since C++17 `std::optional` is the best alternative of Option in Rust. 361 | -------------------------------------------------------------------------------- /graphs/README.md: -------------------------------------------------------------------------------- 1 | # Graphs and arena allocation 2 | 3 | (Note you can run the examples in this chapter by downloading this directory and 4 | running `cargo run`). 5 | 6 | Graphs are a bit awkward to construct in Rust because of Rust's stringent 7 | lifetime and mutability requirements. Graphs of objects are very common in OO 8 | programming. In this tutorial I'm going to go over a few different approaches to 9 | implementation. My preferred approach uses arena allocation and makes slightly 10 | advanced use of explicit lifetimes. I'll finish up by discussing a few potential 11 | Rust features which would make using such an approach easier. 12 | 13 | A [graph](http://en.wikipedia.org/wiki/Graph_%28abstract_data_type%29) is a 14 | collection of nodes with edges between some of those nodes. Graphs are a 15 | generalisation of lists and trees. Each node can have multiple children and 16 | multiple parents (we usually talk about edges into and out of a node, rather 17 | than parents/children). Graphs can be represented by adjacency lists or 18 | adjacency matrices. The former is basically a node object for each node in the 19 | graph, where each node object keeps a list of its adjacent nodes. An adjacency 20 | matrix is a matrix of booleans indicating whether there is an edge from the row 21 | node to the column node. We'll only cover the adjacency list representation, 22 | adjacency matrices have very different issues which are less Rust-specific. 23 | 24 | There are essentially two orthogonal problems: how to handle the lifetime of the 25 | graph and how to handle it's mutability. 26 | 27 | The first problem essentially boils down to what kind of pointer to use to point 28 | to other nodes in the graph. Since graph-like data structures are recursive (the 29 | types are recursive, even if the data is not) we are forced to use pointers of 30 | some kind rather than have a totally value-based structure. Since graphs can be 31 | cyclic, and ownership in Rust cannot be cyclic, we cannot use `Box` as our 32 | pointer type (as we might do for tree-like data structures or linked lists). 33 | 34 | No graph is truly immutable. Because there may be cycles, the graph cannot be 35 | created in a single statement. Thus, at the very least, the graph must be mutable 36 | during its initialisation phase. The usual invariant in Rust is that all 37 | pointers must either be unique or immutable. Graph edges must be mutable (at 38 | least during initialisation) and there can be more than one edge into any node, 39 | thus no edges are guaranteed to be unique. So we're going to have to do 40 | something a little bit advanced to handle mutability. 41 | 42 | One solution is to use mutable raw pointers (`*mut Node`). This is the most 43 | flexible approach, but also the most dangerous. You must handle all the lifetime 44 | management yourself without any help from the type system. You can make very 45 | flexible and efficient data structures this way, but you must be very careful. 46 | This approach handles both the lifetime and mutability issues in one fell swoop. 47 | But it handles them by essentially ignoring all the benefits of Rust - you will 48 | get no help from the compiler here (it's also not particularly ergonomic since 49 | raw pointers don't automatically (de-)reference). Since a graph using raw 50 | pointers is not much different from a graph in C++, I'm not going to cover that 51 | option here. 52 | 53 | The options you have for lifetime management are reference counting (shared 54 | ownership, using `Rc<...>`) or arena allocation (all nodes have the same lifetime, 55 | managed by an arena; using borrowed references `&...`). The former is 56 | more flexible (you can have references from outside the graph to individual 57 | nodes with any lifetime), the latter is better in every other way. 58 | 59 | For managing mutability, you can either use `RefCell`, i.e., make use of Rust's 60 | facility for dynamic, interior mutability, or you can manage the mutability 61 | yourself (in this case you have to use `UnsafeCell` to communicate the interior 62 | mutability to the compiler). The former is safer, the latter is more efficient. 63 | Neither is particularly ergonomic. 64 | 65 | Note that if your graph might have cycles, then if you use `Rc`, further action 66 | is required to break the cycles and not leak memory. Since Rust has no cycle 67 | collection of `Rc` pointers, if there is a cycle in your graph, the ref counts 68 | will never fall to zero, and the graph will never be deallocated. You can solve 69 | this by using `Weak` pointers in your graph or by manually breaking cycles when 70 | you know the graph should be destroyed. The former is more reliable. We don't 71 | cover either here, in our examples we just leak memory. The approach using 72 | borrowed references and arena allocation does not have this issue and is thus 73 | superior in that respect. 74 | 75 | To compare the different approaches I'll use a pretty simple example. We'll just 76 | have a `Node` object to represent a node in the graph, this will hold some 77 | string data (representative of some more complex data payload) and a `Vec` of 78 | adjacent nodes (`edges`). We'll have an `init` function to create a simple graph 79 | of nodes, and a `traverse` function which does a pre-order, depth-first 80 | traversal of the graph. We'll use this to print the payload of each node in the 81 | graph. Finally, we'll have a `Node::first` method which returns a reference to 82 | the first adjacent node to the `self` node and a function `foo` which prints the 83 | payload of an individual node. These functions stand in for more complex 84 | operations involving manipulation of a node interior to the graph. 85 | 86 | To try and be as informative as possible without boring you, I'll cover two 87 | combinations of possibilities: ref counting and `RefCell`, and arena allocation 88 | and `UnsafeCell`. I'll leave the other two combinations as an exercise. 89 | 90 | 91 | ## `Rc>` 92 | 93 | See [full example](src/rc_graph.rs). 94 | 95 | This is the safer option because there is no unsafe code. It is also the least 96 | efficient and least ergonomic option. It is pretty flexible though, nodes of the 97 | graph can be easily reused outside the graph since they are ref-counted. I would 98 | recommend this approach if you need a fully mutable graph, or need your nodes to 99 | exist independently of the graph. 100 | 101 | The node structure looks like 102 | 103 | ```rust 104 | struct Node { 105 | datum: &'static str, 106 | edges: Vec>>, 107 | } 108 | ``` 109 | 110 | Creating a new node is not too bad: `Rc::new(RefCell::new(Node { ... }))`. To 111 | add an edge during initialisation, you have to borrow the start node as mutable, 112 | and clone the end node into the Vec of edges (this clones the pointer, 113 | incrementing the reference count, not the actual node). E.g., 114 | 115 | ```rust 116 | let mut mut_root = root.borrow_mut(); 117 | mut_root.edges.push(b.clone()); 118 | ``` 119 | 120 | The `RefCell` dynamically ensures that we are not already reading or writing the 121 | node when we write it. 122 | 123 | Whenever you access a node, you have to use `.borrow()` to borrow the `RefCell`. 124 | Our `first` method has to return a ref-counted pointer, rather than a borrowed 125 | reference, so callers of `first` also have to borrow: 126 | 127 | ```rust 128 | fn first(&self) -> Rc> { 129 | self.edges[0].clone() 130 | } 131 | 132 | pub fn main() { 133 | let g = ...; 134 | let f = g.first(); 135 | foo(&*f.borrow()); 136 | } 137 | ``` 138 | 139 | 140 | ## `&Node` and `UnsafeCell` 141 | 142 | See [full example](src/ref_graph.rs). 143 | 144 | In this approach we use borrowed references as edges. This is nice and ergonomic 145 | and lets us use our nodes with 'regular' Rust libraries which primarily operate 146 | with borrowed references (note that one nice thing about ref counted objects in 147 | Rust is that they play nicely with the lifetime system. We can create a borrowed 148 | reference into the `Rc` to directly (and safely) reference the data. In the 149 | previous example, the `RefCell` prevents us doing this, but an `Rc`/`UnsafeCell` 150 | approach should allow it). 151 | 152 | Destruction is correctly handled too - the only constraint is that all the nodes 153 | must be destroyed at the same time. Destruction and allocation of nodes is 154 | handled using an arena. 155 | 156 | On the other hand, we do need to use quite a few explicit lifetimes. 157 | Unfortunately we don't benefit from lifetime elision here. At the end of the 158 | section I'll discuss some future directions for the language which could make 159 | things better. 160 | 161 | During construction we will mutate our nodes which might be multiply referenced. 162 | This is not possible in safe Rust code, so we must initialise inside an `unsafe` 163 | block. Since our nodes are mutable and multiply referenced, we must use an 164 | `UnsafeCell` to communicate to the Rust compiler that it cannot rely on its 165 | usual invariants. 166 | 167 | When is this approach feasible? The graph must only be mutated during 168 | initialisation. In addition, we require that all nodes in the graph have the 169 | same lifetime (we could relax these constraints somewhat to allow adding nodes 170 | later as long as they can all be destroyed at the same time). Similarly, we 171 | could rely on more complicated invariants for when the nodes can be mutated, but 172 | it pays to keep things simple, since the programmer is responsible for safety 173 | in those respects. 174 | 175 | Arena allocation is a memory management technique where a set of objects have 176 | the same lifetime and can be deallocated at the same time. An arena is an object 177 | responsible for allocating and deallocating the memory. Since large chunks of 178 | memory are allocated and deallocated at once (rather than allocating individual 179 | objects), arena allocation is very efficient. Usually, all the objects are 180 | allocated from a contiguous chunk of memory, that improves cache coherency when 181 | you are traversing the graph. 182 | 183 | In Rust, arena allocation is supported by the [libarena](https://doc.rust-lang.org/1.1.0/arena/index.html) 184 | crate and is used throughout the compiler. There are two kinds of arenas - typed 185 | and untyped. The former is more efficient and easier to use, but can only 186 | allocate objects of a single type. The latter is more flexible and can allocate 187 | any object. Arena allocated objects all have the same lifetime, which is a 188 | parameter of the arena object. The type system ensures references to arena 189 | allocated objects cannot live longer than the arena itself. 190 | 191 | Our node struct must now include the lifetime of the graph, `'a`. We wrap our 192 | `Vec` of adjacent nodes in an `UnsafeCell` to indicate that we will mutate it 193 | even when it should be immutable: 194 | 195 | ```rust 196 | struct Node<'a> { 197 | datum: &'static str, 198 | edges: UnsafeCell>>, 199 | } 200 | ``` 201 | 202 | Our new function must also use this lifetime and must take as an argument the 203 | arena which will do the allocation: 204 | 205 | ```rust 206 | fn new<'a>(datum: &'static str, arena: &'a TypedArena>) -> &'a Node<'a> { 207 | arena.alloc(Node { 208 | datum: datum, 209 | edges: UnsafeCell::new(Vec::new()), 210 | }) 211 | } 212 | ``` 213 | 214 | We use the arena to allocate the node. The lifetime of the graph is derived from 215 | the lifetime of the reference to the arena, so the arena must be passed in from 216 | the scope which covers the graph's lifetime. For our examples, that means we 217 | pass it into the `init` method. (One could imagine an extension to the type 218 | system which allows creating values at scopes outside their lexical scope, but 219 | there are no plans to add such a thing any time soon). When the arena goes out 220 | of scope, the whole graph is destroyed (Rust's type system ensures that we can't 221 | keep references to the graph beyond that point). 222 | 223 | Adding an edge is a bit different looking: 224 | 225 | ```rust 226 | (*root.edges.get()).push(b); 227 | ``` 228 | 229 | We're essentially doing the obvious `root.edges.push(b)` to push a node (`b`) on 230 | to the list of edges. However, since `edges` is wrapped in an `UnsafeCell`, we 231 | have to call `get()` on it. That gives us a mutable raw pointer to edges (`*mut 232 | Vec<&Node>`), which allows us to mutate `edges`. However, it also requires us to 233 | manually dereference the pointer (raw pointers do not auto-deref), thus the 234 | `(*...)` construction. Finally, dereferencing a raw pointer is unsafe, so the 235 | whole lot has to be wrapped up in an unsafe block. 236 | 237 | The interesting part of `traverse` is: 238 | 239 | ```rust 240 | for n in &(*self.edges.get()) { 241 | n.traverse(f, seen); 242 | } 243 | ``` 244 | 245 | We follow the previous pattern for getting at the edges list, which requires an 246 | unsafe block. In this case we know it is in fact safe because we must be post- 247 | initialisation and thus there will be no mutation. 248 | 249 | Again, the `first` method follows the same pattern for getting at the `edges` 250 | list. And again must be in an unsafe block. However, in contrast to the graph 251 | using `Rc>`, we can return a straightforward borrowed reference to 252 | the node. That is very convenient. We can reason that the unsafe block is safe 253 | because we do no mutation and we are post-initialisation. 254 | 255 | ```rust 256 | fn first(&'a self) -> &'a Node<'a> { 257 | unsafe { 258 | (*self.edges.get())[0] 259 | } 260 | } 261 | ``` 262 | 263 | ### Future language improvements for this approach 264 | 265 | I believe that arena allocation and using borrowed references are an important 266 | pattern in Rust. We should do more in the language to make these patterns safer 267 | and easier to use. I hope use of arenas becomes more ergonomic with the ongoing 268 | work on [allocators](https://github.com/rust-lang/rfcs/pull/244). There are 269 | three other improvements I see: 270 | 271 | #### Safe initialisation 272 | 273 | There has been lots of research in the OO world on mechanisms for ensuring 274 | mutability only during initialisation. How exactly this would work in Rust is an 275 | open research question, but it seems that we need to represent a pointer which 276 | is mutable and not unique, but restricted in scope. Outside that scope any 277 | existing pointers would become normal borrowed references, i.e., immutable *or* 278 | unique. 279 | 280 | The advantage of such a scheme is that we have a way to represent the common 281 | pattern of mutable during initialisation, then immutable. It also relies on the 282 | invariant that, while individual objects are multiply owned, the aggregate (in 283 | this case a graph) is uniquely owned. We should then be able to adopt the 284 | reference and `UnsafeCell` approach, without the `UnsafeCell`s and the unsafe 285 | blocks, making that approach more ergonomic and more safer. 286 | 287 | Alex Summers and Julian Viereck at ETH Zurich are investigating this 288 | further. 289 | 290 | 291 | #### Generic modules 292 | 293 | The 'lifetime of the graph' is constant for any particular graph. Repeating the 294 | lifetime is just boilerplate. One way to make this more ergonomic would be to 295 | allow the graph module to be parameterised by the lifetime, so it would not need 296 | to be added to every struct, impl, and function. The lifetime of the graph would 297 | still need to be specified from outside the module, but hopefully inference 298 | would take care of most uses (as it does today for function calls). 299 | 300 | See [ref_graph_generic_mod.rs](src/ref_graph_generic_mod.rs) for how that might look. 301 | (We should also be able to use safe initialisation (proposed above) to remove 302 | the unsafe code). 303 | 304 | See also this [RFC issue](https://github.com/rust-lang/rfcs/issues/424). 305 | 306 | This feature would vastly reduce the syntactic overhead of the reference and 307 | `UnsafeCell` approach. 308 | 309 | 310 | #### Lifetime elision 311 | 312 | We currently allow the programmer to elide some lifetimes in function signatures 313 | to improve ergonomics. One reason the `&Node` approach to graphs is a bit ugly 314 | is because it doesn't benefit from any of the lifetime elision rules. 315 | 316 | A common pattern in Rust is data structures with a common lifetime. References 317 | into such data structures give rise to types like `&'a Foo<'a>`, for example 318 | `&'a Node<'a>` in the graph example. It would be nice to have an elision 319 | rule that helps in this case. I'm not really sure how it should work though. 320 | 321 | Looking at the example with generic modules, it doesn't look like we need to 322 | extend the lifetime elision rules very much (I'm not actually sure if 323 | `Node::new` would work without the given lifetimes, but it seems like a fairly 324 | trivial extension to make it work if it doesn't). We might want to add some new 325 | rule to allow elision of module-generic lifetimes if they are the only ones in 326 | scope (other than `'static`), but I'm not sure how that would work with multiple 327 | in- scope lifetimes (see the `foo` and `init` functions, for example). 328 | 329 | If we don't add generic modules, we might still be able to add an elision rule 330 | specifically to target `&'a Node<'a>`, not sure how though. 331 | -------------------------------------------------------------------------------- /closures.md: -------------------------------------------------------------------------------- 1 | # Closures and first-class functions 2 | 3 | Closures and first-class and higher order functions are a core part of Rust. In 4 | C and C++ there are function pointers (and those weird member/method pointer 5 | things in C++ that I never got the hang of). However, they are used relatively 6 | rarely and are not very ergonomic. C++11 introduced lambdas, and these are 7 | pretty close to Rust closures, in particular they have a very similar 8 | implementation strategy. 9 | 10 | To start with, I want to establish some intuition for these things. Then, we'll 11 | dive in to the details. 12 | 13 | Lets say we have a function `foo`: `pub fn foo() -> u32 { 42 }`. Now let's 14 | imagine another function `bar` which takes a function as an argument (I'll leave 15 | `bar`'s signature for later): `fn bar(f: ...) { ... }`. We can pass `foo` to 16 | `bar` kind of like we would pass a function pointer in C: `bar(foo)`. In the 17 | body of `bar` we can call `f` as if it were a function: `let x = f();`. 18 | 19 | We say that Rust has first-class functions because we can pass them around and 20 | use them like we can with other values. We say `bar` is a higher-order function 21 | because it takes a function as an argument, i.e., it is a function that operates 22 | on functions. 23 | 24 | Closures in Rust are anonymous functions with a nice syntax. A closure `|x| x + 25 | 2` takes an argument and returns it with `2` added. Note that we don't have to 26 | give types for the arguments to a closure (they can usually be inferred). We 27 | also don't need to specify a return type. If we want the closure body to be more 28 | than just one expression, we can use braces: `|x: i32| { let y = x + 2; y }`. We 29 | can pass closures just like functions: `bar(|| 42)`. 30 | 31 | The big difference between closures and other functions is that closures capture 32 | their environment. This means that we can refer to variables outside the closure 33 | from the closure. E.g., 34 | 35 | ```rust 36 | let x = 42; 37 | bar(|| x); 38 | ``` 39 | 40 | Note how `x` is in scope in the closure. 41 | 42 | We've seen closures before, used with iterators, and this is a common use case 43 | for them. E.g., to add a value to each element of a vector: 44 | 45 | ```rust 46 | fn baz(v: Vec) -> Vec { 47 | let z = 3; 48 | v.iter().map(|x| x + z).collect() 49 | } 50 | ``` 51 | 52 | Here `x` is an argument to the closure, each member of `v` will be passed as an 53 | `x`. `z` is declared outside of the closure, but because it's a closure, `z` can 54 | be referred to. We could also pass a function to map: 55 | 56 | ```rust 57 | fn add_two(x: i32) -> i32 { 58 | x + 2 59 | } 60 | 61 | fn baz(v: Vec) -> Vec { 62 | v.iter().map(add_two).collect() 63 | } 64 | ``` 65 | 66 | Note that Rust also allows declaring functions inside of functions. These are 67 | *not* closures - they can't access their environment. They are merely a 68 | convenience for scoping. 69 | 70 | ```rust 71 | fn qux(x: i32) { 72 | fn quxx() -> i32 { 73 | x // ERROR x is not in scope. 74 | } 75 | 76 | let a = quxx(); 77 | } 78 | ``` 79 | 80 | ## Function types 81 | 82 | Lets introduce a new example function: 83 | 84 | ```rust 85 | fn add_42(x: i32) -> i64 { 86 | x as i64 + 42 87 | } 88 | ``` 89 | 90 | As we saw before, we can store a function in a variable: `let a = add_42;`. The 91 | most precise type of `a` cannot be written in Rust. You'll sometimes see the 92 | compiler render it as `fn(i32) -> i64 {add_42}` in error messages. Each function 93 | has its own unique and anonymous type. `fn add_41(x: i32) -> i64` has a different 94 | type, even though it has the same signature. 95 | 96 | We can write less precise types, for example, `let a: fn(i32) -> i64 = add_42;`. 97 | All function types with the same signature can be coerced to a `fn` type 98 | (which can be written by the programmer). 99 | 100 | `a` is represented by the compiler as a function pointer, however, if the 101 | compiler knows the precise type, it doesn't actually use that function pointer. 102 | A call like a() is statically dispatched based on the type of a. If the 103 | compiler doesn't know the precise type (e.g., it only knows the fn type), then 104 | the call is dispatched using the function pointer in the value. 105 | 106 | There are also `Fn` types (note the capital 'F'). These `Fn` types are bounds, 107 | just like traits (in fact they *are* traits, as we'll see later). `Fn(i32) -> i64` 108 | is a bound on the types of all function-like objects with that signature. When 109 | we take a reference to a function pointer, we're actually creating a trait 110 | object which is represented by a fat pointer (see DSTs). 111 | 112 | To pass a function to another function, or to store the function in a field, we 113 | must write a type. We have several choices, we can either use either a `fn` type 114 | or a `Fn` type. The latter is better because it includes closures (and 115 | potentially other function-like things), whereas `fn` types don't. The `Fn` 116 | types are dynamically sized which means we cannot use them as value types. We 117 | must either pass function objects or use generics. Let's look at the generic 118 | approach first. For example, 119 | 120 | ```rust 121 | fn bar(f: F) -> i64 122 | where F: Fn(i32) -> i64 123 | { 124 | f(0) 125 | } 126 | ``` 127 | 128 | `bar` takes any function with the signature `Fn(i32) -> i64`, i.e., we can 129 | instantiate the `F` type parameter with any function-like type. We could call 130 | `bar(add_42)` to pass `add_42` to `bar` which would instantiate `F` with 131 | `add_42`'s anonymous type. We could also call `bar(add_41)` and that would work 132 | too. 133 | 134 | You can also pass closures to `bar`, e.g., `bar(|x| x as i64)`. This works 135 | because closure types are also bounded by the `Fn` bound matching their 136 | signature (like functions, each closure has it's own anonymous type). 137 | 138 | Finally, you can pass references to functions or closures too: `bar(&add_42)` or 139 | `bar(&|x| x as i64)`. 140 | 141 | One could also write `bar` as `fn bar(f: &Fn(i32) -> i64) ...`. These two 142 | approaches (generics vs a function/trait object) have quite different semantics. 143 | In the generics case, `bar` will be monomorphised so when code is generated, the 144 | compiler know the exact type of `f`, that means it can be statically dispatched. 145 | If using a function object, the function is not monomorphised. The exact type of 146 | `f` is not known, and so the compiler must generate a virtual dispatch. The 147 | latter is slower, but the former will produce more code (one monomorphised 148 | function per type parameter instance). 149 | 150 | There are actually more function traits than just `Fn`; there are `FnMut` and 151 | `FnOnce` too. These are used in the same way as `Fn`, e.g., `FnOnce(i32) -> 152 | i64`. A `FnMut` represents an object which can be called and can be mutated 153 | during that call. This doesn't apply to normal functions, but for closures it 154 | means the closure can mutate its environment. `FnOnce` is a function which can 155 | only be called (at most) once. Again, this is only relevant for closures. 156 | 157 | `Fn`, `FnMut`, and `FnOnce` are in a sub-trait hierarchy. `Fn`s are `FnMut`s 158 | (because one can call a `Fn` function with permission to mutate and no harm is 159 | done, but the opposite is not true). `Fn`s and `FnMut`s are `FnOnce`s (because 160 | there is no harm done if a regular function is only called once, but not the 161 | opposite). 162 | 163 | So, to make a higher-order function as flexible as possible, you should use the 164 | `FnOnce` bound, rather than the `Fn` bound (or use the `FnMut` bound if you must 165 | call the function more than once). 166 | 167 | 168 | ### Methods 169 | 170 | You can use methods in the same way as functions - take pointers to them store 171 | them in variables, etc. You can't use the dot syntax, you must explicitly name 172 | the method using the fully explicit form of naming (sometimes called UFCS for 173 | universal function call syntax). The `self` parameter is the first argument to 174 | the method. E.g., 175 | 176 | ```rust 177 | struct Foo; 178 | 179 | impl Foo { 180 | fn bar(&self) {} 181 | } 182 | 183 | trait T { 184 | fn baz(&self); 185 | } 186 | 187 | impl T for Foo { 188 | fn baz(&self) {} 189 | } 190 | 191 | fn main() { 192 | // Inherent method. 193 | let x = Foo::bar; 194 | x(&Foo); 195 | 196 | // Trait method, note the fully explicit naming form. 197 | let y = ::baz; 198 | y(&Foo); 199 | } 200 | ``` 201 | 202 | 203 | ### Generic functions 204 | 205 | You can't take a pointer to a generic function and there is no way to express a 206 | generic function type. However, you can take a reference to a function if all 207 | its type parameters are instantiated. E.g., 208 | 209 | ```rust 210 | fn foo(x: &T) {} 211 | 212 | fn main() { 213 | let x = &foo::; 214 | x(&42); 215 | } 216 | ``` 217 | 218 | There is no way to define a generic closure. If you need a closure to work over 219 | many types you can use trait objects, macros (for generating closures), or pass 220 | a closure which returns closures (each returned closure can operate on a 221 | different type). 222 | 223 | 224 | ### Lifetime-generic functions and higher-ranked types 225 | 226 | It *is* possible to have function types and closures which are generic over 227 | lifetimes. 228 | 229 | Imagine we have a closure which takes a borrowed reference. The closure can work 230 | the same way no matter what lifetime the reference has (and indeed in the 231 | compiled code, the lifetime will have been erased). But, what does the type look 232 | like? 233 | 234 | For example, 235 | 236 | ```rust 237 | fn foo(x: &Bar, f: F) -> &Baz 238 | where F: Fn(&Bar) -> &Baz 239 | { 240 | f(x) 241 | } 242 | ``` 243 | 244 | what are the lifetimes of the references here? In this simple example, we can 245 | use a single lifetime (no need for a generic closure): 246 | 247 | ```rust 248 | fn foo<'b, F>(x: &'b Bar, f: F) -> &'b Baz 249 | where F: Fn(&'b Bar) -> &'b Baz 250 | { 251 | f(x) 252 | } 253 | ``` 254 | 255 | But what if we want `f` to work on inputs with different lifetimes? Then we need 256 | a generic function type: 257 | 258 | ```rust 259 | fn foo<'b, 'c, F>(x: &'b Bar, y: &'c Bar, f: F) -> (&'b Baz, &'c Baz) 260 | where F: for<'a> Fn(&'a Bar) -> &'a Baz 261 | { 262 | (f(x), f(y)) 263 | } 264 | ``` 265 | 266 | The novelty here is the `for<'a>` syntax, this is used to denote a function type 267 | which is generic over a lifetime. It is read "for all 'a, ...". In theoretical 268 | terms, the function type is universally quantified. 269 | 270 | Note that we cannot hoist up `'a` to `foo` in the above example. Counter-example: 271 | 272 | ```rust 273 | fn foo<'a, 'b, 'c, F>(x: &'b Bar, y: &'c Bar, f: F) -> (&'b Baz, &'c Baz) 274 | where F: Fn(&'a Bar) -> &'a Baz 275 | { 276 | (f(x), f(y)) 277 | } 278 | ``` 279 | 280 | will not compile because when the compiler infers lifetimes for a call to `foo`, 281 | it must pick a single lifetime for `'a`, which it can't do if `'b` and `'c` are 282 | different. 283 | 284 | A function type which is generic in this way is called a higher-ranked type. 285 | Lifetime variables at the outer level have rank one. Because `'a` in the above 286 | example cannot be moved to the outer level, it's rank is higher than one. 287 | 288 | Calling functions with higher-ranked function type arguments is easy - the 289 | compiler will infer the lifetime parameters. E.g., `foo(&Bar { ... }, &Bar 290 | {...}, |b| &b.field)`. 291 | 292 | In fact, most of the time you don't even need to worry about such things. The 293 | compiler will allow you to elide the quantified lifetimes in the same way that 294 | you are allowed to elide many lifetimes on function arguments. For example, the 295 | example above can just be written as 296 | 297 | ```rust 298 | fn foo<'b, 'c, F>(x: &'b Bar, y: &'c Bar, f: F) -> (&'b Baz, &'c Baz) 299 | where F: Fn(&Bar) -> &Baz 300 | { 301 | (f(x), f(y)) 302 | } 303 | ``` 304 | 305 | (and you only need `'b` and `'c` because it is a contrived example). 306 | 307 | Where Rust sees a function type with a borrowed references, it will apply the 308 | usual elision rules, and quantify the elided variables at the scope of the 309 | function type (i.e., with higher rank). 310 | 311 | You might be wondering why bother with all this complexity for what looks like a 312 | fairly niche use case. The real motivation is functions which take a function 313 | to operate on some data provided by the outer function. For example, 314 | 315 | ```rust 316 | fn foo(f: F) 317 | where F: Fn(&i32) // Fully explicit type: for<'a> Fn(&'a i32) 318 | { 319 | let data = 42; 320 | f(&data) 321 | } 322 | ``` 323 | 324 | In these cases, we *need* higher-ranked types. If we added a lifetime parameter 325 | to `foo` instead, we could never infer a correct lifetime. To see why, let's 326 | look at how it might work, consider `fn foo<'a, F: Fn(&'a i32)> ...`. Rust 327 | requires that any lifetime parameter must outlive the item it is declared on (if 328 | this were not the case, an argument with that lifetime could be used inside that 329 | function, where it is not guaranteed to be live). In the body of `foo` we use 330 | `f(&data)`, the lifetime Rust will infer for that reference will last (at most) 331 | from where `data` is declared to where it goes out of scope. Since `'a` must 332 | outlive `foo`, but that inferred lifetime does not, we cannot call `f` in this 333 | way. 334 | 335 | However, with higher-ranked lifetimes `f` can accept any lifetime and so the 336 | anonymous one from `&data` is fine and the function type checks. 337 | 338 | 339 | ### Enum constructors 340 | 341 | This is something of a digression, but it is sometimes a useful trick. All 342 | variants of an enum define a function from the fields of the variant to the enum 343 | type. For example, 344 | 345 | ```rust 346 | enum Foo { 347 | Bar, 348 | Baz(i32), 349 | } 350 | ``` 351 | 352 | defines two functions, `Foo::Bar: Fn() -> Foo` and `Foo::Baz: Fn(i32) -> Foo`. 353 | We don't normally use the variants in this way, we treat them as data types 354 | rather than functions. But sometimes it is useful, for example if we have a list 355 | of `i32`s we can create a list of `Foo`s with 356 | 357 | ```rust 358 | list_of_i32.iter().map(Foo::Baz).collect() 359 | ``` 360 | 361 | 362 | ## Closure flavours 363 | 364 | A closure has two forms of input: the arguments which are passed to it explicitly 365 | and the variables it *captures* from its environment. Usually, everything about 366 | both kinds of input is inferred, but you can have more control if you want it. 367 | 368 | For the arguments, you can declare types instead of letting Rust infer them. You 369 | can also declare a return type. Rather than writing `|x| { ... }` you can write 370 | `|x: i32| -> String { ... }`. Whether an argument is owned or borrowed is 371 | determined by the types (either declared or inferred). 372 | 373 | For the captured variables, the type is mostly known from the environment, but 374 | Rust does a little extra magic. Should a variable be captured by reference or 375 | value? Rust infers this from the body of the closure. If possible, Rust captures 376 | by reference. E.g., 377 | 378 | ```rust 379 | fn foo(x: Bar) { 380 | let f = || { ... x ... }; 381 | } 382 | ``` 383 | 384 | All being well, in the body of `f`, `x` has the type `&Bar` with a lifetime 385 | bounded by the scope of `foo`. However, if `x` is mutated, then Rust will infer 386 | that the capture is by mutable reference, i.e., `x` has type `&mut Bar`. If `x` 387 | is moved in `f` (e.g., is stored into a variable or field with value type), then 388 | Rust infers that the variable must be captured by value, i.e., it has the type 389 | `Bar`. 390 | 391 | This can be overridden by the programmer (sometimes necessary if the closure 392 | will be stored in a field or returned from a function). By using the `move` 393 | keyword in front of a closure. Then, all of the captured variables are captured 394 | by value. E.g., in `let f = move || { ... x ... };`, `x` would always have type 395 | `Bar`. 396 | 397 | We talked earlier about the different function kinds: `Fn`, `FnMut`, and `FnOnce`. 398 | We can now explain why we need them. For closures, the mutable-ness and once-ness 399 | refer to the captured variables. If a capture mutates any of the variables it 400 | captures then it will have a `FnMut` type (note that this is completely inferred 401 | by the compiler, no annotation is necessary). If a variable is moved into a 402 | closure, i.e., it is captured by value (either because of an explicit `move` or 403 | due to inference), then the closure will have a `FnOnce` type. It would be unsafe 404 | to call such a closure multiple times because the captured variable would be 405 | moved more than once. 406 | 407 | Rust will do its best to infer the most flexible type for the closure if it can. 408 | 409 | 410 | ## Implementation 411 | 412 | A closure is implemented as an anonymous struct. That struct has a field for 413 | each variable captured by the closure. It is lifetime-parametric with a single 414 | lifetime parameter which is a bound on the lifetime of captured variables. The 415 | anonymous struct implements a `call` method which is called to execute the 416 | closure. 417 | 418 | For example, consider 419 | 420 | ```rust 421 | fn main() { 422 | let x = Foo { ... }; 423 | let f = |y| x.get_number() + y; 424 | let z = f(42); 425 | } 426 | ``` 427 | 428 | the compiler treats this as 429 | 430 | ```rust 431 | struct Closure14<'env> { 432 | x: &'env Foo, 433 | } 434 | 435 | // Not actually implemented like this, see below. 436 | impl<'env> Closure14<'env> { 437 | fn call(&self, y: i32) -> i32 { 438 | self.x.get_number() + y 439 | } 440 | } 441 | 442 | fn main() { 443 | let x = Foo { ... }; 444 | let f = Closure14 { x: x } 445 | let z = f.call(42); 446 | } 447 | ``` 448 | 449 | As we mentioned above, there are three different function traits - `Fn`, 450 | `FnMut`, and `FnOnce`. In reality the `call` method is required by these traits 451 | rather than being in an inherent impl. `Fn` has a method `call` which takes 452 | `self` by reference, `FnMut` has `call_mut` taking `self` by mutable reference, 453 | and `FnOnce` has `call_once` which takes `self` by values. 454 | 455 | When we've seen function types above, they look like `Fn(i32) -> i32` which 456 | doesn't look much like a trait type. There is a little bit of magic here. Rust allows 457 | this round bracket sugar only for function types. To desugar to a regular type 458 | (an 'angle bracket type'), the argument types are treated as a tuple type and 459 | passed as a type parameter and the return type as an associated type called 460 | `Output`. So, `Fn(i32) -> i32` is desugared to `Fn<(i32,), Output=i32>` and the 461 | `Fn` trait definition looks like 462 | 463 | ```rust 464 | pub trait Fn : FnMut { 465 | fn call(&self, args: Args) -> Self::Output; 466 | } 467 | ``` 468 | 469 | The implementation for `Closure14` above would therefore look more like 470 | 471 | ```rust 472 | impl<'env> FnOnce<(i32,)> for Closure14<'env> { 473 | type Output = i32; 474 | fn call_once(self, args: (i32,)) -> i32 { 475 | ... 476 | } 477 | } 478 | impl<'env> FnMut<(i32,)> for Closure14<'env> { 479 | fn call_mut(&mut self, args: (i32,)) -> i32 { 480 | ... 481 | } 482 | } 483 | impl<'env> Fn<(i32,)> for Closure14<'env> { 484 | fn call(&self, args: (i32,)) -> i32 { 485 | ... 486 | } 487 | } 488 | ``` 489 | 490 | You can find the function traits in 491 | [core::ops](https://dxr.mozilla.org/rust/source/src/libcore/ops.rs) 492 | 493 | We talked above about how using generics gives static dispatch and using trait 494 | objects gives virtual dispatch. We can now see in a bit more detail why. 495 | 496 | When we call `call`, it is a statically dispatched method call, there is no 497 | virtual dispatch. If we pass it to a monomorphised function, we still know the 498 | type statically, and we still get a static dispatch. 499 | 500 | We can make the closure into a trait object, e.g., `&f` or `Box::new(f)` with 501 | types `&Fn(i32)->i32` or `Boxi32>`. These are pointer types, and 502 | because they are pointer-to-trait types, the pointers are fat pointers. That 503 | means they consist of the pointer to the data itself and a pointer to a vtable. 504 | The vtable is used to lookup the address of `call` (or `call_mut` or whatever). 505 | 506 | You'll sometimes hear these two representations of closures called boxed and 507 | unboxed closures. An unboxed closure is the by-value version with static 508 | dispatch. A boxed version is the trait object version with dynamic dispatch. In 509 | the olden days, Rust only had boxed closures (and the system was quite a bit 510 | different). 511 | 512 | ## References 513 | 514 | * [RFC 114 - Closures](https://github.com/rust-lang/rfcs/blob/master/text/0114-closures.md) 515 | * [Finding Closure in Rust blog post](http://huonw.github.io/blog/2015/05/finding-closure-in-rust/) 516 | * [RFC 387 - Higher ranked trait bounds](https://github.com/rust-lang/rfcs/blob/master/text/0387-higher-ranked-trait-bounds.md) 517 | * [Purging proc blog post](http://smallcultfollowing.com/babysteps/blog/2014/11/26/purging-proc/) 518 | 519 | FIXME: relate to closures in C++ 11 520 | --------------------------------------------------------------------------------