├── graphs
    ├── Cargo.toml
    ├── src
    │   ├── mod.rs
    │   ├── rc_graph.rs
    │   ├── ref_graph.rs
    │   └── ref_graph_generic_mod.rs
    └── README.md
├── compile_pdf.sh
├── LICENSE.md
├── primitives.md
├── README.md
├── destructuring.md
├── rc-raw.md
├── unique.md
├── control-flow.md
├── hello-world.md
├── borrowed.md
├── destructuring-2.md
├── arrays.md
├── data-types.md
└── closures.md


/graphs/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "graphs"
 3 | version = "0.0.1"
 4 | authors = ["nrc <ncameron@mozilla.com>"]
 5 | 
 6 | [dependencies]
 7 | typed-arena = "2"
 8 | 
 9 | [[bin]]
10 | name = "graphs"
11 | path = "src/mod.rs"
12 | 


--------------------------------------------------------------------------------
/compile_pdf.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | pandoc -s README.md hello-world.md control-flow.md primitives.md unique.md borrowed.md data-types.md rc-raw.md destructuring.md destructuring-2.md arrays.md graphs/README.md closures.md -o r4cppp.pdf
4 | 


--------------------------------------------------------------------------------
/graphs/src/mod.rs:
--------------------------------------------------------------------------------
 1 | #![feature(rustc_private)]
 2 | 
 3 | extern crate typed_arena;
 4 | 
 5 | mod rc_graph;
 6 | mod ref_graph;
 7 | 
 8 | fn main() {
 9 |     println!("\nRc<RefCell<Node>>:");
10 |     rc_graph::main();
11 |     println!("\n&Node and UnsafeCell:");
12 |     ref_graph::main();
13 | }
14 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright 2015 The Rust for C++ programmers Developers.
2 | 
3 | Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 | http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 | <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 | option. This file may not be copied, modified, or distributed
7 | except according to those terms.
8 | 


--------------------------------------------------------------------------------
/graphs/src/rc_graph.rs:
--------------------------------------------------------------------------------
 1 | 
 2 | use std::rc::Rc;
 3 | use std::cell::RefCell;
 4 | use std::collections::HashSet;
 5 | 
 6 | struct Node {
 7 |     datum: &'static str,
 8 |     edges: Vec<Rc<RefCell<Node>>>,
 9 | }
10 | 
11 | impl Node {
12 |     fn new(datum: &'static str) -> Rc<RefCell<Node>> {
13 |         Rc::new(RefCell::new(Node {
14 |             datum: datum,
15 |             edges: Vec::new(),
16 |         }))
17 |     }
18 | 
19 |     fn traverse<F>(&self, f: &F, seen: &mut HashSet<&'static str>)
20 |         where F: Fn(&'static str)
21 |     {
22 |         if seen.contains(&self.datum) {
23 |             return;
24 |         }
25 |         f(self.datum);
26 |         seen.insert(self.datum);
27 |         for n in &self.edges {
28 |             n.borrow().traverse(f, seen);
29 |         }
30 |     }
31 | 
32 |     fn first(&self) -> Rc<RefCell<Node>> {
33 |         self.edges[0].clone()
34 |     }
35 | }
36 | 
37 | fn foo(node: &Node) {
38 |     println!("foo: {}", node.datum);
39 | }
40 | 
41 | fn init() -> Rc<RefCell<Node>> {
42 |     let root = Node::new("A");
43 | 
44 |     let b = Node::new("B");
45 |     let c = Node::new("C");
46 |     let d = Node::new("D");
47 |     let e = Node::new("E");
48 |     let f = Node::new("F");
49 | 
50 |     {
51 |         let mut mut_root = root.borrow_mut();
52 |         mut_root.edges.push(b.clone());
53 |         mut_root.edges.push(c.clone());
54 |         mut_root.edges.push(d.clone());
55 | 
56 |         let mut mut_c = c.borrow_mut();
57 |         mut_c.edges.push(e.clone());
58 |         mut_c.edges.push(f.clone());
59 |         mut_c.edges.push(root.clone());
60 |     }
61 | 
62 |     root
63 | }
64 | 
65 | pub fn main() {
66 |     let g = init();
67 |     let g = g.borrow();
68 |     g.traverse(&|d| println!("{}", d), &mut HashSet::new());
69 |     let f = g.first();
70 |     foo(&*f.borrow());
71 | }
72 | 


--------------------------------------------------------------------------------
/graphs/src/ref_graph.rs:
--------------------------------------------------------------------------------
 1 | 
 2 | use std::cell::UnsafeCell;
 3 | use std::collections::HashSet;
 4 | use typed_arena::Arena;
 5 | 
 6 | struct Node<'a> {
 7 |     datum: &'static str,
 8 |     edges: UnsafeCell<Vec<&'a Node<'a>>>,
 9 | }
10 | 
11 | impl<'a> Node<'a> {
12 |     fn new<'b>(datum: &'static str, arena: &'b Arena<Node<'b>>) -> &'b Node<'b> {
13 |         arena.alloc(Node {
14 |             datum: datum,
15 |             edges: UnsafeCell::new(Vec::new()),
16 |         })
17 |     }
18 | 
19 |     fn traverse<F>(&self, f: &F, seen: &mut HashSet<&'static str>)
20 |         where F: Fn(&'static str)
21 |     {
22 |         if seen.contains(&self.datum) {
23 |             return;
24 |         }
25 |         f(self.datum);
26 |         seen.insert(self.datum);
27 |         unsafe {
28 |             for n in &(*self.edges.get()) {
29 |                 n.traverse(f, seen);
30 |             }
31 |         }
32 |     }
33 | 
34 |     fn first(&'a self) -> &'a Node<'a> {
35 |         unsafe {
36 |             (*self.edges.get())[0]
37 |         }
38 |     }
39 | }
40 | 
41 | fn foo<'a>(node: &'a Node<'a>) {
42 |     println!("foo: {}", node.datum);
43 | }
44 | 
45 | fn init<'a>(arena: &'a Arena<Node<'a>>) ->&'a Node<'a> {
46 |     let root = Node::new("A", arena);
47 | 
48 |     let b = Node::new("B", arena);
49 |     let c = Node::new("C", arena);
50 |     let d = Node::new("D", arena);
51 |     let e = Node::new("E", arena);
52 |     let f = Node::new("F", arena);
53 | 
54 |     unsafe {
55 |         (*root.edges.get()).push(b);
56 |         (*root.edges.get()).push(c);
57 |         (*root.edges.get()).push(d);
58 | 
59 |         (*c.edges.get()).push(e);
60 |         (*c.edges.get()).push(f);
61 |         (*c.edges.get()).push(root);
62 |     }
63 | 
64 |     root
65 | }
66 | 
67 | pub fn main() {
68 |     let arena = Arena::new();
69 |     let g = init(&arena);
70 |     g.traverse(&|d| println!("{}", d), &mut HashSet::new());
71 |     foo(g.first());
72 | }
73 | 


--------------------------------------------------------------------------------
/graphs/src/ref_graph_generic_mod.rs:
--------------------------------------------------------------------------------
 1 | // Note that this one is hypothetical future Rust and will not compile today.
 2 | 
 3 | use std::cell::UnsafeCell;
 4 | use std::collections::HashSet;
 5 | use arena::TypedArena;
 6 | 
 7 | // Module is parameterised with the lifetime of the graph.
 8 | mod graph<'a> {
 9 |     struct Node {
10 |         datum: &'static str,
11 |         // The module-level lifetime is used for the lifetime of each Node.
12 |         edges: UnsafeCell<Vec<&'a Node>>,
13 |     }
14 | 
15 |     impl Node {
16 |         fn new(datum: &'static str, arena: &'a TypedArena<Node>) -> &'a Node {
17 |             arena.alloc(Node {
18 |                 datum: datum,
19 |                 edges: UnsafeCell::new(Vec::new()),
20 |             })
21 |         }
22 | 
23 |         fn traverse<F>(&self, f: &F, seen: &mut HashSet<&'static str>)
24 |             where F: Fn(&'static str)
25 |         {
26 |             if seen.contains(&self.datum) {
27 |                 return;
28 |             }
29 |             f(self.datum);
30 |             seen.insert(self.datum);
31 |             for n in &self.edges {
32 |                 unsafe {
33 |                     for n in &(*self.edges.get()) {
34 |                         n.traverse(f, seen);
35 |                     }
36 |                 }
37 |             }
38 |         }
39 | 
40 |         fn first(&self) -> &Node {
41 |             unsafe {
42 |                 (*self.edges.get())[0]
43 |             }
44 |         }
45 |     }
46 | 
47 |     // It would be nice if we could rely on lifetime elision and remove the `'a`
48 |     // on the `foo` and `init` functions.
49 |     fn foo(node: &'a Node) {
50 |         println!("foo: {}", node.datum);
51 |     }
52 | 
53 |     fn init(arena: &'a TypedArena<Node>) -> &'a Node {
54 |         let root = Node::new("A", arena);
55 | 
56 |         let b = Node::new("B", arena);
57 |         let c = Node::new("C", arena);
58 |         let d = Node::new("D", arena);
59 |         let e = Node::new("E", arena);
60 |         let f = Node::new("F", arena);
61 | 
62 |         unsafe {
63 |             (*root.edges.get()).push(b);
64 |             (*root.edges.get()).push(c);
65 |             (*root.edges.get()).push(d);
66 | 
67 |             (*c.edges.get()).push(e);
68 |             (*c.edges.get()).push(f);
69 |             (*c.edges.get()).push(root);
70 |         }
71 | 
72 |         root
73 |     }
74 | }
75 | 
76 | pub fn main() {
77 |     let arena = TypedArena::new();
78 |     // The lifetime of the module is inferred here from the lifetime of the
79 |     // reference to the arena, i.e., the scope of the main function.
80 |     let g = graph::init(&arena);
81 |     g.traverse(&|d| println!("{}", d), &mut HashSet::new());
82 |     foo(g.first());
83 | }
84 | 


--------------------------------------------------------------------------------
/primitives.md:
--------------------------------------------------------------------------------
 1 | # Primitive types and operators
 2 | 
 3 | Rust has pretty much the same arithmetic and logical operators as C++. `bool` is
 4 | the same in both languages (as are the `true` and `false` literals). Rust has
 5 | similar concepts of integers, unsigned integers, and floats. However the syntax
 6 | is a bit different. Rust uses `isize` to mean an integer and `usize` to mean an
 7 | unsigned integer. These types are pointer sized. E.g., on a 32 bit system,
 8 | `usize` means a 32 bit unsigned integer. Rust also has explicitly sized types
 9 | which are `u` or `i` followed by 8, 16, 32, 64, or 128. So, for example, `u8` is
10 | an 8 bit unsigned integer and `i32` is a 32 bit signed integer. For floats, Rust
11 | has `f32` and `f64`.
12 | 
13 | Numeric literals can take suffixes to indicate their type. If no suffix is given, Rust tries to infer the
14 | type. If it can't infer, it uses `i32` or `f64` (if there is a decimal point).
15 | Examples:
16 | 
17 | ```rust
18 | fn main() {
19 |     let x: bool = true;
20 |     let x = 34;   // type i32
21 |     // let x = 2147483648; // error: literal out of range for `i32`
22 |     let x = 34isize;
23 |     let x = 34usize;
24 |     let x = 34u8;
25 |     let x = 34i64;
26 |     let x = 34f32;
27 | }
28 | ```
29 | 
30 | As a side note, Rust lets you redefine variables so the above code is legal -
31 | each `let` statement creates a new variable `x` and hides the previous one. This
32 | is more useful than you might expect due to variables being immutable by
33 | default.
34 | 
35 | Numeric literals can be given as binary, octal, and hexadecimal, as well as
36 | decimal. Use the `0b`, `0o`, and `0x` prefixes, respectively. You can use an
37 | underscore anywhere in a numeric literal and it will be ignored. E.g,
38 | 
39 | ```rust
40 | fn main() {
41 |     let x = 12;
42 |     let x = 0b1100;
43 |     let x = 0o14;
44 |     let x = 0xe;
45 |     let y = 0b_1100_0011_1011_0001;
46 | }
47 | ```
48 | 
49 | Rust has chars and strings, but since they are Unicode, they are a bit different
50 | from C++. I'm going to postpone talking about them until after I've introduced
51 | pointers, references, and vectors (arrays).
52 | 
53 | Rust does not implicitly coerce numeric types. In general, Rust has much less
54 | implicit coercion and subtyping than C++. Rust uses the `as` keyword for
55 | explicit coercions and casting. Any numeric value can be cast to another numeric
56 | type. `as` cannot be used to convert from numeric types to boolean types, but
57 | the reverse can be done. E.g.,
58 | 
59 | ```rust
60 | fn main() {
61 |     let x = 34usize as isize;   // cast usize to isize
62 |     let x = 10 as f32;      // isize to float
63 |     let x = 10.45f64 as i8; // float to i8 (loses precision)
64 |     let x = 4u8 as u64;     // gains precision
65 |     let x = 400u16 as u8;   // 144, loses precision (and thus changes the value)
66 |     println!("`400u16 as u8` gives {}", x);
67 |     let x = -3i8 as u8;     // 253, signed to unsigned (changes sign)
68 |     println!("`-3i8 as u8` gives {}", x);
69 |     //let x = 45 as bool;  // FAILS! (use 45 != 0 instead)
70 |     let x = true as usize;  // cast bool to usize (gives a 1)
71 | }
72 | ```
73 | 
74 | Rust has the following operators:
75 | 
76 | |         Type          |            Operators             |
77 | | --------------------- | -------------------------------- |
78 | | Numeric               | `+`, `-`, `*`, `/`, `%`          |
79 | | Bitwise               | `\|`, `&`, `^`, `<<`, `>>`       |
80 | | Comparison            | `==`, `!=`, `>`, `<`, `>=`, `<=` |
81 | | Short-circuit logical | `\|\|`, `&&`                     |
82 | 
83 | All of these behave as in C++, however, Rust is a bit stricter about the types
84 | the operators can be applied to - the bitwise operators can only be applied to
85 | integers and the logical operators can only be applied to booleans. Rust has the
86 | `-` unary operator which negates a number. The `!` operator negates a boolean
87 | and inverts every bit on an integer type (equivalent to `~` in C++ in the latter
88 | case). Rust has compound assignment operators as in C++, e.g., `+=`, but does
89 | not have increment or decrement operators (e.g., `++`).
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Rust For Systems Programmers
  2 | 
  3 | A Rust tutorial for experienced C and C++ programmers.
  4 | 
  5 | Jump to [contents](#contents).
  6 | Jump to [contributing](#contributing).
  7 | 
  8 | This tutorial is intended for programmers who already know how pointers and
  9 | references work and are used to systems programming concepts such as integer
 10 | widths and memory management. We intend to cover, primarily, the differences
 11 | between Rust and C++ to get you writing Rust programs quickly without lots of
 12 | fluff you probably already know.
 13 | 
 14 | Hopefully, Rust is a pretty intuitive language for C++ programmers. Most of the
 15 | syntax is pretty similar. The big difference (in my experience) is that the
 16 | sometimes vague concepts of good systems programming are strictly enforced by
 17 | the compiler. This can be infuriating at first - there are things you want to
 18 | do, but the compiler won't let you (at least in safe code), and sometimes these
 19 | things *are* safe, but you can't convince the compiler of that. However, you'll
 20 | quickly develop a good intuition for what is allowed. Communicating your own
 21 | notions of memory safety to the compiler requires some new and sometimes
 22 | complicated type annotations. But if you have a strong idea of lifetimes for
 23 | your objects and experience with generic programming, they shouldn't be too
 24 | tough to learn.
 25 | 
 26 | This tutorial started as a [series of blog posts](http://featherweightmusings.blogspot.co.nz/search/label/rust-for-c).
 27 | Partly as an aid for me (@nrc) learning Rust (there is no better way to
 28 | check that you have learnt something than to try and explain it to somebody
 29 | else) and partly because I found the existing resources for learning Rust
 30 | unsatisfactory - they spent too much time on the basics that I already knew and
 31 | used higher level intuitions to describe concepts that could better be explained
 32 | to me using lower level intuitions. Since then, the documentation for Rust has
 33 | got *much* better, but I still think that existing C++ programmers are an
 34 | audience who are a natural target for Rust, but are not particularly well
 35 | catered for.
 36 | 
 37 | 
 38 | ## Contents
 39 | 
 40 | 1. [Introduction - Hello world!](hello-world.md)
 41 | 1. [Control flow](control-flow.md)
 42 | 1. [Primitive types and operators](primitives.md)
 43 | 1. [Unique pointers](unique.md)
 44 | 1. [Borrowed pointers](borrowed.md)
 45 | 1. [Rc and raw pointers](rc-raw.md)
 46 | 1. [Data types](data-types.md)
 47 | 1. [Destructuring pt 1](destructuring.md)
 48 | 1. [Destructuring pt 2](destructuring-2.md)
 49 | 1. [Arrays and vecs](arrays.md)
 50 | 1. [Graphs and arena allocation](graphs/README.md)
 51 | 1. [Closures and first-class functions](closures.md)
 52 | 
 53 | 
 54 | ## Other resources
 55 | 
 56 | * [The Rust book/guide](http://doc.rust-lang.org/book/) - the best place for
 57 |   learning Rust in general and probably the best place to go for a second opinion
 58 |   on stuff here or for stuff not covered.
 59 | * [Rust API documentation](http://doc.rust-lang.org/std/index.html) - detailed
 60 |   documentation for the Rust libraries.
 61 | * [The Rust reference manual](https://doc.rust-lang.org/reference/) - a little
 62 |   out of date in places, but thorough; good for looking up details.
 63 | * [Discuss forum](http://users.rust-lang.org/) - general forum for discussion or
 64 |   questions about using and learning Rust.
 65 | * [StackOverflow Rust questions](https://stackoverflow.com/questions/tagged/rust) - answers
 66 |   to many beginner and advanced questions about Rust, but be careful though - Rust
 67 |   has changed *a lot* over the years and some of the answers might be very out of date.
 68 | * [A Firehose of Rust](https://www.youtube.com/watch?v=IPmRDS0OSxM) - a recorded talk
 69 |   introducing C++ programmers to how lifetimes, mutable aliasing, and move semantics work
 70 |   in Rust
 71 | 
 72 | 
 73 | ## Contributing
 74 | 
 75 | Yes please!
 76 | 
 77 | If you spot a typo or mistake, please submit a PR, don't be shy! Please feel
 78 | free to file [an issue](https://github.com/nrc/r4cppp/issues/new) for
 79 | larger changes or for new chapters you'd like to see. I'd also be happy to see
 80 | re-organisation of existing work or expanded examples, if you feel the tutorial
 81 | could be improved in those ways.
 82 | 
 83 | If you'd like to contribute a paragraph, section, or chapter please do! If you
 84 | want ideas for things to cover, see the [list of issues](https://github.com/nrc/r4cppp/issues),
 85 | in particular those tagged [new material](https://github.com/nrc/r4cppp/labels/new%20material).
 86 | If you're not sure of something, please get in touch by pinging me here
 87 | (@nrc) or on irc (nrc, on #rust or #rust-internals).
 88 | 
 89 | 
 90 | ### Style
 91 | 
 92 | Obviously, the intended audience is C++ programmers. The tutorial should
 93 | concentrate on things that will be new to experienced C++ programmers, rather
 94 | than a general audience (although, I don't assume the audience is familiar with
 95 | the most recent versions of C++). I'd like to avoid too much basic material and
 96 | definitely avoid too much overlap with other resources, in particular the Rust
 97 | guide/book.
 98 | 
 99 | Work on edge case use cases (e.g., using a different build system from Cargo, or
100 | writing syntax extensions, using unstable APIs) is definitely welcome, as is
101 | in-depth work on topics already covered at a high level.
102 | 
103 | I'd like to avoid recipe-style examples for converting C++ code to Rust code,
104 | but small examples of this kind are OK.
105 | 
106 | Use of different formats (e.g., question and answer/FAQs, or larger worked
107 | examples) are welcome.
108 | 
109 | I don't plan on adding exercises or suggestions for mini-projects, but if you're
110 | interested in that, let me know.
111 | 
112 | I'm aiming for a fairly academic tone, but not too dry. All writing should be in
113 | English (British English, not American English; although I would be very happy
114 | to have localisations/translations into any language, including American
115 | English) and be valid GitHub markdown. For advice on writing style, grammar,
116 | punctuation, etc. see the Oxford Style Manual
117 | or [The Economist Style Guide](http://www.economist.com/styleguide/introduction).
118 | Please limit width to 80 columns. I am a fan of the Oxford comma.
119 | 
120 | Don't feel like work has to be perfect to be submitted, I'm happy to edit and
121 | I'm sure other people will be in the future.
122 | 


--------------------------------------------------------------------------------
/destructuring.md:
--------------------------------------------------------------------------------
  1 | # Destructuring
  2 | 
  3 | Last time we looked at Rust's data types. Once you have some data inside a structure, you
  4 | will want to get that data out. For structs, Rust has field access, just like
  5 | C++. For tuples, tuple structs, and enums you must use destructuring (there are
  6 | various convenience functions in the library, but they use destructuring
  7 | internally). Destructuring of data structures exists in C++ only since C++17, so
  8 | it most likely familiar from languages such as Python or various functional
  9 | languages.  The idea is that just as you can initialize a data structure by
 10 | filling out its fields with data from a bunch of local variables, you can fill
 11 | out a bunch of local variables with data from a data structure.  From this
 12 | simple beginning, destructuring has become one of Rust's most powerful
 13 | features. To put it another way, destructuring combines pattern matching with
 14 | assignment into local variables.
 15 | 
 16 | Destructuring is done primarily through the let and match statements. The match
 17 | statement is used when the structure being destructured can have different
 18 | variants (such as an enum). A let expression pulls the variables out into the
 19 | current scope, whereas match introduces a new scope. To compare:
 20 | 
 21 | ```rust
 22 | fn foo(pair: (int, int)) {
 23 |     let (x, y) = pair;
 24 |     // we can now use x and y anywhere in foo
 25 | 
 26 |     match pair {
 27 |         (x, y) => {
 28 |             // x and y can only be used in this scope
 29 |         }
 30 |     }
 31 | }
 32 | ```
 33 | 
 34 | The syntax for patterns (used after `let` and before `=>` in the above example)
 35 | in both cases is (pretty much) the same. You can also use these patterns in
 36 | argument position in function declarations:
 37 | 
 38 | ```rust
 39 | fn foo((x, y): (int, int)) {
 40 | }
 41 | ```
 42 | 
 43 | (Which is more useful for structs or tuple-structs than tuples).
 44 | 
 45 | Most initialisation expressions can appear in a destructuring pattern and they
 46 | can be arbitrarily complex. That can include references and primitive literals
 47 | as well as data structures. For example,
 48 | 
 49 | ```rust
 50 | struct St {
 51 |     f1: int,
 52 |     f2: f32
 53 | }
 54 | 
 55 | enum En {
 56 |     Var1,
 57 |     Var2,
 58 |     Var3(int),
 59 |     Var4(int, St, int)
 60 | }
 61 | 
 62 | fn foo(x: &En) {
 63 |     match x {
 64 |         &Var1 => println!("first variant"),
 65 |         &Var3(5) => println!("third variant with number 5"),
 66 |         &Var3(x) => println!("third variant with number {} (not 5)", x),
 67 |         &Var4(3, St { f1: 3, f2: x }, 45) => {
 68 |             println!("destructuring an embedded struct, found {} in f2", x)
 69 |         }
 70 |         &Var4(_, ref x, _) => {
 71 |             println!("Some other Var4 with {} in f1 and {} in f2", x.f1, x.f2)
 72 |         }
 73 |         _ => println!("other (Var2)")
 74 |     }
 75 | }
 76 | ```
 77 | 
 78 | Note how we destructure through a reference by using `&` in the patterns and how
 79 | we use a mix of literals (`5`, `3`, `St { ... }`), wildcards (`_`), and
 80 | variables (`x`).
 81 | 
 82 | You can use `_` wherever a variable is expected if you want to ignore a single
 83 | item in a pattern, so we could have used `&Var3(_)` if we didn't care about the
 84 | integer. In the first `Var4` arm we destructure the embedded struct (a nested
 85 | pattern) and in the second `Var4` arm we bind the whole struct to a variable.
 86 | You can also use `..` to stand in for all fields of a tuple or struct. So if you
 87 | wanted to do something for each enum variant but don't care about the content of
 88 | the variants, you could write:
 89 | 
 90 | ```rust
 91 | fn foo(x: En) {
 92 |     match x {
 93 |         Var1 => println!("first variant"),
 94 |         Var2 => println!("second variant"),
 95 |         Var3(..) => println!("third variant"),
 96 |         Var4(..) => println!("fourth variant")
 97 |     }
 98 | }
 99 | ```
100 | 
101 | When destructuring structs, the fields don't need to be in order and you can use
102 | `..` to elide the remaining fields. E.g.,
103 | 
104 | ```rust
105 | struct Big {
106 |     field1: int,
107 |     field2: int,
108 |     field3: int,
109 |     field4: int,
110 |     field5: int,
111 |     field6: int,
112 |     field7: int,
113 |     field8: int,
114 |     field9: int,
115 | }
116 | 
117 | fn foo(b: Big) {
118 |     let Big { field6: x, field3: y, ..} = b;
119 |     println!("pulled out {} and {}", x, y);
120 | }
121 | ```
122 | 
123 | As a shorthand with structs you can use just the field name which creates a
124 | local variable with that name. The let statement in the above example created
125 | two new local variables `x` and `y`. Alternatively, you could write
126 | 
127 | ```rust
128 | fn foo(b: Big) {
129 |     let Big { field6, field3, .. } = b;
130 |     println!("pulled out {} and {}", field3, field6);
131 | }
132 | ```
133 | 
134 | Now we create local variables with the same names as the fields, in this case
135 | `field3` and `field6`.
136 | 
137 | There are a few more tricks to Rust's destructuring. Lets say you want a
138 | reference to a variable in a pattern. You can't use `&` because that matches a
139 | reference, rather than creates one (and thus has the effect of dereferencing the
140 | object). For example,
141 | 
142 | ```rust
143 | struct Foo {
144 |     field: &'static int
145 | }
146 | 
147 | fn foo(x: Foo) {
148 |     let Foo { field: &y } = x;
149 | }
150 | ```
151 | 
152 | Here, `y` has type `int` and is a copy of the field in `x`.
153 | 
154 | To create a reference to something in a pattern, you use the `ref` keyword. For
155 | example,
156 | 
157 | ```rust
158 | fn foo(b: Big) {
159 |     let Big { field3: ref x, ref field6, ..} = b;
160 |     println!("pulled out {} and {}", *x, *field6);
161 | }
162 | ```
163 | 
164 | Here, `x` and `field6` both have type `&int` and are references to the fields in `b`.
165 | 
166 | One last trick when destructuring is that if you are destructuring a complex
167 | object, you might want to name intermediate objects as well as individual
168 | fields. Going back to an earlier example, we had the pattern `&Var4(3, St{ f1:
169 | 3, f2: x }, 45)`. In that pattern we named one field of the struct, but you
170 | might also want to name the whole struct object. You could write `&Var4(3, s,
171 | 45)` which would bind the struct object to `s`, but then you would have to use
172 | field access for the fields, or if you wanted to only match with a specific
173 | value in a field you would have to use a nested match. That is not fun. Rust
174 | lets you name parts of a pattern using `@` syntax. For example `&Var4(3, s @ St{
175 | f1: 3, f2: x }, 45)` lets us name both a field (`x`, for `f2`) and the whole
176 | struct (`s`).
177 | 
178 | That just about covers your options with Rust pattern matching. There are a few
179 | features I haven't covered, such as matching vectors, but hopefully you know how
180 | to use `match` and `let` and have seen some of the powerful things you can do.
181 | Next time I'll cover some of the subtle interactions between match and borrowing
182 | which tripped me up a fair bit when learning Rust.
183 | 


--------------------------------------------------------------------------------
/rc-raw.md:
--------------------------------------------------------------------------------
  1 | # Reference counted and raw pointers
  2 | 
  3 | TODO add discussion of custom pointers and Deref trait (maybe later, not here)
  4 | 
  5 | So far we've covered unique and borrowed pointers. Unique pointers are very
  6 | similar to the new std::unique_ptr in C++ and borrowed references are the
  7 | 'default' pointer you usually reach for if you would use a pointer or reference
  8 | in C++. Rust has a few more, rarer pointers either in the libraries or built in
  9 | to the language. These are mostly similar to various kinds of smart pointers you
 10 | might be used to in C++.
 11 | 
 12 | This post took a while to write and I still don't like it. There are a lot of
 13 | loose ends here, both in my write up and in Rust itself. I hope some will get
 14 | better with later posts and some will get better as the language develops. If
 15 | you are learning Rust, you might even want to skip this stuff for now, hopefully
 16 | you won't need it. Its really here just for completeness after the posts on
 17 | other pointer types.
 18 | 
 19 | It might feel like Rust has a lot of pointer types, but it is pretty similar to
 20 | C++ once you think about the various kinds of smart pointers available in
 21 | libraries. In Rust, however, you are more likely to meet them when you first
 22 | start learning the language. Because Rust pointers have compiler support, you
 23 | are also much less likely to make errors when using them.
 24 | 
 25 | I'm not going to cover these in as much detail as unique and borrowed references
 26 | because, frankly, they are not as important. I might come back to them in more
 27 | detail later on.
 28 | 
 29 | ## Rc<T>
 30 | 
 31 | Reference counted pointers come as part of the rust standard library. They are
 32 | in the `std::rc` module (we'll cover modules soon-ish. The modules are the
 33 | reason for the `use` incantations in the examples). A reference counted pointer
 34 | to an object of type `T` has type `Rc<T>`. You create reference counted pointers
 35 | using a static method (which for now you can think of like C++'s, but we'll see
 36 | later they are a bit different) - `Rc::new(...)` which takes a value to create
 37 | the pointer to. This constructor method follows Rust's usual move/copy semantics
 38 | (like we discussed for unique pointers) - in either case, after calling Rc::new,
 39 | you will only be able to access the value via the pointer.
 40 | 
 41 | As with the other pointer types, the `.` operator does all the dereferencing you
 42 | need it to. You can use `*` to manually dereference.
 43 | 
 44 | To pass a ref-counted pointer you need to use the `clone` method. This kinda
 45 | sucks, and hopefully we'll fix that, but that is not for sure (sadly). You can
 46 | take a (borrowed) reference to the pointed at value, so hopefully you don't need
 47 | to clone too often. Rust's type system ensures that the ref-counted variable
 48 | will not be deleted before any references expire. Taking a reference has the
 49 | added advantage that it doesn't need to increment or decrement the ref count,
 50 | and so will give better performance (although, that difference is probably
 51 | marginal since Rc objects are limited to a single thread and so the ref count
 52 | operations don't have to be atomic). As in C++, you can also take a reference to
 53 | the Rc pointer.
 54 | 
 55 | An Rc example:
 56 | 
 57 | ```rust
 58 | use std::rc::Rc;
 59 | 
 60 | fn bar(x: Rc<i32>) { }
 61 | fn baz(x: &i32) { }
 62 | 
 63 | fn foo() {
 64 |     let x = Rc::new(45);
 65 |     bar(x.clone());   // Increments the ref-count
 66 |     baz(&*x);         // Does not increment
 67 |     println!("{}", 100 - *x);
 68 | }  // Once this scope closes, all Rc pointers are gone, so ref-count == 0
 69 |    // and the memory will be deleted.
 70 | ```
 71 | 
 72 | Ref counted pointers are always immutable. If you want a mutable ref-counted
 73 | object you need to use a RefCell (or Cell) wrapped in an `Rc`.
 74 | 
 75 | ## Cell and RefCell
 76 | 
 77 | Cell and RefCell are structs which allow you to 'cheat' the mutability rules.
 78 | This is kind of hard to explain without first covering Rust data structures and
 79 | how they work with mutability, so I'm going to come back to these slightly
 80 | tricky objects later. For now, you should know that if you want a mutable, ref
 81 | counted object you need a Cell or RefCell wrapped in an Rc. As a first
 82 | approximation, you probably want Cell for primitive data and RefCell for objects
 83 | with move semantics. So, for a mutable, ref-counted int you would use
 84 | `Rc<Cell<int>>`.
 85 | 
 86 | ## \*T - raw pointers
 87 | 
 88 | Finally, Rust has two kinds of raw pointers (aka unsafe pointers): `*const T`
 89 | for an immutable raw pointer, and `*mut T` for a mutable raw pointer. They are
 90 | created using `&` or `&mut` (you might need to specify a type to get a `*T`
 91 | rather than a `&T` since the `&` operator can create either a borrowed reference
 92 | or a raw pointer). Raw pointers are like C pointers, just a pointer to memory
 93 | with no restrictions on how they are used (you can't do pointer arithmetic
 94 | without casting, but you can do it that way if you must). Raw pointers are the
 95 | only pointer type in Rust which can be null. There is no automatic dereferencing
 96 | of raw pointers (so to call a method you have to write `(*x).foo()`) and no
 97 | automatic referencing. The most important restriction is that they can't be
 98 | dereferenced (and thus can't be used) outside of an unsafe block. In regular
 99 | Rust code you can only pass them around.
100 | 
101 | So, what is unsafe code? Rust has strong safety guarantees, and (rarely) they
102 | prevent you doing something you need to do. Since Rust aims to be a systems
103 | language, it has to be able to do anything that is possible and sometimes that
104 | means doing things the compiler can't verify is safe. To accomplish that, Rust
105 | has the concept of unsafe blocks, marked by the `unsafe` keyword. In unsafe code
106 | you can do unsafe things - dereference a raw pointer, index into an array
107 | without bounds checking, call code written in another language via the FFI, or
108 | cast variables. Obviously, you have to be much more careful writing unsafe code
109 | than writing regular Rust code. In fact, you should only very rarely write
110 | unsafe code. Mostly it is used in very small chunks in libraries, rather than in
111 | client code. In unsafe code you must do all the things you normally do in C++ to
112 | ensure safety. Furthermore, you must manually ensure that you maintain the
113 | invariants which the compiler would usually enforce. Unsafe blocks allow you to
114 | manually enforce Rust's invariants, it does not allow you to break those
115 | invariants. If you do, you can introduce bugs both in safe and unsafe code.
116 | 
117 | An example of using an raw pointer:
118 | 
119 | ```rust
120 | fn foo() {
121 |     let mut x = 5;
122 |     let x_p: *mut i32 = &mut x;
123 |     println!("x+5={}", add_5(x_p));
124 | }
125 | 
126 | fn add_5(p: *mut i32) -> i32 {
127 |     unsafe {
128 |         if !p.is_null() { // Note that *-pointers do not auto-deref, so this is
129 |                           // a method implemented on *i32, not i32.
130 |             *p + 5
131 |         } else {
132 |             -1            // Not a recommended error handling strategy.
133 |         }
134 |     }
135 | }
136 | ```
137 | 
138 | And that concludes our tour of Rust's pointers. Next time we'll take a break
139 | from pointers and look at Rust's data structures. We'll come back to borrowed
140 | references again in a later post though.
141 | 


--------------------------------------------------------------------------------
/unique.md:
--------------------------------------------------------------------------------
  1 | # Unique pointers
  2 | 
  3 | Rust is a systems language and therefore must give you raw access to memory. It
  4 | does this (as in C++) via pointers. Pointers are one area where Rust and C++ are
  5 | very different, both in syntax and semantics. Rust enforces memory safety by
  6 | type checking pointers. That is one of its major advantages over other
  7 | languages. Although the type system is a bit complex, you get memory safety and
  8 | bare-metal performance in return.
  9 | 
 10 | I had intended to cover all of Rust's pointers in one post, but I think the
 11 | subject is too large. So this post will cover just one kind - unique pointers -
 12 | and other kinds will be covered in follow up posts.
 13 | 
 14 | First, an example without pointers:
 15 | 
 16 | ```rust
 17 | fn foo() {
 18 |     let x = 75;
 19 | 
 20 |     // ... do something with `x` ...
 21 | }
 22 | ```
 23 | 
 24 | When we reach the end of `foo`, `x` goes out of scope (in Rust as in C++). That
 25 | means the variable can no longer be accessed and the memory for the variable can
 26 | be reused.
 27 | 
 28 | In Rust, for every type `T` we can write `Box<T>` for an owning (aka unique)
 29 | pointer to `T`. We use `Box::new(...)` to allocate space on the heap and
 30 | initialise that space with the supplied value. This is similar to `new` in C++.
 31 | For example,
 32 | 
 33 | ```rust
 34 | fn foo() {
 35 |     let x = Box::new(75);
 36 | }
 37 | ```
 38 | 
 39 | Here `x` is a pointer to a location on the heap which contains the value `75`.
 40 | `x` has type `Box<i32>`; we could have written `let x: Box<i32> =
 41 | Box::new(75);`. This is similar to writing `int* x = new int(75);` in C++.
 42 | Unlike in C++, Rust will tidy up the memory for us, so there is no need to call
 43 | `free` or `delete`[^1]. Unique pointers behave similarly to
 44 | values - they are deleted when the variable goes out of scope. In our example,
 45 | at the end of the function `foo`, `x` can no longer be accessed and the memory
 46 | pointed at by `x` can be reused.
 47 | 
 48 | Owning pointers are dereferenced using the `*` as in C++. E.g.,
 49 | 
 50 | ```rust
 51 | fn foo() {
 52 |     let x = Box::new(75);
 53 |     println!("`x` points to {}", *x);
 54 | }
 55 | ```
 56 | 
 57 | As with primitive types in Rust, owning pointers and the data they point to are
 58 | immutable by default. Unlike in C++, you can't have a mutable (unique) pointer to
 59 | immutable data or vice versa. Mutability of the data follows from the pointer.
 60 | E.g.,
 61 | 
 62 | ```rust
 63 | fn foo() {
 64 |     let x = Box::new(75);
 65 |     let y = Box::new(42);
 66 |     // x = y;         // Not allowed, x is immutable.
 67 |     // *x = 43;       // Not allowed, *x is immutable.
 68 |     let mut x = Box::new(75);
 69 |     x = y;            // OK, x is mutable.
 70 |     *x = 43;          // OK, *x is mutable.
 71 | }
 72 | ```
 73 | 
 74 | Owning pointers can be returned from a function and continue to live on. If they
 75 | are returned, then their memory will not be freed, i.e., there are no dangling
 76 | pointers in Rust. The memory will not leak. However, it will eventually go out of
 77 | scope and then it will be freed. E.g.,
 78 | 
 79 | ```rust
 80 | fn foo() -> Box<i32> {
 81 |     let x = Box::new(75);
 82 |     x
 83 | }
 84 | 
 85 | fn bar() {
 86 |     let y = foo();
 87 |     // ... use y ...
 88 | }
 89 | ```
 90 | 
 91 | Here, memory is initialised in `foo`, and returned to `bar`. `x` is returned
 92 | from `foo` and stored in `y`, so it is not deleted. At the end of `bar`, `y`
 93 | goes out of scope and so the memory is reclaimed.
 94 | 
 95 | Owning pointers are unique (also called linear) because there can be only one
 96 | (owning) pointer to any piece of memory at any time. This is accomplished by
 97 | move semantics. When one pointer points at a value, any previous pointer can no
 98 | longer be accessed. E.g.,
 99 | 
100 | ```rust
101 | fn foo() {
102 |     let x = Box::new(75);
103 |     let y = x;
104 |     // x can no longer be accessed
105 |     // let z = *x;   // Error.
106 | }
107 | ```
108 | 
109 | Likewise, if an owning pointer is passed to another function or stored in a
110 | field, it can no longer be accessed:
111 | 
112 | ```rust
113 | fn bar(y: Box<isize>) {
114 | }
115 | 
116 | fn foo() {
117 |     let x = Box::new(75);
118 |     bar(x);
119 |     // x can no longer be accessed
120 |     // let z = *x;   // Error.
121 | }
122 | ```
123 | 
124 | Rust's unique pointers are similar to C++ `std::unique_ptr`s. In Rust, as in
125 | C++, there can be only one unique pointer to a value and that value is deleted
126 | when the pointer goes out of scope. Rust does most of its checking statically
127 | rather than at runtime. So, in C++ accessing a unique pointer whose value has
128 | moved will result in a runtime error (since it will be null). In Rust this
129 | produces a compile time error and you cannot go wrong at runtime.
130 | 
131 | We'll see later that it is possible to create other pointer types which point at
132 | a unique pointer's value in Rust. This is similar to C++. However, in C++ this
133 | allows you to cause errors at runtime by holding a pointer to freed memory. That
134 | is not possible in Rust (we'll see how when we cover Rust's other pointer
135 | types).
136 | 
137 | As shown above, owning pointers must be dereferenced to use their values.
138 | However, method calls automatically dereference, so there is no need for a `->`
139 | operator or to use `*` for method calls. In this way, Rust pointers are a bit
140 | similar to both pointers and references in C++. E.g.,
141 | 
142 | ```rust
143 | fn bar(x: Box<Foo>, y: Box<Box<Box<Box<Foo>>>>) {
144 |     x.foo();
145 |     y.foo();
146 | }
147 | ```
148 | 
149 | Assuming that the type `Foo` has a method `foo()`, both these expressions are OK.
150 | 
151 | Calling `Box::new()` with an existing value does not take a reference to that
152 | value, it copies that value. So,
153 | 
154 | ```rust
155 | fn foo() {
156 |     let x = 3;
157 |     let mut y = Box::new(x);
158 |     *y = 45;
159 |     println!("x is still {}", x);
160 | }
161 | ```
162 | 
163 | In general, Rust has move rather than copy semantics (as seen above with unique
164 | pointers). Primitive types have copy semantics, so in the above example the
165 | value `3` is copied, but for more complex values it would be moved. We'll cover
166 | this in more detail later.
167 | 
168 | Sometimes when programming, however, we need more than one reference to a value.
169 | For that, Rust has borrowed pointers. I'll cover those in the next post.
170 | 
171 | 
172 | [^1]: The `std::unique_ptr<T>`, introduced in C++11, is similar in some aspects
173 |     to Rust's `Box<T>` but there are also significant differences.
174 | 
175 |     Similarities:
176 |     * The memory pointed to by a `std::unique_ptr<T>` in C++11 and a `Box<T>` in Rust
177 |     is automatically released once the `std::unique_ptr<T>` goes out of the scope.
178 |     * Both C++11's `std::unique_ptr<T>` and Rust's `Box<T>` only exhibit move semantics.
179 | 
180 |     Differences:
181 | 
182 |     1. C++11 allows for a `std::unique_ptr<T>` to be constructed from an existing pointer,
183 |        thereby allowing multiple unique pointers to the same memory.
184 |        This behaviour is not permitted with `Box<T>`.
185 |     2. Dereferencing a `std::unique_ptr<T>` that has been moved to another variable or function,
186 |        causes undefined behavior in C++11. This would be caught at compile time in Rust.
187 |     3. Mutability or immutability does not go "through" `std::unique_ptr<T>`
188 |        -- dereferencing a `const std::unique_ptr<T>` still yields a mutable
189 |        (non-`const`) reference to the underlying data. In Rust, an immutable
190 |        `Box<T>` does not allow mutation of the data it points to.
191 | 
192 |     `let x = Box::new(75)` in Rust may be interpreted as `const auto x =
193 |     std::unique_ptr<const int>{new int{75}};` in C++11 and `const auto x =
194 |     std::make_unique<const int>(75);` in C++14.
195 | 


--------------------------------------------------------------------------------
/control-flow.md:
--------------------------------------------------------------------------------
  1 | # Control flow
  2 | 
  3 | ## If
  4 | 
  5 | The `if` statement is pretty much the same in Rust as C++. One difference is
  6 | that the braces are mandatory, but parentheses around the expression being tested
  7 | are not. Another is that `if` is an expression, so you can use it the same way
  8 | as the ternary `?:` operator in C++ (remember from the previous section that if the last
  9 | expression in a block is not terminated by a semi-colon, then it becomes the
 10 | value of the block). There is no ternary `?:` in Rust. So, the following two
 11 | functions do the same thing:
 12 | 
 13 | ```rust
 14 | fn foo(x: i32) -> &'static str {
 15 |     let result: &'static str;
 16 |     if x < 10 {
 17 |         result = "less than 10";
 18 |     } else {
 19 |         result = "10 or more";
 20 |     }
 21 |     return result;
 22 | }
 23 | 
 24 | fn bar(x: i32) -> &'static str {
 25 |     if x < 10 {
 26 |         "less than 10"
 27 |     } else {
 28 |         "10 or more"
 29 |     }
 30 | }
 31 | ```
 32 | 
 33 | (Why not `mut result`? The code in `foo` makes `result` immutable, it's just initialized in two possible places. Rust can see that by the time of `return result`, it is guaranteed to have been initialized.)
 34 | 
 35 | The first is a fairly literal translation of what you might write in C++. The
 36 | second is better Rust style.
 37 | 
 38 | You can also write `let result = if x < 10 ...`, etc.
 39 | 
 40 | 
 41 | ## Loops
 42 | 
 43 | Rust has while loops, again just like C++:
 44 | 
 45 | ```rust
 46 | fn main() {
 47 |     let mut x = 10;
 48 |     while x > 0 {
 49 |         println!("Current value: {}", x);
 50 |         x -= 1;
 51 |     }
 52 | }
 53 | ```
 54 | 
 55 | There is no `do...while` loop in Rust, but there is the `loop` statement which
 56 | just loops forever:
 57 | 
 58 | ```rust
 59 | fn main() {
 60 |     loop {
 61 |         println!("Just looping");
 62 |     }
 63 | }
 64 | ```
 65 | 
 66 | Rust has `break` and `continue` just like C++.
 67 | 
 68 | 
 69 | ## For loops
 70 | 
 71 | Rust also has `for` loops, but these are a bit different. Let's say you have a
 72 | vector of integers and you want to print them all (we'll cover vectors/arrays,
 73 | iterators, and generics in more detail in the future. For now, know that a
 74 | `Vec<T>` is a sequence of `T`s and `iter()` returns an iterator from anything
 75 | you might reasonably want to iterate over). A simple `for` loop would look like:
 76 | 
 77 | ```rust
 78 | fn print_all(all: Vec<i32>) {
 79 |     for a in all.iter() {
 80 |         println!("{}", a);
 81 |     }
 82 | }
 83 | ```
 84 | 
 85 | TODO also &all/all instead of all.iter()
 86 | 
 87 | If we want to index over the indices of `all` (a bit more like a standard C++
 88 | for loop over an array), you could do
 89 | 
 90 | ```rust
 91 | fn print_all(all: Vec<i32>) {
 92 |     for i in 0..all.len() {
 93 |         println!("{}: {}", i, all[i]);
 94 |     }
 95 | }
 96 | ```
 97 | 
 98 | Hopefully, it is obvious what the `len` function does. TODO range notation
 99 | 
100 | A more Rust-like equivalent of the preceding example would be to use an
101 | enumerating iterator:
102 | 
103 | ```rust
104 | fn print_all(all: Vec<i32>) {
105 |     for (i, a) in all.iter().enumerate() {
106 |         println!("{}: {}", i, a);
107 |     }
108 | }
109 | ```
110 | 
111 | Where `enumerate()` chains from the iterator `iter()` and yields the current
112 | count and the element during iteration.
113 | 
114 | *The following example incorporates more advanced topics covered in the section
115 | on [Borrowed Pointers](borrowed.md).* Let's say you have a vector of integers
116 | and want to call the function, passing the vector by reference and have the
117 | vector modified in place. Here the `for` loop uses a mutable iterator which
118 | gives mutable refererences - the `*` dereferencing should be familiar to C++
119 | programmers:
120 | 
121 | ```rust
122 | fn double_all(all: &mut Vec<i32>) {
123 |     for a in all.iter_mut() {
124 |         *a += *a;
125 |     }
126 | }
127 | ```
128 | 
129 | 
130 | ## Switch/Match
131 | 
132 | Rust has a match expression which is similar to a C++ switch statement, but much
133 | more powerful. This simple version should look pretty familiar:
134 | 
135 | ```rust
136 | fn print_some(x: i32) {
137 |     match x {
138 |         0 => println!("x is zero"),
139 |         1 => println!("x is one"),
140 |         10 => println!("x is ten"),
141 |         y => println!("x is something else {}", y),
142 |     }
143 | }
144 | ```
145 | 
146 | There are some syntactic differences - we use `=>` to go from the matched value
147 | to the expression to execute, and the match arms are separated by `,` (that last
148 | `,` is optional). There are also some semantic differences which are not so
149 | obvious: the matched patterns must be exhaustive, that is all possible values of
150 | the matched expression (`x` in the above example) must be covered. Try removing
151 | the `y => ...` line and see what happens; that is because we only have matches
152 | for 0, 1, and 10, but there are obviously lots of other integers which don't get
153 | matched. In that last arm, `y` is bound to the value being matched (`x` in this
154 | case). We could also write:
155 | 
156 | ```rust
157 | fn print_some(x: i32) {
158 |     match x {
159 |         x => println!("x is something else {}", x)
160 |     }
161 | }
162 | ```
163 | 
164 | Here the `x` in the match arm introduces a new variable which hides the argument
165 | `x`, just like declaring a variable in an inner scope.
166 | 
167 | If we don't want to name the variable, we can use `_` for an unnamed variable,
168 | which is like having a wildcard match. If we don't want to do anything, we can
169 | provide an empty branch:
170 | 
171 | ```rust
172 | fn print_some(x: i32) {
173 |     match x {
174 |         0 => println!("x is zero"),
175 |         1 => println!("x is one"),
176 |         10 => println!("x is ten"),
177 |         _ => {}
178 |     }
179 | }
180 | ```
181 | 
182 | Another semantic difference is that there is no fall through from one arm to the
183 | next so it works like `if...else if...else`.
184 | 
185 | We'll see in later posts that match is extremely powerful. For now I want to
186 | introduce just a couple more features - the 'or' operator for values and `if`
187 | clauses on arms. Hopefully an example is self-explanatory:
188 | 
189 | ```rust
190 | fn print_some_more(x: i32) {
191 |     match x {
192 |         0 | 1 | 10 => println!("x is one of zero, one, or ten"),
193 |         y if y < 20 => println!("x is less than 20, but not zero, one, or ten"),
194 |         y if y == 200 => println!("x is 200 (but this is not very stylish)"),
195 |         _ => {}
196 |     }
197 | }
198 | ```
199 | 
200 | Just like `if` expressions, `match` statements are actually expressions so we
201 | could re-write the last example as:
202 | 
203 | ```rust
204 | fn print_some_more(x: i32) {
205 |     let msg = match x {
206 |         0 | 1 | 10 => "one of zero, one, or ten",
207 |         y if y < 20 => "less than 20, but not zero, one, or ten",
208 |         y if y == 200 => "200 (but this is not very stylish)",
209 |         _ => "something else"
210 |     };
211 | 
212 |     println!("x is {}", msg);
213 | }
214 | ```
215 | 
216 | Note the semi-colon after the closing brace, that is because the `let` statement
217 | is a statement and must take the form `let msg = ...;`. We fill the rhs with a
218 | match expression (which doesn't usually need a semi-colon), but the `let`
219 | statement does. This catches me out all the time.
220 | 
221 | Motivation: Rust match statements avoid the common bugs with C++ switch
222 | statements - you can't forget a `break` and unintentionally fall through; if you
223 | add a case to an enum (more later on) the compiler will make sure it is covered
224 | by your `match` statement.
225 | 
226 | 
227 | ## Method call
228 | 
229 | Finally, just a quick note that methods exist in Rust, similarly to C++. They
230 | are always called via the `.` operator (no `->`, more on this in another post).
231 | We saw a few examples above (`len`, `iter`). We'll go into more detail in the
232 | future about how they are defined and called. Most assumptions you might make
233 | from C++ or Java are probably correct.
234 | 


--------------------------------------------------------------------------------
/hello-world.md:
--------------------------------------------------------------------------------
  1 | # Introduction - hello world!
  2 | 
  3 | If you are using C or C++, it is probably because you have to - either you need
  4 | low-level access to the system, or need every last drop of performance, or both.
  5 | Rust aims to offer the same level of abstraction around memory, the same
  6 | performance, but be safer and make you more productive.
  7 | 
  8 | Concretely, there are many languages out there that you might prefer to use to
  9 | C++: Java, Scala, Haskell, Python, and so forth, but you can't because either
 10 | the level of abstraction is too high (you don't get direct access to memory,
 11 | you are forced to use garbage collection, etc.), or there are performance issues
 12 | (either performance is unpredictable or it's simply not fast enough). Rust does
 13 | not force you to use garbage collection, and as in C++, you get raw pointers to
 14 | memory to play with. Rust subscribes to the 'pay for what you use' philosophy of
 15 | C++. If you don't use a feature, then you don't pay any performance overhead for
 16 | its existence. Furthermore, all language features in Rust have a predictable (and
 17 | usually small) cost.
 18 | 
 19 | Whilst these constraints make Rust a (rare) viable alternative to C++, Rust also
 20 | has benefits: it is memory safe - Rust's type system ensures that you don't get
 21 | the kind of memory errors which are common in C++ - accessing un-initialised
 22 | memory, and dangling pointers - all are impossible in Rust. Furthermore,
 23 | whenever other constraints allow, Rust strives to prevent other safety issues
 24 | too - for example, all array indexing is bounds checked (of course, if you want
 25 | to avoid the cost, you can (at the expense of safety) - Rust allows you to do
 26 | this in unsafe blocks, along with many other unsafe things. Crucially, Rust
 27 | ensures that unsafety in unsafe blocks stays in unsafe blocks and can't affect
 28 | the rest of your program). Finally, Rust takes many concepts from modern
 29 | programming languages and introduces them to the systems language space.
 30 | Hopefully, that makes programming in Rust more productive, efficient, and
 31 | enjoyable.
 32 | 
 33 | In the rest of this section we'll download and install Rust, create a minimal
 34 | Cargo project, and implement Hello World.
 35 | 
 36 | 
 37 | ## Getting Rust
 38 | 
 39 | You can get Rust from [http://www.rust-lang.org/tools/install](http://www.rust-lang.org/tools/install).
 40 | The downloads from there include the Rust compiler, standard libraries, and
 41 | Cargo, which is a package manager and build tool for Rust.
 42 | 
 43 | Rust is available on three channels: stable, beta, and nightly. Rust works on a
 44 | rapid-release, schedule with new releases every six weeks. On the release date,
 45 | nightly becomes beta and beta becomes stable.
 46 | 
 47 | Nightly is updated every night and is ideal for users who want to experiment with
 48 | cutting edge features and ensure that their libraries will work with future Rust.
 49 | 
 50 | Stable is the right choice for most users. Rust's stability guarantees only
 51 | apply to the stable channel.
 52 | 
 53 | Beta is designed to mostly be used in users' CI to check that their code will
 54 | continue to work as expected.
 55 | 
 56 | So, you probably want the stable channel. If you're on Linux or OS X, the
 57 | easiest way to get it is to run
 58 | 
 59 | ```
 60 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 61 | ```
 62 | 
 63 | On Windows, a similarly easy way would be to run
 64 | 
 65 | ```
 66 | choco install rust
 67 | ```
 68 | 
 69 | For other ways to install, see [http://www.rust-lang.org/tools/install](http://www.rust-lang.org/tools/install).
 70 | 
 71 | You can find the source at [github.com/rust-lang/rust](https://github.com/rust-lang/rust).
 72 | To build the compiler, run `./configure && make rustc`. See
 73 | [building-from-source](https://github.com/rust-lang/rust#building-from-source)
 74 | for more detailed instructions.
 75 | 
 76 | 
 77 | ## Hello World!
 78 | 
 79 | The easiest and most common way to build Rust programs is to use Cargo. To start
 80 | a project called `hello` using Cargo, run `cargo new --bin hello`. This will
 81 | create a new directory called `hello` inside which is a `Cargo.toml` file and
 82 | a `src` directory with a file called `main.rs`.
 83 | 
 84 | `Cargo.toml` defines dependencies and other metadata about our project. We'll
 85 | come back to it in detail later.
 86 | 
 87 | All our source code will go in the `src` directory. `main.rs` already contains
 88 | a Hello World program. It looks like this:
 89 | 
 90 | ```rust
 91 | fn main() {
 92 |     println!("Hello, world!");
 93 | }
 94 | ```
 95 | 
 96 | To build the program, run `cargo build`. To build and run it, `cargo run`. If
 97 | you do the latter, you should be greeted in the console. Success!
 98 | 
 99 | Cargo will have made a `target` directory and put the executable in there.
100 | 
101 | If you want to use the compiler directly you can run `rustc src/main.rs` which
102 | will create an executable called `main`. See `rustc --help` for lots of
103 | options.
104 | 
105 | OK, back to the code. A few interesting points - we use `fn` to define a
106 | function or method. `main()` is the default entry point for our programs (we'll
107 | leave program args for later). There are no separate declarations or header
108 | files as with C++. `println!` is Rust's equivalent of printf. The `!` means that
109 | it is a macro. A subset of the standard library is available without needing to
110 | be explicitly imported/included (the prelude). The `println!` macro is included
111 | as part of that subset.
112 | 
113 | Let's change our example a little bit:
114 | 
115 | ```rust
116 | fn main() {
117 |     let world = "world";
118 |     println!("Hello {}!", world);
119 | }
120 | ```
121 | 
122 | `let` is used to introduce a variable, world is the variable name and it is a
123 | string (technically the type is `&'static str`, but more on that later). We
124 | don't need to specify the type, it will be inferred for us.
125 | 
126 | Using `{}` in the `println!` statement is like using `%s` in printf. In fact, it
127 | is a bit more general than that because Rust will try to convert the variable to
128 | a string if it is not one already[^1] (like `operator<<()` in C++).
129 | You can easily play around with this sort of thing - try multiple strings and
130 | using numbers (integer and float literals will work).
131 | 
132 | If you like, you can explicitly give the type of `world`:
133 | 
134 | ```rust
135 | let world: &'static str = "world";
136 | ```
137 | 
138 | In C++ we write `T x` to declare a variable `x` with type `T`. In Rust we write
139 | `x: T`, whether in `let` statements or function signatures, etc. Mostly we omit
140 | explicit types in `let` statements, but they are required for function
141 | arguments. Let's add another function to see it work:
142 | 
143 | ```rust
144 | fn foo(_x: &'static str) -> &'static str {
145 |     "world"
146 | }
147 | 
148 | fn main() {
149 |     println!("Hello {}!", foo("bar"));
150 | }
151 | ```
152 | 
153 | The function `foo` has a single argument `_x` which is a string literal (we pass
154 | it "bar" from `main`)[^2].
155 | 
156 | The return type for a function is given after `->`. If the function doesn't
157 | return anything (a void function in C++), we don't need to give a return type at
158 | all (as in `main`). If you want to be super-explicit, you can write `-> ()`,
159 | `()` is the void type in Rust.
160 | 
161 | You don't need the `return` keyword in Rust, if the last expression in a
162 | function body (or any other block, we'll see more of this later) is not finished
163 | with a semicolon, then it is the return value. So `foo` will return
164 | "world". The `return` keyword still exists so we can do early returns. You can
165 | replace `"world"` with `return "world";` and it will have the same effect.
166 | 
167 | 
168 | ## Why?
169 | 
170 | I would like to motivate some of the language features above. Local type
171 | inference is convenient and useful without sacrificing safety or performance
172 | (it's even in modern versions of C++ now). A minor convenience is that language
173 | items are consistently denoted by keyword (`fn`, `let`, etc.), this makes
174 | scanning by eye or by tools easier, in general the syntax of Rust is simpler and
175 | more consistent than C++. The `println!` macro is safer than printf - the number
176 | of arguments is statically checked against the number of 'holes' in the string
177 | and the arguments are type checked. This means you can't make the printf
178 | mistakes of printing memory as if it had a different type or addressing memory
179 | further down the stack by mistake. These are fairly minor things, but I hope
180 | they illustrate the philosophy behind the design of Rust.
181 | 
182 | 
183 | [^1]: This is a programmer specified conversion which uses the `Display` trait, which
184 | works a bit like `toString` in Java. You can also use `{:?}` which gives a
185 | compiler generated representation which is sometimes useful for debugging. As
186 | with printf, there are many other options.
187 | 
188 | [^2]: We don't actually use that argument in `foo`. Usually,
189 | Rust will warn us about this. By prefixing the argument name with `_` we avoid
190 | these warnings. In fact, we don't need to name the argument at all, we could
191 | just use `_`.
192 | 


--------------------------------------------------------------------------------
/borrowed.md:
--------------------------------------------------------------------------------
  1 | # Borrowed pointers
  2 | 
  3 | In the last post I introduced unique pointers. This time I will talk about
  4 | another kind of pointer which is much more common in most Rust programs:
  5 | borrowed pointers (aka borrowed references, or just references).
  6 | 
  7 | If we want to have a reference to an existing value (as opposed to creating a
  8 | new value on the heap and pointing to it, as with unique pointers), we must use
  9 | `&`, a borrowed reference. These are probably the most common kind of pointer in
 10 | Rust, and if you want something to fill in for a C++ pointer or reference (e.g.,
 11 | for passing a parameter to a function by reference), this is probably it.
 12 | 
 13 | We use the `&` operator to create a borrowed reference and to indicate reference
 14 | types, and `*` to dereference them. The same rules about automatic dereferencing
 15 | apply as for unique pointers. For example,
 16 | 
 17 | ```rust
 18 | fn foo() {
 19 |     let x = &3;   // type: &i32
 20 |     let y = *x;   // 3, type: i32
 21 |     bar(x, *x);
 22 |     bar(&y, y);
 23 | }
 24 | 
 25 | fn bar(z: &i32, i: i32) {
 26 |     // ...
 27 | }
 28 | ```
 29 | 
 30 | The `&` operator does not allocate memory (we can only create a borrowed
 31 | reference to an existing value) and if a borrowed reference goes out of scope,
 32 | no memory gets deleted.
 33 | 
 34 | Borrowed references are not unique - you can have multiple borrowed references
 35 | pointing to the same value. E.g.,
 36 | 
 37 | ```rust
 38 | fn foo() {
 39 |     let x = 5;                // type: i32
 40 |     let y = &x;               // type: &i32
 41 |     let z = y;                // type: &i32
 42 |     let w = y;                // type: &i32
 43 |     println!("These should all be 5: {} {} {}", *w, *y, *z);
 44 | }
 45 | ```
 46 | 
 47 | Like values, borrowed references are immutable by default. You can also use
 48 | `&mut` to take a mutable reference, or to denote mutable reference types.
 49 | Mutable borrowed references are unique (you can only take a single mutable
 50 | reference to a value, and you can only have a mutable reference if there are no
 51 | immutable references). You can use a mutable reference where an immutable one is
 52 | wanted, but not vice versa. Putting all that together in an example:
 53 | 
 54 | ```rust
 55 | fn bar(x: &i32) { ... }
 56 | fn bar_mut(x: &mut i32) { ... }  // &mut i32 is a reference to an i32 which
 57 |                                  // can be mutated
 58 | 
 59 | fn foo() {
 60 |     let x = 5;
 61 |     //let xr = &mut x;     // Error - can't make a mutable reference to an
 62 |                            // immutable variable
 63 |     let xr = &x;           // Ok (creates an immutable ref)
 64 |     bar(xr);
 65 |     //bar_mut(xr);         // Error - expects a mutable ref
 66 | 
 67 |     let mut x = 5;
 68 |     let xr = &x;           // Ok (creates an immutable ref)
 69 |     //*xr = 4;             // Error - mutating immutable ref
 70 |     //let xr = &mut x;     // Error - there is already an immutable ref, so we
 71 |                            // can't make a mutable one
 72 | 
 73 |     let mut x = 5;
 74 |     let xr = &mut x;       // Ok (creates a mutable ref)
 75 |     *xr = 4;               // Ok
 76 |     //let xr2 = &x;        // Error - there is already a mutable ref, so we
 77 |                            // can't make an immutable one
 78 |     //let xr2 = &mut x;    // Error - can only have one mutable ref at a time
 79 |     bar(xr);               // Ok
 80 |     bar_mut(xr);           // Ok
 81 | }
 82 | ```
 83 | 
 84 | Note that the reference may be mutable (or not) independently of the mutableness
 85 | of the variable holding the reference. This is similar to C++ where pointers can
 86 | be const (or not) independently of the data they point to. This is in contrast
 87 | to unique pointers, where the mutableness of the pointer is linked to the
 88 | mutableness of the data. For example,
 89 | 
 90 | ```rust
 91 | fn foo() {
 92 |     let mut x = 5;
 93 |     let mut y = 6;
 94 |     let xr = &mut x;
 95 |     //xr = &mut y;        // Error xr is immutable
 96 | 
 97 |     let mut x = 5;
 98 |     let mut y = 6;
 99 |     let mut xr = &mut x;
100 |     xr = &mut y;          // Ok
101 | 
102 |     let x = 5;
103 |     let y = 6;
104 |     let mut xr = &x;
105 |     xr = &y;              // Ok - xr is mut, even though the referenced data is not
106 | }
107 | ```
108 | 
109 | If a mutable value is borrowed, it becomes immutable for the duration of the
110 | borrow. Once the borrowed pointer goes out of scope, the value can be mutated
111 | again. This is in contrast to unique pointers, which once moved can never be
112 | used again. For example,
113 | 
114 | ```rust
115 | fn foo() {
116 |     let mut x = 5;            // type: i32
117 |     {
118 |         let y = &x;           // type: &i32
119 |         //x = 4;              // Error - x has been borrowed
120 |         println!("{} {}", y, x);    // Ok - x can be read
121 |     }
122 |     x = 4;                    // OK - y no longer exists
123 | }
124 | ```
125 | 
126 | The same thing happens if we take a mutable reference to a value - the value
127 | still cannot be modified. In general in Rust, data can only ever be modified via
128 | one variable or pointer. Furthermore, since we have a mutable reference, we
129 | can't take an immutable reference. That limits how we can use the underlying
130 | value:
131 | 
132 | ```rust
133 | fn foo() {
134 |     let mut x = 5;            // type: i32
135 |     {
136 |         let y = &mut x;       // type: &mut i32
137 |         //x = 4;              // Error - x has been borrowed
138 |         //println!("{}", x);  // Error - requires borrowing x
139 |     }
140 |     x = 4;                    // OK - y no longer exists
141 | }
142 | ```
143 | 
144 | Unlike C++, Rust won't automatically reference a value for you. So if a function
145 | takes a parameter by reference, the caller must reference the actual parameter.
146 | However, pointer types will automatically be converted to a reference:
147 | 
148 | ```rust
149 | fn foo(x: &i32) { ... }
150 | 
151 | fn bar(x: i32, y: Box<i32>) {
152 |     foo(&x);
153 |     // foo(x);   // Error - expected &i32, found i32
154 |     foo(y);      // Ok
155 |     foo(&*y);    // Also ok, and more explicit, but not good style
156 | }
157 | ```
158 | 
159 | ## `mut` vs `const`
160 | 
161 | At this stage it is probably worth comparing `mut` in Rust to `const` in C++.
162 | Superficially they are opposites. Values are immutable by default in Rust and
163 | can be made mutable by using `mut`. Values are mutable by default in C++, but
164 | can be made constant by using `const`. The subtler and more important difference
165 | is that C++ const-ness applies only to the current use of a value, whereas
166 | Rust's immutability applies to all uses of a value. So in C++ if I have a
167 | `const` variable, someone else could have a non-const reference to it and it
168 | could change without me knowing. In Rust if you have an immutable variable, you
169 | are guaranteed it won't change.
170 | 
171 | As we mentioned above, all mutable variables are unique. So if you have a
172 | mutable value, you know it is not going to change unless you change it.
173 | Furthermore, you can change it freely since you know that no one else is relying
174 | on it not changing.
175 | 
176 | ## Borrowing and lifetimes
177 | 
178 | One of the primary safety goals of Rust is to avoid dangling pointers (where a
179 | pointer outlives the memory it points to). In Rust, it is impossible to have a
180 | dangling borrowed reference. It is only legal to create a borrowed reference to
181 | memory which will be alive longer than the reference (well, at least as long as
182 | the reference). In other words, the lifetime of the reference must be shorter
183 | than the lifetime of the referenced value.
184 | 
185 | That has been accomplished in all the examples in this post. Scopes introduced
186 | by `{}` or functions are bounds on lifetimes - when a variable goes out of scope
187 | its lifetime ends. If we try to take a reference to a shorter lifetime, such as
188 | in a narrower scope, the compiler will give us an error. For example,
189 | 
190 | ```rust
191 | fn foo() {
192 |     let x = 5;
193 |     let mut xr = &x;        // Ok - x and xr have the same lifetime
194 |     {
195 |         let y = 6;
196 |         xr = &y             // Error - xr will outlive y
197 |     }                       // y is released here
198 |     println!("{:?}", xr);   // xr is used here so it outlives y. Try to comment out this line.
199 | }                           // x and xr are released here
200 | ```
201 | 
202 | In the above example, xr and y don't have the same lifetime because y starts
203 | later than xr, but it's the end of lifetimes which is more interesting, since you
204 | can't reference a variable before it exists in any case - something else which
205 | Rust enforces and which makes it safer than C++.
206 | 
207 | ## Explicit lifetimes
208 | 
209 | After playing with borrowed pointers for a while, you'll probably come across
210 | borrowed pointers with an explicit lifetime. These have the syntax `&'a T` ([cf.](https://en.wikipedia.org/wiki/Cf.)
211 | `&T`). They're kind of a big topic since I need to cover lifetime-polymorphism
212 | at the same time so I'll leave it for another post (there are a few more less
213 | common pointer types to cover first though). For now, I just want to say that
214 | `&T` is a shorthand for `&'a T` where `a` is the current scope, that is the
215 | scope in which the type is declared.
216 | 


--------------------------------------------------------------------------------
/destructuring-2.md:
--------------------------------------------------------------------------------
  1 | # Destructuring pt2 - match and borrowing
  2 | 
  3 | When destructuring there are some surprises in store where borrowing is
  4 | concerned. Hopefully, nothing surprising once you understand borrowed references
  5 | really well, but worth discussing (it took me a while to figure out, that's for
  6 | sure. Longer than I realised, in fact, since I screwed up the first version of
  7 | this blog post).
  8 | 
  9 | Imagine you have some `&Enum` variable `x` (where `Enum` is some enum type). You
 10 | have two choices: you can match `*x` and list all the variants (`Variant1 =>
 11 | ...`, etc.) or you can match `x` and list reference to variant patterns
 12 | (`&Variant1 => ...`, etc.). (As a matter of style, prefer the first form where
 13 | possible since there is less syntactic noise). `x` is a borrowed reference and
 14 | there are strict rules for how a borrowed reference can be dereferenced, these
 15 | interact with match expressions in surprising ways (at least surprising to me),
 16 | especially when you are modifying an existing enum in a seemingly innocuous way
 17 | and then the compiler explodes on a match somewhere.
 18 | 
 19 | Before we get into the details of the match expression, lets recap Rust's rules
 20 | for value passing. In C++, when assigning a value into a variable or passing it
 21 | to a function there are two choices - pass-by-value and pass-by-reference. The
 22 | former is the default case and means a value is copied either using a copy
 23 | constructor or a bitwise copy. If you annotate the destination of the parameter
 24 | pass or assignment with `&`, then the value is passed by reference - only a
 25 | pointer to the value is copied and when you operate on the new variable, you are
 26 | also operating on the old value.
 27 | 
 28 | Rust has the pass-by-reference option, although in Rust the source as well as
 29 | the destination must be annotated with `&`. For pass-by-value in Rust, there are
 30 | two further choices - copy or move. A copy is the same as C++'s semantics
 31 | (except that there are no copy constructors in Rust). A move copies the value
 32 | but destroys the old value - Rust's type system ensures you can no longer access
 33 | the old value. As examples, `i32` has copy semantics and `Box<i32>` has move
 34 | semantics:
 35 | 
 36 | ```rust
 37 | fn foo() {
 38 |     let x = 7i32;
 39 |     let y = x;                // x is copied
 40 |     println!("x is {}", x);   // OK
 41 | 
 42 |     let x = Box::new(7i32);
 43 |     let y = x;                // x is moved
 44 |     //println!("x is {}", x); // error: use of moved value: `x`
 45 | }
 46 | ```
 47 | 
 48 | You can also choose to have copy semantics for user-defined types
 49 | by implementing the `Copy` trait. One straightforward way to do that is 
 50 | to add `#[derive(Copy)]` before the definition of the `struct`. Not all
 51 | user-defined types are allowed to implement the `Copy` trait. All fields of 
 52 | a type must implement `Copy` and the type must not have a destructor. 
 53 | Destructors probably need a post of their own, but for now, an object 
 54 | in Rust has a destructor if it implements the `Drop`trait. 
 55 | Just like C++, the destructor is executed just before an object is 
 56 | destroyed.
 57 | 
 58 | Now, it is important that a borrowed object is not moved, otherwise you would
 59 | have a reference to the old object which is no longer valid. This is equivalent
 60 | to holding a reference to an object which has been destroyed after going out of
 61 | scope - it is a kind of dangling pointer. If you have a pointer to an object,
 62 | there could be other references to it. So if an object has move semantics and
 63 | you have a pointer to it, it is unsafe to dereference that pointer. (If the
 64 | object has copy semantics, dereferencing creates a copy and the old object will
 65 | still exist, so other references will be fine).
 66 | 
 67 | OK, back to match expressions. As I said earlier, if you want to match some `x`
 68 | with type `&T` you can dereference once in the match clause or match the
 69 | reference in every arm of the match expression. Example:
 70 | 
 71 | ```rust
 72 | enum Enum1 {
 73 |     Var1,
 74 |     Var2,
 75 |     Var3
 76 | }
 77 | 
 78 | fn foo(x: &Enum1) {
 79 |     match *x {  // Option 1: deref here.
 80 |         Enum1::Var1 => {}
 81 |         Enum1::Var2 => {}
 82 |         Enum1::Var3 => {}
 83 |     }
 84 | 
 85 |     match x {
 86 |         // Option 2: 'deref' in every arm.
 87 |         &Enum1::Var1 => {}
 88 |         &Enum1::Var2 => {}
 89 |         &Enum1::Var3 => {}
 90 |     }
 91 | }
 92 | ```
 93 | 
 94 | In this case you can take either approach because `Enum1` has copy semantics.
 95 | Let's take a closer look at each approach: in the first approach we dereference
 96 | `x` to a temporary variable with type `Enum1` (which copies the value in `x`)
 97 | and then do a match against the three variants of `Enum1`. This is a 'one level'
 98 | match because we don't go deep into the value's type. In the second approach
 99 | there is no dereferencing. We match a value with type `&Enum1` against a
100 | reference to each variant. This match goes two levels deep - it matches the type
101 | (always a reference) and looks inside the type to match the referred type (which
102 | is `Enum1`).
103 | 
104 | Either way, we must ensure that we (that is, the compiler) respect 
105 | Rust's invariants around moves and references - we must not move any
106 | part of an object if it is referenced. If the value being matched has copy
107 | semantics, that is trivial. If it has move semantics then we must make sure that
108 | moves don't happen in any match arm. This is accomplished either by ignoring
109 | data which would move, or making references to it (so we get by-reference
110 | passing rather than by-move).
111 | 
112 | ```rust
113 | enum Enum2 {
114 |     // Box has a destructor so Enum2 has move semantics.
115 |     Var1(Box<i32>),
116 |     Var2,
117 |     Var3
118 | }
119 | 
120 | fn foo(x: &Enum2) {
121 |     match *x {
122 |         // We're ignoring nested data, so this is OK
123 |         Enum2::Var1(..) => {}
124 |         // No change to the other arms.
125 |         Enum2::Var2 => {}
126 |         Enum2::Var3 => {}
127 |     }
128 | 
129 |     match x {
130 |         // We're ignoring nested data, so this is OK
131 |         &Enum2::Var1(..) => {}
132 |         // No change to the other arms.
133 |         &Enum2::Var2 => {}
134 |         &Enum2::Var3 => {}
135 |     }
136 | }
137 | ```
138 | 
139 | In either approach we don't refer to any of the nested data, so none of it is
140 | moved. In the first approach, even though `x` is referenced, we don't touch its
141 | innards in the scope of the dereference (i.e., the match expression) so nothing
142 | can escape. We also don't bind the whole value (i.e., bind `*x` to a variable),
143 | so we can't move the whole object either.
144 | 
145 | We can take a reference to any variant in the second match, but not in the
146 | dereferenced version. So, in the second approach replacing the second arm with `a
147 | @ &Var2 => {}` is OK (`a` is a reference), but under the first approach we
148 | couldn't write `a @ Var2 => {}` since that would mean moving `*x` into `a`. We
149 | could write `ref a @ Var2 => {}` (in which `a` is also a reference), although
150 | it's not a construct you see very often.
151 | 
152 | But what about if we want to use the data nested inside `Var1`? We can't write:
153 | 
154 | ```rust
155 | match *x {
156 |     Enum2::Var1(y) => {}
157 |     _ => {}
158 | }
159 | ```
160 | 
161 | or
162 | 
163 | ```rust
164 | match x {
165 |     &Enum2::Var1(y) => {}
166 |     _ => {}
167 | }
168 | ```
169 | 
170 | because in both cases it means moving part of `x` into `y`. We can use the 'ref'
171 | keyword to get a reference to the data in `Var1`: `&Var1(ref y) => {}`. That is
172 | OK, because now we are not dereferencing anywhere and thus not moving any part
173 | of `x`. Instead we are creating a pointer which points into the interior of `x`.
174 | 
175 | Alternatively, we could destructure the Box (this match is going three levels
176 | deep): `&Var1(box y) => {}` (note `box` pattern syntax is experimental as of rustc 1.58 
177 | and is available only in nightly version of rustc). 
178 | This is OK because `i32` has copy semantics and `y`
179 | is a copy of the `i32` inside the `Box` inside `Var1` (which is 'inside' a
180 | borrowed reference). Since `i32` has copy semantics, we don't need to move any
181 | part of `x`. We could also create a reference to the int rather than copy it:
182 | `&Var1(box ref y) => {}`. Again, this is OK, because we don't do any
183 | dereferencing and thus don't need to move any part of `x`. If the contents of
184 | the Box had move semantics, then we could not write `&Var1(box y) => {}`, we
185 | would be forced to use the reference version. We could also use similar
186 | techniques with the first approach to matching, which look the same but without
187 | the first `&`. For example, `Var1(box ref y) => {}`.
188 | 
189 | Now lets get more complex. Lets say you want to match against a pair of
190 | reference-to-enum values. Now we can't use the first approach at all:
191 | 
192 | ```rust
193 | fn bar(x: &Enum2, y: &Enum2) {
194 |     // Error: x and y are being moved.
195 |     // match (*x, *y) {
196 |     //     (Enum2::Var2, _) => {}
197 |     //     _ => {}
198 |     // }
199 | 
200 |     // OK.
201 |     match (x, y) {
202 |         (&Enum2::Var2, _) => {}
203 |         _ => {}
204 |     }
205 | }
206 | ```
207 | 
208 | The first approach is illegal because the value being matched is created by
209 | dereferencing `x` and `y` and then moving them both into a new tuple object. So
210 | in this circumstance, only the second approach works. And of course, you still
211 | have to follow the rules above for avoiding moving parts of `x` and `y`.
212 | 
213 | If you do end up only being able to get a reference to some data and you need
214 | the value itself, you have no option except to copy that data. Usually that
215 | means using `clone()`. If the data doesn't implement clone, you're going to have
216 | to further destructure to make a manual copy or implement clone yourself.
217 | 
218 | What if we don't have a reference to a value with move semantics, but the value
219 | itself. Now moves are OK, because we know no one else has a reference to the
220 | value (the compiler ensures that if they do, we can't use the value). For
221 | example,
222 | 
223 | ```rust
224 | fn baz(x: Enum2) {
225 |     match x {
226 |         Enum2::Var1(y) => {}
227 |         _ => {}
228 |     }
229 | }
230 | ```
231 | 
232 | There are still a few things to be aware of. Firstly, you can only move to one
233 | place. In the above example we are moving part of `x` into `y` and we'll forget
234 | about the rest. If we wrote `a @ Var1(y) => {}` we would be attempting to move
235 | all of `x` into `a` and part of `x` into `y`. That is not allowed, an arm like
236 | that is illegal. Making one of `a` or `y` a reference (using `ref a`, etc.) is
237 | not an option either, then we'd have the problem described above where we move
238 | whilst holding a reference. We can make both `a` and `y` references and then
239 | we're OK - neither is moving, so `x` remains intact and we have pointers to the
240 | whole and a part of it.
241 | 
242 | Similarly (and more common), if we have a variant with multiple pieces of nested
243 | data, we can't take a reference to one datum and move another. For example if we
244 | had a `Var4` declared as `Var4(Box<int>, Box<int>)` we can have a match arm
245 | which references both (`Var4(ref y, ref z) => {}`) or a match arm which moves
246 | both (`Var4(y, z) => {}`) but you cannot have a match arm which moves one and
247 | references the other (`Var4(ref y, z) => {}`). This is because a partial move
248 | still destroys the whole object, so the reference would be invalid.
249 | 


--------------------------------------------------------------------------------
/arrays.md:
--------------------------------------------------------------------------------
  1 | # Arrays and Vectors
  2 | 
  3 | Rust arrays are pretty different from C arrays. For starters they come in
  4 | statically and dynamically sized flavours. These are more commonly known as
  5 | fixed length arrays and slices. As we'll see, the former is kind of a bad name
  6 | since both kinds of array have fixed (as opposed to growable) length. For a
  7 | growable 'array', Rust provides the `Vec` collection.
  8 | 
  9 | 
 10 | ## Fixed length arrays
 11 | 
 12 | The length of a fixed length array is known statically and features in its
 13 | type. E.g., `[i32; 4]` is the type of an array of `i32`s with length four.
 14 | 
 15 | Array literal and array access syntax is the same as C:
 16 | 
 17 | ```rust
 18 | let a: [i32; 4] = [1, 2, 3, 4];     // As usual, the type annotation is optional.
 19 | println!("The second element is {}", a[1]);
 20 | ```
 21 | 
 22 | You'll notice that array indexing is zero-based, just like C.
 23 | 
 24 | However, unlike C/C++[^1], array indexing is bounds checked. In
 25 | fact all access to arrays is bounds checked, which is another way Rust is a
 26 | safer language.
 27 | 
 28 | If you try to do `a[4]`, then you will get a runtime panic. Unfortunately, the
 29 | Rust compiler is not clever enough to give you a compile time error, even when
 30 | it is obvious (as in this example).
 31 | 
 32 | If you like to live dangerously, or just need to get every last ounce of
 33 | performance out of your program, you can still get unchecked access to arrays.
 34 | To do this, use the `get_unchecked` method on an array. Unchecked array accesses
 35 | must be inside an unsafe block. You should only need to do this in the rarest
 36 | circumstances.
 37 | 
 38 | Just like other data structures in Rust, arrays are immutable by default and
 39 | mutability is inherited. Mutation is also done via the indexing syntax:
 40 | 
 41 | ```rust
 42 | let mut a = [1, 2, 3, 4];
 43 | a[3] = 5;
 44 | println!("{:?}", a);
 45 | ```
 46 | 
 47 | And just like other data, you can borrow an array by taking a reference to it:
 48 | 
 49 | ```rust
 50 | fn foo(a: &[i32; 4]) {
 51 |     println!("First: {}; last: {}", a[0], a[3]);
 52 | }
 53 | 
 54 | fn main() {
 55 |     foo(&[1, 2, 3, 4]);
 56 | }
 57 | ```
 58 | 
 59 | Notice that indexing still works on a borrowed array.
 60 | 
 61 | This is a good time to talk about the most interesting aspect of Rust arrays for
 62 | C++ programmers - their representation. Rust arrays are value types: they are
 63 | allocated on the stack like other values and an array object is a sequence of
 64 | values, not a pointer to those values (as in C). So from our examples above, `let
 65 | a = [1_i32, 2, 3, 4];` will allocate 16 bytes on the stack and executing `let b
 66 | = a;` will copy 16 bytes. If you want a C-like array, you have to explicitly
 67 | make a pointer to the array, this will give you a pointer to the first element.
 68 | 
 69 | A final point of difference between arrays in Rust and C++ is that Rust arrays
 70 | can implement traits, and thus have methods. To find the length of an array, for
 71 | example, you use `a.len()`.
 72 | 
 73 | 
 74 | ## Slices
 75 | 
 76 | A slice in Rust is just an array whose length is not known at compile time. The
 77 | syntax of the type is just like a fixed length array, except there is no length:
 78 | e.g., `[i32]` is a slice of 32 bit integers (with no statically known length).
 79 | 
 80 | There is a catch with slices: since the compiler must know the size of all
 81 | objects in Rust, and it can't know the size of a slice, then we can never have a
 82 | value with slice type. If you try and write `fn foo(x: [i32])`, for example, the
 83 | compiler will give you an error.
 84 | 
 85 | So, you must always have pointers to slices (there are some very technical
 86 | exceptions to this rule so that you can implement your own smart pointers, but
 87 | you can safely ignore them for now). You must write `fn foo(x: &[i32])` (a
 88 | borrowed reference to a slice) or `fn foo(x: *mut [i32])` (a mutable raw pointer
 89 | to a slice), etc.
 90 | 
 91 | The simplest way to create a slice is by coercion. There are far fewer implicit
 92 | coercions in Rust than there are in C++. One of them is the coercion from fixed
 93 | length arrays to slices. Since slices must be pointer values, this is
 94 | effectively a coercion between pointers. For example, we can coerce `&[i32; 4]`
 95 | to `&[i32]`, e.g.,
 96 | 
 97 | ```rust
 98 | let a: &[i32] = &[1, 2, 3, 4];
 99 | ```
100 | 
101 | Here the right hand side is a fixed length array of length four, allocated on
102 | the stack. We then take a reference to it (type `&[i32; 4]`). That reference is
103 | coerced to type `&[i32]` and given the name `a` by the let statement.
104 | 
105 | Again, access is just like C (using `[...]`), and access is bounds checked. You
106 | can also check the length yourself by using `len()`. So clearly the length of
107 | the array is known somewhere. In fact all arrays of any kind in Rust have known
108 | length, since this is essential for bounds checking, which is an integral part
109 | of memory safety. The size is known dynamically (as opposed to statically in the
110 | case of fixed length arrays), and we say that slice types are dynamically sized
111 | types (DSTs, there are other kinds of dynamically sized types too, they'll be
112 | covered elsewhere).
113 | 
114 | Since a slice is just a sequence of values, the size cannot be stored as part of
115 | the slice. Instead it is stored as part of the pointer (remember that slices
116 | must always exist as pointer types). A pointer to a slice (like all pointers to
117 | DSTs) is a fat pointer - it is two words wide, rather than one, and contains the
118 | pointer to the data plus a payload. In the case of slices, the payload is the
119 | length of the slice.
120 | 
121 | So in the example above, the pointer `a` will be 128 bits wide (on a 64 bit
122 | system). The first 64 bits will store the address of the `1` in the sequence
123 | `[1, 2, 3, 4]`, and the second 64 bits will contain `4`. Usually, as a Rust
124 | programmer, these fat pointers can just be treated as regular pointers. But it
125 | is good to know about (it can affect the things you can do with casts, for
126 | example).
127 | 
128 | 
129 | ### Slicing notation and ranges
130 | 
131 | A slice can be thought of as a (borrowed) view of an array. So far we have only
132 | seen a slice of the whole array, but we can also take a slice of part of an
133 | array. There is a special notation for this which is like the indexing
134 | syntax, but takes a range instead of a single integer. E.g., `a[0..4]`, which
135 | takes a slice of the first four elements of `a`. Note that the range is
136 | exclusive at the top and inclusive at the bottom. Examples:
137 | 
138 | ```rust
139 | let a: [i32; 4] = [1, 2, 3, 4];
140 | let b: &[i32] = &a;   // Slice of the whole array.
141 | let c = &a[0..4];     // Another slice of the whole array, also has type &[i32].
142 | let c = &a[1..3];     // The middle two elements, &[i32].
143 | let c = &a[1..];      // The last three elements.
144 | let c = &a[..3];      // The first three elements.
145 | let c = &a[..];       // The whole array, again.
146 | let c = &b[1..3];     // We can also slice a slice.
147 | ```
148 | 
149 | Note that in the last example, we still need to borrow the result of slicing.
150 | The slicing syntax produces an unborrowed slice (type: `[i32]`) which we must
151 | then borrow (to give a `&[i32]`), even if we are slicing a borrowed slice.
152 | 
153 | Range syntax can also be used outside of slicing syntax. `a..b` produces an
154 | iterator which runs from `a` to `b-1`. This can be combined with other iterators
155 | in the usual way, or can be used in `for` loops:
156 | 
157 | ```rust
158 | // Print all numbers from 1 to 10.
159 | for i in 1..11 {
160 |     println!("{}", i);
161 | }
162 | ```
163 | 
164 | ## Vecs
165 | 
166 | A vector is heap allocated and is an owning reference. Therefore (and like
167 | `Box<_>`), it has move semantics. We can think of a fixed length array
168 | analogously to a value, a slice to a borrowed reference. Similarly, a vector in
169 | Rust is analogous to a `Box<_>` pointer.
170 | 
171 | It helps to think of `Vec<_>` as a kind of smart pointer, just like `Box<_>`,
172 | rather than as a value itself. Similarly to a slice, the length is stored in the
173 | 'pointer', in this case the 'pointer' is the Vec value.
174 | 
175 | A vector of `i32`s has type `Vec<i32>`. There are no vector literals, but we can
176 | get the same effect by using the `vec!` macro. We can also create an empty
177 | vector using `Vec::new()`:
178 | 
179 | ```rust
180 | let v = vec![1, 2, 3, 4];      // A Vec<i32> with length 4.
181 | let v: Vec<i32> = Vec::new();  // An empty vector of i32s.
182 | ```
183 | 
184 | In the second case above, the type annotation is necessary so the compiler can
185 | know what the vector is a vector of. If we were to use the vector, the type
186 | annotation would probably not be necessary.
187 | 
188 | Just like arrays and slices, we can use indexing notation to get a value from
189 | the vector (e.g., `v[2]`). Again, these are bounds checked. We can also use
190 | slicing notation to take a slice of a vector (e.g., `&v[1..3]`).
191 | 
192 | The extra feature of vectors is that their size can change - they can get longer
193 | or shorter as needed. For example, `v.push(5)` would add the element `5` to the
194 | end of the vector (this would require that `v` is mutable). Note that growing a
195 | vector can cause reallocation, which for large vectors can mean a lot of
196 | copying. To guard against this you can pre-allocate space in a vector using
197 | `with_capacity`, see the [Vec docs](https://doc.rust-lang.org/std/vec/struct.Vec.html)
198 | for more details.
199 | 
200 | 
201 | ## The `Index` traits
202 | 
203 | Note for readers: there is a lot of material in this section that I haven't
204 | covered properly yet. If you're following the tutorial, you can skip this
205 | section, it is a somewhat advanced topic in any case.
206 | 
207 | The same indexing syntax used for arrays and vectors is also used for other
208 | collections, such as `HashMap`s. And you can use it yourself for your own
209 | collections. You opt-in to using the indexing (and slicing) syntax by
210 | implementing the `Index` trait. This is a good example of how Rust makes
211 | available nice syntax to user types, as well as built-ins (`Deref` for
212 | dereferencing smart pointers, as well as `Add` and various other traits, work in
213 | a similar way).
214 | 
215 | The `Index` trait looks like
216 | 
217 | ```rust
218 | pub trait Index<Idx: ?Sized> {
219 |     type Output: ?Sized;
220 | 
221 |     fn index(&self, index: Idx) -> &Self::Output;
222 | }
223 | ```
224 | 
225 | `Idx` is the type used for indexing. For most uses of indexing this is `usize`.
226 | For slicing this is one of the `std::ops::Range` types. `Output` is the type
227 | returned by indexing, this will be different for each collection. For slicing it
228 | will be a slice, rather than the type of a single element. `index` is a method
229 | which does the work of getting the element(s) out of the collection. Note that
230 | the collection is taken by reference and the method returns a reference to the
231 | element with the same lifetime.
232 | 
233 | Let's look at the implementation for `Vec` to see how what an implementation
234 | looks like:
235 | 
236 | ```rust
237 | impl<T> Index<usize> for Vec<T> {
238 |     type Output = T;
239 | 
240 |     fn index(&self, index: usize) -> &T {
241 |         &(**self)[index]
242 |     }
243 | }
244 | ```
245 | 
246 | As we said above, indexing is done using `usize`. For a `Vec<T>`, indexing will
247 | return a single element of type `T`, thus the value of `Output`. The
248 | implementation of `index` is a bit weird - `(**self)` gets a view of the whole
249 | vec as a slice, then we use indexing on slices to get the element, and finally
250 | take a reference to it.
251 | 
252 | If you have your own collections, you can implement `Index` in a similar way to
253 | get indexing and slicing syntax for your collection.
254 | 
255 | 
256 | ## Initialiser syntax
257 | 
258 | As with all data in Rust, arrays and vectors must be properly initialised. Often
259 | you just want an array full of zeros to start with and using the array literal
260 | syntax is a pain. So Rust gives you a little syntactic sugar to initialise an
261 | array full of a given value: `[value; len]`. So for example to create an array
262 | with length 100 full of zeros, we'd use `[0; 100]`.
263 | 
264 | Similarly for vectors, `vec![42; 100]` would give you a vector with 100
265 | elements, each with the value 42.
266 | 
267 | The initial value is not limited to integers, it can be any expression. For
268 | array initialisers, the length must be an integer constant expression. For
269 | `vec!`, it can be any expression with type `usize`.
270 | 
271 | 
272 | [^1]: In C++11 there is `std::array<T, N>` that provides boundary checking when
273 | `at()` method is used.
274 | 


--------------------------------------------------------------------------------
/data-types.md:
--------------------------------------------------------------------------------
  1 | # Data types
  2 | 
  3 | In this post I'll discuss Rust's data types. These are roughly equivalent to
  4 | classes, structs, and enums in C++. One difference with Rust is that data and
  5 | behaviour are much more strictly separated in Rust than C++ (or Java, or other
  6 | OO languages). Behaviour is defined by functions and those can be defined in
  7 | traits and `impl`s (implementations), but traits cannot contain data, they are
  8 | similar to Java's interfaces in that respect. I'll cover traits and impls in a
  9 | later post, this one is all about data.
 10 | 
 11 | ## Structs
 12 | 
 13 | A rust struct is similar to a C struct or a C++ struct without methods. Simply a
 14 | list of named fields. The syntax is best seen with an example:
 15 | 
 16 | ```rust
 17 | struct S {
 18 |     field1: i32,
 19 |     field2: SomeOtherStruct
 20 | }
 21 | ```
 22 | 
 23 | Here we define a struct called `S` with two fields. The fields are comma
 24 | separated; if you like, you can comma-terminate the last field too.
 25 | 
 26 | Structs introduce a type. In the example, we could use `S` as a type.
 27 | `SomeOtherStruct` is assumed to be another struct (used as a type in the
 28 | example), and (like C++) it is included by value, that is, there is no pointer
 29 | to another struct object in memory.
 30 | 
 31 | Fields in structs are accessed using the `.` operator and their name. An example
 32 | of struct use:
 33 | 
 34 | ```rust
 35 | fn foo(s1: S, s2: &S) {
 36 |     let f = s1.field1;
 37 |     if f == s2.field1 {
 38 |         println!("field1 matches!");
 39 |     }
 40 | }
 41 | ```
 42 | 
 43 | Here `s1` is struct object passed by value and `s2` is a struct object passed by
 44 | reference. As with method calls, we use the same `.` to access fields in both, no
 45 | need for `->`.
 46 | 
 47 | Structs are initialised using struct literals. These are the name of the struct
 48 | and values for each field. For example,
 49 | 
 50 | ```rust
 51 | fn foo(sos: SomeOtherStruct) {
 52 |     let x = S { field1: 45, field2: sos };  // initialise x with a struct literal
 53 |     println!("x.field1 = {}", x.field1);
 54 | }
 55 | ```
 56 | 
 57 | Structs cannot be recursive; that is, you can't have cycles of struct names
 58 | involving definitions and field types. This is because of the value semantics of
 59 | structs. So for example, `struct R { r: Option<R> }` is illegal and will cause a
 60 | compiler error (see below for more about Option). If you need such a structure
 61 | then you should use some kind of pointer; cycles with pointers are allowed:
 62 | 
 63 | ```rust
 64 | struct R {
 65 |     r: Option<Box<R>>
 66 | }
 67 | ```
 68 | 
 69 | If we didn't have the `Option` in the above struct, there would be no way to
 70 | instantiate the struct and Rust would signal an error.
 71 | 
 72 | Structs with no fields do not use braces in either their definition or literal
 73 | use. Definitions do need a terminating semi-colon though, presumably just to
 74 | facilitate parsing.
 75 | 
 76 | ```rust
 77 | struct Empty;
 78 | 
 79 | fn foo() {
 80 |     let e = Empty;
 81 | }
 82 | ```
 83 | 
 84 | ## Tuples
 85 | 
 86 | Tuples are anonymous, heterogeneous sequences of data. As a type, they are
 87 | declared as a sequence of types in parentheses. Since there is no name, they are
 88 | identified by structure. For example, the type `(i32, i32)` is a pair of
 89 | integers and `(i32, f32, S)` is a triple. Tuple values are initialised in the
 90 | same way as tuple types are declared, but with values instead of types for the
 91 | components, e.g., `(4, 5)`. An example:
 92 | 
 93 | ```rust
 94 | // foo takes a struct and returns a tuple
 95 | fn foo(x: SomeOtherStruct) -> (i32, f32, S) {
 96 |     (23, 45.82, S { field1: 54, field2: x })
 97 | }
 98 | ```
 99 | 
100 | Tuples can be used by destructuring using a `let` expression, e.g.,
101 | 
102 | ```rust
103 | fn bar(x: (i32, i32)) {
104 |     let (a, b) = x;
105 |     println!("x was ({}, {})", a, b);
106 | }
107 | ```
108 | 
109 | We'll talk more about destructuring next time.
110 | 
111 | 
112 | ## Tuple structs
113 | 
114 | Tuple structs are named tuples, or alternatively, structs with unnamed fields.
115 | They are declared using the `struct` keyword, a list of types in parentheses,
116 | and a semicolon. Such a declaration introduces their name as a type. Their
117 | fields must be accessed by destructuring (like a tuple), rather than by name.
118 | Tuple structs are not very common.
119 | 
120 | ```rust
121 | struct IntPoint (i32, i32);
122 | 
123 | fn foo(x: IntPoint) {
124 |     let IntPoint(a, b) = x;  // Note that we need the name of the tuple
125 |                              // struct to destructure.
126 |     println!("x was ({}, {})", a, b);
127 | }
128 | ```
129 | 
130 | ## Enums
131 | 
132 | Enums are types like C++ enums or unions, in that they are types which can take
133 | multiple values. The simplest kind of enum is just like a C++ enum:
134 | 
135 | ```rust
136 | enum E1 {
137 |     Var1,
138 |     Var2,
139 |     Var3
140 | }
141 | 
142 | fn foo() {
143 |     let x: E1 = Var2;
144 |     match x {
145 |         Var2 => println!("var2"),
146 |         _ => {}
147 |     }
148 | }
149 | ```
150 | 
151 | However, Rust enums are much more powerful than that. Each variant can contain
152 | data. Like tuples, these are defined by a list of types. In this case they are
153 | more like unions than enums in C++. Rust enums are tagged unions rather than untagged unions (as in C++).
154 | That means you can't mistake one variant of an enum for another at runtime[^1]. An example:
155 | 
156 | ```rust
157 | enum Expr {
158 |     Add(i32, i32),
159 |     Or(bool, bool),
160 |     Lit(i32)
161 | }
162 | 
163 | fn foo() {
164 |     let x = Or(true, false);   // x has type Expr
165 | }
166 | ```
167 | 
168 | Many simple cases of object-oriented polymorphism are better handled in Rust
169 | using enums.
170 | 
171 | To use enums we usually use a match expression. Remember that these are similar
172 | to C++ switch statements. I'll go into more depth on these and other ways to
173 | destructure data next time. Here's an example:
174 | 
175 | ```rust
176 | fn bar(e: Expr) {
177 |     match e {
178 |         Add(x, y) => println!("An `Add` variant: {} + {}", x, y),
179 |         Or(..) => println!("An `Or` variant"),
180 |         _ => println!("Something else (in this case, a `Lit`)"),
181 |     }
182 | }
183 | ```
184 | 
185 | Each arm of the match expression matches a variant of `Expr`. All variants must
186 | be covered. The last case (`_`) covers all remaining variants, although in the
187 | example there is only `Lit`. Any data in a variant can be bound to a variable.
188 | In the `Add` arm we are binding the two i32s in an `Add` to `x` and `y`. If we
189 | don't care about the data, we can use `..` to match any data, as we do for `Or`.
190 | 
191 | 
192 | ## Option
193 | 
194 | One particularly common enum in Rust is `Option`. This has two variants - `Some`
195 | and `None`. `None` has no data and `Some` has a single field with type `T`
196 | (`Option` is a generic enum, which we will cover later, but hopefully the
197 | general idea is clear from C++). Options are used to indicate a value might be
198 | there or might not. Any place you use a null pointer in C++[^2]
199 | to indicate a value which is in some way undefined, uninitialised, or false,
200 | you should probably use an Option in Rust. Using Option is safer because you
201 | must always check it before use; there is no way to do the equivalent of
202 | dereferencing a null pointer. They are also more general, you can use them with
203 | values as well as pointers. An example:
204 | 
205 | ```rust
206 | use std::rc::Rc;
207 | 
208 | struct Node {
209 |     parent: Option<Rc<Node>>,
210 |     value: i32
211 | }
212 | 
213 | fn is_root(node: Node) -> bool {
214 |     match node.parent {
215 |         Some(_) => false,
216 |         None => true
217 |     }
218 | }
219 | ```
220 | 
221 | Here, the parent field could be either a `None` or a `Some` containing an
222 | `Rc<Node>`. In the example, we never actually use that payload, but in real life
223 | you usually would.
224 | 
225 | 
226 | There are also convenience methods on Option, so you could write the body of
227 | `is_root` as `node.parent.is_none()` or `!node.parent.is_some()`.
228 | 
229 | ## Inherited mutability and Cell/RefCell
230 | 
231 | Local variables in Rust are immutable by default and can be marked mutable using
232 | `mut`. We don't mark fields in structs or enums as mutable, their mutability is
233 | inherited. This means that a field in a struct object is mutable or immutable
234 | depending on whether the object itself is mutable or immutable. Example:
235 | 
236 | ```rust
237 | struct S1 {
238 |     field1: i32,
239 |     field2: S2
240 | }
241 | struct S2 {
242 |     field: i32
243 | }
244 | 
245 | fn main() {
246 |     let s = S1 { field1: 45, field2: S2 { field: 23 } };
247 |     // s is deeply immutable, the following mutations are forbidden
248 |     // s.field1 = 46;
249 |     // s.field2.field = 24;
250 | 
251 |     let mut s = S1 { field1: 45, field2: S2 { field: 23 } };
252 |     // s is mutable, these are OK
253 |     s.field1 = 46;
254 |     s.field2.field = 24;
255 | }
256 | ```
257 | 
258 | Inherited mutability in Rust stops at references. This is similar to C++ where
259 | you can modify a non-const object via a pointer from a const object. If you want
260 | a reference field to be mutable, you have to use `&mut` on the field type:
261 | 
262 | ```rust
263 | struct S1 {
264 |     f: i32
265 | }
266 | struct S2<'a> {
267 |     f: &'a mut S1   // mutable reference field
268 | }
269 | struct S3<'a> {
270 |     f: &'a S1       // immutable reference field
271 | }
272 | 
273 | fn main() {
274 |     let mut s1 = S1{f:56};
275 |     let s2 = S2 { f: &mut s1};
276 |     s2.f.f = 45;   // legal even though s2 is immutable
277 |     // s2.f = &mut s1; // illegal - s2 is not mutable
278 |     let s1 = S1{f:56};
279 |     let mut s3 = S3 { f: &s1};
280 |     s3.f = &s1;     // legal - s3 is mutable
281 |     // s3.f.f = 45; // illegal - s3.f is immutable
282 | }
283 | ```
284 | 
285 | (The `'a` parameter on `S2` and `S3` is a lifetime parameter, we'll cover those soon).
286 | 
287 | Sometimes whilst an object is logically immutable, it has parts which need to be
288 | internally mutable. Think of various kinds of caching or a reference count
289 | (which would not give true logical immutability since the effect of changing the
290 | ref count can be observed via destructors). In C++, you would use the `mutable`
291 | keyword to allow such mutation even when the object is const. In Rust we have
292 | the Cell and RefCell structs. These allow parts of immutable objects to be
293 | mutated. Whilst that is useful, it means you need to be aware that when you see
294 | an immutable object in Rust, it is possible that some parts may actually be
295 | mutable.
296 | 
297 | RefCell and Cell let you get around Rust's strict rules on mutation and
298 | aliasability. They are safe to use because they ensure that Rust's invariants
299 | are respected dynamically, even though the compiler cannot ensure that those
300 | invariants hold statically. Cell and RefCell are both single threaded objects.
301 | 
302 | Use Cell for types which have copy semantics (pretty much just primitive types).
303 | Cell has `get` and `set` methods for changing the stored value, and a `new`
304 | method to initialise the cell with a value. Cell is a very simple object - it
305 | doesn't need to do anything smart since objects with copy semantics can't keep
306 | references elsewhere (in Rust) and they can't be shared across threads, so there
307 | is not much to go wrong.
308 | 
309 | Use RefCell for types which have move semantics, that means nearly everything in
310 | Rust, struct objects are a common example. RefCell is also created using `new`
311 | and has a `set` method. To get the value in a RefCell, you must borrow it using
312 | the borrow methods (`borrow`, `borrow_mut`, `try_borrow`, `try_borrow_mut`)
313 | these will give you a borrowed reference to the object in the RefCell. These
314 | methods follow the same rules as static borrowing - you can only have one
315 | mutable borrow, and can't borrow mutably and immutably at the same time.
316 | However, rather than a compile error you get a runtime failure. The `try_`
317 | variants return an Option - you get `Some(val)` if the value can be borrowed and
318 | `None` if it can't. If a value is borrowed, calling `set` will fail too.
319 | 
320 | Here's an example using a ref-counted pointer to a RefCell (a common use-case):
321 | 
322 | ```rust
323 | use std::rc::Rc;
324 | use std::cell::RefCell;
325 | 
326 | struct S {
327 |     field: i32
328 | }
329 | 
330 | fn foo(x: Rc<RefCell<S>>) {
331 |     {
332 |         let s = x.borrow();
333 |         println!("the field, twice {} {}", s.field, x.borrow().field);
334 |         // let s = x.borrow_mut(); // Error - we've already borrowed the contents of x
335 |     }
336 | 
337 |     let mut s = x.borrow_mut(); // OK, the earlier borrows are out of scope
338 |     s.field = 45;
339 |     // println!("The field {}", x.borrow().field); // Error - can't mut and immut borrow
340 |     println!("The field {}", s.field);
341 | }
342 | 
343 | fn main() {
344 |     let s = S{field:12};
345 |     let x: Rc<RefCell<S>> = Rc::new(RefCell::new(s));
346 |     foo(x.clone());
347 | 
348 |     println!("The field {}", x.borrow().field);
349 | }
350 | ```
351 | 
352 | If you're using Cell/RefCell, you should try to put them on the smallest object
353 | you can. That is, prefer to put them on a few fields of a struct, rather than
354 | the whole struct. Think of them like single threaded locks, finer grained
355 | locking is better since you are more likely to avoid colliding on a lock.
356 | 
357 | 
358 | [^1]: In C++17 there is `std::variant<T>` type that is closer to Rust enums than unions.
359 | 
360 | [^2]: Since C++17 `std::optional<T>` is the best alternative of Option in Rust.
361 | 


--------------------------------------------------------------------------------
/graphs/README.md:
--------------------------------------------------------------------------------
  1 | # Graphs and arena allocation
  2 | 
  3 | (Note you can run the examples in this chapter by downloading this directory and
  4 | running `cargo run`).
  5 | 
  6 | Graphs are a bit awkward to construct in Rust because of Rust's stringent
  7 | lifetime and mutability requirements. Graphs of objects are very common in OO
  8 | programming. In this tutorial I'm going to go over a few different approaches to
  9 | implementation. My preferred approach uses arena allocation and makes slightly
 10 | advanced use of explicit lifetimes. I'll finish up by discussing a few potential
 11 | Rust features which would make using such an approach easier.
 12 | 
 13 | A [graph](http://en.wikipedia.org/wiki/Graph_%28abstract_data_type%29) is a
 14 | collection of nodes with edges between some of those nodes. Graphs are a
 15 | generalisation of lists and trees. Each node can have multiple children and
 16 | multiple parents (we usually talk about edges into and out of a node, rather
 17 | than parents/children). Graphs can be represented by adjacency lists or
 18 | adjacency matrices. The former is basically a node object for each node in the
 19 | graph, where each node object keeps a list of its adjacent nodes. An adjacency
 20 | matrix is a matrix of booleans indicating whether there is an edge from the row
 21 | node to the column node. We'll only cover the adjacency list representation,
 22 | adjacency matrices have very different issues which are less Rust-specific.
 23 | 
 24 | There are essentially two orthogonal problems: how to handle the lifetime of the
 25 | graph and how to handle it's mutability.
 26 | 
 27 | The first problem essentially boils down to what kind of pointer to use to point
 28 | to other nodes in the graph. Since graph-like data structures are recursive (the
 29 | types are recursive, even if the data is not) we are forced to use pointers of
 30 | some kind rather than have a totally value-based structure. Since graphs can be
 31 | cyclic, and ownership in Rust cannot be cyclic, we cannot use `Box<Node>` as our
 32 | pointer type (as we might do for tree-like data structures or linked lists).
 33 | 
 34 | No graph is truly immutable. Because there may be cycles, the graph cannot be
 35 | created in a single statement. Thus, at the very least, the graph must be mutable
 36 | during its initialisation phase. The usual invariant in Rust is that all
 37 | pointers must either be unique or immutable. Graph edges must be mutable (at
 38 | least during initialisation) and there can be more than one edge into any node,
 39 | thus no edges are guaranteed to be unique. So we're going to have to do
 40 | something a little bit advanced to handle mutability.
 41 | 
 42 | One solution is to use mutable raw pointers (`*mut Node`). This is the most
 43 | flexible approach, but also the most dangerous. You must handle all the lifetime
 44 | management yourself without any help from the type system. You can make very
 45 | flexible and efficient data structures this way, but you must be very careful.
 46 | This approach handles both the lifetime and mutability issues in one fell swoop.
 47 | But it handles them by essentially ignoring all the benefits of Rust - you will
 48 | get no help from the compiler here (it's also not particularly ergonomic since
 49 | raw pointers don't automatically (de-)reference). Since a graph using raw
 50 | pointers is not much different from a graph in C++, I'm not going to cover that
 51 | option here.
 52 | 
 53 | The options you have for lifetime management are reference counting (shared
 54 | ownership, using `Rc<...>`) or arena allocation (all nodes have the same lifetime,
 55 | managed by an arena; using borrowed references `&...`). The former is
 56 | more flexible (you can have references from outside the graph to individual
 57 | nodes with any lifetime), the latter is better in every other way.
 58 | 
 59 | For managing mutability, you can either use `RefCell`, i.e., make use of Rust's
 60 | facility for dynamic, interior mutability, or you can manage the mutability
 61 | yourself (in this case you have to use `UnsafeCell` to communicate the interior
 62 | mutability to the compiler). The former is safer, the latter is more efficient.
 63 | Neither is particularly ergonomic.
 64 | 
 65 | Note that if your graph might have cycles, then if you use `Rc`, further action
 66 | is required to break the cycles and not leak memory. Since Rust has no cycle
 67 | collection of `Rc` pointers, if there is a cycle in your graph, the ref counts
 68 | will never fall to zero, and the graph will never be deallocated. You can solve
 69 | this by using `Weak` pointers in your graph or by manually breaking cycles when
 70 | you know the graph should be destroyed. The former is more reliable. We don't
 71 | cover either here, in our examples we just leak memory. The approach using
 72 | borrowed references and arena allocation does not have this issue and is thus
 73 | superior in that respect.
 74 | 
 75 | To compare the different approaches I'll use a pretty simple example. We'll just
 76 | have a `Node` object to represent a node in the graph, this will hold some
 77 | string data (representative of some more complex data payload) and a `Vec` of
 78 | adjacent nodes (`edges`). We'll have an `init` function to create a simple graph
 79 | of nodes, and a `traverse` function which does a pre-order, depth-first
 80 | traversal of the graph. We'll use this to print the payload of each node in the
 81 | graph. Finally, we'll have a `Node::first` method which returns a reference to
 82 | the first adjacent node to the `self` node and a function `foo` which prints the
 83 | payload of an individual node. These functions stand in for more complex
 84 | operations involving manipulation of a node interior to the graph.
 85 | 
 86 | To try and be as informative as possible without boring you, I'll cover two
 87 | combinations of possibilities: ref counting and `RefCell`, and arena allocation
 88 | and `UnsafeCell`. I'll leave the other two combinations as an exercise.
 89 | 
 90 | 
 91 | ## `Rc<RefCell<Node>>`
 92 | 
 93 | See [full example](src/rc_graph.rs).
 94 | 
 95 | This is the safer option because there is no unsafe code. It is also the least
 96 | efficient and least ergonomic option. It is pretty flexible though, nodes of the
 97 | graph can be easily reused outside the graph since they are ref-counted. I would
 98 | recommend this approach if you need a fully mutable graph, or need your nodes to
 99 | exist independently of the graph.
100 | 
101 | The node structure looks like
102 | 
103 | ```rust
104 | struct Node {
105 |     datum: &'static str,
106 |     edges: Vec<Rc<RefCell<Node>>>,
107 | }
108 | ```
109 | 
110 | Creating a new node is not too bad: `Rc::new(RefCell::new(Node { ... }))`. To
111 | add an edge during initialisation, you have to borrow the start node as mutable,
112 | and clone the end node into the Vec of edges (this clones the pointer,
113 | incrementing the reference count, not the actual node). E.g.,
114 | 
115 | ```rust
116 | let mut mut_root = root.borrow_mut();
117 | mut_root.edges.push(b.clone());
118 | ```
119 | 
120 | The `RefCell` dynamically ensures that we are not already reading or writing the
121 | node when we write it.
122 | 
123 | Whenever you access a node, you have to use `.borrow()` to borrow the `RefCell`.
124 | Our `first` method has to return a ref-counted pointer, rather than a borrowed
125 | reference, so callers of `first` also have to borrow:
126 | 
127 | ```rust
128 | fn first(&self) -> Rc<RefCell<Node>> {
129 |     self.edges[0].clone()
130 | }
131 | 
132 | pub fn main() {
133 |     let g = ...;
134 |     let f = g.first();
135 |     foo(&*f.borrow());
136 | }
137 | ```
138 | 
139 | 
140 | ## `&Node` and `UnsafeCell`
141 | 
142 | See [full example](src/ref_graph.rs).
143 | 
144 | In this approach we use borrowed references as edges. This is nice and ergonomic
145 | and lets us use our nodes with 'regular' Rust libraries which primarily operate
146 | with borrowed references (note that one nice thing about ref counted objects in
147 | Rust is that they play nicely with the lifetime system. We can create a borrowed
148 | reference into the `Rc` to directly (and safely) reference the data. In the
149 | previous example, the `RefCell` prevents us doing this, but an `Rc`/`UnsafeCell`
150 | approach should allow it).
151 | 
152 | Destruction is correctly handled too - the only constraint is that all the nodes
153 | must be destroyed at the same time. Destruction and allocation of nodes is
154 | handled using an arena.
155 | 
156 | On the other hand, we do need to use quite a few explicit lifetimes.
157 | Unfortunately we don't benefit from lifetime elision here. At the end of the
158 | section I'll discuss some future directions for the language which could make
159 | things better.
160 | 
161 | During construction we will mutate our nodes which might be multiply referenced.
162 | This is not possible in safe Rust code, so we must initialise inside an `unsafe`
163 | block. Since our nodes are mutable and multiply referenced, we must use an
164 | `UnsafeCell` to communicate to the Rust compiler that it cannot rely on its
165 | usual invariants.
166 | 
167 | When is this approach feasible? The graph must only be mutated during
168 | initialisation. In addition, we require that all nodes in the graph have the
169 | same lifetime (we could relax these constraints somewhat to allow adding nodes
170 | later as long as they can all be destroyed at the same time). Similarly, we
171 | could rely on more complicated invariants for when the nodes can be mutated, but
172 | it pays to keep things simple, since the programmer is responsible for safety
173 | in those respects.
174 | 
175 | Arena allocation is a memory management technique where a set of objects have
176 | the same lifetime and can be deallocated at the same time. An arena is an object
177 | responsible for allocating and deallocating the memory. Since large chunks of
178 | memory are allocated and deallocated at once (rather than allocating individual
179 | objects), arena allocation is very efficient. Usually, all the objects are
180 | allocated from a contiguous chunk of memory, that improves cache coherency when
181 | you are traversing the graph.
182 | 
183 | In Rust, arena allocation is supported by the [libarena](https://doc.rust-lang.org/1.1.0/arena/index.html)
184 | crate and is used throughout the compiler. There are two kinds of arenas - typed
185 | and untyped. The former is more efficient and easier to use, but can only
186 | allocate objects of a single type. The latter is more flexible and can allocate
187 | any object. Arena allocated objects all have the same lifetime, which is a
188 | parameter of the arena object. The type system ensures references to arena
189 | allocated objects cannot live longer than the arena itself.
190 | 
191 | Our node struct must now include the lifetime of the graph, `'a`. We wrap our
192 | `Vec` of adjacent nodes in an `UnsafeCell` to indicate that we will mutate it
193 | even when it should be immutable:
194 | 
195 | ```rust
196 | struct Node<'a> {
197 |     datum: &'static str,
198 |     edges: UnsafeCell<Vec<&'a Node<'a>>>,
199 | }
200 | ```
201 | 
202 | Our new function must also use this lifetime and must take as an argument the
203 | arena which will do the allocation:
204 | 
205 | ```rust
206 | fn new<'a>(datum: &'static str, arena: &'a TypedArena<Node<'a>>) -> &'a Node<'a> {
207 |     arena.alloc(Node {
208 |         datum: datum,
209 |         edges: UnsafeCell::new(Vec::new()),
210 |     })
211 | }
212 | ```
213 | 
214 | We use the arena to allocate the node. The lifetime of the graph is derived from
215 | the lifetime of the reference to the arena, so the arena must be passed in from
216 | the scope which covers the graph's lifetime. For our examples, that means we
217 | pass it into the `init` method. (One could imagine an extension to the type
218 | system which allows creating values at scopes outside their lexical scope, but
219 | there are no plans to add such a thing any time soon). When the arena goes out
220 | of scope, the whole graph is destroyed (Rust's type system ensures that we can't
221 | keep references to the graph beyond that point).
222 | 
223 | Adding an edge is a bit different looking:
224 | 
225 | ```rust
226 | (*root.edges.get()).push(b);
227 | ```
228 | 
229 | We're essentially doing the obvious `root.edges.push(b)` to push a node (`b`) on
230 | to the list of edges. However, since `edges` is wrapped in an `UnsafeCell`, we
231 | have to call `get()` on it. That gives us a mutable raw pointer to edges (`*mut
232 | Vec<&Node>`), which allows us to mutate `edges`. However, it also requires us to
233 | manually dereference the pointer (raw pointers do not auto-deref), thus the
234 | `(*...)` construction. Finally, dereferencing a raw pointer is unsafe, so the
235 | whole lot has to be wrapped up in an unsafe block.
236 | 
237 | The interesting part of `traverse` is:
238 | 
239 | ```rust
240 | for n in &(*self.edges.get()) {
241 |     n.traverse(f, seen);
242 | }
243 | ```
244 | 
245 | We follow the previous pattern for getting at the edges list, which requires an
246 | unsafe block. In this case we know it is in fact safe because we must be post-
247 | initialisation and thus there will be no mutation.
248 | 
249 | Again, the `first` method follows the same pattern for getting at the `edges`
250 | list. And again must be in an unsafe block. However, in contrast to the graph
251 | using `Rc<RefCell<_>>`, we can return a straightforward borrowed reference to
252 | the node. That is very convenient. We can reason that the unsafe block is safe
253 | because we do no mutation and we are post-initialisation.
254 | 
255 | ```rust
256 | fn first(&'a self) -> &'a Node<'a> {
257 |     unsafe {
258 |         (*self.edges.get())[0]
259 |     }
260 | }
261 | ```
262 | 
263 | ### Future language improvements for this approach
264 | 
265 | I believe that arena allocation and using borrowed references are an important
266 | pattern in Rust. We should do more in the language to make these patterns safer
267 | and easier to use. I hope use of arenas becomes more ergonomic with the ongoing
268 | work on [allocators](https://github.com/rust-lang/rfcs/pull/244). There are
269 | three other improvements I see:
270 | 
271 | #### Safe initialisation
272 | 
273 | There has been lots of research in the OO world on mechanisms for ensuring
274 | mutability only during initialisation. How exactly this would work in Rust is an
275 | open research question, but it seems that we need to represent a pointer which
276 | is mutable and not unique, but restricted in scope. Outside that scope any
277 | existing pointers would become normal borrowed references, i.e., immutable *or*
278 | unique.
279 | 
280 | The advantage of such a scheme is that we have a way to represent the common
281 | pattern of mutable during initialisation, then immutable. It also relies on the
282 | invariant that, while individual objects are multiply owned, the aggregate (in
283 | this case a graph) is uniquely owned. We should then be able to adopt the
284 | reference and `UnsafeCell` approach, without the `UnsafeCell`s and the unsafe
285 | blocks, making that approach more ergonomic and more safer.
286 | 
287 | Alex Summers and Julian Viereck at ETH Zurich are investigating this
288 | further.
289 | 
290 | 
291 | #### Generic modules
292 | 
293 | The 'lifetime of the graph' is constant for any particular graph. Repeating the
294 | lifetime is just boilerplate. One way to make this more ergonomic would be to
295 | allow the graph module to be parameterised by the lifetime, so it would not need
296 | to be added to every struct, impl, and function. The lifetime of the graph would
297 | still need to be specified from outside the module, but hopefully inference
298 | would take care of most uses (as it does today for function calls).
299 | 
300 | See [ref_graph_generic_mod.rs](src/ref_graph_generic_mod.rs) for how that might look.
301 | (We should also be able to use safe initialisation (proposed above) to remove
302 | the unsafe code).
303 | 
304 | See also this [RFC issue](https://github.com/rust-lang/rfcs/issues/424).
305 | 
306 | This feature would vastly reduce the syntactic overhead of the reference and
307 | `UnsafeCell` approach.
308 | 
309 | 
310 | #### Lifetime elision
311 | 
312 | We currently allow the programmer to elide some lifetimes in function signatures
313 | to improve ergonomics. One reason the `&Node` approach to graphs is a bit ugly
314 | is because it doesn't benefit from any of the lifetime elision rules.
315 | 
316 | A common pattern in Rust is data structures with a common lifetime. References
317 | into such data structures give rise to types like `&'a Foo<'a>`, for example
318 | `&'a Node<'a>` in the graph example. It would be nice to have an elision
319 | rule that helps in this case. I'm not really sure how it should work though.
320 | 
321 | Looking at the example with generic modules, it doesn't look like we need to
322 | extend the lifetime elision rules very much (I'm not actually sure if
323 | `Node::new` would work without the given lifetimes, but it seems like a fairly
324 | trivial extension to make it work if it doesn't). We might want to add some new
325 | rule to allow elision of module-generic lifetimes if they are the only ones in
326 | scope (other than `'static`), but I'm not sure how that would work with multiple
327 | in- scope lifetimes (see the `foo` and `init` functions, for example).
328 | 
329 | If we don't add generic modules, we might still be able to add an elision rule
330 | specifically to target `&'a Node<'a>`, not sure how though.
331 | 


--------------------------------------------------------------------------------
/closures.md:
--------------------------------------------------------------------------------
  1 | # Closures and first-class functions
  2 | 
  3 | Closures and first-class and higher order functions are a core part of Rust. In
  4 | C and C++ there are function pointers (and those weird member/method pointer
  5 | things in C++ that I never got the hang of). However, they are used relatively
  6 | rarely and are not very ergonomic. C++11 introduced lambdas, and these are
  7 | pretty close to Rust closures, in particular they have a very similar
  8 | implementation strategy.
  9 | 
 10 | To start with, I want to establish some intuition for these things. Then, we'll
 11 | dive in to the details.
 12 | 
 13 | Lets say we have a function `foo`: `pub fn foo() -> u32 { 42 }`. Now let's
 14 | imagine another function `bar` which takes a function as an argument (I'll leave
 15 | `bar`'s signature for later): `fn bar(f: ...) { ... }`. We can pass `foo` to
 16 | `bar` kind of like we would pass a function pointer in C: `bar(foo)`. In the
 17 | body of `bar` we can call `f` as if it were a function: `let x = f();`.
 18 | 
 19 | We say that Rust has first-class functions because we can pass them around and
 20 | use them like we can with other values. We say `bar` is a higher-order function
 21 | because it takes a function as an argument, i.e., it is a function that operates
 22 | on functions.
 23 | 
 24 | Closures in Rust are anonymous functions with a nice syntax. A closure `|x| x +
 25 | 2` takes an argument and returns it with `2` added. Note that we don't have to
 26 | give types for the arguments to a closure (they can usually be inferred). We
 27 | also don't need to specify a return type. If we want the closure body to be more
 28 | than just one expression, we can use braces: `|x: i32| { let y = x + 2; y }`. We
 29 | can pass closures just like functions: `bar(|| 42)`.
 30 | 
 31 | The big difference between closures and other functions is that closures capture
 32 | their environment. This means that we can refer to variables outside the closure
 33 | from the closure. E.g.,
 34 | 
 35 | ```rust
 36 | let x = 42;
 37 | bar(|| x);
 38 | ```
 39 | 
 40 | Note how `x` is in scope in the closure.
 41 | 
 42 | We've seen closures before, used with iterators, and this is a common use case
 43 | for them. E.g., to add a value to each element of a vector:
 44 | 
 45 | ```rust
 46 | fn baz(v: Vec<i32>) -> Vec<i32> {
 47 |     let z = 3;
 48 |     v.iter().map(|x| x + z).collect()
 49 | }
 50 | ```
 51 | 
 52 | Here `x` is an argument to the closure, each member of `v` will be passed as an
 53 | `x`. `z` is declared outside of the closure, but because it's a closure, `z` can
 54 | be referred to. We could also pass a function to map:
 55 | 
 56 | ```rust
 57 | fn add_two(x: i32) -> i32 {
 58 |     x + 2
 59 | }
 60 | 
 61 | fn baz(v: Vec<i32>) -> Vec<i32> {
 62 |     v.iter().map(add_two).collect()
 63 | }
 64 | ```
 65 | 
 66 | Note that Rust also allows declaring functions inside of functions. These are
 67 | *not* closures - they can't access their environment. They are merely a
 68 | convenience for scoping.
 69 | 
 70 | ```rust
 71 | fn qux(x: i32) {
 72 |     fn quxx() -> i32 {
 73 |         x // ERROR x is not in scope.
 74 |     }
 75 | 
 76 |     let a = quxx();
 77 | }
 78 | ```
 79 | 
 80 | ## Function types
 81 | 
 82 | Lets introduce a new example function:
 83 | 
 84 | ```rust
 85 | fn add_42(x: i32) -> i64 {
 86 |     x as i64 + 42
 87 | }
 88 | ```
 89 | 
 90 | As we saw before, we can store a function in a variable: `let a = add_42;`. The
 91 | most precise type of `a` cannot be written in Rust. You'll sometimes see the
 92 | compiler render it as `fn(i32) -> i64 {add_42}` in error messages. Each function
 93 | has its own unique and anonymous type. `fn add_41(x: i32) -> i64` has a different
 94 | type, even though it has the same signature.
 95 | 
 96 | We can write less precise types, for example, `let a: fn(i32) -> i64 = add_42;`.
 97 | All function types with the same signature can be coerced to a `fn` type
 98 | (which can be written by the programmer).
 99 | 
100 | `a` is represented by the compiler as a function pointer, however, if the
101 | compiler knows the precise type, it doesn't actually use that function pointer.
102 | A call like a() is statically dispatched based on the type of a. If the
103 | compiler doesn't know the precise type (e.g., it only knows the fn type), then
104 | the call is dispatched using the function pointer in the value.
105 | 
106 | There are also `Fn` types (note the capital 'F'). These `Fn` types are bounds,
107 | just like traits (in fact they *are* traits, as we'll see later). `Fn(i32) -> i64`
108 | is a bound on the types of all function-like objects with that signature. When
109 | we take a reference to a function pointer, we're actually creating a trait
110 | object which is represented by a fat pointer (see DSTs).
111 | 
112 | To pass a function to another function, or to store the function in a field, we
113 | must write a type. We have several choices, we can either use either a `fn` type
114 | or a `Fn` type. The latter is better because it includes closures (and
115 | potentially other function-like things), whereas `fn` types don't. The `Fn`
116 | types are dynamically sized which means we cannot use them as value types. We
117 | must either pass function objects or use generics. Let's look at the generic
118 | approach first. For example,
119 | 
120 | ```rust
121 | fn bar<F>(f: F) -> i64
122 |     where F: Fn(i32) -> i64
123 | {
124 |     f(0)
125 | }
126 | ```
127 | 
128 | `bar` takes any function with the signature `Fn(i32) -> i64`, i.e., we can
129 | instantiate the `F` type parameter with any function-like type. We could call
130 | `bar(add_42)` to pass `add_42` to `bar` which would instantiate `F` with
131 | `add_42`'s anonymous type. We could also call `bar(add_41)` and that would work
132 | too.
133 | 
134 | You can also pass closures to `bar`, e.g., `bar(|x| x as i64)`. This works
135 | because closure types are also bounded by the `Fn` bound matching their
136 | signature (like functions, each closure has it's own anonymous type).
137 | 
138 | Finally, you can pass references to functions or closures too: `bar(&add_42)` or
139 | `bar(&|x| x as i64)`.
140 | 
141 | One could also write `bar` as `fn bar(f: &Fn(i32) -> i64) ...`. These two
142 | approaches (generics vs a function/trait object) have quite different semantics.
143 | In the generics case, `bar` will be monomorphised so when code is generated, the
144 | compiler know the exact type of `f`, that means it can be statically dispatched.
145 | If using a function object, the function is not monomorphised. The exact type of
146 | `f` is not known, and so the compiler must generate a virtual dispatch. The
147 | latter is slower, but the former will produce more code (one monomorphised
148 | function per type parameter instance).
149 | 
150 | There are actually more function traits than just `Fn`; there are `FnMut` and
151 | `FnOnce` too. These are used in the same way as `Fn`, e.g., `FnOnce(i32) ->
152 | i64`. A `FnMut` represents an object which can be called and can be mutated
153 | during that call. This doesn't apply to normal functions, but for closures it
154 | means the closure can mutate its environment. `FnOnce` is a function which can
155 | only be called (at most) once. Again, this is only relevant for closures.
156 | 
157 | `Fn`, `FnMut`, and `FnOnce` are in a sub-trait hierarchy. `Fn`s are `FnMut`s
158 | (because one can call a `Fn` function with permission to mutate and no harm is
159 | done, but the opposite is not true). `Fn`s and `FnMut`s are `FnOnce`s (because
160 | there is no harm done if a regular function is only called once, but not the
161 | opposite).
162 | 
163 | So, to make a higher-order function as flexible as possible, you should use the
164 | `FnOnce` bound, rather than the `Fn` bound (or use the `FnMut` bound if you must
165 | call the function more than once).
166 | 
167 | 
168 | ### Methods
169 | 
170 | You can use methods in the same way as functions - take pointers to them store
171 | them in variables, etc. You can't use the dot syntax, you must explicitly name
172 | the method using the fully explicit form of naming (sometimes called UFCS for
173 | universal function call syntax). The `self` parameter is the first argument to
174 | the method. E.g.,
175 | 
176 | ```rust
177 | struct Foo;
178 | 
179 | impl Foo {
180 |     fn bar(&self) {}
181 | }
182 | 
183 | trait T {
184 |     fn baz(&self);
185 | }
186 | 
187 | impl T for Foo {
188 |     fn baz(&self) {}
189 | }
190 | 
191 | fn main() {
192 |     // Inherent method.
193 |     let x = Foo::bar;
194 |     x(&Foo);
195 |     
196 |     // Trait method, note the fully explicit naming form.
197 |     let y = <Foo as T>::baz;
198 |     y(&Foo);
199 | }
200 | ```
201 | 
202 | 
203 | ### Generic functions
204 | 
205 | You can't take a pointer to a generic function and there is no way to express a
206 | generic function type. However, you can take a reference to a function if all
207 | its type parameters are instantiated. E.g.,
208 | 
209 | ```rust
210 | fn foo<T>(x: &T) {}
211 | 
212 | fn main() {
213 |     let x = &foo::<i32>;
214 |     x(&42);
215 | }
216 | ```
217 | 
218 | There is no way to define a generic closure. If you need a closure to work over
219 | many types you can use trait objects, macros (for generating closures), or pass
220 | a closure which returns closures (each returned closure can operate on a
221 | different type).
222 | 
223 | 
224 | ### Lifetime-generic functions and higher-ranked types
225 | 
226 | It *is* possible to have function types and closures which are generic over
227 | lifetimes. 
228 | 
229 | Imagine we have a closure which takes a borrowed reference. The closure can work
230 | the same way no matter what lifetime the reference has (and indeed in the
231 | compiled code, the lifetime will have been erased). But, what does the type look
232 | like?
233 | 
234 | For example,
235 | 
236 | ```rust
237 | fn foo<F>(x: &Bar, f: F) -> &Baz
238 |     where F: Fn(&Bar) -> &Baz
239 | {
240 |     f(x)
241 | }
242 | ```
243 | 
244 | what are the lifetimes of the references here? In this simple example, we can
245 | use a single lifetime (no need for a generic closure):
246 | 
247 | ```rust
248 | fn foo<'b, F>(x: &'b Bar, f: F) -> &'b Baz
249 |     where F: Fn(&'b Bar) -> &'b Baz
250 | {
251 |     f(x)
252 | }
253 | ```
254 | 
255 | But what if we want `f` to work on inputs with different lifetimes? Then we need
256 | a generic function type:
257 | 
258 | ```rust
259 | fn foo<'b, 'c, F>(x: &'b Bar, y: &'c Bar, f: F) -> (&'b Baz, &'c Baz)
260 |     where F: for<'a> Fn(&'a Bar) -> &'a Baz
261 | {
262 |     (f(x), f(y))
263 | }
264 | ```
265 | 
266 | The novelty here is the `for<'a>` syntax, this is used to denote a function type
267 | which is generic over a lifetime. It is read "for all 'a, ...". In theoretical
268 | terms, the function type is universally quantified.
269 | 
270 | Note that we cannot hoist up `'a` to `foo` in the above example. Counter-example:
271 | 
272 | ```rust
273 | fn foo<'a, 'b, 'c, F>(x: &'b Bar, y: &'c Bar, f: F) -> (&'b Baz, &'c Baz)
274 |     where F: Fn(&'a Bar) -> &'a Baz
275 | {
276 |     (f(x), f(y))
277 | }
278 | ```
279 | 
280 | will not compile because when the compiler infers lifetimes for a call to `foo`,
281 | it must pick a single lifetime for `'a`, which it can't do if `'b` and `'c` are
282 | different.
283 | 
284 | A function type which is generic in this way is called a higher-ranked type.
285 | Lifetime variables at the outer level have rank one. Because `'a` in the above
286 | example cannot be moved to the outer level, it's rank is higher than one.
287 | 
288 | Calling functions with higher-ranked function type arguments is easy - the
289 | compiler will infer the lifetime parameters. E.g., `foo(&Bar { ... }, &Bar
290 | {...}, |b| &b.field)`.
291 | 
292 | In fact, most of the time you don't even need to worry about such things. The
293 | compiler will allow you to elide the quantified lifetimes in the same way that
294 | you are allowed to elide many lifetimes on function arguments. For example, the
295 | example above can just be written as
296 | 
297 | ```rust
298 | fn foo<'b, 'c, F>(x: &'b Bar, y: &'c Bar, f: F) -> (&'b Baz, &'c Baz)
299 |     where F: Fn(&Bar) -> &Baz
300 | {
301 |     (f(x), f(y))
302 | }
303 | ```
304 | 
305 | (and you only need `'b` and `'c` because it is a contrived example).
306 | 
307 | Where Rust sees a function type with a borrowed references, it will apply the
308 | usual elision rules, and quantify the elided variables at the scope of the
309 | function type (i.e., with higher rank).
310 | 
311 | You might be wondering why bother with all this complexity for what looks like a
312 | fairly niche use case. The real motivation is functions which take a function
313 | to operate on some data provided by the outer function. For example,
314 | 
315 | ```rust
316 | fn foo<F>(f: F)
317 |     where F: Fn(&i32) // Fully explicit type: for<'a> Fn(&'a i32)
318 | {
319 |     let data = 42;
320 |     f(&data)
321 | }
322 | ```
323 | 
324 | In these cases, we *need* higher-ranked types. If we added a lifetime parameter
325 | to `foo` instead, we could never infer a correct lifetime. To see why, let's
326 | look at how it might work, consider `fn foo<'a, F: Fn(&'a i32)> ...`. Rust
327 | requires that any lifetime parameter must outlive the item it is declared on (if
328 | this were not the case, an argument with that lifetime could be used inside that
329 | function, where it is not guaranteed to be live). In the body of `foo` we use
330 | `f(&data)`, the lifetime Rust will infer for that reference will last (at most)
331 | from where `data` is declared to where it goes out of scope. Since `'a` must
332 | outlive `foo`, but that inferred lifetime does not, we cannot call `f` in this
333 | way.
334 | 
335 | However, with higher-ranked lifetimes `f` can accept any lifetime and so the
336 | anonymous one from `&data` is fine and the function type checks.
337 | 
338 | 
339 | ### Enum constructors
340 | 
341 | This is something of a digression, but it is sometimes a useful trick. All
342 | variants of an enum define a function from the fields of the variant to the enum
343 | type. For example,
344 | 
345 | ```rust
346 | enum Foo {
347 |     Bar,
348 |     Baz(i32),
349 | }
350 | ```
351 | 
352 | defines two functions, `Foo::Bar: Fn() -> Foo` and `Foo::Baz: Fn(i32) -> Foo`.
353 | We don't normally use the variants in this way, we treat them as data types
354 | rather than functions. But sometimes it is useful, for example if we have a list
355 | of `i32`s we can create a list of `Foo`s with
356 | 
357 | ```rust
358 | list_of_i32.iter().map(Foo::Baz).collect()
359 | ```
360 | 
361 | 
362 | ## Closure flavours
363 | 
364 | A closure has two forms of input: the arguments which are passed to it explicitly
365 | and the variables it *captures* from its environment. Usually, everything about
366 | both kinds of input is inferred, but you can have more control if you want it.
367 | 
368 | For the arguments, you can declare types instead of letting Rust infer them. You
369 | can also declare a return type. Rather than writing `|x| { ... }` you can write
370 | `|x: i32| -> String { ... }`. Whether an argument is owned or borrowed is 
371 | determined by the types (either declared or inferred).
372 | 
373 | For the captured variables, the type is mostly known from the environment, but
374 | Rust does a little extra magic. Should a variable be captured by reference or
375 | value? Rust infers this from the body of the closure. If possible, Rust captures
376 | by reference. E.g.,
377 | 
378 | ```rust
379 | fn foo(x: Bar) {
380 |     let f = || { ... x ... };
381 | }
382 | ```
383 | 
384 | All being well, in the body of `f`, `x` has the type `&Bar` with a lifetime
385 | bounded by the scope of `foo`. However, if `x` is mutated, then Rust will infer
386 | that the capture is by mutable reference, i.e., `x` has type `&mut Bar`. If `x`
387 | is moved in `f` (e.g., is stored into a variable or field with value type), then
388 | Rust infers that the variable must be captured by value, i.e., it has the type
389 | `Bar`.
390 | 
391 | This can be overridden by the programmer (sometimes necessary if the closure
392 | will be stored in a field or returned from a function). By using the `move`
393 | keyword in front of a closure. Then, all of the captured variables are captured
394 | by value. E.g., in `let f = move || { ... x ... };`, `x` would always have type
395 | `Bar`.
396 | 
397 | We talked earlier about the different function kinds: `Fn`, `FnMut`, and `FnOnce`.
398 | We can now explain why we need them. For closures, the mutable-ness and once-ness
399 | refer to the captured variables. If a capture mutates any of the variables it
400 | captures then it will have a `FnMut` type (note that this is completely inferred
401 | by the compiler, no annotation is necessary). If a variable is moved into a
402 | closure, i.e., it is captured by value (either because of an explicit `move` or
403 | due to inference), then the closure will have a `FnOnce` type. It would be unsafe
404 | to call such a closure multiple times because the captured variable would be
405 | moved more than once.
406 | 
407 | Rust will do its best to infer the most flexible type for the closure if it can.
408 | 
409 | 
410 | ## Implementation
411 | 
412 | A closure is implemented as an anonymous struct. That struct has a field for
413 | each variable captured by the closure. It is lifetime-parametric with a single
414 | lifetime parameter which is a bound on the lifetime of captured variables. The
415 | anonymous struct implements a `call` method which is called to execute the
416 | closure.
417 | 
418 | For example, consider
419 | 
420 | ```rust
421 | fn main() {
422 |     let x = Foo { ... };
423 |     let f = |y| x.get_number() + y;
424 |     let z = f(42);
425 | }
426 | ```
427 | 
428 | the compiler treats this as
429 | 
430 | ```rust
431 | struct Closure14<'env> {
432 |     x: &'env Foo,
433 | }
434 | 
435 | // Not actually implemented like this, see below.
436 | impl<'env> Closure14<'env> {
437 |     fn call(&self, y: i32) -> i32 {
438 |         self.x.get_number() + y
439 |     }
440 | }
441 | 
442 | fn main() {
443 |     let x = Foo { ... };
444 |     let f = Closure14 { x: x }
445 |     let z = f.call(42);
446 | }
447 | ```
448 | 
449 | As we mentioned above, there are three different function traits - `Fn`,
450 | `FnMut`, and `FnOnce`. In reality the `call` method is required by these traits
451 | rather than being in an inherent impl. `Fn` has a method `call` which takes
452 | `self` by reference, `FnMut` has `call_mut` taking `self` by mutable reference,
453 | and `FnOnce` has `call_once` which takes `self` by values.
454 | 
455 | When we've seen function types above, they look like `Fn(i32) -> i32` which
456 | doesn't look much like a trait type. There is a little bit of magic here. Rust allows
457 | this round bracket sugar only for function types. To desugar to a regular type
458 | (an 'angle bracket type'), the argument types are treated as a tuple type and
459 | passed as a type parameter and the return type as an associated type called
460 | `Output`. So, `Fn(i32) -> i32` is desugared to `Fn<(i32,), Output=i32>` and the
461 | `Fn` trait definition looks like
462 | 
463 | ```rust
464 | pub trait Fn<Args> : FnMut<Args> {
465 |     fn call(&self, args: Args) -> Self::Output;
466 | }
467 | ```
468 | 
469 | The implementation for `Closure14` above would therefore look more like
470 | 
471 | ```rust
472 | impl<'env> FnOnce<(i32,)> for Closure14<'env> {
473 |     type Output = i32;
474 |     fn call_once(self, args: (i32,)) -> i32 {
475 |         ...
476 |     }
477 | }
478 | impl<'env> FnMut<(i32,)> for Closure14<'env> {
479 |     fn call_mut(&mut self, args: (i32,)) -> i32 {
480 |         ...
481 |     }
482 | }
483 | impl<'env> Fn<(i32,)> for Closure14<'env> {
484 |     fn call(&self, args: (i32,)) -> i32 {
485 |         ...
486 |     }
487 | }
488 | ```
489 | 
490 | You can find the function traits in
491 | [core::ops](https://dxr.mozilla.org/rust/source/src/libcore/ops.rs)
492 | 
493 | We talked above about how using generics gives static dispatch and using trait
494 | objects gives virtual dispatch. We can now see in a bit more detail why.
495 | 
496 | When we call `call`, it is a statically dispatched method call, there is no
497 | virtual dispatch. If we pass it to a monomorphised function, we still know the
498 | type statically, and we still get a static dispatch.
499 | 
500 | We can make the closure into a trait object, e.g., `&f` or `Box::new(f)` with
501 | types `&Fn(i32)->i32` or `Box<Fn(i32)->i32>`. These are pointer types, and
502 | because they are pointer-to-trait types, the pointers are fat pointers. That
503 | means they consist of the pointer to the data itself and a pointer to a vtable.
504 | The vtable is used to lookup the address of `call` (or `call_mut` or whatever).
505 | 
506 | You'll sometimes hear these two representations of closures called boxed and
507 | unboxed closures. An unboxed closure is the by-value version with static
508 | dispatch. A boxed version is the trait object version with dynamic dispatch. In
509 | the olden days, Rust only had boxed closures (and the system was quite a bit
510 | different).
511 | 
512 | ## References
513 | 
514 | * [RFC 114 - Closures](https://github.com/rust-lang/rfcs/blob/master/text/0114-closures.md)
515 | * [Finding Closure in Rust blog post](http://huonw.github.io/blog/2015/05/finding-closure-in-rust/)
516 | * [RFC 387 - Higher ranked trait bounds](https://github.com/rust-lang/rfcs/blob/master/text/0387-higher-ranked-trait-bounds.md)
517 | * [Purging proc blog post](http://smallcultfollowing.com/babysteps/blog/2014/11/26/purging-proc/)
518 | 
519 | FIXME: relate to closures in C++ 11
520 | 


--------------------------------------------------------------------------------