├── tests ├── skeptic.rs ├── util │ ├── mod.rs │ └── drop_log.rs └── array-into-iter.rs ├── dummy-lib.rs ├── .gitignore ├── book.toml ├── src ├── contribute.md ├── SUMMARY.md ├── intro.md ├── ffi.md ├── omnipresent.md └── thread-sync.md ├── .github └── CONTRIBUTING.md ├── Cargo.toml ├── .travis.yml ├── README.md └── LICENSE /tests/skeptic.rs: -------------------------------------------------------------------------------- 1 | include!(concat!(env!("OUT_DIR"), "/skeptic-tests.rs")); 2 | 3 | -------------------------------------------------------------------------------- /tests/util/mod.rs: -------------------------------------------------------------------------------- 1 | pub use drop_log::{DropLog, LogOnDrop}; 2 | mod drop_log; 3 | -------------------------------------------------------------------------------- /dummy-lib.rs: -------------------------------------------------------------------------------- 1 | //! This library isn't actually used. 2 | //! 3 | //! It only exists for its tests. 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | book 2 | 3 | # ...skeptic creates this? 4 | /libtest.rmeta 5 | 6 | Cargo.lock 7 | /target 8 | **/*.rs.bk 9 | -------------------------------------------------------------------------------- /book.toml: -------------------------------------------------------------------------------- 1 | [book] 2 | authors = ["Michael Lamparski"] 3 | multilingual = false 4 | src = "src" 5 | title = "Unsafe Gotchas" 6 | -------------------------------------------------------------------------------- /src/contribute.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | The source for this book is [hosted on github](https://github.com/exphp-share/unsafe-gotchas). 4 | 5 | Please feel free to contribute your own gotchas! 6 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | By contributing you agree to have your contributions licensed under the [Creative Commons Zero](https://creativecommons.org/publicdomain/zero/1.0/) license (which is effectively public domain). 2 | -------------------------------------------------------------------------------- /src/SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | [Introduction](./intro.md) 4 | 5 | - [Omnipresent concerns](./omnipresent.md) 6 | - [Concerns for FFI](./ffi.md) 7 | - [Concerns for thread synchronization](./thread-sync.md) 8 | 9 | [Contributing](./contribute.md) 10 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "unsafe-gotchas-tests" 3 | version = "0.1.0" 4 | authors = ["Michael Lamparski "] 5 | edition = "2018" 6 | publish = false 7 | 8 | [lib] 9 | path = "dummy-lib.rs" 10 | 11 | [dependencies] 12 | libc = "0.2" 13 | 14 | [build-dependencies] 15 | skeptic = "0.13" 16 | 17 | [dev-dependencies] 18 | skeptic = "0.13" 19 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | sudo: false 3 | 4 | cache: 5 | - cargo 6 | 7 | rust: 8 | - stable 9 | 10 | before_script: 11 | - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update) 12 | - (test -x $HOME/.cargo/bin/mdbook || cargo install --vers "^0.2" mdbook) 13 | - cargo install-update -a 14 | 15 | script: 16 | - mdbook build && cargo test 17 | -------------------------------------------------------------------------------- /src/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | There's plenty of things that you're *obviously* supposed to worry about in `unsafe` code; such as making sure that you don't dereference pointers to invalid data, and that you don't use something after it is freed. But oftentimes there are problems that are not so obvious, and you might forget to think about them even if they are mentioned in the docs of an `unsafe fn`! 4 | 5 | This book is a (modest) collection of those unsafe "gotchas." 6 | 7 | ## About 8 | 9 | The source for this book is [hosted on GitHub](https://github.com/exphp-share/unsafe-gotchas). If you'd like to contribute, just submit a PR! 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `unsafe` Gotchas 2 | 3 | [![Build Status](https://travis-ci.org/exphp-share/unsafe-gotchas.svg?branch=master)](https://travis-ci.org/exphp-share/unsafe-gotchas) 4 | [![License: CC0-1.0](https://img.shields.io/badge/License-CC0%201.0-lightgrey.svg)](http://creativecommons.org/publicdomain/zero/1.0/) 5 | 6 | mdbook about the not-so-obvious things to worry about in unsafe code. 7 | 8 | [View it here!](https://exphp.github.io/unsafe-gotchas/) 9 | 10 | --- 11 | 12 | To build it locally: 13 | 14 | ```sh 15 | git clone https://github.com/exphp-share/unsafe-gotchas 16 | cd unsafe-gotchas 17 | mdbook serve --open 18 | ``` 19 | 20 | This book uses [`skeptic`](https://github.com/budziq/rust-skeptic) to test its snippets. Simply run `cargo test`. 21 | -------------------------------------------------------------------------------- /tests/util/drop_log.rs: -------------------------------------------------------------------------------- 1 | use std::rc::Rc; 2 | use std::cell::RefCell; 3 | use std::{ops, fmt}; 4 | 5 | pub struct DropLog { 6 | log: Rc>>, 7 | } 8 | 9 | pub struct LogOnDrop { 10 | value: Option, 11 | log: Rc>>, 12 | } 13 | 14 | impl Drop for LogOnDrop { 15 | fn drop(&mut self) { 16 | self.log.borrow_mut().push(self.value.take().unwrap()) 17 | } 18 | } 19 | 20 | impl DropLog { 21 | pub fn new() -> Self 22 | { DropLog { 23 | log: Rc::new(RefCell::new(vec![])), 24 | }} 25 | 26 | pub fn wrap(&self, value: T) -> LogOnDrop 27 | { LogOnDrop { 28 | value: Some(value), 29 | log: self.log.clone(), 30 | }} 31 | 32 | // NOTE: Reads to Vec so that the RefCell lock can be released. 33 | /// Read the log of all values that were dropped after 34 | /// passing through `self.wrap()`. 35 | pub fn read(&self) -> Vec 36 | where T: Clone, 37 | { self.log.borrow().to_vec() } 38 | } 39 | 40 | impl ops::Deref for LogOnDrop { 41 | type Target = T; 42 | 43 | fn deref(&self) -> &T 44 | { self.value.as_ref().unwrap() } 45 | } 46 | 47 | impl ops::DerefMut for LogOnDrop { 48 | fn deref_mut(&mut self) -> &mut T 49 | { self.value.as_mut().unwrap() } 50 | } 51 | 52 | impl PartialEq for LogOnDrop { 53 | fn eq(&self, other: &T) -> bool 54 | { self.value.as_ref().unwrap() == other } 55 | } 56 | 57 | impl fmt::Debug for LogOnDrop { 58 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 59 | f.debug_tuple("LogOnDrop") 60 | .field(&self.value) 61 | .finish() 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /tests/array-into-iter.rs: -------------------------------------------------------------------------------- 1 | //---- Copy the definition from src/omnipresent.md below ----- 2 | use std::mem::ManuallyDrop; 3 | use std::ptr; 4 | 5 | pub struct ArrayIntoIter { 6 | array: [ManuallyDrop; 3], 7 | index: usize, 8 | } 9 | 10 | impl ArrayIntoIter { 11 | pub fn new(array: [T; 3]) -> Self { 12 | let [a, b, c] = array; 13 | let wrap = ManuallyDrop::new; 14 | ArrayIntoIter { 15 | array: [wrap(a), wrap(b), wrap(c)], 16 | index: 0, 17 | } 18 | } 19 | } 20 | 21 | impl Iterator for ArrayIntoIter { 22 | type Item = T; 23 | 24 | fn next(&mut self) -> Option { 25 | match self.index { 26 | 3 => None, 27 | i => { 28 | self.index += 1; 29 | Some(ManuallyDrop::into_inner(unsafe { ptr::read(&self.array[i]) })) 30 | } 31 | } 32 | } 33 | } 34 | 35 | impl Drop for ArrayIntoIter { 36 | fn drop(&mut self) { 37 | // Run to completion 38 | self.for_each(drop); 39 | } 40 | } 41 | //------------------------------------------------------------ 42 | 43 | mod util; 44 | 45 | use crate::util::DropLog; 46 | 47 | #[test] 48 | fn no_iteration() { 49 | let log = DropLog::new(); 50 | { 51 | let array = [log.wrap(1), log.wrap(2), log.wrap(3)]; 52 | let _ = ArrayIntoIter::new(array); 53 | } 54 | assert_eq!(log.read(), vec![1, 2, 3]) 55 | } 56 | 57 | #[test] 58 | fn partial_iter() { 59 | let log = DropLog::new(); 60 | { 61 | let array = [log.wrap(1), log.wrap(2), log.wrap(3)]; 62 | let mut iter = ArrayIntoIter::new(array); 63 | assert_eq!(iter.next().unwrap(), 1); 64 | assert_eq!(iter.next().unwrap(), 2); 65 | } 66 | assert_eq!(log.read(), vec![1, 2, 3]) 67 | } 68 | 69 | #[test] 70 | fn over_iter() { 71 | let log = DropLog::new(); 72 | { 73 | let array = [log.wrap(1), log.wrap(2), log.wrap(3)]; 74 | let mut iter = ArrayIntoIter::new(array); 75 | assert_eq!(iter.next().unwrap(), 1); 76 | assert_eq!(iter.next().unwrap(), 2); 77 | assert_eq!(iter.next().unwrap(), 3); 78 | assert!(iter.next().is_none()); 79 | assert!(iter.next().is_none()); 80 | } 81 | assert_eq!(log.read(), vec![1, 2, 3]) 82 | } 83 | -------------------------------------------------------------------------------- /src/ffi.md: -------------------------------------------------------------------------------- 1 | Concerns for FFI 2 | ================ 3 | 4 | 5 | 6 | enums are not FFI-safe 7 | ---------------------- 8 | 9 | **What to look for:** `enum`s appearing in signatures of `extern fn`s. 10 | 11 | **Summary:** It is undefined behavior for an `enum` in rust to carry an invalid value. Therefore, do not make it possible for C code to supply the value of an `enum` type. 12 | 13 | **Incorrect:** 14 | 15 | ```rust 16 | # fn main() {} 17 | #[repr(u16)] 18 | pub enum Mode { 19 | Read = 0, 20 | Write = 1, 21 | } 22 | 23 | #[allow(unused)] 24 | extern "C" fn rust_from_c(mode: Mode) { 25 | // ... 26 | } 27 | ``` 28 | 29 | **Also incorrect:** 30 | ```rust,no_run 31 | # #[repr(u16)] 32 | # pub enum Mode { 33 | # Read = 0, 34 | # Write = 1, 35 | # } 36 | # 37 | extern "C" { 38 | fn c_from_rust(mode: *mut Mode); 39 | } 40 | 41 | fn main() { 42 | let mut mode = Mode::Read; 43 | unsafe { c_from_rust(&mut mode); } 44 | } 45 | ``` 46 | 47 | 48 | 49 | `CString::from_raw` 50 | ------------------- 51 | 52 | **Things to look for:** Any usage of `CString::{into_raw, from_raw}`. 53 | 54 | **Summary:** As documented, `CString::from_raw` recomputes the length by scanning for a null byte. What it doesn't (currently) mention is that **this length must match the original length.** 55 | 56 | I think you'll be hard pressed to find any C API function that mutates a `char *` without changing its length! 57 | 58 | **Incorrect** 59 | 60 | ```rust,no_run 61 | extern crate libc; 62 | 63 | use std::ffi::{CString, CStr}; 64 | 65 | fn main() { 66 | let ptr = CString::new("Hello, world!").unwrap().into_raw(); 67 | let delim = CString::new(" ").unwrap(); 68 | 69 | let first_word_ptr = unsafe { libc::strtok(ptr, delim.as_ptr()) }; 70 | 71 | assert_eq!( 72 | unsafe { CStr::from_ptr(first_word_ptr) }, 73 | &CString::new("Hello,").unwrap()[..], 74 | ); 75 | 76 | drop(unsafe { CString::from_raw(ptr) }); 77 | } 78 | ``` 79 | 80 | This is incorrect because `strtok` inserts a NUL byte after the comma in `"Hello, world!"`, causing the `CString` to have a different length once it is reconstructed. As a result, when the CString is freed, it will pass the wrong size to the allocator. 81 | 82 | The fix is to never use these methods. If a C API needs to modify a string, use a `Vec` buffer instead. 83 | 84 | **Correct** 85 | 86 | ```rust 87 | extern crate libc; 88 | 89 | use std::ffi::{CString, CStr}; 90 | use libc::c_char; 91 | 92 | fn main() { 93 | let mut buf = CString::new("Hello, world!").unwrap().into_bytes_with_nul(); 94 | let delim = CString::new(" ").unwrap(); 95 | 96 | let first_word_ptr = unsafe { 97 | libc::strtok(buf.as_mut_ptr() as *mut c_char, delim.as_ptr()) 98 | }; 99 | 100 | assert_eq!( 101 | unsafe { CStr::from_ptr(first_word_ptr) }, 102 | &CString::new("Hello,").unwrap()[..], 103 | ); 104 | } 105 | ``` 106 | 107 | 108 | 109 | ### Also: Store a `CString` to a local before calling `as_ptr()` 110 | 111 | Just as an aside, there's another footgun here. If I had written: 112 | 113 | **Incorrect:** 114 | 115 | ```rust,no_run 116 | # use std::ffi::CString; 117 | # fn main() { 118 | let delim = CString::new(" ").unwrap().as_ptr(); 119 | # let _ = delim; 120 | # } 121 | ``` 122 | 123 | the buffer would have been freed immediately. 124 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | 3 | Statement of Purpose 4 | 5 | The laws of most jurisdictions throughout the world automatically confer 6 | exclusive Copyright and Related Rights (defined below) upon the creator and 7 | subsequent owner(s) (each and all, an "owner") of an original work of 8 | authorship and/or a database (each, a "Work"). 9 | 10 | Certain owners wish to permanently relinquish those rights to a Work for the 11 | purpose of contributing to a commons of creative, cultural and scientific 12 | works ("Commons") that the public can reliably and without fear of later 13 | claims of infringement build upon, modify, incorporate in other works, reuse 14 | and redistribute as freely as possible in any form whatsoever and for any 15 | purposes, including without limitation commercial purposes. These owners may 16 | contribute to the Commons to promote the ideal of a free culture and the 17 | further production of creative, cultural and scientific works, or to gain 18 | reputation or greater distribution for their Work in part through the use and 19 | efforts of others. 20 | 21 | For these and/or other purposes and motivations, and without any expectation 22 | of additional consideration or compensation, the person associating CC0 with a 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work 25 | and publicly distribute the Work under its terms, with knowledge of his or her 26 | Copyright and Related Rights in the Work and the meaning and intended legal 27 | effect of CC0 on those rights. 28 | 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be 30 | protected by copyright and related or neighboring rights ("Copyright and 31 | Related Rights"). Copyright and Related Rights include, but are not limited 32 | to, the following: 33 | 34 | i. the right to reproduce, adapt, distribute, perform, display, communicate, 35 | and translate a Work; 36 | 37 | ii. moral rights retained by the original author(s) and/or performer(s); 38 | 39 | iii. publicity and privacy rights pertaining to a person's image or likeness 40 | depicted in a Work; 41 | 42 | iv. rights protecting against unfair competition in regards to a Work, 43 | subject to the limitations in paragraph 4(a), below; 44 | 45 | v. rights protecting the extraction, dissemination, use and reuse of data in 46 | a Work; 47 | 48 | vi. database rights (such as those arising under Directive 96/9/EC of the 49 | European Parliament and of the Council of 11 March 1996 on the legal 50 | protection of databases, and under any national implementation thereof, 51 | including any amended or successor version of such directive); and 52 | 53 | vii. other similar, equivalent or corresponding rights throughout the world 54 | based on applicable law or treaty, and any national implementations thereof. 55 | 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright 59 | and Related Rights and associated claims and causes of action, whether now 60 | known or unknown (including existing as well as future claims and causes of 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum 62 | duration provided by applicable law or treaty (including future time 63 | extensions), (iii) in any current or future medium and for any number of 64 | copies, and (iv) for any purpose whatsoever, including without limitation 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes 66 | the Waiver for the benefit of each member of the public at large and to the 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver 68 | shall not be subject to revocation, rescission, cancellation, termination, or 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work 70 | by the public as contemplated by Affirmer's express Statement of Purpose. 71 | 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be 73 | judged legally invalid or ineffective under applicable law, then the Waiver 74 | shall be preserved to the maximum extent permitted taking into account 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver 76 | is so judged Affirmer hereby grants to each affected person a royalty-free, 77 | non transferable, non sublicensable, non exclusive, irrevocable and 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration 80 | provided by applicable law or treaty (including future time extensions), (iii) 81 | in any current or future medium and for any number of copies, and (iv) for any 82 | purpose whatsoever, including without limitation commercial, advertising or 83 | promotional purposes (the "License"). The License shall be deemed effective as 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the 85 | License for any reason be judged legally invalid or ineffective under 86 | applicable law, such partial invalidity or ineffectiveness shall not 87 | invalidate the remainder of the License, and in such case Affirmer hereby 88 | affirms that he or she will not (i) exercise any of his or her remaining 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims 90 | and causes of action with respect to the Work, in either case contrary to 91 | Affirmer's express Statement of Purpose. 92 | 93 | 4. Limitations and Disclaimers. 94 | 95 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 96 | surrendered, licensed or otherwise affected by this document. 97 | 98 | b. Affirmer offers the Work as-is and makes no representations or warranties 99 | of any kind concerning the Work, express, implied, statutory or otherwise, 100 | including without limitation warranties of title, merchantability, fitness 101 | for a particular purpose, non infringement, or the absence of latent or 102 | other defects, accuracy, or the present or absence of errors, whether or not 103 | discoverable, all to the greatest extent permissible under applicable law. 104 | 105 | c. Affirmer disclaims responsibility for clearing rights of other persons 106 | that may apply to the Work or any use thereof, including without limitation 107 | any person's Copyright and Related Rights in the Work. Further, Affirmer 108 | disclaims responsibility for obtaining any necessary consents, permissions 109 | or other rights required for any use of the Work. 110 | 111 | d. Affirmer understands and acknowledges that Creative Commons is not a 112 | party to this document and has no duty or obligation with respect to this 113 | CC0 or use of the Work. 114 | 115 | For more information, please see 116 | 117 | 118 | -------------------------------------------------------------------------------- /src/omnipresent.md: -------------------------------------------------------------------------------- 1 | Omnipresent concerns 2 | ==================== 3 | 4 | These concerns may come up regardless of what kind of `unsafe` code you're writing. 5 | 6 | 7 | 8 | Drop safety 9 | ----------- 10 | 11 | **Things to look for:** 12 | 13 | * Usage of `unsafe` in any generic function that doesn't have `T: Copy` bounds. 14 | * Usage of `unsafe` near code that can panic. 15 | 16 | **Summary:** `unsafe` code often puts data in a state where it would be dangerous for a destructor to run. The possibility that code may unwind amplifies this problem immensely. **Most `unsafe` code needs to worry about drop safety at some point.** 17 | 18 | ### Danger: A value read using `std::ptr::read` may get dropped twice 19 | 20 | (This also applies to `<*const T>::read`, which is basically the same function) 21 | 22 | **Incorrect** 23 | 24 | ```rust 25 | # fn main() {} 26 | use std::ptr; 27 | 28 | pub struct ArrayIntoIter { 29 | array: [T; 3], 30 | index: usize, 31 | } 32 | 33 | impl Iterator for ArrayIntoIter { 34 | type Item = T; 35 | 36 | fn next(&mut self) -> Option { 37 | match self.index { 38 | 3 => None, 39 | i => { 40 | self.index += 1; 41 | Some(unsafe { ptr::read(&self.array[i]) }) 42 | } 43 | } 44 | } 45 | } 46 | ``` 47 | 48 | When the `ArrayIntoIter` is dropped, all of the elements will be dropped, even though ownership of some of the elements may have already been given away. 49 | 50 | For this reason, usage of `std::ptr::read` must almost always be paired together with usage of `std::mem::forget`, or, better yet, `std::mem::ManuallyDrop` (available since 1.20.0) which is capable of solving a broader variety of problems. (In fact, it is impossible to fix the above example using only `mem::forget`) 51 | 52 | **Correct** 53 | 54 | 58 | 59 | ```rust 60 | # fn main() {} 61 | use std::mem::ManuallyDrop; 62 | use std::ptr; 63 | 64 | pub struct ArrayIntoIter { 65 | array: [ManuallyDrop; 3], 66 | index: usize, 67 | } 68 | 69 | impl ArrayIntoIter { 70 | pub fn new(array: [T; 3]) -> Self { 71 | let [a, b, c] = array; 72 | let wrap = ManuallyDrop::new; 73 | ArrayIntoIter { 74 | array: [wrap(a), wrap(b), wrap(c)], 75 | index: 0, 76 | } 77 | } 78 | } 79 | 80 | impl Iterator for ArrayIntoIter { 81 | type Item = T; 82 | 83 | fn next(&mut self) -> Option { 84 | match self.index { 85 | 3 => None, 86 | i => { 87 | self.index += 1; 88 | Some(ManuallyDrop::into_inner(unsafe { ptr::read(&self.array[i]) })) 89 | } 90 | } 91 | } 92 | } 93 | 94 | impl Drop for ArrayIntoIter { 95 | fn drop(&mut self) { 96 | // Run to completion 97 | self.for_each(drop); 98 | } 99 | } 100 | ``` 101 | 102 | ### Danger: Closures can panic 103 | 104 | **Incorrect** 105 | 106 | ```rust 107 | # fn main() {} 108 | use std::ptr; 109 | 110 | pub fn filter_inplace( 111 | vec: &mut Vec, 112 | mut pred: impl FnMut(&mut T) -> bool, 113 | ) { 114 | let mut write_idx = 0; 115 | 116 | for read_idx in 0..vec.len() { 117 | if pred(&mut vec[read_idx]) { 118 | if read_idx != write_idx { 119 | unsafe { 120 | ptr::copy_nonoverlapping(&vec[read_idx], &mut vec[write_idx], 1); 121 | } 122 | } 123 | write_idx += 1; 124 | } else { 125 | drop(unsafe { ptr::read(&vec[read_idx]) }); 126 | } 127 | } 128 | unsafe { vec.set_len(write_idx); } 129 | } 130 | ``` 131 | 132 | When `pred()` panics, we never reach the final `.set_len()`, and some elements may get dropped twice. 133 | 134 | ### Danger: Any method on any safe trait can panic 135 | 136 | A generalization of the previous point. You can't even trust `clone` to not panic! 137 | 138 | **Incorrect** 139 | 140 | ```rust 141 | # fn main() {} 142 | pub fn remove_all( 143 | vec: &mut Vec, 144 | target: &T, 145 | ) { 146 | // same as filter_inplace 147 | // but replace if pred(&mut vec[read_idx]) 148 | // with if &vec[read_idx] == target 149 | # let _ = (vec, target); 150 | } 151 | ``` 152 | 153 | ### Danger: Drop can panic! 154 | 155 | This particularly nefarious special case of the prior point will leave you tearing your hair out. 156 | 157 | **Still Incorrect:** 158 | 159 | ```rust 160 | # fn main() {} 161 | /// Marker trait for Eq impls that do not panic. 162 | /// 163 | /// # Safety 164 | /// Behavior is undefined if any of the methods of `Eq` panic. 165 | pub unsafe trait NoPanicEq: Eq {} 166 | 167 | pub fn remove_all( 168 | vec: &mut Vec, 169 | target: &T, 170 | ) { 171 | // same as before 172 | # let _ = (vec, target); 173 | } 174 | ``` 175 | 176 | In this case, the line 177 | 178 | ```rust 179 | # use std::ptr; 180 | # fn main() { 181 | # let read_idx = 0; 182 | # let vec = vec![1]; 183 | drop(unsafe { ptr::read(&vec[read_idx]) }); 184 | # } 185 | ``` 186 | 187 | in the `else` block may still panic. And in this case we should consider ourselves fortunate that the drop is even visible! Most drops will be invisible, hidden at the end of a scope. 188 | 189 | Many of these problems can be solved through extremely liberal use of `std::mem::ManuallyDrop`; basically, whenever you own a `T` or a container of `T`s, put it in a `std::mem::ManuallyDrop` so that it won't drop on unwind. Then you only need to worry about the ones you don't own (anything your function receives by `&mut` reference). 190 | 191 | 192 | 193 | Pointer alignment 194 | ----------------- 195 | 196 | **Things to look for:** Code that parses `&[u8]` into references of other types. 197 | 198 | **Summary:** Any attempt to convert a `*const T` into a `&T` (or to call `std::ptr::read`) requires an aligned pointer, in addition to all the other, more obvious requirements. 199 | 200 | 201 | 202 | Generic usage of `std::mem::uninitialized` or `std::mem::zeroed` 203 | ---------------------------------------------------------------- 204 | 205 | **Things to look for:** Usage of either `std::mem::uninitialized` or `std::mem::zeroed` in a function with a generic type parameter `T`. 206 | 207 | **Summary:** Sometimes people try to use `std::mem::uninitialized` as a substitute for `T::default()` in cases where they cannot add a `T: Default` bound. This usage is **almost always incorrect** due to multiple edge cases. 208 | 209 | ### Danger: `T` may have a destructor 210 | 211 | Yep, these functions are yet another instance of our mortal enemy, `Drop` unsafety. 212 | 213 | **Incorrect** 214 | 215 | ```rust 216 | # #![allow(unused_assignments)] 217 | # fn main() {} 218 | pub fn call_function( 219 | func: impl FnOnce() -> T, 220 | ) -> T { 221 | let mut out: T; 222 | out = unsafe { std::mem::uninitialized() }; 223 | out = func(); // <---- 224 | out 225 | } 226 | ``` 227 | 228 | This function exhibits UB because, at the marked line, the original, uninitialized value assigned to `out` is dropped. 229 | 230 | **Still Incorrect** 231 | 232 | ```rust 233 | # fn main() {} 234 | pub fn call_function( 235 | func: impl FnOnce() -> T, 236 | ) -> T { 237 | let mut out: T; 238 | out = unsafe { std::mem::uninitialized() }; 239 | unsafe { std::ptr::write(&mut out, func()) }; 240 | out 241 | } 242 | ``` 243 | 244 | This function *still* exhibits UB because `func()` can panic, causing the uninitialized value assigned to `out` to be dropped during unwind. 245 | 246 | ### Danger: `T` may be uninhabited 247 | 248 | **_Still_ incorrect!!** 249 | 250 | ```rust 251 | # #![allow(unused_assignments)] 252 | # fn main() {} 253 | pub fn call_function( 254 | func: impl FnOnce() -> T, 255 | ) -> T { 256 | let mut out: T; 257 | out = unsafe { std::mem::uninitialized() }; 258 | out = func(); 259 | out 260 | } 261 | ``` 262 | 263 | Here, the `Copy` bound forbids `T` from having a destructor, so we no longer have to worry about drops. However, this function still exhibits undefined behavior in the case where `T` is uninhabited: 264 | 265 | ```rust,no_run 266 | # #![allow(unused_assignments)] 267 | # fn call_function( 268 | # func: impl FnOnce() -> T, 269 | # ) -> T { 270 | # let mut out: T; 271 | # out = unsafe { std::mem::uninitialized() }; 272 | # out = func(); 273 | # out 274 | # } 275 | # 276 | /// A type that is impossible to construct. 277 | #[derive(Copy, Clone)] 278 | enum Never {} 279 | 280 | fn main() { 281 | let _: Never = call_function(|| panic!("Hello, world!")); 282 | } 283 | ``` 284 | 285 | The problem here is that `std::mem::uninitialized::` successfully returns a value of a type that cannot possibly exist. 286 | 287 | Or at least, it used to. Recent versions of the standard library (early rust `1.3x`) include an explicit check for uninitialized types inside `std::mem::{uninitialized, zeroed}`, and these functions will now panic with a nice error message. 288 | 289 | ### How about `std::mem::MaybeUninit`? 290 | 291 | This new type (on the road to stabilization in 1.36.0) has none of the issues listed above. 292 | 293 | * Dropping a `MaybeUninit` does not run destructors. 294 | * The type `MaybeUninit` is always inhabited even if `T` is not. 295 | 296 | This makes it significantly safer. 297 | -------------------------------------------------------------------------------- /src/thread-sync.md: -------------------------------------------------------------------------------- 1 | Concerns for thread synchronization 2 | =================================== 3 | 4 | 5 | 6 | Shared mutability without `UnsafeCell` 7 | -------------------------------------- 8 | 9 | **What to look for:** Mutable data that is shared by multiple threads, but isn't 10 | atomic or wrapped in an `UnsafeCell`. Casts from `*const _` to `*mut _`. 11 | 12 | **Summary:** Threads usually exchange data by reading and writing to shared 13 | memory locations. But by default, Rust assumes that non-atomic data accessed via 14 | a shared `&` reference cannot change. This assumption must be suppressed using 15 | an `UnsafeCell` in objects meant for thread synchronization. 16 | 17 | **Incorrect:** 18 | 19 | ```rust 20 | # fn main() {} 21 | use std::sync::atomic::{AtomicBool, Ordering}; 22 | 23 | pub struct SpinLock { 24 | data: T, 25 | locked: AtomicBool, 26 | } 27 | 28 | impl SpinLock { 29 | pub fn new(data: T) -> Self { 30 | Self { 31 | data, 32 | locked: AtomicBool::new(false), 33 | } 34 | } 35 | 36 | pub fn try_lock(&self) -> Option> { 37 | let was_locked = self.locked.swap(true, Ordering::Acquire); 38 | if was_locked { 39 | None 40 | } else { 41 | Some(LockGuard(&self)) 42 | } 43 | } 44 | } 45 | 46 | pub struct LockGuard<'a, T>(&'a SpinLock); 47 | 48 | impl<'a, T> LockGuard<'a, T> { 49 | pub fn get_mut(&mut self) -> &mut T { 50 | let data_ptr = &self.0.data as *const _ as *mut _; 51 | unsafe { &mut *data_ptr } 52 | } 53 | } 54 | 55 | impl<'a, T> Drop for LockGuard<'a, T> { 56 | fn drop(&mut self) { 57 | self.0.locked.store(false, Ordering::Release); 58 | } 59 | } 60 | ``` 61 | 62 | **Correct:** 63 | 64 | ```rust 65 | # fn main() {} 66 | use std::cell::UnsafeCell; 67 | use std::sync::atomic::{AtomicBool, Ordering}; 68 | 69 | pub struct SpinLock { 70 | cell: UnsafeCell, 71 | locked: AtomicBool, 72 | } 73 | 74 | impl SpinLock { 75 | pub fn new(data: T) -> Self { 76 | Self { 77 | cell: UnsafeCell::new(data), 78 | locked: AtomicBool::new(false), 79 | } 80 | } 81 | 82 | pub fn try_lock(&self) -> Option> { 83 | let was_locked = self.locked.swap(true, Ordering::Acquire); 84 | if was_locked { 85 | None 86 | } else { 87 | Some(LockGuard(&self)) 88 | } 89 | } 90 | } 91 | 92 | pub struct LockGuard<'a, T>(&'a SpinLock); 93 | 94 | impl<'a, T> LockGuard<'a, T> { 95 | pub fn get_mut(&mut self) -> &mut T { 96 | unsafe { &mut *self.0.cell.get() } 97 | } 98 | } 99 | 100 | impl<'a, T> Drop for LockGuard<'a, T> { 101 | fn drop(&mut self) { 102 | self.0.locked.store(false, Ordering::Release); 103 | } 104 | } 105 | ``` 106 | 107 | 108 | 109 | Multiple `&mut` to the same data 110 | -------------------------------- 111 | 112 | **What to look for:** Multiple `&mut`s to a single piece of data, or APIs that 113 | allow creating them. 114 | 115 | **Summary:** As seen above, to synchronize threads through shared memory, we 116 | need to cheat Rust's "no shared mutability" rule using `UnsafeCell`. This makes 117 | it easy to accidentally expose an API that allows creating multiple `&mut`s to a 118 | single piece of data, which is Undefined Behavior. 119 | 120 | **Incorrect:** 121 | 122 | ```rust 123 | # fn main() {} 124 | use std::cell::UnsafeCell; 125 | use std::sync::atomic::{AtomicU32, Ordering}; 126 | 127 | pub struct RecursiveSpinLock { 128 | cell: UnsafeCell, 129 | owner_id: AtomicU32, 130 | } 131 | 132 | const NO_THREAD_ID: u32 = 0; 133 | static THREAD_ID_CTR: AtomicU32 = AtomicU32::new(1); 134 | thread_local!(static THREAD_ID: u32 = THREAD_ID_CTR.fetch_add(1, Ordering::Relaxed)); 135 | 136 | impl RecursiveSpinLock { 137 | pub fn new(data: T) -> Self { 138 | Self { 139 | cell: UnsafeCell::new(data), 140 | owner_id: AtomicU32::new(NO_THREAD_ID), 141 | } 142 | } 143 | 144 | pub fn try_lock(&self) -> Option<&mut T> { 145 | THREAD_ID.with(|&my_id| { 146 | let old_id = self.owner_id.compare_and_swap(NO_THREAD_ID, my_id, Ordering::Acquire); 147 | if old_id == NO_THREAD_ID || old_id == my_id { 148 | Some(unsafe { &mut *self.cell.get() }) 149 | } else { 150 | None 151 | } 152 | }) 153 | } 154 | 155 | pub fn unlock(&self) { 156 | THREAD_ID.with(|&my_id| { 157 | let old_id = self.owner_id.compare_and_swap(my_id, NO_THREAD_ID, Ordering::Release); 158 | assert_eq!(old_id, my_id, "Incorrect lock usage detected!"); 159 | }) 160 | } 161 | } 162 | ``` 163 | 164 | Here, a single thread calling `try_lock()` multiple times on a 165 | `RecursiveSpinLock` object (or, for that matter, slyly keeping the `&mut T` 166 | around after calling `unlock()`) can get multiple mutable references to its 167 | inner data, which is illegal in Rust. 168 | 169 | If you really need a recursive lock, you will need to make its API return 170 | a shared `&` reference, or to turn it into an unsafe API that returns a raw 171 | `*mut` pointer (possibly wrapped in `NonNull`). 172 | 173 | 174 | 175 | Data races 176 | ---------- 177 | 178 | **What to look for:** One thread writing to a piece of data in a fashion that is 179 | observable by another thread writing to or reading from it. 180 | 181 | **Summary:** Even in the presence of an `UnsafeCell`, data races are undefined 182 | behavior. Intuitions of memory accesses based on reading the code may not match 183 | the actual memory access patterns of optimized binaries running on modern 184 | out-of-order CPUs. Please ensure that other threads wait for writes to be 185 | finished before accessing the shared data. 186 | 187 | **Incorrect:** 188 | 189 | ```rust 190 | # fn main() {} 191 | use std::cell::UnsafeCell; 192 | use std::sync::atomic::{AtomicBool, Ordering}; 193 | 194 | pub struct Racey { 195 | cell: UnsafeCell, 196 | writing: AtomicBool, 197 | } 198 | 199 | impl Racey { 200 | pub fn new(data: T) -> Self { 201 | Self { 202 | cell: UnsafeCell::new(data), 203 | writing: AtomicBool::new(false), 204 | } 205 | } 206 | 207 | pub fn read(&self) -> *const T { 208 | self.cell.get() 209 | } 210 | 211 | pub fn try_write(&self) -> Option> { 212 | let was_writing = self.writing.swap(true, Ordering::Acquire); 213 | if was_writing { 214 | None 215 | } else { 216 | Some(WriteGuard(&self)) 217 | } 218 | } 219 | } 220 | 221 | pub struct WriteGuard<'a, T>(&'a Racey); 222 | 223 | impl<'a, T> WriteGuard<'a, T> { 224 | // Notice the use of &mut self, which prevents multiple &mut T to be created 225 | pub fn get_mut(&mut self) -> &mut T { 226 | unsafe { &mut *self.0.cell.get() } 227 | } 228 | } 229 | 230 | impl<'a, T> Drop for WriteGuard<'a, T> { 231 | fn drop(&mut self) { 232 | self.0.writing.store(false, Ordering::Release); 233 | } 234 | } 235 | ``` 236 | 237 | Although this design correctly prevents multiple writers from acquiring an 238 | `&mut` to the data at the same time (which, as we've seen, is UB even if they 239 | don't use those references), it does not prevents readers from observing the 240 | writes of the writers. 241 | 242 | For that matter, simply modifying `read` to return a `&T` instead of a 243 | `*const T` would be Undefined Behavior per se, because `&mut` and `&` references 244 | are not allowed to coexist. 245 | 246 | 247 | 248 | Insufficient synchronization 249 | ---------------------------- 250 | 251 | **What to look for:** Insufficient atomic memory orderings and unforeseen 252 | interleavings of thread operations on shared memory. 253 | 254 | **Summary:** Modern optimizing compilers and CPUs will add, remove, and reorder 255 | memory accesses in a fashion that is observable by other threads. It is your 256 | responsability to tell the compiler which of these alterations should be 257 | prevented so that your code remains correct. 258 | 259 | **Incorrect:** 260 | 261 | ```rust 262 | # fn main() {} 263 | use std::cell::UnsafeCell; 264 | use std::sync::atomic::{AtomicBool, Ordering}; 265 | 266 | pub struct SpinLock { 267 | cell: UnsafeCell, 268 | locked: AtomicBool, 269 | } 270 | 271 | impl SpinLock { 272 | pub fn new(data: T) -> Self { 273 | Self { 274 | cell: UnsafeCell::new(data), 275 | locked: AtomicBool::new(false), 276 | } 277 | } 278 | 279 | pub fn try_lock(&self) -> Option> { 280 | let was_locked = self.locked.swap(true, Ordering::Relaxed); 281 | if was_locked { 282 | None 283 | } else { 284 | Some(LockGuard(&self)) 285 | } 286 | } 287 | } 288 | 289 | pub struct LockGuard<'a, T>(&'a SpinLock); 290 | 291 | impl<'a, T> LockGuard<'a, T> { 292 | pub fn get_mut(&mut self) -> &mut T { 293 | unsafe { &mut *self.0.cell.get() } 294 | } 295 | } 296 | 297 | impl<'a, T> Drop for LockGuard<'a, T> { 298 | fn drop(&mut self) { 299 | self.0.locked.store(false, Ordering::Relaxed); 300 | } 301 | } 302 | ``` 303 | 304 | Use of `Relaxed` memory ordering means that the compiler and CPU are allowed to 305 | move reads and writes to the lock-protected data before the atomic swap that 306 | acquires the lock or after the atomic CAS that releases the lock. This may 307 | result in data races. 308 | 309 | **Correct:** 310 | 311 | ```rust 312 | # fn main() {} 313 | use std::cell::UnsafeCell; 314 | use std::sync::atomic::{AtomicBool, Ordering}; 315 | 316 | pub struct SpinLock { 317 | cell: UnsafeCell, 318 | locked: AtomicBool, 319 | } 320 | 321 | impl SpinLock { 322 | pub fn new(data: T) -> Self { 323 | Self { 324 | cell: UnsafeCell::new(data), 325 | locked: AtomicBool::new(false), 326 | } 327 | } 328 | 329 | pub fn try_lock(&self) -> Option> { 330 | let was_locked = self.locked.swap(true, Ordering::Acquire); 331 | if was_locked { 332 | None 333 | } else { 334 | Some(LockGuard(&self)) 335 | } 336 | } 337 | } 338 | 339 | pub struct LockGuard<'a, T>(&'a SpinLock); 340 | 341 | impl<'a, T> LockGuard<'a, T> { 342 | pub fn get_mut(&mut self) -> &mut T { 343 | unsafe { &mut *self.0.cell.get() } 344 | } 345 | } 346 | 347 | impl<'a, T> Drop for LockGuard<'a, T> { 348 | fn drop(&mut self) { 349 | self.0.locked.store(false, Ordering::Release); 350 | } 351 | } 352 | ``` 353 | 354 | `Acquire` ordering ensures that no reads and writes can be speculatively carried 355 | out on the locked data before the lock has been acquired. `Release` ordering 356 | ensures that all reads and writes to locked data have been flushed to shared 357 | memory before the lock is released. 358 | 359 | Together, these memory orderings guarantee that a thread acquiring the lock will 360 | see the inner data as the thread that previously released the lock saw it. 361 | --------------------------------------------------------------------------------