├── .gitignore
├── docs
    ├── _config.yml
    ├── images
    │   ├── 3-1.png
    │   ├── 4-1.png
    │   ├── 4-2.png
    │   ├── 4-3.png
    │   ├── 7-1.png
    │   ├── 7-2.png
    │   ├── 8-1.png
    │   ├── 8-2.png
    │   ├── ref-filesystem-1.png
    │   └── ref-memory_management-1.png
    ├── reference
    │   ├── IO.md
    │   ├── deadlocks.md
    │   ├── file_systems.md
    │   ├── memory_management.md
    │   └── processes_and_threads.md
    ├── 10_testing_and_debugging_multithreaded_application.md
    ├── index.md
    ├── 09_parallel_algorithm.md
    ├── 05_designing_lock_based_concurrent_data_structure.md
    ├── 01_managing_thread.md
    ├── 06_designing_lock_free_concurrent_data_structure.md
    ├── 02_sharing_data_between_thread.md
    └── 08_advanced_thread_management.md
├── .gitattributes
├── src
    ├── thread_poll.hpp
    ├── concurrent_stack.hpp
    ├── concurrent_list.hpp
    ├── hierarchical_mutex.cpp
    ├── lock_free_stack.hpp
    ├── concurrent_map.hpp
    ├── concurrent_queue.hpp
    ├── lock_free_stack_hazard_pointer.hpp
    └── atm.cpp
├── README.md
└── LICENSE


/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
2 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/docs/images/3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/downdemo/Cpp-Concurrency-in-Action-2ed/HEAD/docs/images/3-1.png


--------------------------------------------------------------------------------
/docs/images/4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/downdemo/Cpp-Concurrency-in-Action-2ed/HEAD/docs/images/4-1.png


--------------------------------------------------------------------------------
/docs/images/4-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/downdemo/Cpp-Concurrency-in-Action-2ed/HEAD/docs/images/4-2.png


--------------------------------------------------------------------------------
/docs/images/4-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/downdemo/Cpp-Concurrency-in-Action-2ed/HEAD/docs/images/4-3.png


--------------------------------------------------------------------------------
/docs/images/7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/downdemo/Cpp-Concurrency-in-Action-2ed/HEAD/docs/images/7-1.png


--------------------------------------------------------------------------------
/docs/images/7-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/downdemo/Cpp-Concurrency-in-Action-2ed/HEAD/docs/images/7-2.png


--------------------------------------------------------------------------------
/docs/images/8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/downdemo/Cpp-Concurrency-in-Action-2ed/HEAD/docs/images/8-1.png


--------------------------------------------------------------------------------
/docs/images/8-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/downdemo/Cpp-Concurrency-in-Action-2ed/HEAD/docs/images/8-2.png


--------------------------------------------------------------------------------
/docs/images/ref-filesystem-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/downdemo/Cpp-Concurrency-in-Action-2ed/HEAD/docs/images/ref-filesystem-1.png


--------------------------------------------------------------------------------
/docs/images/ref-memory_management-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/downdemo/Cpp-Concurrency-in-Action-2ed/HEAD/docs/images/ref-memory_management-1.png


--------------------------------------------------------------------------------
/src/thread_poll.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <condition_variable>
 4 | #include <functional>
 5 | #include <mutex>
 6 | #include <queue>
 7 | #include <thread>
 8 | #include <utility>
 9 | 
10 | class ThreadPool {
11 |  public:
12 |   explicit ThreadPool(std::size_t n) {
13 |     for (std::size_t i = 0; i < n; ++i) {
14 |       std::thread{[this] {
15 |         std::unique_lock<std::mutex> l(m_);
16 |         while (true) {
17 |           if (!q_.empty()) {
18 |             auto task = std::move(q_.front());
19 |             q_.pop();
20 |             l.unlock();
21 |             task();
22 |             l.lock();
23 |           } else if (done_) {
24 |             break;
25 |           } else {
26 |             cv_.wait(l);
27 |           }
28 |         }
29 |       }}.detach();
30 |     }
31 |   }
32 | 
33 |   ~ThreadPool() {
34 |     {
35 |       std::lock_guard<std::mutex> l(m_);
36 |       done_ = true;  // cv_.wait 使用了 done_ 判断所以要加锁
37 |     }
38 |     cv_.notify_all();
39 |   }
40 | 
41 |   template <typename F>
42 |   void submit(F&& f) {
43 |     {
44 |       std::lock_guard<std::mutex> l(m_);
45 |       q_.emplace(std::forward<F>(f));
46 |     }
47 |     cv_.notify_one();
48 |   }
49 | 
50 |  private:
51 |   std::mutex m_;
52 |   std::condition_variable cv_;
53 |   bool done_ = false;
54 |   std::queue<std::function<void()>> q_;
55 | };
56 | 


--------------------------------------------------------------------------------
/src/concurrent_stack.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <exception>
 4 | #include <memory>
 5 | #include <mutex>
 6 | #include <stack>
 7 | #include <utility>
 8 | 
 9 | struct EmptyStack : std::exception {
10 |   const char* what() const noexcept { return "empty stack!"; }
11 | };
12 | 
13 | template <typename T>
14 | class ConcurrentStack {
15 |  public:
16 |   ConcurrentStack() = default;
17 | 
18 |   ConcurrentStack(const ConcurrentStack& rhs) {
19 |     std::lock_guard<std::mutex> l(rhs.m_);
20 |     s_ = rhs.s_;
21 |   }
22 | 
23 |   ConcurrentStack& operator=(const ConcurrentStack&) = delete;
24 | 
25 |   void push(T x) {
26 |     std::lock_guard<std::mutex> l(m_);
27 |     s_.push(std::move(x));
28 |   }
29 | 
30 |   bool empty() const {
31 |     std::lock_guard<std::mutex> l(m_);
32 |     return s_.empty();
33 |   }
34 | 
35 |   std::shared_ptr<T> pop() {
36 |     std::lock_guard<std::mutex> l(m_);
37 |     if (s_.empty()) {
38 |       throw EmptyStack();
39 |     }
40 |     auto res = std::make_shared<T>(std::move(s_.top()));
41 |     s_.pop();
42 |     return res;
43 |   }
44 | 
45 |   void pop(T& res) {
46 |     std::lock_guard<std::mutex> l(m_);
47 |     if (s_.empty()) {
48 |       throw EmptyStack();
49 |     }
50 |     res = std::move(s_.top());
51 |     s_.pop();
52 |   }
53 | 
54 |  private:
55 |   mutable std::mutex m_;
56 |   std::stack<T> s_;
57 | };
58 | 


--------------------------------------------------------------------------------
/src/concurrent_list.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <memory>
 4 | #include <mutex>
 5 | #include <utility>
 6 | 
 7 | template <typename T>
 8 | class ConcurrentList {
 9 |  public:
10 |   ConcurrentList() = default;
11 | 
12 |   ~ConcurrentList() {
13 |     remove_if([](const Node&) { return true; });
14 |   }
15 | 
16 |   ConcurrentList(const ConcurrentList&) = delete;
17 | 
18 |   ConcurrentList& operator=(const ConcurrentList&) = delete;
19 | 
20 |   void push_front(const T& x) {
21 |     std::unique_ptr<Node> t(new Node(x));
22 |     std::lock_guard<std::mutex> head_lock(head_.m);
23 |     t->next = std::move(head_.next);
24 |     head_.next = std::move(t);
25 |   }
26 | 
27 |   template <typename F>
28 |   void for_each(F f) {
29 |     Node* cur = &head_;
30 |     std::unique_lock<std::mutex> head_lock(head_.m);
31 |     while (Node* const next = cur->next.get()) {
32 |       std::unique_lock<std::mutex> next_lock(next->m);
33 |       head_lock.unlock();  // 锁住了下一节点，因此可以释放上一节点的锁
34 |       f(*next->data);
35 |       cur = next;                        // 当前节点指向下一节点
36 |       head_lock = std::move(next_lock);  // 转交下一节点锁的所有权，循环上述过程
37 |     }
38 |   }
39 | 
40 |   template <typename F>
41 |   std::shared_ptr<T> find_first_if(F f) {
42 |     Node* cur = &head_;
43 |     std::unique_lock<std::mutex> head_lock(head_.m);
44 |     while (Node* const next = cur->next.get()) {
45 |       std::unique_lock<std::mutex> next_lock(next->m);
46 |       head_lock.unlock();
47 |       if (f(*next->data)) {
48 |         return next->data;  // 返回目标值，无需继续查找
49 |       }
50 |       cur = next;
51 |       head_lock = std::move(next_lock);
52 |     }
53 |     return nullptr;
54 |   }
55 | 
56 |   template <typename F>
57 |   void remove_if(F f) {
58 |     Node* cur = &head_;
59 |     std::unique_lock<std::mutex> head_lock(head_.m);
60 |     while (Node* const next = cur->next.get()) {
61 |       std::unique_lock<std::mutex> next_lock(next->m);
62 |       if (f(*next->data)) {  // 为 true 则移除下一节点
63 |         std::unique_ptr<Node> old_next = std::move(cur->next);
64 |         cur->next = std::move(next->next);  // 下一节点设为下下节点
65 |         next_lock.unlock();
66 |       } else {  // 否则继续转至下一节点
67 |         head_lock.unlock();
68 |         cur = next;
69 |         head_lock = std::move(next_lock);
70 |       }
71 |     }
72 |   }
73 | 
74 |  private:
75 |   struct Node {
76 |     std::mutex m;
77 |     std::shared_ptr<T> data;
78 |     std::unique_ptr<Node> next;
79 |     Node() = default;
80 |     Node(const T& x) : data(std::make_shared<T>(x)) {}
81 |   };
82 | 
83 |   Node head_;
84 | };
85 | 


--------------------------------------------------------------------------------
/src/hierarchical_mutex.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <mutex>
 3 | #include <stdexcept>
 4 | 
 5 | class HierarchicalMutex {
 6 |  public:
 7 |   explicit HierarchicalMutex(int hierarchy_value)
 8 |       : cur_hierarchy_(hierarchy_value), prev_hierarchy_(0) {}
 9 | 
10 |   void lock() {
11 |     validate_hierarchy();  // 层级错误则抛异常
12 |     m_.lock();
13 |     update_hierarchy();
14 |   }
15 | 
16 |   bool try_lock() {
17 |     validate_hierarchy();
18 |     if (!m_.try_lock()) {
19 |       return false;
20 |     }
21 |     update_hierarchy();
22 |     return true;
23 |   }
24 | 
25 |   void unlock() {
26 |     if (thread_hierarchy_ != cur_hierarchy_) {
27 |       throw std::logic_error("mutex hierarchy violated");
28 |     }
29 |     thread_hierarchy_ = prev_hierarchy_;  // 恢复前一线程的层级值
30 |     m_.unlock();
31 |   }
32 | 
33 |  private:
34 |   void validate_hierarchy() {
35 |     if (thread_hierarchy_ <= cur_hierarchy_) {
36 |       throw std::logic_error("mutex hierarchy violated");
37 |     }
38 |   }
39 | 
40 |   void update_hierarchy() {
41 |     // 先存储当前线程的层级值（用于解锁时恢复）
42 |     prev_hierarchy_ = thread_hierarchy_;
43 |     // 再把其设为锁的层级值
44 |     thread_hierarchy_ = cur_hierarchy_;
45 |   }
46 | 
47 |  private:
48 |   std::mutex m_;
49 |   const int cur_hierarchy_;
50 |   int prev_hierarchy_;
51 |   static thread_local int thread_hierarchy_;  // 所在线程的层级值
52 | };
53 | 
54 | // static thread_local 表示存活于一个线程周期
55 | thread_local int HierarchicalMutex::thread_hierarchy_(INT_MAX);
56 | 
57 | HierarchicalMutex high(10000);
58 | HierarchicalMutex mid(6000);
59 | HierarchicalMutex low(5000);
60 | 
61 | void lf() {  // 最低层函数
62 |   std::lock_guard<HierarchicalMutex> l(low);
63 |   // 调用 low.lock()，thread_hierarchy_ 为 INT_MAX，
64 |   // cur_hierarchy_ 为 5000，thread_hierarchy_ > cur_hierarchy_，
65 |   // 通过检查，上锁，prev_hierarchy_ 更新为 INT_MAX，
66 |   // thread_hierarchy_ 更新为 5000
67 | }  // 调用 low.unlock()，thread_hierarchy_ == cur_hierarchy_，
68 | // 通过检查，thread_hierarchy_ 恢复为 prev_hierarchy_ 保存的 INT_MAX，解锁
69 | 
70 | void hf() {
71 |   std::lock_guard<HierarchicalMutex> l(high);  // high.cur_hierarchy_ 为 10000
72 |   // thread_hierarchy_ 为 10000，可以调用低层函数
73 |   lf();  // thread_hierarchy_ 从 10000 更新为 5000
74 |   //  thread_hierarchy_ 恢复为 10000
75 | }  //  thread_hierarchy_ 恢复为 INT_MAX
76 | 
77 | void mf() {
78 |   std::lock_guard<HierarchicalMutex> l(mid);  // thread_hierarchy_ 为 6000
79 |   hf();  // thread_hierarchy_ < high.cur_hierarchy_，违反了层级结构，抛异常
80 | }
81 | 
82 | int main() {
83 |   lf();
84 |   hf();
85 |   try {
86 |     mf();
87 |   } catch (std::logic_error& ex) {
88 |     std::cout << ex.what();
89 |   }
90 | }
91 | 


--------------------------------------------------------------------------------
/src/lock_free_stack.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <atomic>
 4 | #include <memory>
 5 | 
 6 | template <typename T>
 7 | class LockFreeStack {
 8 |  public:
 9 |   ~LockFreeStack() {
10 |     while (pop()) {
11 |     }
12 |   }
13 | 
14 |   void push(const T& x) {
15 |     ReferenceCount t;
16 |     t.p = new Node(x);
17 |     t.external_cnt = 1;
18 |     // 下面比较中 release 保证之前的语句都先执行，因此 load 可以使用 relaxed
19 |     t.p->next = head_.load(std::memory_order_relaxed);
20 |     while (!head_.compare_exchange_weak(t.p->next, t, std::memory_order_release,
21 |                                         std::memory_order_relaxed)) {
22 |     }
23 |   }
24 | 
25 |   std::shared_ptr<T> pop() {
26 |     ReferenceCount t = head_.load(std::memory_order_relaxed);
27 |     while (true) {
28 |       increase_count(t);  // acquire
29 |       Node* p = t.p;
30 |       if (!p) {
31 |         return nullptr;
32 |       }
33 |       if (head_.compare_exchange_strong(t, p->next,
34 |                                         std::memory_order_relaxed)) {
35 |         std::shared_ptr<T> res;
36 |         res.swap(p->v);
37 |         // 将外部计数减 2 后加到内部计数，减 2 是因为，
38 |         // 节点被删除减 1，该线程无法再次访问此节点再减 1
39 |         const int cnt = t.external_cnt - 2;
40 |         // swap 要先于 delete，因此使用 release
41 |         if (p->inner_cnt.fetch_add(cnt, std::memory_order_release) == -cnt) {
42 |           delete p;  // 内外部计数和为 0
43 |         }
44 |         return res;
45 |       }
46 |       if (p->inner_cnt.fetch_sub(1, std::memory_order_relaxed) == 1) {
47 |         p->inner_cnt.load(std::memory_order_acquire);  // 只是用 acquire 来同步
48 |         // acquire 保证 delete 在之后执行
49 |         delete p;  // 内部计数为 0
50 |       }
51 |     }
52 |   }
53 | 
54 |  private:
55 |   struct Node;
56 | 
57 |   struct ReferenceCount {
58 |     int external_cnt;
59 |     Node* p = nullptr;
60 |   };
61 | 
62 |   struct Node {
63 |     std::shared_ptr<T> v;
64 |     std::atomic<int> inner_cnt = 0;
65 |     ReferenceCount next;
66 |     Node(const T& x) : v(std::make_shared<T>(x)) {}
67 |   };
68 | 
69 |   void increase_count(ReferenceCount& old_cnt) {
70 |     ReferenceCount new_cnt;
71 |     do {  // 比较失败不改变当前值，并可以继续循环，因此可以选择 relaxed
72 |       new_cnt = old_cnt;
73 |       ++new_cnt.external_cnt;  // 访问 head_ 时递增外部计数，表示该节点正被使用
74 |     } while (!head_.compare_exchange_strong(old_cnt, new_cnt,
75 |                                             std::memory_order_acquire,
76 |                                             std::memory_order_relaxed));
77 |     old_cnt.external_cnt = new_cnt.external_cnt;
78 |   }
79 | 
80 |  private:
81 |   std::atomic<ReferenceCount> head_;
82 | };
83 | 


--------------------------------------------------------------------------------
/src/concurrent_map.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <algorithm>
 4 | #include <functional>
 5 | #include <list>
 6 | #include <map>
 7 | #include <memory>
 8 | #include <mutex>
 9 | #include <shared_mutex>
10 | #include <utility>
11 | #include <vector>
12 | 
13 | template <typename K, typename V, typename Hash = std::hash<K>>
14 | class ConcurrentMap {
15 |  public:
16 |   // 桶数默认为 19（一般用 x % 桶数作为 x 的桶索引，桶数为质数可使桶分布均匀）
17 |   ConcurrentMap(std::size_t n = 19, const Hash& h = Hash{})
18 |       : buckets_(n), hasher_(h) {
19 |     for (auto& x : buckets_) {
20 |       x.reset(new Bucket);
21 |     }
22 |   }
23 | 
24 |   ConcurrentMap(const ConcurrentMap&) = delete;
25 | 
26 |   ConcurrentMap& operator=(const ConcurrentMap&) = delete;
27 | 
28 |   V get(const K& k, const V& default_value = V{}) const {
29 |     return get_bucket(k).get(k, default_value);
30 |   }
31 | 
32 |   void set(const K& k, const V& v) { get_bucket(k).set(k, v); }
33 | 
34 |   void erase(const K& k) { get_bucket(k).erase(k); }
35 | 
36 |   // 为了方便使用，提供一个到 std::map 的映射
37 |   std::map<K, V> to_map() const {
38 |     std::vector<std::unique_lock<std::shared_mutex>> locks;
39 |     for (auto& x : buckets_) {
40 |       locks.emplace_back(std::unique_lock<std::shared_mutex>(x->m));
41 |     }
42 |     std::map<K, V> res;
43 |     for (auto& x : buckets_) {
44 |       for (auto& y : x->data) {
45 |         res.emplace(y);
46 |       }
47 |     }
48 |     return res;
49 |   }
50 | 
51 |  private:
52 |   struct Bucket {
53 |     std::list<std::pair<K, V>> data;
54 |     mutable std::shared_mutex m;  // 每个桶都用这个锁保护
55 | 
56 |     V get(const K& k, const V& default_value) const {
57 |       // 没有修改任何值，异常安全
58 |       std::shared_lock<std::shared_mutex> l(m);  // 只读锁，可共享
59 |       auto it = std::find_if(data.begin(), data.end(),
60 |                              [&](auto& x) { return x.first == k; });
61 |       return it == data.end() ? default_value : it->second;
62 |     }
63 | 
64 |     void set(const K& k, const V& v) {
65 |       std::unique_lock<std::shared_mutex> l(m);  // 写，单独占用
66 |       auto it = std::find_if(data.begin(), data.end(),
67 |                              [&](auto& x) { return x.first == k; });
68 |       if (it == data.end()) {
69 |         data.emplace_back(k, v);  // emplace_back 异常安全
70 |       } else {
71 |         it->second = v;  // 赋值可能抛异常，但值是用户提供的，可放心让用户处理
72 |       }
73 |     }
74 | 
75 |     void erase(const K& k) {
76 |       std::unique_lock<std::shared_mutex> l(m);  // 写，单独占用
77 |       auto it = std::find_if(data.begin(), data.end(),
78 |                              [&](auto& x) { return x.first == k; });
79 |       if (it != data.end()) {
80 |         data.erase(it);
81 |       }
82 |     }
83 |   };
84 | 
85 |   Bucket& get_bucket(const K& k) const {  // 桶数固定因此可以无锁调用
86 |     return *buckets_[hasher_(k) % buckets_.size()];
87 |   }
88 | 
89 |  private:
90 |   std::vector<std::unique_ptr<Bucket>> buckets_;
91 |   Hash hasher_;
92 | };
93 | 


--------------------------------------------------------------------------------
/src/concurrent_queue.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <condition_variable>
  4 | #include <memory>
  5 | #include <mutex>
  6 | #include <utility>
  7 | 
  8 | template <typename T>
  9 | class ConcurrentQueue {
 10 |  public:
 11 |   ConcurrentQueue() : head_(new Node), tail_(head_.get()) {}
 12 | 
 13 |   ConcurrentQueue(const ConcurrentQueue&) = delete;
 14 | 
 15 |   ConcurrentQueue& operator=(const ConcurrentQueue&) = delete;
 16 | 
 17 |   void push(T x) {
 18 |     auto new_val = std::make_shared<T>(std::move(x));
 19 |     auto new_node = std::make_unique<Node>();
 20 |     Node* new_tail_node = new_node.get();
 21 |     {
 22 |       std::lock_guard<std::mutex> l(tail_mutex_);
 23 |       tail_->v = new_val;
 24 |       tail_->next = std::move(new_node);
 25 |       tail_ = new_tail_node;
 26 |     }
 27 |     cv_.notify_one();
 28 |   }
 29 | 
 30 |   std::shared_ptr<T> try_pop() {
 31 |     std::unique_ptr<Node> head_node = try_pop_head();
 32 |     return head_node ? head_node->v : nullptr;
 33 |   }
 34 | 
 35 |   bool try_pop(T& res) {
 36 |     std::unique_ptr<Node> head_node = try_pop_head(res);
 37 |     return head_node != nullptr;
 38 |   }
 39 | 
 40 |   std::shared_ptr<T> wait_and_pop() {
 41 |     std::unique_ptr<Node> head_node = wait_pop_head();
 42 |     return head_node->v;
 43 |   }
 44 | 
 45 |   void wait_and_pop(T& res) { wait_pop_head(res); }
 46 | 
 47 |   bool empty() const {
 48 |     std::lock_guard<std::mutex> l(head_mutex_);
 49 |     return head_.get() == get_tail();
 50 |   }
 51 | 
 52 |  private:
 53 |   struct Node {
 54 |     std::shared_ptr<T> v;
 55 |     std::unique_ptr<Node> next;
 56 |   };
 57 | 
 58 |  private:
 59 |   std::unique_ptr<Node> try_pop_head() {
 60 |     std::lock_guard<std::mutex> l(head_mutex_);
 61 |     if (head_.get() == get_tail()) {
 62 |       return nullptr;
 63 |     }
 64 |     return pop_head();
 65 |   }
 66 | 
 67 |   std::unique_ptr<Node> try_pop_head(T& res) {
 68 |     std::lock_guard<std::mutex> l(head_mutex_);
 69 |     if (head_.get() == get_tail()) {
 70 |       return nullptr;
 71 |     }
 72 |     res = std::move(*head_->v);
 73 |     return pop_head();
 74 |   }
 75 | 
 76 |   std::unique_ptr<Node> wait_pop_head() {
 77 |     std::unique_lock<std::mutex> l(wait_for_data());
 78 |     return pop_head();
 79 |   }
 80 | 
 81 |   std::unique_ptr<Node> wait_pop_head(T& res) {
 82 |     std::unique_lock<std::mutex> l(wait_for_data());
 83 |     res = std::move(*head_->v);
 84 |     return pop_head();
 85 |   }
 86 | 
 87 |   std::unique_lock<std::mutex> wait_for_data() {
 88 |     std::unique_lock<std::mutex> l(head_mutex_);
 89 |     cv_.wait(l, [this] { return head_.get() != get_tail(); });
 90 |     return l;
 91 |   }
 92 | 
 93 |   std::unique_ptr<Node> pop_head() {
 94 |     std::unique_ptr<Node> head_node = std::move(head_);
 95 |     head_ = std::move(head_node->next);
 96 |     return head_node;
 97 |   }
 98 | 
 99 |   Node* get_tail() {
100 |     std::lock_guard<std::mutex> l(tail_mutex_);
101 |     return tail_;
102 |   }
103 | 
104 |  private:
105 |   std::unique_ptr<Node> head_;
106 |   Node* tail_ = nullptr;
107 |   std::mutex head_mutex_;
108 |   mutable std::mutex tail_mutex_;
109 |   std::condition_variable cv_;
110 | };
111 | 


--------------------------------------------------------------------------------
/docs/reference/IO.md:
--------------------------------------------------------------------------------
 1 | ## I/O 硬件原理
 2 | 
 3 | * I/O 设备就是可以将数据输入到计算机（如鼠标、键盘），或者可以接收计算机输出数据的外部设备（如显示器）
 4 | * I/O 设备按信息交换单位可分为两类
 5 |   * 块设备（block device）：把信息存储在固定大小的块中，每个块都有自己的地址。块设备的基本特征是，传输速率快，可寻址，每个块都能独立于其他块而读写。磁盘就是最常见的块可寻址设备，无论磁盘臂当前处于什么位置，总是能寻址其他柱面并且等待所需要的磁盘块旋转到磁头下面
 6 |   * 字符设备（character device）：以字符为单位发送或接收一个字符流，而不考虑任何块结构，因此传输速率较慢，不可寻址，也没有任何寻道操作，在输入/输出时常采用中断驱动方式。打印机、鼠标就是常见的字符设备
 7 | * I/O 设备一般由机械部件和电子部件两部分组成
 8 |   * 机械部件主要用于执行具体 I/O 操作，如鼠标的按钮、键盘的按键、显示器的屏幕、硬盘的磁盘臂
 9 |   * 电子部件也称作设备控制器（device controller）或适配器（adapter），通常是主板上的芯片，或一块插入主板扩充槽的印刷电路板
10 | * CPU 无法直接控制机械部件，因此需要通过设备控制器作为中介来控制机械部件。设备控制器的主要功能有
11 |   * 接收和识别 CPU 发出的命令：每个控制器有几个寄存器用于与 CPU 通信，通过写入这些寄存器，操作系统可以命令设备发送数据、接收数据、开启或关闭，或者执行其他某些操作
12 |   * 向 CPU 报告设备的状态：通过读取这些寄存器，操作系统可以了解设备的状态，是否准备好接收一个新的命令等
13 |   * 数据交换：除了控制寄存器外，许多设备还有一个操作系统可以读写的数据缓冲区，比如在屏幕上显示像素的常规方法是使用一个视频 RAM，这一 RAM 基本上只是一个数据缓冲区，可供程序或操作系统写入数据
14 |   * 地址识别：为了区分设备控制器中的寄存器，需要给每个寄存器设置一个地址，控制器通过 CPU 提供的地址来判断 CPU 要访问的寄存器
15 | * 设备控制器中有多个寄存器，为这些寄存器编址有两种方式
16 |   * 内存映射 I/O（memory-mapped I/O）：所有设备控制器的寄存器映射到内存空间中，每个控制寄存器被分配一个唯一的内存地址，并且不会有内存被分配到这一地址
17 |   * 寄存器独立编址：每个寄存器被分配一个 I/O 端口（port）号，所有端口号形成 I/O 端口空间（I/O port space），并且受到保护使得普通用户程序不能对其进行访问，只有操作系统可以访问。这一方案中，内存地址空间和 I/O 地址空间是不同且不相关的
18 | 
19 | ## I/O 软件原理
20 | 
21 | * I/O 软件的设计有以下目标
22 |   * 设备独立性（device independence）：允许编写出的程序可以访问任意 I/O 设备而无需事先指定设备，比如读取一个文件作为输入的程序，应该能在硬盘、DVD 或 USB 盘上读取文件，无需为每一种不同的设备修改程序
23 |   * 统一命名（uniform naming）：一个文件或一个设备的名字应该是一个简单的字符串或一个整数，不应依赖于设备
24 |   * 错误处理（error handling）：一般来说，错误应该尽可能在接近硬件的层面得到处理。当控制器发现一个读错误时，如果它能够处理，就应该自己设法纠正错误。如果控制器处理不了，设备驱动程序就应当予以处理，可能只需要重读一次这块数据就正确了
25 |   * 同步（synchronous，即阻塞）和异步（asynchronous，即中断驱动）传输：大多数物理 I/O 是异步的，比如 CPU 启动传输后便转去做其他工作，直到中断发生。如果 I/O 操作是阻塞的，用户程序就更容易编写，比如 read 系统调用之后程序将自动被挂起，直到缓冲区中的数据准备好，而正是操作系统将实际异步的操作变为了在用户程序看来是阻塞式的操作
26 |   * 缓冲（buffering）：数据离开一个设备之后通常不能直接存放到最终目的地，比如从网络上进来一个数据包时，直到将该数据包存放到某个地方，并对其进行检查，操作系统才知道要将其置于何处。缓冲涉及大量复制工作，经常对 I/O 性能有重大影响
27 |   * 共享设备和独占设备：共享设备能同时让多个用户使用（如磁盘），独占设备则只能由单个用户独占使用（如磁带机）。独占设备的引入带来了各种问题（如死锁），操作系统必须能处理共享设备和独占设备以避免问题发生
28 | * I/O 有三种实现方式
29 |   * 程序控制 I/O（programmed I/O）：这是 I/O 的最简单形式。CPU 轮询设备状态，当设备准备好时，CPU 向控制器发出读指令，从 I/O 设备中读取字，再把这些字写入到存储器。这种方式的优点是实现简单，缺点是在完成全部 I/O 之前，CPU 的所有时间都被其占用，如果 CPU 有其他事情要做，轮询就导致了 CPU 利用率低
30 |   * 中断驱动 I/O ：用中断阻塞等待 I/O 的进程，CPU 在等待 I/O 设备就绪时，通过调度程序先执行其他进程。当 I/O 完成后（比如打印机打印完一个字符，准备接收下一个字符），设备控制器将向 CPU 发送一个中断信号，CPU 检测到中断信号后保存当前进程的运行环境信息，然后执行中断驱动程序来处理中断。CPU 从设备控制器读一个字的数据传送到 CPU 寄存器，再写入主存，接着 CPU 恢复其他进程的运行环境并继续执行（打印下一个字符）。中断的优点是提高了 CPU 利用率，缺点是每次只能读一个字，每次都要发生一个中断，频繁的中断处理将浪费一定的 CPU 时间
31 |   * 使用 DMA（Direct Memory Access）的 I/O ：让 DMA 控制器来完成 CPU 要做的工作，使得 CPU 可以在 I/O 期间做其他操作。有了 DMA 控制器，就不用每个字中断一次，而是减少到每个缓冲区一次。DMA 控制器通常比 CPU 慢很多，如果 CPU 在等待 DMA 中断时没有其他事情要做，采用中断驱动 I/O 甚至程序控制 I/O 也许更好
32 | 
33 | ## I/O 软件层次
34 | 
35 | * I/O 软件通常组织成四个层次，从上层到底层依次为
36 |   * 用户级 I/O 软件：实现了与用户交互的接口，为用户提供 I/O 操作相关的库函数接口，如 `printf`
37 |   * 与设备无关的操作系统软件：向用户层提供系统调用，如为 `printf` 提供 `write`，另外还要提供设备保护（设置访问权限）、缓冲、错误报告、分配与释放专用设备、建立逻辑设备名到物理设备名的映射关系等功能
38 |   * 设备驱动程序（device driver）：每个连接到计算机上的 I/O 设备都需要某些设备特定的代码来对其进行控制，这样的代码称为设备驱动程序
39 |   * 中断处理程序：进行中断处理
40 | 
41 | ## 盘
42 | 
43 | * 盘有多种多样的类型，最常用的是磁盘，它具有读写速度同样快的特点，适合作为辅助存储器（用于分页、文件系统等）
44 | * 磁盘被组织成柱面，每一个柱面包含若干磁道，磁道数与垂直堆叠的磁头个数相同，磁道又被分为若干扇区，通过 `(柱面号, 盘面号, 扇区号)` 即可定位一个磁盘块
45 | * 磁盘臂调度算法有
46 |   * 先来先服务算法（First-Come First-Served，FCFS）：按照请求接收顺序完成请求，优点是公平简单易实现，缺点是平均寻道时间较长
47 |   * 最短寻道时间优先算法（Shortest Seek Time First，SSTF）：下一次处理，磁头向所有请求中距离最近的位置移动。缺点是可能出现饥饿现象
48 |   * 扫描算法（SCAN）：也叫电梯算法（elevator algorithm），磁头持续向一个方向移动，直到到达最内侧或最外侧时才改变方向。优点是平均寻道时间较短，不会产生饥饿现象
49 |   * LOOK 调度算法：对扫描算法稍作优化，如果磁头移动方向上已没有需要处理的请求，则直接改变方向
50 |   * 循环扫描算法（C-SCAN）：SCAN 算法对于各个位置磁道的响应频率不平均，靠近磁盘两侧的可能更快被下一次访问。为了解决这个问题，C-SCAN 算法的原理是，只在一个移动方向上处理请求，磁头返回时不处理任何请求
51 |   * C-LOOK：只在一个移动方向上处理请求，如果该方向之后没有要处理的请求，则磁头返回，并且只需要返回到第一个有请求的位置
52 | 


--------------------------------------------------------------------------------
/src/lock_free_stack_hazard_pointer.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <atomic>
  4 | #include <functional>
  5 | #include <memory>
  6 | #include <stdexcept>
  7 | #include <thread>
  8 | 
  9 | static constexpr std::size_t MaxSize = 100;
 10 | 
 11 | struct HazardPointer {
 12 |   std::atomic<std::thread::id> id;
 13 |   std::atomic<void*> p;
 14 | };
 15 | 
 16 | static HazardPointer HazardPointers[MaxSize];
 17 | 
 18 | class HazardPointerHelper {
 19 |  public:
 20 |   HazardPointerHelper() {
 21 |     for (auto& x : HazardPointers) {
 22 |       std::thread::id default_id;
 23 |       if (x.id.compare_exchange_strong(default_id,
 24 |                                        std::this_thread::get_id())) {
 25 |         hazard_pointer = &x;  // 取一个未设置过的风险指针
 26 |         break;
 27 |       }
 28 |     }
 29 |     if (!hazard_pointer) {
 30 |       throw std::runtime_error("No hazard pointers available");
 31 |     }
 32 |   }
 33 | 
 34 |   ~HazardPointerHelper() {
 35 |     hazard_pointer->p.store(nullptr);
 36 |     hazard_pointer->id.store(std::thread::id{});
 37 |   }
 38 | 
 39 |   HazardPointerHelper(const HazardPointerHelper&) = delete;
 40 | 
 41 |   HazardPointerHelper operator=(const HazardPointerHelper&) = delete;
 42 | 
 43 |   std::atomic<void*>& get() { return hazard_pointer->p; }
 44 | 
 45 |  private:
 46 |   HazardPointer* hazard_pointer = nullptr;
 47 | };
 48 | 
 49 | std::atomic<void*>& hazard_pointer_for_this_thread() {
 50 |   static thread_local HazardPointerHelper t;
 51 |   return t.get();
 52 | }
 53 | 
 54 | bool is_existing(void* p) {
 55 |   for (auto& x : HazardPointers) {
 56 |     if (x.p.load() == p) {
 57 |       return true;
 58 |     }
 59 |   }
 60 |   return false;
 61 | }
 62 | 
 63 | template <typename T>
 64 | class LockFreeStack {
 65 |  public:
 66 |   void push(const T& x) {
 67 |     Node* t = new Node(x);
 68 |     t->next = head_.load();
 69 |     while (!head_.compare_exchange_weak(t->next, t)) {
 70 |     }
 71 |   }
 72 | 
 73 |   std::shared_ptr<T> pop() {
 74 |     std::atomic<void*>& hazard_pointer = hazard_pointer_for_this_thread();
 75 |     Node* t = head_.load();
 76 |     do {  // 外循环确保 t 为最新的头节点，循环结束后将头节点设为下一节点
 77 |       Node* t2;
 78 |       do {  // 循环至风险指针保存当前最新的头节点
 79 |         t2 = t;
 80 |         hazard_pointer.store(t);
 81 |         t = head_.load();
 82 |       } while (t != t2);
 83 |     } while (t && !head_.compare_exchange_strong(t, t->next));
 84 |     hazard_pointer.store(nullptr);
 85 |     std::shared_ptr<T> res;
 86 |     if (t) {
 87 |       res.swap(t->v);
 88 |       if (is_existing(t)) {
 89 |         append_to_delete_list(new DataToDelete{t});
 90 |       } else {
 91 |         delete t;
 92 |       }
 93 |       try_delete();
 94 |     }
 95 |     return res;
 96 |   }
 97 | 
 98 |  private:
 99 |   struct Node {
100 |     std::shared_ptr<T> v;
101 |     Node* next = nullptr;
102 |     Node(const T& x) : v(std::make_shared<T>(x)) {}
103 |   };
104 | 
105 |   struct DataToDelete {
106 |    public:
107 |     template <typename T>
108 |     DataToDelete(T* p)
109 |         : data(p), deleter([](void* p) { delete static_cast<T*>(p); }) {}
110 | 
111 |     ~DataToDelete() { deleter(data); }
112 | 
113 |     void* data = nullptr;
114 |     std::function<void(void*)> deleter;
115 |     DataToDelete* next = nullptr;
116 |   };
117 | 
118 |  private:
119 |   void append_to_delete_list(DataToDelete* t) {
120 |     t->next = to_delete_list_.load();
121 |     while (!to_delete_list_.compare_exchange_weak(t->next, t)) {
122 |     }
123 |   }
124 | 
125 |   void try_delete() {
126 |     DataToDelete* cur = to_delete_list_.exchange(nullptr);
127 |     while (cur) {
128 |       DataToDelete* t = cur->next;
129 |       if (!is_existing(cur->data)) {
130 |         delete cur;
131 |       } else {
132 |         append_to_delete_list(new DataToDelete{cur});
133 |       }
134 |       cur = t;
135 |     }
136 |   }
137 | 
138 |  private:
139 |   std::atomic<Node*> head_;
140 |   std::atomic<std::size_t> pop_cnt_;
141 |   std::atomic<DataToDelete*> to_delete_list_;
142 | };
143 | 


--------------------------------------------------------------------------------
/docs/reference/deadlocks.md:
--------------------------------------------------------------------------------
  1 | ## 资源死锁（resource deadlock）
  2 | 
  3 | * 资源分为两类
  4 |   * 可抢占资源（preemptable resource）：可以从拥有它的进程中抢占，而不会产生任何副作用，如存储器
  5 |   * 不可抢占资源（nonpreemptable resource）：在不引起相关的计算失败的情况下，无法把它从占有它的进程处抢占过来，如光盘刻录机
  6 | * 死锁主要关心不可抢占资源
  7 | * 如果一个进程集合中，每个进程都在等待集合中的其他进程才能引发的事件，则该进程集合就是死锁的。通常这个事件是其他进程释放自身占有的资源，这种死锁称为资源死锁，这是最常见的死锁类型，但不是唯一的类型
  8 | * 发生资源死锁的四个必要条件是
  9 |   * 互斥条件：每个资源要么分配给一个进程，要么是可用的
 10 |   * 占有和等待条件：已得到某个资源的进程可以再请求新的资源，并且不会释放已有资源
 11 |   * 不可抢占条件：已分配给一个进程的资源不能被强制抢占，只能被占有它的进程显式释放
 12 |   * 环路等待条件：死锁发生时，系统中必然有多个进程组成一条环路，环路中的每个进程都在等待下一个进程所占有的资源
 13 |   
 14 | ## 鸵鸟算法
 15 | 
 16 | * 最简单的解决方法是，把头埋到沙子里，假装根本没有问题发生。不同人对该方法的看法也不同，数学家认为这种方法完全不可接受，无论代价多大都应该彻底防止死锁发生，工程师认为要根据死锁发生的频率、严重程度、系统崩溃次数来决定，如果死锁每五年发生一次，而系统每个月都会因故障崩溃一次，就没有必要用损失性能和可用性的代价去防止死锁
 17 | 
 18 | ## 死锁检测和死锁恢复
 19 | 
 20 | * 第二种技术是死锁检测和恢复，使用这种技术时，系统不阻止死锁的产生，而是允许死锁发生，在检测到死锁发生后再恢复
 21 | * 用 E 表示现有资源向量（exisiting resource vector），A 表示可用资源向量（available resource vector），用 C 表示当前分配矩阵（current allocation matrix），用 R 表示请求矩阵（request matrix），死锁检测的算法是
 22 |   * 在 R 中查找是否存在某一行（即一个进程）小于等于 A
 23 |   * 如果找到这样一行，就将 C 中相同行数的行（即该进程的已分配资源）加到 A 中，然后标记该进程，再转到上一步
 24 |   * 如果不存在这样一行，则算法终止。算法结束时，所有没标记过的进程都是死锁进程
 25 | * 死锁恢复方法有：抢占、回滚、终止进程
 26 | 
 27 | ## 死锁避免
 28 | 
 29 | * 如果当前状态下没有死锁发生，并且存在某种调度次序能使每个进程都运行完毕，则称该状态是安全的
 30 | * 对于目前有 3 个空闲资源的如下状态，先分配 2 个资源给 B，B 运行完释放 4 个资源，此时有 5 个空闲资源，接着 5 个资源全分配给 C，C 运行结束后将有 9 个空闲资源，最后将 9 个资源全分配给 A 即可。按 BCA 的分配顺序可以使得所有进程都能完成，因此这个状态是安全的
 31 | 
 32 | |进程|已分配资源|最大需求|
 33 | |:-:|:-:|:-:|
 34 | |A|3|9|
 35 | |B|2|4|
 36 | |C|2|7|
 37 | 
 38 | * 空闲资源数为 2 时的如下状态就是不安全状态。首先只能先运行 B，B 运行结束后共有 4 个空闲资源，无法再运行 A 或 C
 39 | 
 40 | |进程|已分配资源|最大需求|
 41 | |:-:|:-:|:-:|
 42 | |A|4|9|
 43 | |B|2|4|
 44 | |C|2|7|
 45 | 
 46 | * 安全状态和不安全状态的区别是：从安全状态出发，系统可以保证所有进程都能完成，而从不安全状态出发就没有这样的保证
 47 | * Dijkstra 提出了一种避免死锁的调度算法，称为银行家算法（banker's algorithm），方法是对每一个请求进行检查，如果满足这一请求会到达安全状态，则满足该请求，否则推迟对该请求的满足
 48 | * 之前安全状态的例子考虑的就是单个资源的银行家算法，下面考虑多个资源的银行家算法
 49 | * 已分配资源
 50 | 
 51 | |进程|资源1|资源2|资源3|资源4|
 52 | |:-:|:-:|:-:|:-:|:-:|
 53 | |A|3|0|1|1|
 54 | |B|0|1|0|0|
 55 | |C|1|1|1|0|
 56 | |D|1|1|0|1|
 57 | |E|0|0|0|0|
 58 | 
 59 | * 仍需要的资源
 60 | 
 61 | |进程|资源1|资源2|资源3|资源4|
 62 | |:-:|:-:|:-:|:-:|:-:|
 63 | |A|1|1|0|0|
 64 | |B|0|1|1|2|
 65 | |C|3|1|0|0|
 66 | |D|0|0|1|0|
 67 | |E|2|1|1|0|
 68 | 
 69 | * 对应的当前分配矩阵 C 和请求矩阵 R 为
 70 | 
 71 | ```
 72 | C       R
 73 | 3011    1100
 74 | 0100    0112
 75 | 1110    3100
 76 | 1101    0010
 77 | 0000    2110
 78 | ```
 79 | 
 80 | * 用三个向量表示现有资源 E、已分配资源 P、可用资源 A，计算分配矩阵 C 的每列和得到 `P = (5322)`，以 `E = (6342)` 为例，`A = E - P = (1020)`
 81 | * 检测一个状态是否安全的算法是
 82 |   * 查找一个使用可用资源即可运行的进程，如果找不到则系统就会死锁
 83 |   * 如果找到，则假设该进程获取所需资源并运行结束，将该进程标记为终止，再将其资源加到 A 上
 84 |   * 重复上述两步，如果最后所有进程都被标记为终止，则初始状态是安全的
 85 | * 对于这个例子
 86 |   * 进程 D 仍需要的资源为 `(0010)`，均小于 `(1020)`，因此运行 D，D 最初的已分配资源为 `(1101)`，因此结束后 `A = (1020) + (1101) = (2121)`
 87 |   * 进程 A 仍需要的资源为 `(1100)`，均小于运行 `(2121)`，运行 A（此时 E 也满足条件，也可以运行 E），A 最初的已分配资源为 `(3011)`，结束后 `A = (2121) + (3011) = (5132)`
 88 |   * 运行 B，结束后 `A = (5132) + (0100) = (5232)`
 89 |   * 运行 C，结束后 `A = (5232) + (1110) = (6342)`
 90 |   * 运行 E，结束后 `A = (6342) + (0000) = (6342)`
 91 |   * 所有进程都运行结束，因此这个例子的状态是安全的
 92 | 
 93 | ## 死锁预防
 94 | 
 95 | * 死锁避免本质上来说是不可能的，因为它需要获取未来的请求，而这些请求是不可知的
 96 | * 死锁发生时，四个条件必须同时成立，因此破坏其中条件即可预防发生死锁
 97 |   * 破坏互斥条件：如果资源不被一个进程独占，就一定不会发生死锁。实际情况中，如果允许两个进程同时使用打印机就会造成混乱，解决这个问题的方法是假脱机打印机技术（spooling printer）
 98 |   * 破坏占有并等待条件：禁止已持有资源的进程再等待其他资源即可。一种实现方法是，规定所有进程在开始执行前请求所需的全部资源。这种方法的问题是，很多进程在运行时才知道需要多少资源，实际上如果进程知道需要多少资源就可以使用银行家算法。另一种方法是，当进程请求资源时，先暂时释放其占有的资源，再尝试一次获取所需的全部资源
 99 |   * 破坏不可抢占条件：这种方法是可能的
100 |   * 破坏环路等待条件：对资源编号，请求必须按编号升序提出，但问题在于，几乎找不出一种使每个人都满意的编号次序
101 | 
102 | ## 通信死锁（communication deadlock）
103 | 
104 | * 除了最常见的资源死锁，还有通信死锁。通信死锁发生在通信系统（如网络）中，比如进程 A 向进程 B 发送请求信息并阻塞至 B 回复，如果 A 发送的信息丢失，就会导致 A 和 B 均阻塞，从而导致死锁
105 | * 通信死锁可以通过超时来解决，发送者在发送信息时启动计时器，如果计时器在回复到达前停止，则发送者可以认为信息已丢失，并重新发送
106 | 
107 | ## 活锁（livelock）
108 | 
109 | * 活锁不会导致进程阻塞，甚至可以说进程正在活动，因此不是死锁，但实际上进程不会继续往下执行，因此可以称为活锁
110 | 
111 | ```cpp
112 | void process_A() {
113 |   acquire_lock(&resource_1);
114 |   while (!try_lock(&resource_2)) {  // 进程 A 尝试获取资源 2 失败
115 |     release_lock(&resource_1);  // 先释放资源 1，一段时间后再尝试获取资源 2
116 |     wait_fixed_time();  // 若 B 此时也在等待，则两者都让出了资源但对方都未获取
117 |     acquire_lock(&resource_1);  // 两者各自拿回资源，则下次获取对方资源仍会失败
118 |   }                             // 若此过程一直重复就是活锁
119 |   use_both_resources();
120 |   release_lock(&resource_2);
121 |   release_lock(&resource_1);
122 | }
123 | 
124 | void process_B() {
125 |   acquire_lock(&resource_2);
126 |   while (!try_lock(&resource_1)) {
127 |     release_lock(&resource_2);
128 |     wait_fixed_time();
129 |     acquire_lock(&resource_2);
130 |   }
131 |   use_both_resources();
132 |   release_lock(&resource_1);
133 |   release_lock(&resource_2);
134 | }
135 | ```
136 | 


--------------------------------------------------------------------------------
/docs/10_testing_and_debugging_multithreaded_application.md:
--------------------------------------------------------------------------------
  1 | ## 并发相关的 bug 类型
  2 | 
  3 | * 与并发直接相关的 bug 一般可以分为两大类，一是非预期阻塞，二是 race condition
  4 | * 非预期阻塞包含以下几种情况
  5 |   * 死锁（deadlock）：两个线程互相等待，导致均无法完成工作。最明显的情况是，如果负责用户界面的线程死锁，界面将失去响应。也有一些情况是，界面可以保持响应，但一些任务无法完成，比如搜索不返回结果，或者文档不被打印
  6 |   * 活锁（livelock）：类似于死锁，不同的是线程不是阻塞等待，而是在忙碌于一个检查循环中，比如自旋锁。严重时，其表现的症状就和死锁一样，比如程序不进行，此外由于线程仍在运行，CPU 会处于高使用率状态。在不太严重的情况下，活锁最终会被操作系统的随机调度解决，但仍然会造成任务的长时间延迟，并且延迟期间 CPU 使用率很高
  7 |   * I/O 阻塞或其他外部输入：如果线程阻塞等待外部输入，就无法继续处理工作。因此如果一个线程执行的任务会被其他线程等待，就不要让这个线程等待外部输入
  8 | * 许多死锁和活锁都是由于 race condition 造成的，不过很大一部分 race condition 是良性的，比如要处理任务队列的下一个任务，决定用哪个工作线程去处理是无关紧要的。造成问题的 race condtion 包含以下几种情况
  9 |   * 数据竞争（data race）：数据竞争是一种特定类型的 race condtion，由于对共享内存位置的不同步的并发访问，它将导致未定义行为。数据竞争通常发生于不正确地使用原子操作来同步线程，或者不加锁访问共享数据
 10 |   * 被破坏的不变量（broken invariant）：它可以表现为空悬指针（其他线程可以删除被访问的数据）、随机内存损坏（由于局部更新导致线程读取的值不一致）、双重释放（比如两个线程弹出队列的同一个数据）等。不变量的破坏是暂时的，因为它是基于值的。如果不同线程上的操作要求以一个特定顺序执行，不正确的同步就会导致 race condition，有时就会违反这个执行顺序
 11 |   * 生命周期问题（lifetime issue）：这个问题可以归入 broken invariant，但这里单独提出来。这个问题表现为，线程比其访问的数据活得更长。一般这个问题发生于线程引用了超出范围的局部变量，但也不仅限于此，比如调用 [join](https://en.cppreference.com/w/cpp/thread/thread/join)，要考虑异常抛出时，调用不被跳过
 12 | * 通常可以通过调试器来确认死锁和活锁的线程以及它们争用的同步对象。对于数据竞争、不变量的破坏、生命周期问题，可见症状（如随机崩溃或不正确的输出）可以显示在代码的任何位置，代码可能重写系统其他部分使用的内存，并且很久以后才被触及，这个错误可能在程序执行的后期出现在与 bug 代码完全无关的位置。这就是共享内存的真正祸端，无论如何限制线程对数据的访问和确保正确的同步，任何线程都可以重写其他线程中的数据
 13 | 
 14 | ## 定位 bug 的方法
 15 | 
 16 | ### code review
 17 | 
 18 | * 让其他人或自己过段时间来 code review，因为对代码不熟悉，需要思考代码的工作方式，看待的角度也不一样，更有可能发现潜在的问题。多线程代码一般有以下问题
 19 |   * 哪些数据需要被保护，以避免并发访问
 20 |   * 如何确保数据得到保护
 21 |   * 其他线程此时可能运行到代码的哪个位置
 22 |   * 这个线程持有哪些锁
 23 |   * 其他线程持有哪些锁
 24 |   * 在这个线程中完成的操作和另一个线程中完成的操作之间是否有任何排序要求，如何执行这些要求
 25 |   * 这个线程读的数据是否仍然有效，是否可能被其他线程修改过
 26 |   * 假设另一个线程在修改数据，这意味着什么，如何确保这种情况永远不会发生
 27 | 
 28 | ### 测试
 29 | 
 30 | * 测试多线程程序的困难在于，具体的线程调度顺序是不确定的，对于相同的输入，得到的结果却不一定相同，结果可能有时是正确的，有时是错误的。因此存在潜在的 race condition 也不意味着总会得到失败的结果，有时可能也会成功
 31 | * 由于重现并发相关的 bug 很困难，所以值得仔细设计测试。最好让每个测试运行最小数量的代码，这样在测试失败时可以最好地隔离出错误代码。比如测试一个并发队列，分别测试并发的 push 和 pop 的工作，就直接比测试整个队列的功能要好
 32 | * 为了验证问题是否与并发相关，应该从测试中消除并发性。多线程中的 bug 并不意味着一定是并发相关的，如果一个问题在单线程中也总是出现，这就是一个普通的 bug，而不是并发相关的 bug。如果一个问题在单核系统中消失，而在多核或多处理器系统中总会出现，一般这就可能是一个 race condition，或同步、内存序相关的问题
 33 | * 测试用例
 34 |   * 单线程调用 push() 或 pop()，以验证 queue 的基本功能
 35 |   * 空 queue，一个线程 push()，另一个线程 pop()
 36 |   * 空 queue，多线程 push()
 37 |   * 满 queue，多线程 push()
 38 |   * 空 queue，多线程 pop()
 39 |   * 满 queue，多线程 pop()
 40 |   * 有部分数据但不够所有线程用的 queue，多线程 pop()
 41 |   * 空 queue，一个线程 pop()，多线程 push()
 42 |   * 满 queue，一个线程 pop()，多线程 push()
 43 |   * 空 queue，多线程 pop()，多线程 push()
 44 |   * 满 queue，多线程 pop()，多线程 push()
 45 | * 测试环境
 46 |   * 多线程在每种 case 中具体指多少线程 (3, 4, 1,024?)
 47 |   * 是否有足够的处理器，让每个线程运行在自己的核上
 48 |   * 在哪些处理器架构上进行测试
 49 |   * 如何合理对测试中的 while 部分 suitable scheduling
 50 | * 一般满足以下条件的代码就是易于测试的，这些条件单线程和多线程中同样适用
 51 |   * 每个函数和类的责任是清晰的
 52 |   * 函数简明扼要（short and to the point）
 53 |   * 测试可以完全控制被测代码所在环境
 54 |   * 执行特定操作的被测代码在系统中是紧密而非分散的
 55 |   * 代码在写下之前已被考虑过如何测试
 56 | * 为了测试设计并发代码的一个最好方法是消除并发，如果可以把代码分解成负责线程间通信路径的部分，以及在单线程中操作通信数据的部分，就可以极大地简化问题。对于操作通信数据的部分就可以用常规的单线程技术测试，对于负责线程间通信的部分，代码小了很多，测试也更容易
 57 | 
 58 | ### 多线程测试技术
 59 | 
 60 | * 第一种测试技术是压力测试，随着代码运行次数的增加，bug 出现的几率也更高，如果代码运行十亿次都通过，代码就很可能是没有问题的。如果测试是细粒度的（fine-grained），比如前面对并发队列的测试，压力测试就更可靠。如果粒度非常大，可能的组合也非常多，即使十亿次的测试的结果也不算可靠
 61 | * 压力测试的缺点是，如果测试本来就保证了问题不会发生，那么无论测试多少次都不会出现失败的情况，这就会造成误导。比如在单核系统上测试多线程程序，race condition 和乒乓缓存的问题根本不会出现，但这不表示这个程序在多核系统上是没问题的。又比如，不同处理器架构提供了不同的同步和内存序工具，在 x86 和 x86-64 架构上，无论使用 [memory_order_relaxed](https://en.cppreference.com/w/cpp/atomic/memory_order) 还是 [memory_order_seq_cst](https://en.cppreference.com/w/cpp/atomic/memory_order) 内存序，原子 load 操作总是一样的，这意味着在 x86 架构上使用 relaxed 语义总是可行的，但如果换成细粒度内存序指令的系统（比如 SPARC）就会失败
 62 | * 第二种测试技术是组合仿真测试（combination simulation testing），即使用一个特殊的软件来仿真真实的运行时环境。仿真软件将记录数据访问、锁定、原子操作的序列，然后使用 C++ 内存模型的规则来重复运行所有可能的操作组合，以确定 race condition 和死锁
 63 | * 虽然这种详尽的组合测试可以保证找到设计所要检测的所有问题，但会花费大量时间，因为组合的数量随线程 数和每个线程执行的操作数呈指数增长，它最好用于单个代码片段的细粒度测试，而非用于整个程序。这种技术的另一个明显缺点是，它要求访真软件能处理代码中的操作
 64 | * 第三种测试技术是使用专门的库。比如共享数据通常会用 mutex 保护，如果在访问数据时能检查哪些 mutex 被锁定了，就能验证线程在访问数据时是否锁定了相应的 mutex，如果没有锁定就报告失败。库实现也能记录上锁的顺序，如果另一个线程对同一个 mutex 以不同顺序上锁，这就会被记录为潜在的死锁
 65 | * 另一种类型的库是，同步原语的实现允许测试编写者在多线程等待时，可以控制哪个线程来获得锁，或者哪个线程被 [notify_one](https://en.cppreference.com/w/cpp/thread/condition_variable/notify_one) 通知。这就允许设置特定方案，来验证代码是否在这些方案中按预期运行
 66 | * 一些测试工具已经作为标准库实现的一部分提供了，其他的则可以基于标准库的部分手动实现
 67 | 
 68 | ### 构建多线程测试代码
 69 | 
 70 | * 多线程测试代码可以分为以下几部分
 71 |   * 必须先执行的总体设置
 72 |   * 必须运行在每个线程上的线程特定的设置
 73 |   * 要并发运行在每个线程上的代码
 74 |   * 并发执行结束后的状态断言
 75 | * 如下是对一个队列的测试代码
 76 | 
 77 | ```cpp
 78 | void test_concurrent_push_and_pop_on_empty_queue() {
 79 |   ConcurrentQueue<int> q;  // 总体设置：先创建一个队列
 80 |   std::promise<void> go, push_ready, pop_ready;
 81 |   std::shared_future<void> ready(go.get_future());
 82 |   std::future<void> push_done;
 83 |   std::future<int> pop_done;
 84 |   try {
 85 |     push_done = std::async(
 86 |         std::launch::async,  // 指定异步策略保证每个任务运行在自己的线程上
 87 |         [&q, ready, &push_ready]() {
 88 |           push_ready.set_value();
 89 |           ready.wait();
 90 |           q.push(42);  // 线程特定的设置：存入一个 int
 91 |         });
 92 |     pop_done = std::async(std::launch::async, [&q, ready, &pop_ready]() {
 93 |       pop_ready.set_value();
 94 |       ready.wait();
 95 |       return q.try_pop();
 96 |     });
 97 |     push_ready.get_future().wait();  // 等待开始测试的通知
 98 |     pop_ready.get_future().wait();   // 同上
 99 |     go.set_value();                  // 通知开始真正的测试
100 |     push_done.get();                 // 获取结果
101 |     assert(pop_done.get() == 42);    // 获取结果
102 |     assert(q.empty());
103 |   } catch (...) {
104 |     go.set_value();  // 避免空悬指针
105 |     throw;           // 再抛出异常
106 |   }
107 | }
108 | ```
109 | 
110 | ### 测试多线程代码的性能
111 | 
112 | * 使用并发的一个主要目的就是利用多核处理器来提高程序性能，因此测试代码来确保性能确实提升了是很重要的。性能相关的一个主要方面就是可扩展性，性能应该随着核数一起提升。在测试多线程代码性能时，最好在尽可能多的不同配置上进行测试
113 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | * C++11 引入了 Boost 线程库作为标准线程库，作者 Anthony Williams 为介绍其特性，于 2012 年出版了 *[C++ Concurrency in Action](https://book.douban.com/subject/4130141/)* 一书，并顺应 C++17 于 2019 年 2 月出版了[第二版](https://book.douban.com/subject/27036085/)。*[C++ Concurrency in Action 2ed](https://learning.oreilly.com/library/view/c-concurrency-in/9781617294693/)* 前五章介绍了[线程支持库](https://en.cppreference.com/w/cpp/thread)的基本用法，后六章从实践角度介绍了并发编程的设计思想，相比第一版多介绍了一些 C++17 特性，如 [std::scoped_lock](https://en.cppreference.com/w/cpp/thread/scoped_lock)、[std::shared_mutex](https://en.cppreference.com/w/cpp/thread/shared_mutex)，并多出一章（第十章）介绍 [C++17 标准库并行算法](https://en.cppreference.com/w/cpp/header/execution)，此外个人会在相应处补充 C++20 相关特性，如 [std::jthread](https://en.cppreference.com/w/cpp/thread/jthread)、[std::counting_semaphore](https://en.cppreference.com/w/cpp/thread/counting_semaphore)、[std::barrier](https://en.cppreference.com/w/cpp/thread/barrier)、[std::latch](https://en.cppreference.com/w/cpp/thread/latch) 等。阅读本书前可参考 [Andrew S. Tanenbaum](https://en.wikipedia.org/wiki/Andrew_S._Tanenbaum) 的 [*Modern Operating Systems*](https://book.douban.com/subject/25864553/)，预备操作系统的基础知识（[进程与线程](reference/processes_and_threads.html)、[死锁](reference/deadlocks.html)、[内存管理](reference/memory_management.html)、[文件系统](reference/file_systems.html)、[I/O](reference/IO.html) 等）。此为个人笔记，仅供参考，更详细内容见[原书](https://learning.oreilly.com/library/view/c-concurrency-in/9781617294693/)。
 2 | 
 3 | ## [线程支持库](https://en.cppreference.com/w/cpp/thread)
 4 | 
 5 | 1. [线程管理（Managing thread）](01_managing_thread.html)：[\<thread\>](https://en.cppreference.com/w/cpp/header/thread)
 6 | 2. [线程间共享数据（Sharing data between thread）](02_sharing_data_between_thread.html)：[\<mutex\>](https://en.cppreference.com/w/cpp/header/mutex)、[\<shared_mutex\>](https://en.cppreference.com/w/cpp/header/shared_mutex)
 7 | 3. [同步并发操作（Synchronizing concurrent operation）](03_synchronizing_concurrent_operation.html)：[\<condition_variable\>](https://en.cppreference.com/w/cpp/header/condition_variable)、[\<semaphore\>](https://en.cppreference.com/w/cpp/header/semaphore)、[\<barrier\>](https://en.cppreference.com/w/cpp/header/barrier)、[\<latch\>](https://en.cppreference.com/w/cpp/header/latch)、[\<future\>](https://en.cppreference.com/w/cpp/header/future)、[\<chrono\>](https://en.cppreference.com/w/cpp/header/chrono)、[\<ratio\>](https://en.cppreference.com/w/cpp/header/ratio)
 8 | 4. [C++ 内存模型和基于原子类型的操作（The C++ memory model and operations on atomic type）](04_the_cpp_memory_model_and_operations_on_atomic_type.html)：[\<atomic\>](https://en.cppreference.com/w/cpp/header/atomic)
 9 | 
10 | ## 并发编程实践
11 | 
12 | 5. [基于锁的并发数据结构的设计（Designing lock-based concurrent data structure）](05_designing_lock_based_concurrent_data_structure.html)
13 | 6. [无锁并发数据结构的设计（Designing lock-free concurrent data structure）](06_designing_lock_free_concurrent_data_structure.html)
14 | 7. [并发代码的设计（Designing concurrent code）](07_designing_concurrent_code.html)
15 | 8. [高级线程管理（Advanced thread management）](08_advanced_thread_management.html)
16 | 9. [并行算法（Parallel algorithm）](09_parallel_algorithm.html)：[\<execution\>](https://en.cppreference.com/w/cpp/header/execution)
17 | 10. [多线程应用的测试与调试（Testing and debugging multithreaded application）](10_testing_and_debugging_multithreaded_application.html)
18 | 
19 | ## 标准库相关头文件
20 | 
21 | |头文件|说明|
22 | |:-:|:-:|
23 | |[\<thread\>](https://en.cppreference.com/w/cpp/header/thread)、[\<stop_token\>](https://en.cppreference.com/w/cpp/header/stop_token)|线程|
24 | |[\<mutex\>](https://en.cppreference.com/w/cpp/header/mutex)、[\<shared_mutex\>](https://en.cppreference.com/w/cpp/header/shared_mutex)|锁|
25 | |[\<condition_variable\>](https://en.cppreference.com/w/cpp/header/condition_variable)|条件变量|
26 | |[\<semaphore\>](https://en.cppreference.com/w/cpp/header/semaphore)|信号量|
27 | |[\<barrier\>](https://en.cppreference.com/w/cpp/header/barrier)、[\<latch\>](https://en.cppreference.com/w/cpp/header/latch)|屏障|
28 | |[\<future\>](https://en.cppreference.com/w/cpp/header/future)|异步处理的结果|
29 | |[\<chrono\>](https://en.cppreference.com/w/cpp/header/chrono)|时钟|
30 | |[\<ratio\>](https://en.cppreference.com/w/cpp/header/ratio)|编译期有理数算数|
31 | |[\<atomic\>](https://en.cppreference.com/w/cpp/header/atomic)|原子类型和原子操作|
32 | |[\<execution\>](https://en.cppreference.com/w/cpp/header/execution)|标准库算法执行策略|
33 | 
34 | ## 并发库对比
35 | 
36 | ### [C++11 Thread](https://en.cppreference.com/w/cpp/thread)
37 | 
38 | |特性|API|
39 | |:-:|:-:|
40 | |thread|[std::thread](https://en.cppreference.com/w/cpp/thread/thread)|
41 | |mutex|[std::mutex](https://en.cppreference.com/w/cpp/thread/mutex)、[std::lock_guard](https://en.cppreference.com/w/cpp/thread/lock_guard)、[std::unique_lock](https://en.cppreference.com/w/cpp/thread/unique_lock)|
42 | |condition variable|[std::condition_variable](https://en.cppreference.com/w/cpp/thread/condition_variable)、[std::condition_variable_any](https://en.cppreference.com/w/cpp/thread/condition_variable_any)|
43 | |atomic|[std::atomic](https://en.cppreference.com/w/cpp/atomic/atomic)、[std::atomic_thread_fence](https://en.cppreference.com/w/cpp/atomic/atomic_thread_fence)|
44 | |future|[std::future](https://en.cppreference.com/w/cpp/thread/future)、[std::shared_future](https://en.cppreference.com/w/cpp/thread/shared_future)|
45 | |interruption|无|
46 | 
47 | ### [Boost Thread](https://www.boost.org/doc/libs/1_87_0/doc/html/thread.html)
48 | 
49 | |特性|API|
50 | |:-:|:-:|
51 | |thread|[boost::thread](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/thread_management.html#thread.thread_management.thread)|
52 | |mutex|[boost::mutex](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.mutex_types.mutex)、[boost::lock_guard](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.lock_guard.lock_guard)、[boost::unique_lock](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.locks.unique_lock)|
53 | |condition variable|[boost::condition_variable](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.condvar_ref.condition_variable)、[boost::condition_variable_any](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.condvar_ref.condition_variable_any)|
54 | |atomic|无|
55 | |future|[boost::future](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.futures.reference.unique_future)、[boost::shared_future](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.futures.reference.shared_future)|
56 | |interruption|[thread::interrupt](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/thread_management.html#thread.thread_management.thread.interrupt)|
57 | 
58 | ### [POSIX Thread](http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/pthread.h.html)
59 | 
60 | |特性|API|
61 | |:-:|:-:|
62 | |thread|[pthread_create](http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_create.html)、[pthread_detach](http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_detach.html#)、[pthread_join](http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_join.html#)|
63 | |mutex|[pthread_mutex_lock、pthread_mutex_unlock](http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_mutex_lock.html)|
64 | |condition variable|[pthread_cond_wait](http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_cond_wait.html)、[pthread_cond_signal](https://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_cond_signal.html)|
65 | |atomic|无|
66 | |future|无|
67 | |interruption|[pthread_cancel](http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_cancel.html)|
68 | 
69 | ### [Java Thread](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/lang/Thread.html)
70 | 
71 | |特性|API|
72 | |:-:|:-:|
73 | |thread|[java.lang.Thread](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/lang/Thread.html)|
74 | |mutex|[synchronized blocks](http://tutorials.jenkov.com/java-concurrency/synchronized.html)|
75 | |condition variable|[java.lang.Object.wait](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/lang/Object.html#wait())、[java.lang.Object.notify](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/lang/Object.html#notify())|
76 | |atomic|volatile 变量、[java.util.concurrent.atomic](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/util/concurrent/atomic/package-summary.html)|
77 | |future|[java.util.concurrent.Future](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/util/concurrent/Future.html)|
78 | |interruption|[java.lang.Thread.interrupt](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/lang/Thread.html#interrupt())|
79 | |线程安全的容器|[java.util.concurrent](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/util/concurrent/package-summary.html) 中的容器|
80 | |线程池|[java.util.concurrent.ThreadPoolExecutor](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/util/concurrent/ThreadPoolExecutor.html)|
81 | 


--------------------------------------------------------------------------------
/docs/reference/file_systems.md:
--------------------------------------------------------------------------------
  1 | * 进程运行时，可以在自己的地址空间存储信息，但这样保存信息的问题是
  2 |   * 对于一些程序，如银行系统，这样的存储空间太小
  3 |   * 进程终止时，保存的信息就丢失了
  4 |   * 经常需要多个进程访问同一信息，这要求信息独立于任何一个进程
  5 | * 因此，长期存储信息有三个基本要求
  6 |   * 能够存储大量信息
  7 |   * 使用信息的进程终止时，信息仍存在
  8 |   * 允许多个进程并发访问信息
  9 | * 理论上，磁盘（magnetic disk）就能解决长期存储的问题，但实际上，有许多操作不便于实现
 10 |   * 如何找到信息
 11 |   * 如何防止一个用户读取另一个用户的数据
 12 |   * 如何知道哪些块是空闲的
 13 | * 为了解决这个问题，引入文件的概念，它是一个建模于磁盘的抽象概念
 14 | * 文件由操作系统管理，文件的构造、命名、访问、使用、保护、实现、管理方法是操作系统设计的主要内容，操作系统中处理文件的部分称为文件系统（file system）
 15 | 
 16 | ## 文件
 17 | 
 18 | ### 文件命名
 19 | 
 20 | * 各个系统中的文件命名规则不同，现代操作系统都允许用 1 到 8 个字母组成的字符串作为合法的文件名，通常也允许有数字和一些特殊字符
 21 | * 一般操作系统支持文件名用圆点分隔为两部分，如 `main.cpp`，圆点后的部分称为文件扩展名（file extension）。UNIX 中，文件扩展名只是一种约定，Windows 中的扩展名则有特别意义，用户或进程可以在操作系统中注册扩展名，并规定哪个程序拥有该扩展名（即双击该文件则启动此程序并运行该文件）
 22 | 
 23 | ### 文件结构
 24 | 
 25 | * 文件可以有多种构造方式
 26 |   * 常见的一种构造方式是无结构的单字节序列，操作系统见到的就是字节，文件内容的任何含义只在用户程序中解释，UNIX 和 Windows 都采用这种方法。这为操作系统提供了最大的灵活性，用户可以向文件中加入任何内容，以任何形式命名，操作系统不提供帮助也不进行阻碍
 27 |   * 第二种构造方式是固定长度记录的序列，这种方式的中心思想是，读操作返回一个记录，写操作重写或追加一个记录。几十年前，80 列的穿孔卡片是主流时，很多大型机的操作系统使用的就是这种方式，文件由 80 个字符的记录组成，文件系统建立在这种文件基础上
 28 |   * 第三种构造方式是用一棵记录树构成文件，记录的固定位置有一个键，树按键排序，从而可以对键进行快速查找，这种方式被广泛用于处理商业数据的大型计算机
 29 | 
 30 | ### 文件类型
 31 | 
 32 | * 操作系统一般支持多种文件类型，UNIX 和 Windows 都有普通文件（regular file）和目录（directory），此外 UNIX 还有字符特殊文件（character special file）和块特殊文件（block special file）
 33 | * 普通文件一般分为 ASCII 文件和二进制文件
 34 |   * ASCII 文件由多行正文组成，每行用回车符或换行符或两者（如 MS-DOS）结束，其最大优势是可以显示、打印、编辑，如果很多程序都用 ASCII 文件作为输入和输出，就很容易把一个程序的输出作为另一个程序的输入
 35 |   * 二进制文件打印出来是充满乱码的表，通常二进制文件有一定的内部结构，使用该文件的程序才了解这种结构。比如 UNIX 存档文件，每个文件以模块头开始，其中记录了名称、创建日期、所有者、保护码、文件大小，该模块头与可执行文件一样都是二进制数字，打印输出它们毫无意义
 36 | 
 37 | ### 文件访问
 38 | 
 39 | * 早期操作系统只有顺序访问（sequential access）一种文件访问方式，进程可以从头按顺序读取文件的字节，不能跳过某一些内容。在存储介质是磁带而不是磁盘时，顺序访问文件是很方便的
 40 | * 用磁盘存储文件时，就能以任何次序读取文件的字节，能被这种方式访问的文件称为随机访问文件（random access file）。对许多程序来说，随机访问文件必不可少，比如数据库系统，查找一条记录时，不需要先读出之前的成千上万条记录
 41 | 
 42 | ### 文件属性
 43 | 
 44 | * 除了文件名和数据，操作系统还会保存文件相关的信息，如创建日期、文件大小等，这些附加信息称为文件属性（attribute）或元数据（metadata）。不同系统中的文件属性差别很大
 45 | 
 46 | ### 文件操作
 47 | 
 48 | * 使用文件是为了存储信息并方便以后检索，不同的操作系统提供了不同的方式，常见的文件相关的系统调用有 `create`、`delete`、`open`、`close`、`read`、`write`、`append`、`seek`、`get attributes`、`set attributes`、`rename`
 49 | 
 50 | ## 目录
 51 | 
 52 | * 目录系统的最简单形式是单层目录系统，即一个目录中包含所有文件，这个目录通常称为根目录，其优势是简单，且能快速定位文件，常用于简单的嵌入式装置，如电话、数码相机
 53 | * 现在的用户通常有成千上万的文件，用单层目录寻找文件就很困难了，这就需要层次结构（即一个目录树），几乎所有现代文件系统使用的都是层次目录系统。用目录树组织文件系统时，常用绝对路径名（absolute path name）或相对路径名（relative path name）来指明文件名
 54 | * UNIX 中常见的目录操作的系统调用有 `create`、`delete`（只能删除空目录）、`opendir`、`closedir`、`readdir`、`rename`、`link`、`unlink`
 55 | 
 56 | ## 文件系统的实现
 57 | 
 58 | ### 文件系统布局
 59 | 
 60 | * 文件系统存放在磁盘上。多数磁盘划分为一个或多个分区，每个分区中有一个独立的文件系统
 61 | * 磁盘的 0 号扇区称为主引导记录（Master Boot Record，MBR），用来引导计算机
 62 | * MBR 的结尾是分区表，该表给出了每个分区的起始地址和结束地址。表中的一个分区被标记为活动分区，计算机被引导时，BIOS 读入并执行 MBR，MBR 做的第一件事就是确定活动分区，读入第一个块，即引导块（boot block），并执行
 63 | * 除了引导块，磁盘分区的布局通常随文件系统的不同而变化，一个可能的文件系统布局如下
 64 | 
 65 | ```
 66 | |-----------------整个磁盘-----------------|
 67 |   分区表               磁盘分区
 68 |      ↓       ↙     ↙        ↘        ↘
 69 |  __________________________________________
 70 | |MBR||||________|________|________|________|
 71 |                /          \
 72 |               /            \
 73 | 
 74 | |引导块|超级块|空闲空间管理|i节点|根目录|文件和目录|
 75 | ```
 76 | 
 77 | ### 文件的实现
 78 | 
 79 | * 文件存储实现的关键是记录文件用到了哪些磁盘块，不同的操作系统的实现方式不同
 80 | * 最简单的方式是连续分配，每个文件作为一连串连续数据块存储在磁盘上，比如块大小为 1 KB 的磁盘上，50 KB 的文件要分配 50 个连续的块。每个文件都要从一个新的块开始，上一个文件末尾块可能会存在部分被浪费的空间
 81 | * 连续分配的优势是实现简单，只需要为每个文件记录第一块的磁盘地址和使用的块数，另外读操作性能较好，单个操作就可以读出整个文件
 82 | * 缺点是删除文件会在磁盘中留下断断续续的空闲块。压缩磁盘代价太高，不可行。维护一个空闲块链表，但创建新文件时，为了选择选择合适的空闲区，必须先给出文件的最终大小，如果用户要创建一个文档然后录入，用户是无法给出最终大小的。但这在 CD-ROM 中是可行的，因为所有文件的大小都事先定好了，并且后续使用也不会被改变
 83 | * 第二种方式是链式分配，这样不会因为磁盘碎片而浪费存储空间，但随机访问很慢，每次要访问一个块时，都必须从第一个块开始。此外，指向下一个块的指针占用了一些字节，每个磁盘块存储数据的字节数不再是 2 的整数次幂，虽然这个问题不是非常严重，但也会降低系统的运行效率，因为程序一般以长度为 2 的整数次幂来读写磁盘块
 84 | * 第三种方式是把链式分配的指针放到内存的一个表中，这个表称为文件分配表（File Allocation Table，FAT），这样就解决了大小不正常带来的问题，但如果表项过多，比如 1 TB 的磁盘和 1 KB 的块，FAT 有 10 亿项，每项至少占 3 字节，这就占了 3 GB 内存，因此 FAT 在大型磁盘中不实用
 85 | * 最后一种方式是为每个文件赋予一个 i 节点（index-node）的数据结构，其中列出了文件属性和文件块的磁盘地址。给定 i 节点就能找到文件的所有块，这种方式相对于 FAT 的优势是，只有在文件打开时，其 i 节点才在内存中，最终需要的内存与同时打开的最大文件数成正比
 86 | 
 87 | ### 目录的实现
 88 | 
 89 | * 读文件时必须先打开文件，打开文件时，操作系统利用路径名找到目录项，目录项中提供了查找文件磁盘块所需要的信息。这些信息与系统有关，信息可能是整个文件的磁盘地址（对于连续分配的系统）、第一块的编号（链式分配）、i 节点号。文件属性存放的位置可以是目录项或者 i 节点
 90 | * 现代操作系统一般都支持长度可变的长文件名。最简单的实现方式是，给文件名一个长度限制，如 255 个字符，并为每个文件名保留该长度的空间，这种方式简单但浪费了大量目录空间
 91 | * 第二种方式是，每个目录项中开头有一个记录目录项长度的固定部分，接着是文件属性、任意长度的文件名。缺点和连续分配的磁盘碎片问题一样，移除一个个文件后会留下断断续续的空隙。由于整个目录在内存中，只有对目录进行紧凑操作才能节省空间。另一个问题是一个目录项可能会分布在多个页面上，读取文件名时可能发生缺页中断
 92 | * 第三种方式是，使目录项有固定长度，将文件名放在目录后面的堆上，并管理这个堆，这样移除一个目录项后，下一个进来的目录项总可以填满这个空隙
 93 | * 线性查找文件名要从头到尾搜索目录，对于非常长的目录，一个优化方式是在每个目录中使用散列表来映射文件名和对应的目录项
 94 | 
 95 | ### 共享文件
 96 | 
 97 | * 几个用户在同一个项目中工作时常需要共享文件。对于如下文件系统，B 与 C 有一个共享文件，B 的目录与该文件的联系称为一个链接（link）。这样，文件系统本身是一个有向无环图（Derected Acyclic Graph，DAG）而不是一棵树，代价是维护变得复杂
 98 | 
 99 | ![](../images/ref-filesystem-1.png)
100 | 
101 | * 共享文件的问题是，如果目录中包含磁盘地址，链接文件时必须将 C 目录中的磁盘地址复制到 B 目录中，如果 B（或 C）往文件中添加内容，新数据块只会列入 B（或 C）的用户目录中，C（或 B）对此改变是不知道的，这就违背了共享的目的
102 | * 解决这个问题的第一个方法是，磁盘块不列入目录，而是列入一个与文件关联的小型数据结构，目录将指向这个小型数据结构。这是 UNIX 的做法，小型数据结构就是 i 节点
103 | * 这种方法的缺点是，B 链接该共享文件时，i 节点记录的文件所有者仍是 C，只是将 i 节点的链接计数加 1，以让系统知道该文件有多少个指向它的目录项。如果 C 之后删除了这个文件，B 就有一个指向无效的 i 节点的目录项。如果这个 i 节点之后分配给另一个文件，B 的链接将指向一个错误的文件。系统可以通过 i 节点的计数知道文件被引用，但无法找到所有目录项并删除，也不可能把目录项指针存储在 i 节点中，因为可能有无数个这样的目录
104 | * 第二个方法是符号链接（symbolic linking），让系统建立一个 LINK 类型的文件，把该文件放在 B 目录下，使得 B 与 C 的一个文件存在链接。LINK 文件中包含了要链接的文件的路径名，B 读该链接文件时，操作系统发现是 LINK 类型，则找到其链接文件的路径并读取
105 | * 符号链接在文件被删除后，通过路径名查找文件将失败，因此不会有第一种方法的问题。符号链接的问题在于需要额外开销，必须读取包含路径的文件，然后逐步扫描路径直到找到 i 节点，这些操作可能需要很多次额外的磁盘访问
106 | * 此外，所有方式的链接都存在的一个问题是，文件有多个路径，如果查找文件，将多次定位到被链接的文件，如果一个程序的功能是查找某个文件并复制，就可能导致多次复制同一文件
107 | 
108 | ### 日志结构文件系统（Log-structured File System，LFS）
109 | 
110 | * 设计 LFS 的主要原因是，CPU 运行速度越来越快，RAM 内存变得更大，磁盘高速缓存迅速增加，不需要磁盘访问操作，就可能满足直接来自高速缓存的大部分读请求，由此可以推断，未来的磁盘访问多数是写操作，且写操作往往是零碎的，提前读机制并不能获得更好的性能
111 | * 因此 LFS 的设计者决定重新实现一种 UNIX 文件系统，即使面对一个由大部分为零碎的随机写操作组成的任务，也能够充分利用磁盘带宽
112 | * 基本思路是，将整个磁盘结构化为一个日志，最初所有写操作都缓冲在内存中，每隔一段时间或有特殊需要时，被缓冲在内存中未执行的写操作被放到一个单独的段中，作为日志末尾的一个邻接段被写入磁盘
113 | * 但磁盘空间不是无限大的，这种做法最终将导致日志占满整个磁盘，此时就无法再写入新的段。为了解决这个问题，LFS 有一个清理线程，该线程周期性扫描日志进行磁盘压缩。整个磁盘成为一个大的环形缓冲区，写线程将新的段写到前面，清理线程将旧的段从后面移走
114 | * LFS 在处理大量零碎写操作时的性能比 UNIX 好一个数量级，在处理读和大块写操作时的性能也不比 UNIX 差，甚至更好
115 | 
116 | ### 日志文件系统
117 | 
118 | * 由于 LFS 和现有的文件系统不相匹配，所以还未被广泛使用，但其内在的一个思想，即面对出错的鲁棒性，可以被其他文件系统借鉴。这个基本想法是，保存一个用于记录系统下一步要做什么的日志。当系统在完成任务前崩溃时，重新启动后，就能通过查看日志获取崩溃前计划完成的任务。这样的文件系统被称为日志文件系统，并已被实际使用，比如微软的 NTFS、Linux ext3、RerserFS，OS X 将日志文件系统作为可选项提供
119 | 
120 | ### 虚拟文件系统（Virtual File System，VFS）
121 | 
122 | * 同一台计算机或同一个操作系统中，可以有多个不同的文件系统
123 | * Windows 有一个主要的 NTFS 文件系统，但也有一个包含 FAT-32 或 FAT-16 的驱动器或分区，此外还可能有 CD-ROM 或者 DVD（每一个包含特定文件系统），Windows 通过指定盘符来处理不同的文件系统，进程打开文件时，盘符是显式或隐式存在的，Windows 由此可知向哪个文件系统传递请求，不需要将不同的文件系统整合为统一模式
124 | * 所有现代的 UNIX 尝试将多种文件系统整合到一个统一的结构中。一个 Linux 系统可以用 ext2 作为根文件系统，ext3 分区装载在 `/usr` 下，采用 RerserFS 的文件系统的硬盘装载在 `/home` 下，ISO 9660 的 CD-ROM 临时装载在 `/mnt` 下。用户视角中，只有一个文件系统层级，但实际上是对用户和进程不可见的多种不相容的文件系统
125 | * 但是多种文件系统的存在在实际应用中是明确可见的，以前大多 UNIX 操作系统都使用 VFS 概念尝试将多种文件系统统一成一个有序结构，其核心思想是抽象出所有文件系统共有的部分为单独一层，这一层通过调用底层的实际文件系统来具体管理数据
126 | * UNIX 中，所有文件相关的系统调用最初都指向 VFS，这些来自用户进程的调用都是标准的 POSIX 系统调用，VFS 对用户进程提供的上层接口就是 POSIX 接口。VFS 也有一个对于实际文件系统的下层接口，即 VFS 接口，当创造一个新的文件系统和 VFS 一起工作时，新系统的设计者必须确定它提供 VFS 所需要的功能调用
127 | 
128 | ```
129 | --------------------------------
130 | 用户进程
131 | --------------------------------
132 | |
133 | |             POSIX 接口
134 | ↓
135 | --------------------------------
136 | VFS
137 | --------------------------------
138 | |    |    |
139 | |    |    |   VFS 接口
140 | ↓    ↓    ↓
141 | --------------------------------
142 | FS1  FS2  FS3 实际文件系统
143 | --------------------------------
144 | ↑    ↑    ↑
145 | |    |    |
146 | ↓    ↓    ↓
147 | --------------------------------
148 | 高速缓冲区
149 | --------------------------------
150 | ```
151 | 
152 | ## 文件系统管理和优化
153 | 
154 | ### 磁盘空间管理
155 | 
156 | * 几乎所有文件系统都将文件分割成固定大小的块存储，各块之间不一定相邻。块的大小是一个需要考虑的问题，块太小则文件块数越多，需要更多次的寻道与旋转延迟才能读出它们，从而降低了性能。块太大，则文件的最后一个块存在空间浪费。从历史观点上来说，一般设将块大小为 1 到 4 KB，但随着现在磁盘超过了 1 TB，磁盘空间已经不再短缺了，将块的大小提升到 64 KB 并接受一些浪费比较好
157 | * 选定块大小后，下一个问题是如何记录空闲块。有两种方法被广泛使用，一是链表，二是位图
158 | * 为了防止占用太多磁盘空间，多用户操作系统通常提供了强制性磁盘配额机制，系统管理员为每个用户分配拥有文件和块的最大数量，操作系统确保每个用户不超过得到的配额
159 | 
160 | ### 文件系统备份
161 | 
162 | * 磁盘转储到磁带上有两种方案
163 |   * 物理转储：从磁盘的第 0 块开始，将全部的磁盘块按序输出到磁带上，直到最后一块复制完毕
164 |   * 逻辑转储：从一个或几个指定的目录开始，递归地转储其自给定日期后有所更改的全部文件和目录
165 | 
166 | ### 文件系统的一致性
167 | 
168 | * 很多文件系统读取磁盘块，修改后再写回磁盘。如果在写回完成前系统崩溃，文件系统可能处于不一致状态。为此，很多计算机都有一个检查文件系统一致性的实用程序，比如 UNIX 的 fsck、Windows 的 scandisk，系统启动时，特别是崩溃后的重启，可以运行该程序
169 | * 一致性检查分两种
170 |   * 块的一致性检查：程序构造两张表，每张表为每个块设立一个计数器，第一张表记录块在文件中的出现次数，第二张记录块在空闲区的出现次数。如果文件系统一致，最终每一个块在其中一张表中的计数器为 1，如果一个块在两张表中的计数器都为 0，则称为块丢失
171 |   * 文件的一致性检查：原理同上，区别是一个文件（而非一个块）对应一个计数器。注意，由于存在硬链接，一个文件可能出现在多个目录中。而遇到符号链接是不计数的，不会对目标文件的计数器加 1
172 | 
173 | ### 文件系统性能
174 | 
175 | * 访问磁盘比访问内存慢很多，如果只需要一个字，内存访问可以比磁盘访问快百万数量级，因此许多文件系统采用了各种优化措施来改善性能
176 | * 最常用的减少磁盘访问次数的技术是块高速缓存（block cache）或缓冲区高速缓存（buffer cache），它们逻辑上属于磁盘，但实际上保存在内存中
177 | * 第二个明显提高性能的技术是块提前读，在需要用到块之前先将块提前写入高速缓存，从而提高命中率。块提前读只适用于顺序读取的文件，如果请求文件系统在某个文件中生成一个块，文件系统将潜在地检查高速缓存，如果下一个块不在缓存中，则为下一个块安排一个预读
178 | * 另一个重要技术是把可能顺序访问的块放在一起，最好是在同一个柱面上，从而减少磁盘臂的移动次数。这个技术仅当磁盘中装有磁盘臂时才有意义，现在固态硬盘（SSD）越来越流行，而它们不带移动部件。固态硬盘采用了和闪存同样的制造技术，使得随机访问与顺序访问在传输速度上已经较为接近，传统硬盘的诸多问题就消失了，但也有一些新问题，比如每一块只可写入有限次数，使用时要十分小心以达到均匀分散磨损的目的
179 | 
180 | ### 磁盘碎片整理
181 | 
182 | * 随着不断创建与删除文件，磁盘会逐渐产生许多碎片，创建一个新文件时，其使用的块会散布在整个磁盘上，造成性能降低
183 | * 一个恢复方式是，移动文件使其相邻，把空闲区放到一个或多个大的连续区域内。Windows 有一个 defrag 程序，就是用于完成这项工作的，Windows 用户应该定期使用它。Linux 文件系统由于其选择磁盘块的方式，在磁盘碎片整理上一般不会遇到 Windows 那样的困难，因此很少需要手动整理磁盘碎片
184 | * 固态硬盘不受磁盘碎片的影响，对其做磁盘碎片整理不仅没有提高性能，反而磨损了硬盘，缩短了使用寿命
185 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | * C++11 引入了 Boost 线程库作为标准线程库，作者 Anthony Williams 为介绍其特性，于 2012 年出版了 *[C++ Concurrency in Action](https://book.douban.com/subject/4130141/)* 一书，并顺应 C++17 于 2019 年 2 月出版了[第二版](https://book.douban.com/subject/27036085/)。*[C++ Concurrency in Action 2ed](https://learning.oreilly.com/library/view/c-concurrency-in/9781617294693/)* 前五章介绍了[线程支持库](https://en.cppreference.com/w/cpp/thread)的基本用法，后六章从实践角度介绍了并发编程的设计思想，相比第一版多介绍了一些 C++17 特性，如 [std::scoped_lock](https://en.cppreference.com/w/cpp/thread/scoped_lock)、[std::shared_mutex](https://en.cppreference.com/w/cpp/thread/shared_mutex)，并多出一章（第十章）介绍 [C++17 标准库并行算法](https://en.cppreference.com/w/cpp/header/execution)，此外个人会在相应处补充 C++20 相关特性，如 [std::jthread](https://en.cppreference.com/w/cpp/thread/jthread)、[std::counting_semaphore](https://en.cppreference.com/w/cpp/thread/counting_semaphore)、[std::barrier](https://en.cppreference.com/w/cpp/thread/barrier)、[std::latch](https://en.cppreference.com/w/cpp/thread/latch) 等。阅读本书前可参考 [Andrew S. Tanenbaum](https://en.wikipedia.org/wiki/Andrew_S._Tanenbaum) 的 [*Modern Operating Systems*](https://book.douban.com/subject/25864553/)，预备操作系统的基础知识（[进程与线程](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/reference/processes_and_threads.md)、[死锁](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/reference/deadlocks.md)、[内存管理](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/reference/memory_management.md)、[文件系统](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/reference/file_systems.md)、[I/O](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/reference/IO.md) 等）。此为个人笔记，仅供参考，更详细内容见[原书](https://learning.oreilly.com/library/view/c-concurrency-in/9781617294693/)。
 2 | 
 3 | ## [线程支持库](https://en.cppreference.com/w/cpp/thread)
 4 | 
 5 | 1. [线程管理（Managing thread）](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/01_managing_thread.md)：[\<thread\>](https://en.cppreference.com/w/cpp/header/thread)
 6 | 2. [线程间共享数据（Sharing data between thread）](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/02_sharing_data_between_thread.md)：[\<mutex\>](https://en.cppreference.com/w/cpp/header/mutex)、[\<shared_mutex\>](https://en.cppreference.com/w/cpp/header/shared_mutex)
 7 | 3. [同步并发操作（Synchronizing concurrent operation）](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/03_synchronizing_concurrent_operation.md)：[\<condition_variable\>](https://en.cppreference.com/w/cpp/header/condition_variable)、[\<semaphore\>](https://en.cppreference.com/w/cpp/header/semaphore)、[\<barrier\>](https://en.cppreference.com/w/cpp/header/barrier)、[\<latch\>](https://en.cppreference.com/w/cpp/header/latch)、[\<future\>](https://en.cppreference.com/w/cpp/header/future)、[\<chrono\>](https://en.cppreference.com/w/cpp/header/chrono)、[\<ratio\>](https://en.cppreference.com/w/cpp/header/ratio)
 8 | 4. [C++ 内存模型和基于原子类型的操作（The C++ memory model and operations on atomic type）](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/04_the_cpp_memory_model_and_operations_on_atomic_type.md)：[\<atomic\>](https://en.cppreference.com/w/cpp/header/atomic)
 9 | 
10 | ## 并发编程实践
11 | 
12 | 5. [基于锁的并发数据结构的设计（Designing lock-based concurrent data structure）](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/05_designing_lock_based_concurrent_data_structure.md)
13 | 6. [无锁并发数据结构的设计（Designing lock-free concurrent data structure）](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/06_designing_lock_free_concurrent_data_structure.md)
14 | 7. [并发代码的设计（Designing concurrent code）](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/07_designing_concurrent_code.md)
15 | 8. [高级线程管理（Advanced thread management）](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/08_advanced_thread_management.md)
16 | 9. [并行算法（Parallel algorithm）](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/09_parallel_algorithm.md)：[\<execution\>](https://en.cppreference.com/w/cpp/header/execution)
17 | 10. [多线程应用的测试与调试（Testing and debugging multithreaded application）](https://github.com/downdemo/Cpp-Concurrency-in-Action-2ed/blob/master/docs/10_testing_and_debugging_multithreaded_application.md)
18 | 
19 | ## 标准库相关头文件
20 | 
21 | |头文件|说明|
22 | |:-:|:-:|
23 | |[\<thread\>](https://en.cppreference.com/w/cpp/header/thread)、[\<stop_token\>](https://en.cppreference.com/w/cpp/header/stop_token)|线程|
24 | |[\<mutex\>](https://en.cppreference.com/w/cpp/header/mutex)、[\<shared_mutex\>](https://en.cppreference.com/w/cpp/header/shared_mutex)|锁|
25 | |[\<condition_variable\>](https://en.cppreference.com/w/cpp/header/condition_variable)|条件变量|
26 | |[\<semaphore\>](https://en.cppreference.com/w/cpp/header/semaphore)|信号量|
27 | |[\<barrier\>](https://en.cppreference.com/w/cpp/header/barrier)、[\<latch\>](https://en.cppreference.com/w/cpp/header/latch)|屏障|
28 | |[\<future\>](https://en.cppreference.com/w/cpp/header/future)|异步处理的结果|
29 | |[\<chrono\>](https://en.cppreference.com/w/cpp/header/chrono)|时钟|
30 | |[\<ratio\>](https://en.cppreference.com/w/cpp/header/ratio)|编译期有理数算数|
31 | |[\<atomic\>](https://en.cppreference.com/w/cpp/header/atomic)|原子类型和原子操作|
32 | |[\<execution\>](https://en.cppreference.com/w/cpp/header/execution)|标准库算法执行策略|
33 | 
34 | ## 并发库对比
35 | 
36 | ### [C++11 Thread](https://en.cppreference.com/w/cpp/thread)
37 | 
38 | |特性|API|
39 | |:-:|:-:|
40 | |thread|[std::thread](https://en.cppreference.com/w/cpp/thread/thread)|
41 | |mutex|[std::mutex](https://en.cppreference.com/w/cpp/thread/mutex)、[std::lock_guard](https://en.cppreference.com/w/cpp/thread/lock_guard)、[std::unique_lock](https://en.cppreference.com/w/cpp/thread/unique_lock)|
42 | |condition variable|[std::condition_variable](https://en.cppreference.com/w/cpp/thread/condition_variable)、[std::condition_variable_any](https://en.cppreference.com/w/cpp/thread/condition_variable_any)|
43 | |atomic|[std::atomic](https://en.cppreference.com/w/cpp/atomic/atomic)、[std::atomic_thread_fence](https://en.cppreference.com/w/cpp/atomic/atomic_thread_fence)|
44 | |future|[std::future](https://en.cppreference.com/w/cpp/thread/future)、[std::shared_future](https://en.cppreference.com/w/cpp/thread/shared_future)|
45 | |interruption|无|
46 | 
47 | ### [Boost Thread](https://www.boost.org/doc/libs/1_87_0/doc/html/thread.html)
48 | 
49 | |特性|API|
50 | |:-:|:-:|
51 | |thread|[boost::thread](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/thread_management.html#thread.thread_management.thread)|
52 | |mutex|[boost::mutex](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.mutex_types.mutex)、[boost::lock_guard](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.lock_guard.lock_guard)、[boost::unique_lock](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.locks.unique_lock)|
53 | |condition variable|[boost::condition_variable](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.condvar_ref.condition_variable)、[boost::condition_variable_any](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.condvar_ref.condition_variable_any)|
54 | |atomic|无|
55 | |future|[boost::future](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.futures.reference.unique_future)、[boost::shared_future](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.futures.reference.shared_future)|
56 | |interruption|[thread::interrupt](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/thread_management.html#thread.thread_management.thread.interrupt)|
57 | 
58 | ### [POSIX Thread](http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/pthread.h.html)
59 | 
60 | |特性|API|
61 | |:-:|:-:|
62 | |thread|[pthread_create](http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_create.html)、[pthread_detach](http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_detach.html#)、[pthread_join](http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_join.html#)|
63 | |mutex|[pthread_mutex_lock、pthread_mutex_unlock](http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_mutex_lock.html)|
64 | |condition variable|[pthread_cond_wait](http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_cond_wait.html)、[pthread_cond_signal](https://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_cond_signal.html)|
65 | |atomic|无|
66 | |future|无|
67 | |interruption|[pthread_cancel](http://pubs.opengroup.org/onlinepubs/9699919799/functions/pthread_cancel.html)|
68 | 
69 | ### [Java Thread](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/lang/Thread.html)
70 | 
71 | |特性|API|
72 | |:-:|:-:|
73 | |thread|[java.lang.Thread](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/lang/Thread.html)|
74 | |mutex|[synchronized blocks](http://tutorials.jenkov.com/java-concurrency/synchronized.html)|
75 | |condition variable|[java.lang.Object.wait](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/lang/Object.html#wait())、[java.lang.Object.notify](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/lang/Object.html#notify())|
76 | |atomic|volatile 变量、[java.util.concurrent.atomic](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/util/concurrent/atomic/package-summary.html)|
77 | |future|[java.util.concurrent.Future](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/util/concurrent/Future.html)|
78 | |interruption|[java.lang.Thread.interrupt](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/lang/Thread.html#interrupt())|
79 | |线程安全的容器|[java.util.concurrent](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/util/concurrent/package-summary.html) 中的容器|
80 | |线程池|[java.util.concurrent.ThreadPoolExecutor](https://docs.oracle.com/en/java/javase/23/docs/api/java.base/java/util/concurrent/ThreadPoolExecutor.html)|
81 | 


--------------------------------------------------------------------------------
/docs/09_parallel_algorithm.md:
--------------------------------------------------------------------------------
  1 | ## [执行策略（execution policy）](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t)
  2 | 
  3 | * C++17 对标准库算法重载了并行版本，区别是多了一个指定执行策略的参数
  4 | 
  5 | ```cpp
  6 | std::vector<int> v;
  7 | std::sort(std::execution::par, v.begin(), v.end());
  8 | ```
  9 | 
 10 | * [std::execution::par](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag) 表示允许多线程并行执行此算法，注意这是一个权限（permission）而非强制要求（requirement），此算法依然可以被单线程执行
 11 | * 另外，如果指定了执行策略，算法复杂度的要求也更宽松，因为并行算法为了利用好系统的并行性通常要做更多工作。比如把工作划分给 100 个处理器，即使总工作是原来的两倍，也仍然能获得原来的五十倍的性能
 12 | * [\<execution\>](https://en.cppreference.com/w/cpp/header/execution) 中指定了如下执行策略类
 13 | 
 14 | ```cpp
 15 | std::execution::sequenced_policy;
 16 | std::execution::parallel_policy;
 17 | std::execution::parallel_unsequenced_policy;
 18 | std::execution::unsequenced_policy;  // C++20
 19 | ```
 20 | 
 21 | * 并指定了对应的全局对象
 22 | 
 23 | ```cpp
 24 | std::execution::seq;
 25 | std::execution::par;
 26 | std::execution::par_unseq;
 27 | std::execution::unseq;  // C++20
 28 | ```
 29 | 
 30 | * 如果使用执行策略，算法的行为就会受执行策略影响，影响方面包括：算法复杂度、抛异常时的行为、算法步骤的执行位置（where）、方式（how）、时刻（when）
 31 | * 除了管理并行执行的调度开销，许多并行算法会执行更多的核心操作（交换、比较、使用函数对象等），这样可以减少总的实际消耗时间，从而全面提升性能。这就是算法复杂度受影响的原因，其具体改变因算法不同而异
 32 | * 在不指定执行策略时，如下对算法的调用，抛出的异常会被传播
 33 | 
 34 | ```cpp
 35 | std::for_each(v.begin(), v.end(), [](auto x) { throw my_exception(); });
 36 | ```
 37 | 
 38 | * 而指定执行策略时，如果算法执行期间抛出异常，则行为结果由执行策略决定。如果有任何未捕获的异常，执行策略将调用 [std::terminate](https://en.cppreference.com/w/cpp/error/terminate) 终止程序，唯一可能抛出异常的情况是，内部操作不能获取足够的内存资源时抛出 [std::bad_alloc](https://en.cppreference.com/w/cpp/memory/new/bad_alloc)。如下操作将调用 [std::terminate](https://en.cppreference.com/w/cpp/error/terminate) 终止程序
 39 | 
 40 | ```cpp
 41 | std::for_each(std::execution::seq, v.begin(), v.end(),
 42 |               [](auto x) { throw my_exception(); });
 43 | ```
 44 | 
 45 | * 不同的执行策略的执行方式也不相同。执行策略会指定执行算法步骤的代理，可以是常规线程、矢量流、GPU 线程或其他任何东西。执行策略也会指定算法步骤运行的顺序限制，比如是否要以特定顺序运行、不同算法步骤的一部分是否可以互相交错或并行运行等。下面对不同的执行策略进行详细解释
 46 | 
 47 | ### [std::execution::sequenced_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t)
 48 | 
 49 | * [std::execution::sequenced_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t) 策略要求可以不（may not）并行执行，所有操作将执行在一个线程上。但它也是执行策略，因此与其他执行策略一样会影响算法复杂度和异常行为
 50 | * 所有执行在一个线程上的操作必须以某个确定顺序执行，因此这些操作是不能互相交错的。但不规定具体顺序，因此对于不同的函数调用可能产生不同的顺序
 51 | 
 52 | ```cpp
 53 | std::vector<int> v(1000);
 54 | int n = 0;
 55 | // 把 1-1000 存入容器，存入顺序可能是顺序也可能是乱序
 56 | std::for_each(std::execution::seq, v.begin(), v.end(),
 57 |               [&](int& x) { x = ++n; });
 58 | ```
 59 | 
 60 | * 因此 [std::execution::sequenced_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t) 策略很少要求算法使用迭代器、值、可调用对象，它们可以自由地使用同步机制，可以依赖于同一线程上调用的操作，尽管不能依赖于这些操作的顺序
 61 | 
 62 | ### [std::execution::parallel_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t)
 63 | 
 64 | * [std::execution::parallel_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t) 策略提供了基本的跨多个线程的并行执行，操作可以执行在调用算法的线程上，或执行在由库创建的线程上，在一个给定线程上的操作必须以确定顺序执行，并且不能相互交错。同样这个顺序是未指定的，对于不同的调用可能会有不同的顺序。一个给定的操作将在一个固定的线程上运行完整个周期
 65 | * 因此 [std::execution::parallel_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t) 策略对于迭代器、值、可调用对象的使用就有一定要求，它们在并行调用时不能造成数据竞争，并且不能依赖于统一线程上的其他操作，或者说只能依赖于不运行在同一线程上的其他操作
 66 | * 大多数情况都可以使用 [std::execution::parallel_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t) 策略
 67 | 
 68 | ```cpp
 69 | std::for_each(std::execution::par, v.begin(), v.end(), [](auto& x) { ++x; });
 70 | ```
 71 | 
 72 | * 只有在元素之间有特定顺序或对共享数据的访问不同步时，它才有问题
 73 | 
 74 | ```cpp
 75 | std::vector<int> v(1000);
 76 | int n = 0;
 77 | std::for_each(std::execution::par, v.begin(), v.end(), [&](int& x) {
 78 |   x = ++n;
 79 | });  // 如果多个线程执行 lambda 就会对 n 产生数据竞争
 80 | ```
 81 | 
 82 | * 因此使用 [std::execution::parallel_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t) 策略时，应该事先考虑可能出现的未定义行为。可以用 mutex 或原子变量来解决竞争问题，但这就影响了并发性。不过这个例子只是为了阐述此情况，一般使用 [std::execution::parallel_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t) 策略时都是允许同步访问共享数据的
 83 | 
 84 | ### [std::execution::parallel_unsequenced_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t)
 85 | 
 86 | * [std::execution::parallel_unsequenced_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t) 策略提供了最大可能的并行化，代价是对算法使用的迭代器、值和可调用对象有最严格的的要求
 87 | * 使用 [std::execution::parallel_unsequenced_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t) 策略的算法允许以无序的方式在任意未指定的线程中执行，并且在每个线程中彼此不排序。也就是说，操作可以在单个线程上互相交错，同一线程上的第二个操作可以开始于第一个操作结束前，并且可以在线程间迁移，一个给定的操作可以开始于一个线程，运行于另一线程，而完成于第三个线程
 88 | * 使用 [std::execution::parallel_unsequenced_policy](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag_t) 策略时，提供给算法的迭代器、值、可调用对象上的操作不能使用任何形式的同步，也不能调用与其他代码同步的任何函数。这意味着操作只能作用于相关元素，或任何基于这些元素的可访问数据，并且不能修改任何线程间或元素间的共享数据
 89 | 
 90 | ## 标准库并行算法
 91 | 
 92 | * [\<algorithm\>](https://en.cppreference.com/w/cpp/algorithm) 和 [\<numberic\>](https://en.cppreference.com/w/cpp/header/numeric) 中的大部分算法都重载了并行版本。[std::accumlate](https://en.cppreference.com/w/cpp/algorithm/accumulate) 没有并行版本，但 C++17 提供了 [std::reduce](https://en.cppreference.com/w/cpp/algorithm/reduce)
 93 | 
 94 | ```cpp
 95 | std::accumulate(v.begin(), v.end(), 0);
 96 | std::reduce(std::execution::par, v.begin(), v.end());
 97 | ```
 98 | 
 99 | * 如果常规算法有并行版的重载，则并行版对常规算法原有的所有重载都有一个对应重载版本
100 | 
101 | ```cpp
102 | template <class RandomIt>
103 | void sort(RandomIt first, RandomIt last);
104 | 
105 | template <class RandomIt, class Compare>
106 | void sort(RandomIt first, RandomIt last, Compare comp);
107 | 
108 | // 并行版对应有两个重载
109 | template <class ExecutionPolicy, class RandomIt>
110 | void sort(ExecutionPolicy&& policy, RandomIt first, RandomIt last);
111 | 
112 | template <class ExecutionPolicy, class RandomIt, class Compare>
113 | void sort(ExecutionPolicy&& policy, RandomIt first, RandomIt last,
114 |           Compare comp);
115 | ```
116 | 
117 | * 但并行版的重载对部分算法有一些区别，如果常规版本使用的是输入迭代器（input iterator）或输出迭代器（output iterator），则并行版的重载将使用前向迭代器（forward iterator）
118 | 
119 | ```cpp
120 | template <class InputIt, class OutputIt>
121 | OutputIt copy(InputIt first, InputIt last, OutputIt d_first);
122 | 
123 | template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2>
124 | ForwardIt2 copy(ExecutionPolicy&& policy, ForwardIt1 first, ForwardIt1 last,
125 |                 ForwardIt2 d_first);
126 | ```
127 | 
128 | * 输入迭代器只能用来读取指向的值，迭代器自增后就再也无法访问之前指向的值，它一般用于从控制台或网络输入，或生成序列，比如 [std::istream_iterator](https://en.cppreference.com/w/cpp/iterator/istream_iterator)。同理，输出迭代器一般用来输出到文件，或添加值到容器，也是单向的，比如 [std::ostream_iterator](https://en.cppreference.com/w/cpp/iterator/ostream_iterator)
129 | * 前向迭代器返回元素的引用，因此可以用于读写，它同样只能单向传递，[std::forward_list](https://en.cppreference.com/w/cpp/container/forward_list) 的迭代器就是前向迭代器，虽然它不可以回到之前指向的值，但可以存储一个指向之前元素的拷贝（比如 [std::forward_list::begin](https://en.cppreference.com/w/cpp/container/forward_list/begin)）来重复利用。对于并行性来说，可以重复利用迭代器很重要。此外，前向迭代器的自增不会使其他的迭代器拷贝失效，这样就不用担心其他线程中的迭代器受影响。如果使用输入迭代器，所有线程只能共用一个迭代器，显然无法并行
130 | * [std::execution::par](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag) 是最常用的策略，除非实现提供了更符合需求的非标准策略。一些情况下也可以使用 [std::execution::par_unseq](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag)，虽然这不保证更好的并发性，但它给了库通过重排和交错任务来提升性能的可能性，不过代价就是不能使用同步机制，要确保线程安全只能让算法本身不会让多个线程访问同一元素，并在调用该算法的外部使用同步机制来避免其他线程对数据的访问
131 | * 内部带同步机制只能使用 [std::execution::par](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag)，如果使用 [std::execution::par_unseq](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag) 会出现未定义行为
132 | 
133 | ```cpp
134 | #include <algorithm>
135 | #include <mutex>
136 | #include <vector>
137 | 
138 | class A {
139 |  public:
140 |   int get() const {
141 |     std::lock_guard<std::mutex> l(m_);
142 |     return n_;
143 |   }
144 | 
145 |   void inc() {
146 |     std::lock_guard<std::mutex> l(m_);
147 |     ++n_;
148 |   }
149 | 
150 |  private:
151 |   mutable std::mutex m_;
152 |   int n_ = 0;
153 | };
154 | 
155 | void f(std::vector<A>& v) {
156 |   std::for_each(std::execution::par, v.begin(), v.end(), [](A& x) { x.inc(); });
157 | }
158 | ```
159 | 
160 | * 如果使用 [std::execution::par_unseq](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag) 则应该在外部使用同步机制
161 | 
162 | ```cpp
163 | #include <algorithm>
164 | #include <mutex>
165 | #include <vector>
166 | 
167 | class A {
168 |  public:
169 |   int get() const { return n_; }
170 |   void inc() { ++n_; }
171 | 
172 |  private:
173 |   int n_ = 0;
174 | };
175 | 
176 | class B {
177 |  public:
178 |   void lock() { m_.lock(); }
179 |   void unlock() { m_.unlock(); }
180 |   std::vector<A>& get() { return v_; }
181 | 
182 |  private:
183 |   std::mutex m_;
184 |   std::vector<A> v_;
185 | };
186 | 
187 | void f(B& x) {
188 |   std::lock_guard<std::mutex> l(x);
189 |   auto& v = x.get();
190 |   std::for_each(std::execution::par_unseq, v.begin(), v.end(),
191 |                 [](A& x) { x.inc(); });
192 | }
193 | ```
194 | 
195 | * 下面是一个更实际的例子。假如有一个网站，访问日志有上百万条，为了方便查看数据需要对日志进行处理。对日志每行的处理是独立的工作，很适合使用并行算法
196 | 
197 | ```cpp
198 | struct Log {
199 |   std::string page;
200 |   time_t visit_time;
201 |   // any other fields
202 | };
203 | 
204 | extern Log parse(const std::string& line);
205 | 
206 | using Map = std::unordered_map<std::string, unsigned long long>;
207 | 
208 | Map f(const std::vector<std::string>& v) {
209 |   struct Combine {
210 |     // log、Map 两个参数有四种组合，所以需要四个重载
211 |     Map operator()(Map lhs, Map rhs) const {
212 |       if (lhs.size() < rhs.size()) {
213 |         std::swap(lhs, rhs);
214 |       }
215 |       for (const auto& x : rhs) {
216 |         lhs[x.first] += x.second;
217 |       }
218 |       return lhs;
219 |     }
220 | 
221 |     Map operator()(Log l, Map m) const {
222 |       ++m[l.page];
223 |       return m;
224 |     }
225 | 
226 |     Map operator()(Map m, Log l) const {
227 |       ++m[l.page];
228 |       return m;
229 |     }
230 | 
231 |     Map operator()(Log lhs, Log rhs) const {
232 |       Map m;
233 |       ++m[lhs.page];
234 |       ++m[rhs.page];
235 |       return m;
236 |     }
237 |   };
238 | 
239 |   return std::transform_reduce(std::execution::par, v.begin(), v.end(),
240 |                                Map{},      // 初始值，一个空的 map
241 |                                Combine{},  // 结合两个元素的二元操作
242 |                                parse);  // 对每个元素执行的一元操作
243 | }
244 | ```
245 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/docs/reference/memory_management.md:
--------------------------------------------------------------------------------
  1 | ## 无存储器抽象
  2 | 
  3 | * 早期计算机没有存储器抽象，每个程序都直接访问物理内存
  4 | 
  5 | ```asm
  6 | MOV REGISTER1, 1000  ;将位置1000的物理内存中的内容移到 REGISTER1 中
  7 | ```
  8 | 
  9 | * 因此那时呈现给程序员的存储器模型就是简单的物理内存：从 0 到某个上限的地址集合，每个地址对应一个可容纳一定数目（通常是 8 个）二进制位的存储单元
 10 | * 这种情况下，在内存中同时运行两个程序是不可能的，如果一个程序在 2000 的位置写入一个新值，就会擦掉另一个程序在相同位置上的内容，因此无法同时运行两个程序，这两个程序会立刻崩溃
 11 | * 为了运行多个程序，一个解决方法是，操作系统把当前内存中所有内容保存到磁盘，然后把下一个程序读入到内存中再运行即可。同一时刻，只要内存中只有一个程序，就不会发生冲突
 12 | * 但这种方法有一个重要的缺陷，即重定位（即逻辑地址到物理地址的转换）问题。假设有两个程序，第一个程序在 0 处的指令是 `JMP 24`，第二个程序在 0 处的指令是 `JMP 28`，当第一个程序运行一段时间后再运行第二个程序，第二个程序会跳到第一个程序 28 处的指令。由于对内存地址的不正确访问，程序立刻崩溃
 13 | * 一个补救方法是静态重定位，即装入时将逻辑地址转换为物理地址。当一个程序被装载到地址 16384 时，常数 16384 被加到每一个程序地址上。虽然这个机制在不出错误的情况下可行，但不是一种通用的解决方法，同时会减慢装载速度，并且它要求所有的可执行程序提供额外的信息，以区分哪些内存字中存有可重定位的地址，哪些没有
 14 | * 虽然直接引用物理地址对大型计算机、小型计算机、台式计算机、笔记本都已经成为了历史，但在嵌入式系统、智能卡系统中，缺少存储器抽象的情况仍然很常见。像收音机、洗衣机、微波炉都是采用访问绝对内存地址的寻址方式，其中的程序都是事先确定的，用户不能在其上运行自己的软件，因此它们可以正常工作
 15 | * 总之，把物理地址暴露给进程带来的严重问题有：
 16 |   * 如果用户程序可以寻址内存的每个字节，就可以轻易破坏操作系统
 17 |   * 想要运行多个程序很困难
 18 | 
 19 | ## 一种存储器抽象：地址空间
 20 | 
 21 | * 要使多个程序同时存在于内存中并且互不影响，需要解决保护（进程只能访问自己的内存）和重定位两个问题。对前者的一个原始的解决方法是，给内存标记上一个保护键，并且比较执行进程的键和其访问的每个内存字的保护键，比如进程能访问的空间是 0-100，CPU 标记此范围，然后在访问内存时检查是否为该进程可访问空间。不过这种方法并没有解决重定位问题
 22 | * 更好的方法是创造一个新的存储器抽象：地址空间。地址空间是一个进程可用于寻址内存的一套地址集合，每个进程都有一个自己的地址空间，并且这个地址空间独立于其他进程的地址空间（除了一些情况下进程需要共享地址空间）
 23 | * 地址空间的概念非常通用，比如 7 位数字的电话号码的地址空间是 `0 000 000` 到 `9 999 999`，x86 的 I/O 端口的地址空间是 `0` 到 `16383`，IPv4 的地址空间是 `0` 到 `2 ^ 32 - 1`。地址空间也可以是非数字的，比如以 `.com` 结尾的网络域名的集合
 24 | * 比较难的是给每个程序一个独有的地址空间，使得两个程序的相同地址（如地址 28）对应不同的物理地址
 25 | * 一个简单的方法是使用动态重定位，即运行时将逻辑地址转换为物理地址。把每个进程的地址空间映射到物理内存的不同部分，当一个进程运行时，程序的起始物理地址装载到基址寄存器（又称重定位寄存器），程序的长度装载到界限寄存器（又称限长寄存器）。进程访问内存，CPU 在把地址发送到内存总线前会自动把基址加到进程发出的地址值上，同时检查程序提供的地址是否超出了界限寄存器中的值，如果超出了就会产生错误并终止访问。对于之前的例子，比如第二个程序的 `JMP 28`，CPU 会将其解释为 `JMP 16412`
 26 | * 使用基址寄存器和界限寄存器重定位的缺点是，每次访问内存都需要进行加法和比较运算，比较运算可以很快，但加法运算由于进位传递时间的问题，在没有使用特殊电路的情况下会显得很慢
 27 | * 但物理内存是有限的，把所有进程一直保存在内存中需要巨大的内存，内存不足就无法支持这点。处理内存超载有两种通用方法，最简单的是交换（swapping）技术，即把进程完整调入内存运行一段时间，然后把它存回磁盘，这样空闲进程主要存储在磁盘上，不运行就不会占用内存。另一种方法是虚拟内存（virtual memory），它能使程序只有一部分调入内存的情况下运行
 28 | * 交换可能在内存中产生多个空闲区（hole）。把进程尽可能靠近，将这些小的间隙合并成一大块，这种技术称为内存紧缩（memory compaction）。通常不进行这个操作，因为它需要耗费大量 CPU 时间
 29 | * 如果进程的数据段可以增长（比如从堆中动态分配内存），进程与空闲区相邻，则可以把空闲区分配给进程使其增大。如果进程之间紧紧相邻，就需要把要增长的进程移到内存中一个足够大的区域，或者把一个或多个进程交换出去以生成足够大的空闲区。如果进程在内存中不能增长，并且磁盘上的交换区已满，则这个进程只能挂起直到有空间空闲，或者结束
 30 | * 如果大部分进程在运行时需要增长，为了减少因内存区不够而引起的进程交换和移动开销，一种方法是在换入或移动进程时额外分配一些预留内存
 31 | * 动态分配内存时，操作系统必须对其进行管理，一般跟踪内存使用情况有两种方法：位图和空闲区链表
 32 | * 使用位图法时，把内存划分成分配单元（每个单元小到几个字节或大到几千字节），用位图中的一位来记录每个分配单元的使用情况，比如 0 表示空闲 1 表示占用（或者相反）。分配单元越小，位图越大，不过即使 4 个字节大小的分配单元，32 位的内存只需要 1 位位图，位图只占用了 `1 / 32` 的内存
 33 | * 位图法的主要问题是，在决定把一个占 `k` 个分配单元的进程调入内存时，存储管理器必须搜索位图，在位图中找出有 `k` 个连续 0 的串，这个查找操作很耗时，因为在位图中该串可能跨越字的边界
 34 | * 另一个记录内存使用情况的方法是，维护一个记录已分配内存段和空闲内存段的链表，链表中的一个节点包含一个进程或者两个进程间的一块空闲区
 35 | * 使用链表法时，为进程分配内存的最简单的算法是首次适配（first fit）算法，存储管理器沿链表搜索，直到找到一个足够大的空闲区，然后将空闲区分为两部分，一部分为要分配的大小，供进程使用，另一部分形成新的空闲区
 36 | * 对首次适配算法进行小修改可以得到下次适配（next fit）算法，区别是在每次找到合适的空闲区时记录位置，这样下次就可以从上次结束的地方开始搜索。Bays 的仿真程序证明下次适配算法性能略低于首次适配算法
 37 | * 另一个著名并广泛使用的算法是最佳适配（best fit）算法，搜索整个链表，找到能容纳进程的最小空闲区。因为每次都要搜索整个链表，所以它比首次适配算法慢。有些令人意外的是，它比前两种算法浪费更多的内存，因为它会产生大量无用的小空闲区。为了避免分裂出很多非常小的空闲区，可以考虑最差适配（worst fit）算法，即总是分配最大的可用空闲区，但仿真程序表明这也不是一个好方法
 38 | * 一个提高算法速度的方式是，为进程和空闲区分别维护链表，代价是增加复杂度和内存释放速度变慢，因为必须将回收的段从进程链表删除并插入到空闲区链表
 39 | * 如果分别维护进程和空闲区的链表，就可以对空闲区链表按大小排序，以提高最佳适配算法的速度，比如按从小到大排序，第一个合适的空间就是最小的空闲区，就是最佳适配。排序后，首次适配算法与最佳适配算法一样快，下次适配算法无意义
 40 | * 单独维护空闲区链表时可以做一个小优化，利用空闲区存储信息，每个空闲区的第一个字就是空闲区大小，第二个字指向下一空闲区
 41 | * 另一种分配算法是快速分配（quick fit）算法，它为常用大小的空闲区维护单独的链表，比如链表第一项是 4 KB 大小空闲区的链表头指针，第二项是 8 KB 大小空闲区的链表头指针，以此类推。像 21 KB 的空闲区，既可以放在 20 KB 的链表中，也可以放在一个专门存放特殊大小的链表中。这种算法查找指定大小的空闲区很快，但同样存在的缺点是，进程终止或换出时，寻找它的相邻块并查找是否可以合并的过程非常费时，如果不合并，内存将很快分裂出大量无法利用的小空闲区
 42 | 
 43 | ## 虚拟内存
 44 | 
 45 | * 当程序大到内存无法容纳时，交换技术就有所缺陷，一个典型 SATA 磁盘的峰值传输率高达每秒几百兆，交换一个 1 GB 的程序就需要好几秒
 46 | * 程序大于内存的问题在一些应用领域早就存在了，比如模拟宇宙的创建就要花费大量内存。20 世纪 60 年代的解决方案是，将程序分割为多个覆盖区（overlay）。程序开始运行时，将覆盖管理模块装入内存，该模块立刻装入并运行第一个覆盖区，执行完成后，第一个覆盖区通知管理模块装入下一个覆盖区
 47 | * 程序员必须把程序分割成多个片段，这个工作非常费时枯燥，并且易出错。不久后有了虚拟内存（virtual memory）的方法，这些工作都可以交给计算机去做
 48 | * 虚拟内存的基本思想是，程序的地址空间被分割成多个页（page），每一页有连续的地址范围。这些页被映射到物理内存，但并不是所有页必须在内存中才能运行程序。当程序引用到一部分物理内存中的地址空间时，由硬件执行必要的映射。当程序引用到一部分不在物理内存中的地址空间时，由操作系统负责将缺失的部分装入物理内存并重新执行失败的指令
 49 | 
 50 | ## 分页（paging）
 51 | 
 52 | * 大部分虚拟内存系统都使用了分页技术
 53 | * 由程序产生的地址称为虚拟地址（virtual address）
 54 | 
 55 | ```asm
 56 | MOV REG, 1000  ;将地址为 1000 的内存单元的内容复制到 REG，1000 是虚拟地址
 57 | ```
 58 | 
 59 | * 虚拟地址构成了虚拟地址空间（virtual address space）。在没有虚拟内存的计算机上，系统直接将虚拟地址送到内存总线上，读写操作使用相同地址的物理内存字。在使用虚拟内存时，虚拟地址被送到内存管理单元（Memory Management Unit，MMU），MMU 把虚拟地址映射为物理内存地址
 60 | * 页表给出虚拟地址与物理内存地址之间的映射关系
 61 | * 虚拟地址空间按固定大小划分为页面（page），物理内存中对应的单元称为页框（page frame），页面和页框的大小通常相同，页表说明了每个页面对应的页框。RAM 和磁盘之间的交换总是以整个页面为单元进行的
 62 | 
 63 | ![](../images/ref-memory_management-1.png)
 64 | 
 65 | * 对应 64 KB 的虚拟地址空间和 32 KB 的物理内存，可以得到 16 个页面和 8 个页框
 66 | * 比如执行指令访问地址 0 时
 67 | 
 68 | ```asm
 69 | MOV REG, 0
 70 | ```
 71 | 
 72 | * 虚拟地址 0 被送到 MMU，MMU 发现其位于页面 0（0 - 4095），根据映射结果，页面 0 对应页框 2（8192 - 12287），于是 MMU 将地址转换为 8192，并把地址 8192 送到总线上。内存并不需要知道 MMU 做的事，只看到一个访问地址 8192 的请求并执行
 73 | * 当虚拟地址空间比物理内存大时，就会存在未被映射的页面。当程序执行指令访问未映射的页面
 74 | 
 75 | ```asm
 76 | MOV REG, 32780  ;位于页面 8（从 32768 开始）
 77 | ```
 78 | 
 79 | * MMU 发现该页面未被映射，于是使 CPU 陷入（traps）到操作系统，这称为缺页中断（page fault）。操作系统找到一个很少使用的页框并把其内容写入磁盘，比如找到页面 1 对应的页框 1。将页面 1 标记为未映射，再把页面 8 映射到这个页框 1，然后重新启动访问指令，此时虚拟地址 32780 就可以映射到物理地址 4108（4096 + 32780 - 32768）
 80 | * 页面大小一般是 2 的整数次幂。比如页面大小为 4 KB，即 `2 ^ 12`，对于一个 16 位的虚拟地址，即可用前 4 位表示页面的页号，后 12 位表示偏移量。比如虚拟地址 `8192`，二进制为 `0010 0000 0000 0100`，`0010` 即为页号，`0000 0000 0100` 即为偏移，因此 `8192` 位于页号 `2` 偏移 `4` 的位置
 81 | * 页表中，查找页号 `2` 对应的页框号为 `6`，把页框号 `110` 复制到输出寄存器的高 3 位，后 12 位保持不变，`110 0000 0000 0100` 即为物理地址
 82 | * 除了页框号，页表还会有一些其他的位
 83 |   * 有效位，如果该位为 1 则说明存在映射，如果为 0，则访问该页面将引起缺页中断
 84 |   * 保护（protection）位，指出一个页允许的访问方式，比如用一个位表示，0 表示读写，1 表示只读
 85 |   * 修改（modified）位，记录页面使用情况，写入页面后由硬件自动设置修改位，该位也称为脏位（dirty bit），在重新分配页框时很有用，比如一个页是脏的（已被修改过），则必须把它写回磁盘，是干净的则可以直接丢弃
 86 |   * 访问（referenced）位，在页面被访问时设置，主要用来帮助操作系统在发生缺页中断时选择要淘汰的页面
 87 |   * 禁止高速缓存位，该位对于映射到设备寄存器而非常规内存的页面十分重要，比如操作系统持续等待 I/O 设备的响应，必须保证硬件读取的数据来自设备而非高速缓存
 88 | 
 89 | ## 加速分页过程
 90 | 
 91 | * 在任何分页系统中都需要考虑两个问题
 92 |   * 虚拟地址到物理地址的映射必须非常快：每次访问内存都要进行映射，所有的指令最终都来自内存，并且很多指令也会访问内存中的操作数，因此每条指令进行一两次或更多页表访问是必要的。如果指令一条指令要 1 ns，页表查询必须在 0.2 ns 内完成，以避免映射成为主要瓶颈
 93 |   * 如果虚拟地址空间很大，页表也会很大：现代计算机至少使用 32 位虚拟地址，假设页面大小为 4 KB，32 位的地址空间将有 100 万页，页表也必然有 100 万条表项。每个进程都有自己的虚拟地址空间，都需要自己的页表，于是需要为进程分配非常多的连续页框
 94 | * 大多数程序总是对少量页面多次访问，没有必要让将整个页表保存在内存中，由此得出的一种解决方案是，设置一个转换检测缓冲区（Translation Lookaside Buffer，TLB），也称相联存储器（associate memory）或快表，将虚拟内存直接映射到物理地址，而不必再访问页表
 95 | * TLB 通常在 MMU 中，包含少量表项，实际中很少会超过 256 个。将一个虚拟地址放入 MMU 中进行转换时，硬件先将页号与 TLB 中所有表项进行匹配，如果匹配成功且操作不违反保护位，则直接从 TLB 中取出页框号，而不再访问页表。如果匹配失败，则进行正常的页表查询，并从 TLB 淘汰一个表项，然后用新找到的页表项代替它
 96 | * 处理巨大的虚拟地址空间有两种解决方法：多级页表和倒排页表
 97 | * 比如 32 位地址空间中，页面大小为 4 KB，偏移量占 12 位，则页号占 20 位。将页号分组，页表项大小为 4 B，4 KB 的页面就能放 1024 个表项，于是每 1024 个页号分为一组。这样分组得到的页表为二级页表，再用一个顶级页表映射页号到二级页表的物理地址即可
 98 | * 使用多级页表时，32 位的地址划分为 10 位的 PT1 域、10 位的 PT2 域、12 位的 Offset 域。比如对于虚拟地址 `0000 0000 0100 0000 0011 0000 0000 0100`，PT1 为 1，PT2 为 3，Offset 为 4，MMU 先访问顶级页表 1 处，得到二级页表的物理地址，由此访问二级页表 3 处，得到页框号，最后加上 Offset 即为最终的物理地址
 99 | * 二级页表可以扩充为更多级。每级页表大小不能超过一个页面，比如 4 KB 页面，偏移为 12 位，页表项大小为 4 B，每 1024 分为一组，则每级最多 10 位，如果是 40 位，则除去 12 位，剩余可以划分为一级 8 位、二级 10 位、三级 10 位的三级页表
100 | * 单级页表只要进行两次访存（第一次访问页表得到物理地址，第二次访问物理地址），而每多一级页表就要多一次访存（不考虑 TLB）
101 | * 另一种方式是倒排页表（inverted page table），让每个页框（而非页面）对应一个表项。比如对于 64 位虚拟地址，4 KB 的页，4 GB 的 RAM，一个倒排页表仅需要 `2 ^ 20` 个表项，表项记录了一个页框对应的页面（进程）
102 | * 虽然倒排页表节省了大量空间，但从虚拟地址到物理地址的转换变得很困难，必须搜索整个倒排页表来找到页面，每一次搜索都要执行访问操作。这个问题可以通过 TLB 解决
103 | * 倒排页表在 64 位机器中很常见，因为 64 位机器中，即使使用大页面页表项数量也很庞大，比如对于 4 MB 页面和 64 位虚拟地址，需要的页表项目数为 `2 ^ 42`
104 | 
105 | ## 页面置换算法
106 | 
107 | * 发生缺页中断时，操作系统必须换出内存中的一个页面，以腾出空间。如果换出的页面在内存驻留期间被修改过，就必须把它写回磁盘以更新其在磁盘上的副本，如果未被修改过则不需要写回
108 | * 如果一个经常用到的页面被换出内存，短时间内它可能又被调入内存，这会带来不必要的开销。因此发生缺页中断时，如何选择要换出的页面是一个值得考虑的问题
109 | 
110 | ### 最优页面置换算法（OPTimal replacement，OPT）
111 | 
112 | * OPT 算法的思路很简单，从所有页面中选出下次访问时间距现在最久的淘汰
113 | 
114 | ```
115 | 432143543215  // 页面队列
116 | 444444444222  // 页 1
117 |  33333333311  // 页 2
118 |   2111555555  // 页 3
119 | TTTT  T  TT   // 是否发生缺页中断（共发生 7 次缺页中断，4 次页面置换）
120 |    |
121 |    把 2 替换掉，因为 432 中，2 下一次被访问的时间最靠后
122 | ```
123 | 
124 | * 这个算法的唯一问题在于，它是无法实现的，因为发生缺页中断时，操作系统无法得知各个页面下一次在什么时候被访问
125 | * 作为理论最优算法，可以用它衡量其他算法的性能。如果操作系统的页面置换性能只比最优算法差 1%，那么花费大量精力来优化算法就不是特别必要的
126 | 
127 | ### 最近未使用页面置换算法（Not Recently Used，NRU）
128 | 
129 | * 操作系统为每个页面设置了两个状态位，当页面被访问时设置 R 位，被修改时设置 M 位。启动进程时，所有页面的 RM 均设为 0，并且 R 被定期（比如每次时钟中断时）清零
130 | * 发生缺页中断时，根据 RM 位的值，可以将页面分为 4 类
131 |   * 第 0 类：未访问未修改（R 位为 0，M 位为 0）
132 |   * 第 1 类：未访问已修改（R 位为 0，M 位为 1，看起来似乎不可能，实际可以由第 3 类转换而来）
133 |   * 第 2 类：已访问未修改（R 位为 1，M 位为 0）
134 |   * 第 3 类：已访问已修改（R 位为 1，M 位为 1，R 在清零后即变为第 1 类）
135 | * NRU 算法随机从第 0 类中选择一个页面淘汰，如果第 0 类中没有页面则选择第 1 类，以此类推，优先选择编号最小的类
136 | * 这个算法的隐含思想是，淘汰一个未访问已修改页面（第 1 类），比淘汰一个频繁使用的干净页面（第 2 类）好
137 | * NRU 的主要优点是易理解且能有效实现，虽然性能不是最好的，但已经够用了
138 | 
139 | ### 先进先出页面置换算法（First-In First-Out，FIFO）
140 | 
141 | * 顾名思义，淘汰最早进入的页面
142 | * 操作系统维护一个内存中所有当前页面的链表，最新进入的页面放在表尾，淘汰页面就是表头页面
143 | * FIFO 可能淘汰常用页面，甚至可能出现分配页面数增多但缺页率反而提高的异常现象（Belady 异常），因此很少使用纯粹的 FIFO 算法
144 | 
145 | ### 第二次机会页面置换算法（Second-Chance）
146 | 
147 | * 对 FIFO 做一个简单的修改：检查最老页面的 R 位（访问位），如果 R 位是 0 则淘汰，如果是 1 则把 R 位清零，并把该页面放到表尾，然后继续搜索
148 | * 如果所有页面都被访问过，则该算法就简化为纯粹的 FIFO 算法
149 | 
150 | ### 时钟页面置换算法（clock）
151 | 
152 | * 第二次机会算法经常要在链表中移动页面，降低了效率且不是很有必要
153 | * 一个更好的办法是将所有页面保存在在一个类似钟面的环形链表中，一个表针指向最老的页面。发生缺页中断时，检查表针指向的页面，如果 R 位是 0 则淘汰该页面，并在该位置插入新页面，然后表针后移一步。如果 R 位是 1 则把 R 位清零，然后表针后移一步。如果该页已存在，不发生缺页中断，R 位是 0 则改为 1，表针不需要移动
154 | 
155 | ### 最近最少使用页面置换算法（Least Recently Used，LRU）
156 | 
157 | * LRU 是 OPT 的一个近似思路，在前几条指令中频繁使用的页面很可能在后几条指令中被使用，反过来说，很久没使用的页面很可能在之后的长时间内仍然不使用
158 | * LRU 是可实现的，但代价很高。实现 LRU 需要维护一个所有页面的链表，最常使用的位于表头，每次访问时必须更新整个链表，在链表中找到页面删除后再添加到表头
159 | * 有一些使用特殊硬件实现 LRU 的方法，比如要求硬件有一个 64 位计数器，它在每条指令执行完后加 1，每个页表项中有一个足够容纳这个计数器值的域。发生缺页中断时，检查所有页表项的计数值，值最小的就是最近最少使用的
160 | * 只有非常少的计算机有这种硬件，LRU 很优秀但很难实现
161 | 
162 | ### 最不常用页面置换算法（Not Frequently Used，NFU）
163 | 
164 | * NFU 是 LRU 的一个软件实现方案
165 | * NFU 将每个页面与一个软件计数器关联，计数器初值为 0，每次时钟中断时，操作系统扫描内存中所有页面，将每个页面的 R 位值加到计数器上，这个计数器大致跟踪了各个页面被访问的频繁程度。发生缺页中断时，则置换计数器值最小的页面
166 | * NFU 的问题在于，第一遍扫描中频繁使用的页面，第二遍扫描时，计数器值仍然很高。这就会导致后续扫描中，即使该页面使用次数最少，也会由于计数器值较高而不被置换
167 | 
168 | ### 老化（aging）算法
169 | 
170 | * 老化算法对 NFU 做了一些改进，在 R 位加进之前先将计数器右移一位，然后把 R 位加到计数器最左端的位
171 | 
172 | ```
173 | 页面
174 | 0    10000000     11000000     11100000     11110000     01111000
175 | 1    00000000     10000000     11000000     01100000     10110000
176 | 2    10000000     01000000     00100000     00100000     10001000
177 | 3    00000000     00000000     10000000     01000000     00100000
178 | 4    10000000     11000000     01100000     10110000     01011000
179 | 5    10000000     01000000     10100000     01010000     00101000
180 |      |            |            |            |            |
181 |      访问页面 024 访问 014     访问 013     访问 04      访问 12
182 | ```
183 | 
184 | * 发生缺页中断时，置换计数器值最小的页面，因为前面的 0 越多，说明其最近越不常被访问
185 | * 老化算法非常近似 LRU，但有两个区别
186 |   * 比如最后一次访问时，如果发生缺页中断，需要置换一个页面。页面 3 和页面 5 开头都是 001，即前两次未被访问，前第三次被访问，如果前第三次是页面 5 先被访问，则 LRU 会替换页面 5，但这里无法区分两者谁先被访问，而只能替换值较小的页面 3
187 |   * 老化算法计数器位数有限，比如这里是 8 位，只能记录过去 8 次的访问，超过该次数的记录无法得知。不过实践中，如果时钟滴答是 20 ms，8 位一般是够用的，如果一个页面 160 ms 未被访问，则很可能不重要
188 | 
189 | ### 工作集页面置换算法
190 | 
191 | * 在单纯的分页系统中，刚开始启动进程时，内存中没有页面，CPU 尝试取第一条指令时就会产生一次缺页中断，使操作系统装入含第一条指令的页面。一段时间后，进程需要的大部分页面都在内存了，进程开始在较少缺页中断的情况下运行。这个策略称为请求调页（demand paging），因为页面在需要时被调入，而不是预先装入
192 | * 一个进程当前正在使用的页面集合称为它的工作集（Denning），如果整个工作集都被装入内存中，那么进程在运行到下一阶段之前不会产生很多缺页中断。如果内存太小无法容纳整个工作集，进程的运行过程中将产生大量缺页中断，导致运行速度变慢，因为通常执行一条指令只要几纳秒，而从磁盘读入一个页面需要十几毫秒。如果每执行几条指令就发生一次缺页中断，就称这个程序发生了颠簸（Denning）
193 | * 请求调页策略中，每次装入一个进程都要产生大量缺页中断，速度太慢，并且 CPU 花了很多时间处理缺页中断，浪费了许多 CPU 时间，因此不少分页系统会设法跟踪工作集，以确保在进程运行前，工作集已经在内存中了，这个方法称为工作集模型（Denning），也叫预先调页（prepaging），其目的在于大大减少缺页中断率
194 | * 工作集是随着时间变化的，它是最近 k 次访存所访问过的页面集合。为了实现该算法，需要一种精确的方法来确定哪些页面在工作集中，为此必须预先选定 k 值。但有了工作集的定义并不意味着就能计算出工作集
195 | * 假设有一个长度为 k 的移位寄存器，每次访存都把寄存器左移一位，然后在最右端插入刚才访问过的页面号，寄存器中 k 个页面号的集合就是工作集。理论上，发生缺页中断时，只要读出寄存器中的内容并排序，然后删除重复的页面，结果就是工作集。但维护该寄存器并在缺页中断时处理它需要很大的开销，因此该技术从未被使用过
196 | * 有几种近似的方法作为替代，一种常见近似方法是，不向后查找最近 k 次的内存访问，而是查找过去一定时间内，比如过去 10 ms 访存所用到的页面集合
197 | * 基于工作集的页面置换算法是，找出一个不在工作集中的页面并淘汰，为此表项中至少需要包含两条信息，一是上次使用该页面的近似时间，二是 R 位（访问位）
198 | * 处理表项时，如果 R 位是 1，则把上次使用时间改为当前实际时间。如果 R 位是 0，则可以作为置换候选者，计算生存时间（当前实际时间与上次使用时间的差），如果生存时间大于定义工作集范围的时间，则该页面在工作集外，将其置换。如果 R 为 0 且生存时间不超过定义工作集范围的时间，则该页面仍在工作集中，记录该页面。如果扫描完整个页表都没有可淘汰的，则从记录页面中选一个生存时间最长的淘汰，如果记录页面为空，即所有页面 R 位均为 1，则随机选择一个淘汰
199 | 
200 | ### 工作集时钟（WSClock）页面置换算法
201 | 
202 | * 工作集算法需要扫描整个页表，比较费时，结合时钟算法的思路稍作改进，即可得到 WSClock 算法。它实现简单，性能较好，在实际工作中得到了广泛使用
203 | 
204 | ## 分段（Segmentation）
205 | 
206 | * 一个编译器在编译过程中会建立许多表，其中可能包括
207 |   * 被保存起来供打印清单用的源程序正文（用于批处理系统）
208 |   * 包含变量名字和属性的符号表
209 |   * 包含用到的所有整型量和浮点常量的表
210 |   * 包含程序语法分析结果的语法分析树
211 |   * 编译器内部过程调用使用的堆栈
212 | * 在一维地址空间中，当有多个动态增加的表时，就可能发生碰撞。一种能令程序员不用管理表扩张和收缩的方法是，在机器上提供多个互相独立的段（segment）的地址空间，段的长度可以不同，在运行时可以改变，比如堆栈段的长度在数据压入时会增长，在数据弹出时会减小
213 | * 每个段都构成一个独立的地址空间，在内存中占据连续空间，可以独立地增长或减小，而不会影响其他段
214 | * 段是按逻辑功能的划分的实体，程序员使用起来更方便，并且程序的可读性更高。此外，分段有助于共享和保护。分段系统中，可以把共享库放到一个单独的段中由各个进程共享，而不需要在每个进程的地址空间中保存一份。当组成一个程序的所有过程都被编译和链接好以后，如果一个段的过程被修改并重新编译，也不会影响到其他段，因为这个段的起始地址（基址）没有被修改
215 | * 要在分段的存储器中表示一个地址，必须提供一个段号（段名）和一个段内地址（段内偏移量）
216 | 
217 | ```cpp
218 | 31 ... 16 15 ... 0  // 可用 31 - 16 表示段号，15 - 0 表示段内地址
219 | ```
220 | 
221 | * 每个进程需要一张段表，每个段表项记录一个段的起始位置和段的长度。段表项长度是固定的，因此段号可以是隐含的，不占存储空间。查找时，如果段号越界，则产生越界中断。如果段内地址超出段长，则产生越界中断
222 | 
223 | ```
224 | K 号段的段表存放地址 = 段表起始位置 + K * 段表项长度
225 | 
226 | 段号 基址 段长
227 | 0    20K  3K
228 | 1    60K  2K
229 | 2    40K  5K
230 | 
231 | 如果一个逻辑地址段号为 1，段内地址为 1024
232 | 段号 1 的段长为 2K，大于 1024，不产生越界中断
233 | 存放地址 = 60K + 1024 = 61K
234 | ```
235 | 
236 | * 分段管理的缺点是，如果段长过大，则不便于分配连续空间，此外会产生外部碎片。分页管理的内存利用率高，不会产生外部碎片，只会有少量页内碎片。因此，两者结合可以互相弥补，实现段页式管理
237 | * 段页式系统的地址由段号、页号、页内地址（页内偏移量）组成。分段对用户可见，而分页不可见
238 | 
239 | ```
240 | 31 ... 16 15 ... 12 11 ... 0  // 可用 31 - 16 表示段号，15 - 12 表示页号，11 - 0 表示页内地址
241 | ```
242 | 
243 | * 每个段表项记录页表长度、页表起始地址，通过页表起始地址找到页号，通过页号对应的页表项目找到物理地址，一共需要三次访存（如果引入以段号和页号为关键字的 TLB 且命中，则只需要一次访存）。段表项长度是固定的，段号可以是隐含的。同样，每个页表项长度固定，页号是隐含的
244 | 


--------------------------------------------------------------------------------
/docs/05_designing_lock_based_concurrent_data_structure.md:
--------------------------------------------------------------------------------
  1 | * 设计并发数据结构要考虑两点，一是确保访问 thread-safe，二是提高并发度
  2 |   * thread-safe 基本要求如下
  3 |     * 数据结构的不变量（invariant）被一个线程破坏时，确保不被线程看到此状态
  4 |     * 提供操作完整的函数来避免数据结构接口中固有的 race condition
  5 |     * 注意数据结构出现异常时的行为，以确保不变量不被破坏
  6 |     * 限制锁的范围，避免可能的嵌套锁，最小化死锁的概率
  7 |   * 作为数据结构的设计者，要提高数据结构的并发度，可以从以下角度考虑
  8 |     * 部分操作能否在锁的范围外执行
  9 |     * 数据结构的不同部分是否被不同的 mutex 保护
 10 |     * 是否所有操作需要同级别的保护
 11 |     * 在不影响操作语义的前提下，能否对数据结构做简单的修改提高并发度
 12 |   * 总结为一点，即最小化线程对共享数据的轮流访问，最大化真实的并发量
 13 | 
 14 | ## thread-safe queue
 15 | 
 16 | * 之前实现过的 thread-safe stack 和 queue 都是用一把锁定保护整个数据结构，这限制了并发性，多线程在成员函数中阻塞时，同一时间只有一个线程能工作。这种限制主要是因为内部实现使用的是 [std::queue](https://en.cppreference.com/w/cpp/container/queue)，为了支持更高的并发，需要更换内部的实现方式，使用细粒度的（fine-grained）锁。最简单的实现方式是包含头尾指针的单链表，不考虑并发的单链表实现如下
 17 | 
 18 | ```cpp
 19 | #include <memory>
 20 | #include <utility>
 21 | 
 22 | template <typename T>
 23 | class Queue {
 24 |  public:
 25 |   Queue() = default;
 26 | 
 27 |   Queue(const Queue&) = delete;
 28 | 
 29 |   Queue& operator=(const Queue&) = delete;
 30 | 
 31 |   void push(T x) {
 32 |     auto new_node = std::make_unique<Node>(std::move(x));
 33 |     Node* new_tail_node = new_node.get();
 34 |     if (tail_) {
 35 |       tail_->next = std::move(new_node);
 36 |     } else {
 37 |       head_ = std::move(new_node);
 38 |     }
 39 |     tail_ = new_tail_node;
 40 |   }
 41 | 
 42 |   std::shared_ptr<T> try_pop() {
 43 |     if (!head_) {
 44 |       return nullptr;
 45 |     }
 46 |     auto res = std::make_shared<T>(std::move(head_->v));
 47 |     std::unique_ptr<Node> head_node = std::move(head_);
 48 |     head_ = std::move(head_node->next);
 49 |     return res;
 50 |   }
 51 | 
 52 |  private:
 53 |   struct Node {
 54 |     explicit Node(T x) : v(std::move(x)) {}
 55 |     T v;
 56 |     std::unique_ptr<Node> next;
 57 |   };
 58 | 
 59 |   std::unique_ptr<Node> head_;
 60 |   Node* tail_ = nullptr;
 61 | };
 62 | ```
 63 | 
 64 | * 即使用两个 mutex 分别保护头尾指针，这个实现在多线程下也有明显问题。push 可以同时修改头尾指针，会对两个 mutex 上锁，另外仅有一个元素时头尾指针相等，push 写和 try_pop 读的 next 节点是同一对象，产生了竞争，锁的也是同一个 mutex
 65 | * 该问题很容易解决，在头节点前初始化一个 dummy 节点即可，这样 push 只访问尾节点，不会再与 try_pop 竞争头节点
 66 | 
 67 | ```cpp
 68 | #include <memory>
 69 | #include <utility>
 70 | 
 71 | template <typename T>
 72 | class Queue {
 73 |  public:
 74 |   Queue() : head_(new Node), tail_(head_.get()) {}
 75 | 
 76 |   Queue(const Queue&) = delete;
 77 | 
 78 |   Queue& operator=(const Queue&) = delete;
 79 | 
 80 |   void push(T x) {
 81 |     auto new_val = std::make_shared<T>(std::move(x));
 82 |     auto new_node = std::make_unique<Node>();
 83 |     Node* new_tail_node = new_node.get();
 84 |     tail_->v = new_val;
 85 |     tail_->next = std::move(new_node);
 86 |     tail_ = new_tail_node;
 87 |   }
 88 | 
 89 |   std::shared_ptr<T> try_pop() {
 90 |     if (head_.get() == tail_) {
 91 |       return nullptr;
 92 |     }
 93 |     std::shared_ptr<T> res = head->v;
 94 |     std::unique_ptr<Node> head_node = std::move(head_);
 95 |     head_ = std::move(head_node->next);
 96 |     return res;
 97 |   }
 98 | 
 99 |  private:
100 |   struct Node {
101 |     std::shared_ptr<T> v;
102 |     std::unique_ptr<Node> next;
103 |   };
104 | 
105 |   std::unique_ptr<Node> head_;
106 |   Node* tail_ = nullptr;
107 | };
108 | ```
109 | 
110 | * 接着加上锁，锁的范围应该尽可能小
111 | 
112 | ```cpp
113 | #include <memory>
114 | #include <mutex>
115 | #include <utility>
116 | 
117 | template <typename T>
118 | class ConcurrentQueue {
119 |  public:
120 |   ConcurrentQueue() : head_(new Node), tail_(head_.get()) {}
121 | 
122 |   ConcurrentQueue(const ConcurrentQueue&) = delete;
123 | 
124 |   ConcurrentQueue& operator=(const ConcurrentQueue&) = delete;
125 | 
126 |   void push(T x) {
127 |     auto new_val = std::make_shared<T>(std::move(x));
128 |     auto new_node = std::make_unique<Node>();
129 |     Node* new_tail_node = new_node.get();
130 | 
131 |     std::lock_guard<std::mutex> l(tail_mutex_);
132 |     tail_->v = new_val;
133 |     tail_->next = std::move(new_node);
134 |     tail_ = new_tail_node;
135 |   }
136 | 
137 |   std::shared_ptr<T> try_pop() {
138 |     std::unique_ptr<Node> head_node = pop_head();
139 |     return head_node ? head_node->v : nullptr;
140 |   }
141 | 
142 |  private:
143 |   struct Node {
144 |     std::shared_ptr<T> v;
145 |     std::unique_ptr<Node> next;
146 |   };
147 | 
148 |  private:
149 |   std::unique_ptr<Node> pop_head() {
150 |     std::lock_guard<std::mutex> l(head_mutex_);
151 |     if (head_.get() == get_tail()) {
152 |       return nullptr;
153 |     }
154 |     std::unique_ptr<Node> head_node = std::move(head_);
155 |     head_ = std::move(head_node->next);
156 |     return head_node;
157 |   }
158 | 
159 |   Node* get_tail() {
160 |     std::lock_guard<std::mutex> l(tail_mutex_);
161 |     return tail_;
162 |   }
163 | 
164 |  private:
165 |   std::unique_ptr<Node> head_;
166 |   Node* tail_ = nullptr;
167 |   std::mutex head_mutex_;
168 |   std::mutex tail_mutex_;
169 | };
170 | ```
171 | 
172 | * push 中创建新值和新节点都没上锁，多线程可用并发创建新值和新节点。虽然同时只有一个线程能添加新节点，但这只需要一个指针赋值操作，锁住尾节点的时间很短，try_pop 中对尾节点只是用来做一次比较，持有尾节点的时间同样很短，因此 try_pop 和 push 几乎可以同时调用。try_pop 中锁住头节点所做的也只是指针赋值操作，开销较大的析构在锁外进行，这意味着虽然同时只有一个线程能 pop_head，但允许多线程删除节点并返回数据，提升了 try_pop 的并发调用数量
173 | * 最后再结合 [std::condition_variable](https://en.cppreference.com/w/cpp/thread/condition_variable) 实现 wait_and_pop，即得到与之前接口相同但并发度更高的 thread-safe queue
174 | 
175 | ```cpp
176 | #include <condition_variable>
177 | #include <memory>
178 | #include <mutex>
179 | #include <utility>
180 | 
181 | template <typename T>
182 | class ConcurrentQueue {
183 |  public:
184 |   ConcurrentQueue() : head_(new Node), tail_(head_.get()) {}
185 | 
186 |   ConcurrentQueue(const ConcurrentQueue&) = delete;
187 | 
188 |   ConcurrentQueue& operator=(const ConcurrentQueue&) = delete;
189 | 
190 |   void push(T x) {
191 |     auto new_val = std::make_shared<T>(std::move(x));
192 |     auto new_node = std::make_unique<Node>();
193 |     Node* new_tail_node = new_node.get();
194 |     {
195 |       std::lock_guard<std::mutex> l(tail_mutex_);
196 |       tail_->v = new_val;
197 |       tail_->next = std::move(new_node);
198 |       tail_ = new_tail_node;
199 |     }
200 |     cv_.notify_one();
201 |   }
202 | 
203 |   std::shared_ptr<T> try_pop() {
204 |     std::unique_ptr<Node> head_node = try_pop_head();
205 |     return head_node ? head_node->v : nullptr;
206 |   }
207 | 
208 |   bool try_pop(T& res) {
209 |     std::unique_ptr<Node> head_node = try_pop_head(res);
210 |     return head_node != nullptr;
211 |   }
212 | 
213 |   std::shared_ptr<T> wait_and_pop() {
214 |     std::unique_ptr<Node> head_node = wait_pop_head();
215 |     return head_node->v;
216 |   }
217 | 
218 |   void wait_and_pop(T& res) { wait_pop_head(res); }
219 | 
220 |   bool empty() const {
221 |     std::lock_guard<std::mutex> l(head_mutex_);
222 |     return head_.get() == get_tail();
223 |   }
224 | 
225 |  private:
226 |   struct Node {
227 |     std::shared_ptr<T> v;
228 |     std::unique_ptr<Node> next;
229 |   };
230 | 
231 |  private:
232 |   std::unique_ptr<Node> try_pop_head() {
233 |     std::lock_guard<std::mutex> l(head_mutex_);
234 |     if (head_.get() == get_tail()) {
235 |       return nullptr;
236 |     }
237 |     return pop_head();
238 |   }
239 | 
240 |   std::unique_ptr<Node> try_pop_head(T& res) {
241 |     std::lock_guard<std::mutex> l(head_mutex_);
242 |     if (head_.get() == get_tail()) {
243 |       return nullptr;
244 |     }
245 |     res = std::move(*head_->v);
246 |     return pop_head();
247 |   }
248 | 
249 |   std::unique_ptr<Node> wait_pop_head() {
250 |     std::unique_lock<std::mutex> l(wait_for_data());
251 |     return pop_head();
252 |   }
253 | 
254 |   std::unique_ptr<Node> wait_pop_head(T& res) {
255 |     std::unique_lock<std::mutex> l(wait_for_data());
256 |     res = std::move(*head_->v);
257 |     return pop_head();
258 |   }
259 | 
260 |   std::unique_lock<std::mutex> wait_for_data() {
261 |     std::unique_lock<std::mutex> l(head_mutex_);
262 |     cv_.wait(l, [this] { return head_.get() != get_tail(); });
263 |     return l;
264 |   }
265 | 
266 |   std::unique_ptr<Node> pop_head() {
267 |     std::unique_ptr<Node> head_node = std::move(head_);
268 |     head_ = std::move(head_node->next);
269 |     return head_node;
270 |   }
271 | 
272 |   Node* get_tail() {
273 |     std::lock_guard<std::mutex> l(tail_mutex_);
274 |     return tail_;
275 |   }
276 | 
277 |  private:
278 |   std::unique_ptr<Node> head_;
279 |   Node* tail_ = nullptr;
280 |   std::mutex head_mutex_;
281 |   mutable std::mutex tail_mutex_;
282 |   std::condition_variable cv_;
283 | };
284 | ```
285 | 
286 | ## thread-safe map
287 | 
288 | * 并发访问 [std::map](https://en.cppreference.com/w/cpp/container/map) 和 [std::unordered_map](https://en.cppreference.com/w/cpp/container/unordered_map) 的接口的问题在于迭代器，其他线程删除元素时会导致迭代器失效，因此 thread-safe map 的接口设计就要跳过迭代器
289 | * 为了使用细粒度锁，就不应该使用标准库容器。可选的关联容器数据结构有三种，一是二叉树（如红黑树），但每次查找修改都要从访问根节点开始，也就表示根节点需要上锁，尽管沿着树向下访问节点时会解锁，但这个比起覆盖整个数据结构的单个锁好不了多少
290 | * 第二种方式是有序数组，这比二叉树还差，因为无法提前得知一个给定的值应该放在哪，于是同样需要一个覆盖整个数组的锁
291 | * 第三种方式是哈希表。假如有一个固定数量的桶，一个 key 属于哪个桶取决于 key 的属性和哈希函数，这意味着可以安全地分开锁住每个桶。如果使用读写锁，就能将并发度提高相当于桶数量的倍数
292 | 
293 | ```cpp
294 | #include <algorithm>
295 | #include <functional>
296 | #include <list>
297 | #include <map>
298 | #include <memory>
299 | #include <mutex>
300 | #include <shared_mutex>
301 | #include <utility>
302 | #include <vector>
303 | 
304 | template <typename K, typename V, typename Hash = std::hash<K>>
305 | class ConcurrentMap {
306 |  public:
307 |   // 桶数默认为 19（一般用 x % 桶数作为 x 的桶索引，桶数为质数可使桶分布均匀）
308 |   ConcurrentMap(std::size_t n = 19, const Hash& h = Hash{})
309 |       : buckets_(n), hasher_(h) {
310 |     for (auto& x : buckets_) {
311 |       x.reset(new Bucket);
312 |     }
313 |   }
314 | 
315 |   ConcurrentMap(const ConcurrentMap&) = delete;
316 | 
317 |   ConcurrentMap& operator=(const ConcurrentMap&) = delete;
318 | 
319 |   V get(const K& k, const V& default_value = V{}) const {
320 |     return get_bucket(k).get(k, default_value);
321 |   }
322 | 
323 |   void set(const K& k, const V& v) { get_bucket(k).set(k, v); }
324 | 
325 |   void erase(const K& k) { get_bucket(k).erase(k); }
326 | 
327 |   // 为了方便使用，提供一个到 std::map 的映射
328 |   std::map<K, V> to_map() const {
329 |     std::vector<std::unique_lock<std::shared_mutex>> locks;
330 |     for (auto& x : buckets_) {
331 |       locks.emplace_back(std::unique_lock<std::shared_mutex>(x->m));
332 |     }
333 |     std::map<K, V> res;
334 |     for (auto& x : buckets_) {
335 |       for (auto& y : x->data) {
336 |         res.emplace(y);
337 |       }
338 |     }
339 |     return res;
340 |   }
341 | 
342 |  private:
343 |   struct Bucket {
344 |     std::list<std::pair<K, V>> data;
345 |     mutable std::shared_mutex m;  // 每个桶都用这个锁保护
346 | 
347 |     V get(const K& k, const V& default_value) const {
348 |       // 没有修改任何值，异常安全
349 |       std::shared_lock<std::shared_mutex> l(m);  // 只读锁，可共享
350 |       auto it = std::find_if(data.begin(), data.end(),
351 |                              [&](auto& x) { return x.first == k; });
352 |       return it == data.end() ? default_value : it->second;
353 |     }
354 | 
355 |     void set(const K& k, const V& v) {
356 |       std::unique_lock<std::shared_mutex> l(m);  // 写，单独占用
357 |       auto it = std::find_if(data.begin(), data.end(),
358 |                              [&](auto& x) { return x.first == k; });
359 |       if (it == data.end()) {
360 |         data.emplace_back(k, v);  // emplace_back 异常安全
361 |       } else {
362 |         it->second = v;  // 赋值可能抛异常，但值是用户提供的，可放心让用户处理
363 |       }
364 |     }
365 | 
366 |     void erase(const K& k) {
367 |       std::unique_lock<std::shared_mutex> l(m);  // 写，单独占用
368 |       auto it = std::find_if(data.begin(), data.end(),
369 |                              [&](auto& x) { return x.first == k; });
370 |       if (it != data.end()) {
371 |         data.erase(it);
372 |       }
373 |     }
374 |   };
375 | 
376 |   Bucket& get_bucket(const K& k) const {  // 桶数固定因此可以无锁调用
377 |     return *buckets_[hasher_(k) % buckets_.size()];
378 |   }
379 | 
380 |  private:
381 |   std::vector<std::unique_ptr<Bucket>> buckets_;
382 |   Hash hasher_;
383 | };
384 | ```
385 | 
386 | ## thread-safe list
387 | 
388 | ```cpp
389 | #include <memory>
390 | #include <mutex>
391 | #include <utility>
392 | 
393 | template <typename T>
394 | class ConcurrentList {
395 |  public:
396 |   ConcurrentList() = default;
397 | 
398 |   ~ConcurrentList() {
399 |     remove_if([](const Node&) { return true; });
400 |   }
401 | 
402 |   ConcurrentList(const ConcurrentList&) = delete;
403 | 
404 |   ConcurrentList& operator=(const ConcurrentList&) = delete;
405 | 
406 |   void push_front(const T& x) {
407 |     std::unique_ptr<Node> t(new Node(x));
408 |     std::lock_guard<std::mutex> head_lock(head_.m);
409 |     t->next = std::move(head_.next);
410 |     head_.next = std::move(t);
411 |   }
412 | 
413 |   template <typename F>
414 |   void for_each(F f) {
415 |     Node* cur = &head_;
416 |     std::unique_lock<std::mutex> head_lock(head_.m);
417 |     while (Node* const next = cur->next.get()) {
418 |       std::unique_lock<std::mutex> next_lock(next->m);
419 |       head_lock.unlock();  // 锁住了下一节点，因此可以释放上一节点的锁
420 |       f(*next->data);
421 |       cur = next;                        // 当前节点指向下一节点
422 |       head_lock = std::move(next_lock);  // 转交下一节点锁的所有权，循环上述过程
423 |     }
424 |   }
425 | 
426 |   template <typename F>
427 |   std::shared_ptr<T> find_first_if(F f) {
428 |     Node* cur = &head_;
429 |     std::unique_lock<std::mutex> head_lock(head_.m);
430 |     while (Node* const next = cur->next.get()) {
431 |       std::unique_lock<std::mutex> next_lock(next->m);
432 |       head_lock.unlock();
433 |       if (f(*next->data)) {
434 |         return next->data;  // 返回目标值，无需继续查找
435 |       }
436 |       cur = next;
437 |       head_lock = std::move(next_lock);
438 |     }
439 |     return nullptr;
440 |   }
441 | 
442 |   template <typename F>
443 |   void remove_if(F f) {
444 |     Node* cur = &head_;
445 |     std::unique_lock<std::mutex> head_lock(head_.m);
446 |     while (Node* const next = cur->next.get()) {
447 |       std::unique_lock<std::mutex> next_lock(next->m);
448 |       if (f(*next->data)) {  // 为 true 则移除下一节点
449 |         std::unique_ptr<Node> old_next = std::move(cur->next);
450 |         cur->next = std::move(next->next);  // 下一节点设为下下节点
451 |         next_lock.unlock();
452 |       } else {  // 否则继续转至下一节点
453 |         head_lock.unlock();
454 |         cur = next;
455 |         head_lock = std::move(next_lock);
456 |       }
457 |     }
458 |   }
459 | 
460 |  private:
461 |   struct Node {
462 |     std::mutex m;
463 |     std::shared_ptr<T> data;
464 |     std::unique_ptr<Node> next;
465 |     Node() = default;
466 |     Node(const T& x) : data(std::make_shared<T>(x)) {}
467 |   };
468 | 
469 |   Node head_;
470 | };
471 | ```
472 | 


--------------------------------------------------------------------------------
/docs/01_managing_thread.md:
--------------------------------------------------------------------------------
  1 | ## [std::thread](https://en.cppreference.com/w/cpp/thread/thread)
  2 | 
  3 | * 每个程序有一个执行 main() 函数的主线程，将函数添加为 [std::thread](https://en.cppreference.com/w/cpp/thread/thread) 的参数即可启动另一个线程，两个线程会同时运行
  4 | 
  5 | ```cpp
  6 | #include <iostream>
  7 | #include <thread>
  8 | 
  9 | void f() { std::cout << "hello world"; }
 10 | 
 11 | int main() {
 12 |   std::thread t{f};
 13 |   t.join();  // 等待新起的线程退出
 14 | }
 15 | ```
 16 | 
 17 | * [std::thread](https://en.cppreference.com/w/cpp/thread/thread) 的参数也可以是函数对象或者 lambda
 18 | 
 19 | ```cpp
 20 | #include <iostream>
 21 | #include <thread>
 22 | 
 23 | struct A {
 24 |   void operator()() const { std::cout << 1; }
 25 | };
 26 | 
 27 | int main() {
 28 |   A a;
 29 |   std::thread t1(a);  // 会调用 A 的拷贝构造函数
 30 |   std::thread t2(A());  // most vexing parse，声明名为 t2 参数类型为 A 的函数
 31 |   std::thread t3{A()};
 32 |   std::thread t4((A()));
 33 |   std::thread t5{[] { std::cout << 1; }};
 34 |   t1.join();
 35 |   t3.join();
 36 |   t4.join();
 37 |   t5.join();
 38 | }
 39 | ```
 40 | 
 41 | * 在线程销毁前要对其调用 [join](https://en.cppreference.com/w/cpp/thread/thread/join) 等待线程退出或 [detach](https://en.cppreference.com/w/cpp/thread/thread/detach) 将线程分离，否则 [std::thread](https://en.cppreference.com/w/cpp/thread/thread) 的析构函数会调用 [std::terminate](https://en.cppreference.com/w/cpp/error/terminate) 终止程序，注意分离线程可能出现空悬引用的隐患
 42 | 
 43 | ```cpp
 44 | #include <iostream>
 45 | #include <thread>
 46 | 
 47 | class A {
 48 |  public:
 49 |   A(int& x) : x_(x) {}
 50 | 
 51 |   void operator()() const {
 52 |     for (int i = 0; i < 1000000; ++i) {
 53 |       call(x_);  // 存在对象析构后引用空悬的隐患
 54 |     }
 55 |   }
 56 | 
 57 |  private:
 58 |   void call(int& x) {}
 59 | 
 60 |  private:
 61 |   int& x_;
 62 | };
 63 | 
 64 | void f() {
 65 |   int x = 0;
 66 |   A a{x};
 67 |   std::thread t{a};
 68 |   t.detach();  // 不等待 t 结束
 69 | }  // 函数结束后 t 可能还在运行，而 x 已经销毁，a.x_ 为空悬引用
 70 | 
 71 | int main() {
 72 |   std::thread t{f};  // 导致空悬引用
 73 |   t.join();
 74 | }
 75 | ```
 76 | 
 77 | * [join](https://en.cppreference.com/w/cpp/thread/thread/join) 会在线程结束后清理 [std::thread](https://en.cppreference.com/w/cpp/thread/thread)，使其与完成的线程不再关联，因此对一个线程只能进行一次 [join](https://en.cppreference.com/w/cpp/thread/thread/join)
 78 | 
 79 | ```cpp
 80 | #include <thread>
 81 | 
 82 | int main() {
 83 |   std::thread t([] {});
 84 |   t.join();
 85 |   t.join();  // 错误
 86 | }
 87 | ```
 88 | 
 89 | * 如果线程运行过程中发生异常，之后的 [join](https://en.cppreference.com/w/cpp/thread/thread/join) 会被忽略，为此需要捕获异常，并在抛出异常前 [join](https://en.cppreference.com/w/cpp/thread/thread/join)
 90 | 
 91 | ```cpp
 92 | #include <thread>
 93 | 
 94 | int main() {
 95 |   std::thread t([] {});
 96 |   try {
 97 |     throw 0;
 98 |   } catch (int x) {
 99 |     t.join();  // 处理异常前先 join()
100 |     throw x;   // 再将异常抛出
101 |   }
102 |   t.join();  // 之前抛异常，不会执行到此处
103 | }
104 | ```
105 | 
106 | * C++20 提供了 [std::jthread](https://en.cppreference.com/w/cpp/thread/jthread)，它会在析构函数中对线程 [join](https://en.cppreference.com/w/cpp/thread/thread/join)
107 | 
108 | ```cpp
109 | #include <thread>
110 | 
111 | int main() {
112 |   std::jthread t([] {});
113 | }
114 | ```
115 | 
116 | * [detach](https://en.cppreference.com/w/cpp/thread/thread/detach) 分离线程会让线程在后台运行，一般将这种在后台运行的线程称为守护线程，守护线程与主线程无法直接交互，也不能被 [join](https://en.cppreference.com/w/cpp/thread/thread/join)
117 | 
118 | ```cpp
119 | std::thread t([] {});
120 | t.detach();
121 | assert(!t.joinable());
122 | ```
123 | 
124 | * 创建守护线程一般是为了长时间运行，比如有一个文档处理应用，为了同时编辑多个文档，每次新开一个文档，就可以开一个对应的守护线程
125 | 
126 | ```cpp
127 | void edit_document(const std::string& filename) {
128 |   open_document_and_display_gui(filename);
129 |   while (!done_editing()) {
130 |     user_command cmd = get_user_input();
131 |     if (cmd.type == open_new_document) {
132 |       const std::string new_name = get_filename_from_user();
133 |       std::thread t(edit_document, new_name);
134 |       t.detach();
135 |     } else {
136 |       process_user_input(cmd);
137 |     }
138 |   }
139 | }
140 | ```
141 | 
142 | ## 为带参数的函数创建线程
143 | 
144 | * 有参数的函数也能传给 [std::thread](https://en.cppreference.com/w/cpp/thread/thread)，参数的默认实参会被忽略
145 | 
146 | ```cpp
147 | #include <thread>
148 | 
149 | void f(int i = 1) {}
150 | 
151 | int main() {
152 |   std::thread t{f, 42};  // std::thread t{f} 则会出错，因为默认实参会被忽略
153 |   t.join();
154 | }
155 | ```
156 | 
157 | * 参数的引用类型也会被忽略，为此要使用 [std::ref](https://en.cppreference.com/w/cpp/utility/functional/ref)
158 | 
159 | ```cpp
160 | #include <cassert>
161 | #include <thread>
162 | 
163 | void f(int& i) { ++i; }
164 | 
165 | int main() {
166 |   int i = 1;
167 |   std::thread t{f, std::ref(i)};
168 |   t.join();
169 |   assert(i == 2);
170 | }
171 | ```
172 | 
173 | * 如果对一个实例的 non-static 成员函数创建线程，第一个参数类型为成员函数指针，第二个参数类型为实例指针，后续参数为函数参数
174 | 
175 | ```cpp
176 | #include <iostream>
177 | #include <thread>
178 | 
179 | class A {
180 |  public:
181 |   void f(int i) { std::cout << i; }
182 | };
183 | 
184 | int main() {
185 |   A a;
186 |   std::thread t1{&A::f, &a, 42};  // 调用 a->f(42)
187 |   std::thread t2{&A::f, a, 42};   // 拷贝构造 tmp_a，再调用 tmp_a.f(42)
188 |   t1.join();
189 |   t2.join();
190 | }
191 | ```
192 | 
193 | * 如果要为参数是 move-only 类型的函数创建线程，则需要使用 [std::move](https://en.cppreference.com/w/cpp/utility/move) 传入参数
194 | 
195 | ```cpp
196 | #include <iostream>
197 | #include <thread>
198 | #include <utility>
199 | 
200 | void f(std::unique_ptr<int> p) { std::cout << *p; }
201 | 
202 | int main() {
203 |   std::unique_ptr<int> p(new int(42));
204 |   std::thread t{f, std::move(p)};
205 |   t.join();
206 | }
207 | ```
208 | 
209 | ## 转移线程所有权
210 | 
211 | * [std::thread](https://en.cppreference.com/w/cpp/thread/thread) 是 move-only 类型，不能拷贝，只能通过移动转移所有权，但不能转移所有权到 joinable 的线程
212 | 
213 | ```cpp
214 | #include <thread>
215 | #include <utility>
216 | 
217 | void f() {}
218 | void g() {}
219 | 
220 | int main() {
221 |   std::thread a{f};
222 |   std::thread b = std::move(a);
223 |   assert(!a.joinable());
224 |   assert(b.joinable());
225 |   a = std::thread{g};
226 |   assert(a.joinable());
227 |   assert(b.joinable());
228 |   // a = std::move(b);  // 错误，不能转移所有权到 joinable 的线程
229 |   a.join();
230 |   a = std::move(b);
231 |   assert(a.joinable());
232 |   assert(!b.joinable());
233 |   a.join();
234 | }
235 | ```
236 | 
237 | * 移动操作同样适用于支持移动的容器
238 | 
239 | ```cpp
240 | #include <algorithm>
241 | #include <thread>
242 | #include <vector>
243 | 
244 | int main() {
245 |   std::vector<std::thread> v;
246 |   for (int i = 0; i < 10; ++i) {
247 |     v.emplace_back([] {});
248 |   }
249 |   std::for_each(std::begin(v), std::end(v), std::mem_fn(&std::thread::join));
250 | }
251 | ```
252 | 
253 | * [std::thread](https://en.cppreference.com/w/cpp/thread/thread) 可以作为函数返回值
254 | 
255 | ```cpp
256 | #include <thread>
257 | 
258 | std::thread f() {
259 |   return std::thread{[] {}};
260 | }
261 | 
262 | int main() {
263 |   std::thread t{f()};
264 |   t.join();
265 | }
266 | ```
267 | 
268 | * [std::thread](https://en.cppreference.com/w/cpp/thread/thread) 也可以作为函数参数
269 | 
270 | ```cpp
271 | #include <thread>
272 | #include <utility>
273 | 
274 | void f(std::thread t) { t.join(); }
275 | 
276 | int main() {
277 |   f(std::thread([] {}));
278 |   std::thread t([] {});
279 |   f(std::move(t));
280 | }
281 | ```
282 | 
283 | * 实现一个可以直接用 [std::thread](https://en.cppreference.com/w/cpp/thread/thread) 构造的自动清理线程的类
284 | 
285 | ```cpp
286 | #include <stdexcept>
287 | #include <thread>
288 | #include <utility>
289 | 
290 | class scoped_thread {
291 |  public:
292 |   explicit scoped_thread(std::thread x) : t_(std::move(x)) {
293 |     if (!t_.joinable()) {
294 |       throw std::logic_error("no thread");
295 |     }
296 |   }
297 |   ~scoped_thread() { t_.join(); }
298 |   scoped_thread(const scoped_thread&) = delete;
299 |   scoped_thread& operator=(const scoped_thread&) = delete;
300 | 
301 |  private:
302 |   std::thread t_;
303 | };
304 | 
305 | int main() {
306 |   scoped_thread t{std::thread{[] {}}};
307 | }
308 | ```
309 | 
310 | * 类似 [std::jthread](https://en.cppreference.com/w/cpp/thread/jthread) 的类
311 | 
312 | ```cpp
313 | #include <thread>
314 | 
315 | class Jthread {
316 |  public:
317 |   Jthread() noexcept = default;
318 | 
319 |   template <typename T, typename... Ts>
320 |   explicit Jthread(T&& f, Ts&&... args)
321 |       : t_(std::forward<T>(f), std::forward<Ts>(args)...) {}
322 | 
323 |   explicit Jthread(std::thread x) noexcept : t_(std::move(x)) {}
324 |   Jthread(Jthread&& rhs) noexcept : t_(std::move(rhs.t_)) {}
325 | 
326 |   Jthread& operator=(Jthread&& rhs) noexcept {
327 |     if (joinable()) {
328 |       join();
329 |     }
330 |     t_ = std::move(rhs.t_);
331 |     return *this;
332 |   }
333 | 
334 |   Jthread& operator=(std::thread t) noexcept {
335 |     if (joinable()) {
336 |       join();
337 |     }
338 |     t_ = std::move(t);
339 |     return *this;
340 |   }
341 | 
342 |   ~Jthread() noexcept {
343 |     if (joinable()) {
344 |       join();
345 |     }
346 |   }
347 | 
348 |   void swap(Jthread&& rhs) noexcept { t_.swap(rhs.t_); }
349 |   std::thread::id get_id() const noexcept { return t_.get_id(); }
350 |   bool joinable() const noexcept { return t_.joinable(); }
351 |   void join() { t_.join(); }
352 |   void detach() { t_.detach(); }
353 |   std::thread& as_thread() noexcept { return t_; }
354 |   const std::thread& as_thread() const noexcept { return t_; }
355 | 
356 |  private:
357 |   std::thread t_;
358 | };
359 | 
360 | int main() {
361 |   Jthread t{[] {}};
362 | }
363 | ```
364 | 
365 | ## 查看硬件支持的线程数量
366 | 
367 | * [hardware_concurrency](https://en.cppreference.com/w/cpp/thread/thread/hardware_concurrency) 会返回硬件支持的并发线程数
368 | 
369 | ```cpp
370 | #include <iostream>
371 | #include <thread>
372 | 
373 | int main() {
374 |   unsigned int n = std::thread::hardware_concurrency();
375 |   std::cout << n << " concurrent threads are supported.\n";
376 | }
377 | ```
378 | 
379 | * 并行版本的 [std::accumulate](https://en.cppreference.com/w/cpp/algorithm/accumulate)
380 | 
381 | ```cpp
382 | #include <algorithm>
383 | #include <cassert>
384 | #include <functional>
385 | #include <iterator>
386 | #include <numeric>
387 | #include <thread>
388 | #include <vector>
389 | 
390 | template <typename Iterator, typename T>
391 | struct accumulate_block {
392 |   void operator()(Iterator first, Iterator last, T& res) {
393 |     res = std::accumulate(first, last, res);
394 |   }
395 | };
396 | 
397 | template <typename Iterator, typename T>
398 | T parallel_accumulate(Iterator first, Iterator last, T init) {
399 |   long len = std::distance(first, last);
400 |   if (!len) {
401 |     return init;
402 |   }
403 |   long min_per_thread = 25;
404 |   long max_threads = (len + min_per_thread - 1) / min_per_thread;
405 |   long hardware_threads = std::thread::hardware_concurrency();
406 |   long num_threads =  // 线程数量
407 |       std::min(hardware_threads == 0 ? 2 : hardware_threads, max_threads);
408 |   long block_size = len / num_threads;  // 每个线程中的数据量
409 |   std::vector<T> res(num_threads);
410 |   std::vector<std::thread> threads(num_threads - 1);  // 已有主线程故少一个线程
411 |   Iterator block_start = first;
412 |   for (long i = 0; i < num_threads - 1; ++i) {
413 |     Iterator block_end = block_start;
414 |     std::advance(block_end, block_size);  // block_end 指向当前块尾部
415 |     threads[i] = std::thread{accumulate_block<Iterator, T>{}, block_start,
416 |                              block_end, std::ref(res[i])};
417 |     block_start = block_end;
418 |   }
419 |   accumulate_block<Iterator, T>()(block_start, last, res[num_threads - 1]);
420 |   std::for_each(threads.begin(), threads.end(),
421 |                 std::mem_fn(&std::thread::join));
422 |   return std::accumulate(res.begin(), res.end(), init);
423 | }
424 | 
425 | int main() {
426 |   std::vector<long> v(1000000);
427 |   std::iota(std::begin(v), std::end(v), 0);
428 |   long res = std::accumulate(std::begin(v), std::end(v), 0);
429 |   assert(parallel_accumulate(std::begin(v), std::end(v), 0) == res);
430 | }
431 | ```
432 | 
433 | ## 线程号
434 | 
435 | * 可以通过对线程实例调用成员函数 [get_id](https://en.cppreference.com/w/cpp/thread/thread/get_id) 或在当前线程中调用 [std::this_thread::get_id](https://en.cppreference.com/w/cpp/thread/get_id) 获取 [线程号](https://en.cppreference.com/w/cpp/thread/thread/id)，其本质是一个无符号整型的封装，允许拷贝和比较，因此可以将其作为容器的键值，如果两个线程的线程号相等，则两者是同一线程或都是空线程（一般空线程的线程号为 0）
436 | 
437 | ```cpp
438 | #include <iostream>
439 | #include <thread>
440 | 
441 | #ifdef _WIN32
442 | #include <Windows.h>
443 | #elif defined __GNUC__
444 | #include <syscall.h>
445 | #include <unistd.h>
446 | 
447 | #endif
448 | 
449 | void print_current_thread_id() {
450 | #ifdef _WIN32
451 |   std::cout << std::this_thread::get_id() << std::endl;       // 19840
452 |   std::cout << GetCurrentThreadId() << std::endl;             // 19840
453 |   std::cout << GetThreadId(GetCurrentThread()) << std::endl;  // 19840
454 | #elif defined __GNUC__
455 |   std::cout << std::this_thread::get_id() << std::endl;  // 1
456 |   std::cout << pthread_self() << std::endl;              // 140250646003520
457 |   std::cout << getpid() << std::endl;  // 1502109，ps aux 显示此 pid
458 |   std::cout << syscall(SYS_gettid) << std::endl;  // 1502109
459 | #endif
460 | }
461 | 
462 | std::thread::id master_thread_id = std::this_thread::get_id();
463 | 
464 | void f() {
465 |   if (std::this_thread::get_id() == master_thread_id) {
466 |     // do_master_thread_work();
467 |   }
468 |   // do_common_work();
469 | }
470 | 
471 | int main() {
472 |   print_current_thread_id();
473 |   f();
474 |   std::thread t{f};
475 |   t.join();
476 | }
477 | ```
478 | 
479 | ## CPU 亲和性（affinity）
480 | 
481 | * 将线程绑定到一个指定的 CPU core 上运行，避免多核 CPU 上下文切换和 cache miss 的开销
482 | 
483 | ```cpp
484 | #ifdef _WIN32
485 | #include <Windows.h>
486 | #elif defined __GNUC__
487 | #include <pthread.h>
488 | #include <sched.h>
489 | #include <string.h>
490 | #endif
491 | 
492 | #include <iostream>
493 | #include <thread>
494 | 
495 | void affinity_cpu(std::thread::native_handle_type t, int cpu_id) {
496 | #ifdef _WIN32
497 |   if (!SetThreadAffinityMask(t, 1ll << cpu_id)) {
498 |     std::cerr << "fail to affinity" << GetLastError() << std::endl;
499 |   }
500 | #elif defined __GNUC__
501 |   cpu_set_t cpu_set;
502 |   CPU_ZERO(&cpu_set);
503 |   CPU_SET(cpu_id, &cpu_set);
504 |   int res = pthread_setaffinity_np(t, sizeof(cpu_set), &cpu_set);
505 |   if (res != 0) {
506 |     errno = res;
507 |     std::cerr << "fail to affinity" << strerror(errno) << std::endl;
508 |   }
509 | #endif
510 | }
511 | 
512 | void affinity_cpu_on_current_thread(int cpu_id) {
513 | #ifdef _WIN32
514 |   if (!SetThreadAffinityMask(GetCurrentThread(), 1ll << cpu_id)) {
515 |     std::cerr << "fail to affinity" << GetLastError() << std::endl;
516 |   }
517 | #elif defined __GNUC__
518 |   cpu_set_t cpu_set;
519 |   CPU_ZERO(&cpu_set);
520 |   CPU_SET(cpu_id, &cpu_set);
521 |   int res = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
522 |   if (res != 0) {
523 |     errno = res;
524 |     std::cerr << "fail to affinity" << strerror(errno) << std::endl;
525 |   }
526 | #endif
527 | }
528 | 
529 | void f() { affinity_cpu_on_current_thread(0); }
530 | 
531 | int main() {
532 |   std::thread t1{[] {}};
533 |   affinity_cpu(t1.native_handle(), 1);
534 |   std::thread t2{f};
535 |   t1.join();
536 |   t2.join();
537 | }
538 | ```
539 | 


--------------------------------------------------------------------------------
/docs/06_designing_lock_free_concurrent_data_structure.md:
--------------------------------------------------------------------------------
  1 | ## 非阻塞数据结构
  2 | 
  3 | * 阻塞的算法和数据结构使用 mutex、条件变量、期值来同步数据，但非阻塞不等价于 lock-free，比如自旋锁没有使用任何阻塞函数的调用，是非阻塞的，但并非 lock-free
  4 | * 非阻塞数据结构由松到严可分为三个等级：obstruction-free、lock-free、wait-free
  5 |   * obstruction-free（无障碍）：如果其他线程都暂停了，任何一个给定的线程都会在有限步数内完成操作。上例就是这种情况，但这种情况很少见，所以满足这个条件只能算一个失败的 lock-free 实现
  6 |   * lock-free（无锁）：如果多线程在同一个数据结构上操作，其中一个将在有限步数内完成操作。满足 lock-free 必定满足 obstruction-free
  7 |   * wait-free（无等待）：如果多线程在同一个数据结构上操作，每个线程都会在有限步数内完成操作。满足 wait-free 必定满足 lock-free，但 wait-free 很难实现，因为要保证有限步数内完成操作，就要保证操作一次通过，并且执行到某一步不能导致其他线程操作失败
  8 | * lock-free 数据结构必须允许多线程并发访问，但它们不能做相同操作，比如一个 lock-free 的 queue 允许一个线程 push、另一个线程 pop，但不允许两个线程同时 push。此外，如果一个访问 lock-free 数据结构的线程被中途挂起，其他线程必须能完成操作而不需要等待挂起的线程
  9 | * 使用 lock-free 数据结构主要是为了最大化并发访问，不需要阻塞。第二个原因是鲁棒性，如果线程在持有锁时死掉就会导致数据结构被永久破坏，而对 lock-free 数据结构来说，除了死掉的线程里的数据，其他的数据都不会丢失。lock-free 没有任何锁，所以一定不会出现死锁
 10 | * 但 lock-free 可能造成更大开销，用于 lock-free 的原子操作比非原子操作慢得多，且 lock-free 数据结构中的原子操作一般比 lock-based 中的多，此外，硬件必须访问同一个原子变量以在线程间同步数据。无论 lock-free 还是 lock-based，性能方面的检查（最坏情况等待时间、平均等待时间、总体执行时间或其他方面）都是非常重要的
 11 | 
 12 | ## lock-free thread-safe stack
 13 | 
 14 | * 最简单的 stack 实现方式是包含头节点指针的链表。push 的过程很简单，创建一个新节点，然后让新节点的 next 指针指向当前 head，最后 head 设为新节点
 15 | * 这里的 race condition 在于，如果两个线程同时 push，让各自的新节点的 next 指针指向当前 head，这样必然导致 head 最终设为二者之一的新节点，而另一个被丢弃
 16 | * 解决方法是，在最后设置 head 时先进行判断，只有当前 head 与新节点的 next 相等，才将 head 设为新节点，如果不等则让 next 指向当前 head 并重新判断。而这个操作必须是原子的，因此就需要使用 [compare_exchange_weak](https://en.cppreference.com/w/cpp/atomic/atomic/compare_exchange)，不需要使用 [compare_exchange_strong](https://en.cppreference.com/w/cpp/atomic/atomic/compare_exchange)，因为 [compare_exchange_weak](https://en.cppreference.com/w/cpp/atomic/atomic/compare_exchange) 在相等时可能替换失败，但替换失败也会返回 false，放在循环里带来的效果是一样的，而 [compare_exchange_weak](https://en.cppreference.com/w/cpp/atomic/atomic/compare_exchange) 在一些机器架构上可以产生比 [compare_exchange_strong](https://en.cppreference.com/w/cpp/atomic/atomic/compare_exchange) 更优化的代码
 17 | 
 18 | ```cpp
 19 | #include <atomic>
 20 | 
 21 | template <typename T>
 22 | class LockFreeStack {
 23 |  public:
 24 |   void push(const T& x) {
 25 |     Node* t = new Node(x);
 26 |     t->next = head_.load();
 27 |     while (!head_.compare_exchange_weak(t->next, t)) {
 28 |     }
 29 |   }
 30 | 
 31 |  private:
 32 |   struct Node {
 33 |     T v;
 34 |     Node* next = nullptr;
 35 |     Node(const T& x) : v(x) {}
 36 |   };
 37 | 
 38 |  private:
 39 |   std::atomic<Node*> head_;
 40 | };
 41 | ```
 42 | 
 43 | * pop 的过程很简单，先存储当前头节点指针，再将头节点设为下一节点，最后返回存储的头节点并删除指针。这里的 race condition 在于，如果两个线程同时 pop，如果一个已经删除了头节点，另一个线程读取头节点的下一节点就访问了空悬指针
 44 | * 先绕开删除指针这一步，考虑前几步的实现
 45 | 
 46 | ```cpp
 47 | template <typename T>
 48 | void LockFreeStack<T>::pop(T& res) {
 49 |   Node* t = head_.load();  // 未考虑头节点为空指针的情况
 50 |   while (!head_.compare_exchange_weak(t, t->next)) {
 51 |   }
 52 |   res = t->v;
 53 | }
 54 | ```
 55 | 
 56 | * 传引用来保存结果的原因是，如果直接返回值，返回前一定会先移除元素，如果拷贝返回值时抛出异常，移除的元素就丢失了。但传引用的问题是，如果其他线程移除了节点，被移除的节点不能被解引用，当前线程就无法安全地拷贝数据。因此，如果想安全地返回值，应该返回智能指针
 57 | 
 58 | ```cpp
 59 | #include <atomic>
 60 | #include <memory>
 61 | 
 62 | template <typename T>
 63 | class LockFreeStack {
 64 |  public:
 65 |   void push(const T& x) {
 66 |     Node* t = new Node(x);
 67 |     t->next = head_.load();
 68 |     while (!head_.compare_exchange_weak(t->next, t)) {
 69 |     }
 70 |   }
 71 | 
 72 |   std::shared_ptr<T> pop() {  // 还未考虑释放原来的头节点指针
 73 |     Node* t = head_.load();
 74 |     while (t && !head_.compare_exchange_weak(t, t->next)) {
 75 |     }
 76 |     return t ? t->v : nullptr;
 77 |   }
 78 | 
 79 |  private:
 80 |   struct Node {
 81 |     std::shared_ptr<T> v;
 82 |     Node* next = nullptr;
 83 |     Node(const T& x) : v(std::make_shared<T>(x)) {}
 84 |   };
 85 | 
 86 |  private:
 87 |   std::atomic<Node*> head_;
 88 | };
 89 | ```
 90 | 
 91 | * 释放被移除的节点的难点在于，一个线程在释放内存时，无法得知其他线程是否持有要释放的指针
 92 | * 只要没有其他线程调用 pop，就能安全释放，因此可以用一个计数器来记录调用 pop 的线程数，计数不为 1 时，先把节点添加到待删除节点列表中，计数为 1 则安全释放
 93 | 
 94 | ```cpp
 95 | #include <atomic>
 96 | #include <memory>
 97 | 
 98 | template <typename T>
 99 | class LockFreeStack {
100 |  public:
101 |   void push(const T& x) {
102 |     Node* t = new Node(x);
103 |     t->next = head_.load();
104 |     while (!head_.compare_exchange_weak(t->next, t)) {
105 |     }
106 |   }
107 | 
108 |   std::shared_ptr<T> pop() {
109 |     ++pop_cnt_;
110 |     Node* t = head_.load();
111 |     while (t && !head_.compare_exchange_weak(t, t->next)) {
112 |     }
113 |     std::shared_ptr<T> res;
114 |     if (t) {
115 |       res.swap(t->v);
116 |     }
117 |     try_delete(t);
118 |     return res;
119 |   }
120 | 
121 |  private:
122 |   struct Node {
123 |     std::shared_ptr<T> v;
124 |     Node* next = nullptr;
125 |     Node(const T& x) : v(std::make_shared<T>(x)) {}
126 |   };
127 | 
128 |  private:
129 |   static void delete_list(Node* head) {
130 |     while (head) {
131 |       Node* t = head->next;
132 |       delete head;
133 |       head = t;
134 |     }
135 |   }
136 | 
137 |   void append_to_delete_list(Node* first, Node* last) {
138 |     last->next = to_delete_list_;
139 |     // 确保 last->next 为 to_delete_list_，再设置 first 为新的头节点
140 |     while (!to_delete_list_.compare_exchange_weak(last->next, first)) {
141 |     }
142 |   }
143 | 
144 |   void append_to_delete_list(Node* head) {
145 |     Node* last = head;
146 |     while (Node* t = last->next) {
147 |       last = t;
148 |     }
149 |     append_to_delete_list(head, last);
150 |   }
151 | 
152 |   void try_delete(Node* head) {
153 |     if (pop_cnt_ == 0) {
154 |       return;
155 |     }
156 |     if (pop_cnt_ > 1) {
157 |       append_to_delete_list(head, head);
158 |       --pop_cnt_;
159 |       return;
160 |     }
161 |     Node* t = to_delete_list_.exchange(nullptr);
162 |     if (--pop_cnt_ == 0) {
163 |       delete_list(t);
164 |     } else if (t) {
165 |       append_to_delete_list(t);
166 |     }
167 |     delete head;
168 |   }
169 | 
170 |  private:
171 |   std::atomic<Node*> head_;
172 |   std::atomic<std::size_t> pop_cnt_;
173 |   std::atomic<Node*> to_delete_list_;
174 | };
175 | ```
176 | 
177 | * 如果要释放所有节点，必须有一个时刻计数器为 0。在高负载的情况下，往往不会存在这样的时刻，从而导致待删除节点的列表无限增长
178 | 
179 | ### Hazard Pointer（风险指针）
180 | 
181 | * 另一个释放的思路是，在线程访问节点时，设置一个保存了线程 ID 和该节点的风险指针。用一个全局数组保存所有线程的风险指针，释放节点时，如果数组中不存在包含该节点的风险指针，则可以直接释放，否则将节点添加到待删除列表中。风险指针实现如下
182 | 
183 | ```cpp
184 | #include <atomic>
185 | #include <stdexcept>
186 | #include <thread>
187 | 
188 | static constexpr std::size_t MaxSize = 100;
189 | 
190 | struct HazardPointer {
191 |   std::atomic<std::thread::id> id;
192 |   std::atomic<void*> p;
193 | };
194 | 
195 | static HazardPointer HazardPointers[MaxSize];
196 | 
197 | class HazardPointerHelper {
198 |  public:
199 |   HazardPointerHelper() {
200 |     for (auto& x : HazardPointers) {
201 |       std::thread::id default_id;
202 |       if (x.id.compare_exchange_strong(default_id,
203 |                                        std::this_thread::get_id())) {
204 |         hazard_pointer = &x;  // 取一个未设置过的风险指针
205 |         break;
206 |       }
207 |     }
208 |     if (!hazard_pointer) {
209 |       throw std::runtime_error("No hazard pointers available");
210 |     }
211 |   }
212 | 
213 |   ~HazardPointerHelper() {
214 |     hazard_pointer->p.store(nullptr);
215 |     hazard_pointer->id.store(std::thread::id{});
216 |   }
217 | 
218 |   HazardPointerHelper(const HazardPointerHelper&) = delete;
219 | 
220 |   HazardPointerHelper operator=(const HazardPointerHelper&) = delete;
221 | 
222 |   std::atomic<void*>& get() { return hazard_pointer->p; }
223 | 
224 |  private:
225 |   HazardPointer* hazard_pointer = nullptr;
226 | };
227 | 
228 | std::atomic<void*>& hazard_pointer_for_this_thread() {
229 |   static thread_local HazardPointerHelper t;
230 |   return t.get();
231 | }
232 | 
233 | bool is_existing(void* p) {
234 |   for (auto& x : HazardPointers) {
235 |     if (x.p.load() == p) {
236 |       return true;
237 |     }
238 |   }
239 |   return false;
240 | }
241 | ```
242 | 
243 | * 使用风险指针
244 | 
245 | ```cpp
246 | #include <atomic>
247 | #include <functional>
248 | #include <memory>
249 | 
250 | #include "hazard_pointer.hpp"
251 | 
252 | template <typename T>
253 | class LockFreeStack {
254 |  public:
255 |   void push(const T& x) {
256 |     Node* t = new Node(x);
257 |     t->next = head_.load();
258 |     while (!head_.compare_exchange_weak(t->next, t)) {
259 |     }
260 |   }
261 | 
262 |   std::shared_ptr<T> pop() {
263 |     std::atomic<void*>& hazard_pointer = hazard_pointer_for_this_thread();
264 |     Node* t = head_.load();
265 |     do {  // 外循环确保 t 为最新的头节点，循环结束后将头节点设为下一节点
266 |       Node* t2;
267 |       do {  // 循环至风险指针保存当前最新的头节点
268 |         t2 = t;
269 |         hazard_pointer.store(t);
270 |         t = head_.load();
271 |       } while (t != t2);
272 |     } while (t && !head_.compare_exchange_strong(t, t->next));
273 |     hazard_pointer.store(nullptr);
274 |     std::shared_ptr<T> res;
275 |     if (t) {
276 |       res.swap(t->v);
277 |       if (is_existing(t)) {
278 |         append_to_delete_list(new DataToDelete{t});
279 |       } else {
280 |         delete t;
281 |       }
282 |       try_delete();
283 |     }
284 |     return res;
285 |   }
286 | 
287 |  private:
288 |   struct Node {
289 |     std::shared_ptr<T> v;
290 |     Node* next = nullptr;
291 |     Node(const T& x) : v(std::make_shared<T>(x)) {}
292 |   };
293 | 
294 |   struct DataToDelete {
295 |     template <typename T>
296 |     DataToDelete(T* p)
297 |         : data(p), deleter([](void* p) { delete static_cast<T*>(p); }) {}
298 | 
299 |     ~DataToDelete() { deleter(data); }
300 | 
301 |     void* data = nullptr;
302 |     std::function<void(void*)> deleter;
303 |     DataToDelete* next = nullptr;
304 |   };
305 | 
306 |  private:
307 |   void append_to_delete_list(DataToDelete* t) {
308 |     t->next = to_delete_list_.load();
309 |     while (!to_delete_list_.compare_exchange_weak(t->next, t)) {
310 |     }
311 |   }
312 | 
313 |   void try_delete() {
314 |     DataToDelete* cur = to_delete_list_.exchange(nullptr);
315 |     while (cur) {
316 |       DataToDelete* t = cur->next;
317 |       if (!is_existing(cur->data)) {
318 |         delete cur;
319 |       } else {
320 |         append_to_delete_list(new DataToDelete{cur});
321 |       }
322 |       cur = t;
323 |     }
324 |   }
325 | 
326 |  private:
327 |   std::atomic<Node*> head_;
328 |   std::atomic<std::size_t> pop_cnt_;
329 |   std::atomic<DataToDelete*> to_delete_list_;
330 | };
331 | ```
332 | 
333 | * 风险指针实现简单并达到了安全释放的目的，但每次删除节点前后都要遍历数组并原子访问内部指针来检查，增加了很多开销
334 | * 无锁内存回收技术领域十分活跃，大公司都会申请自己的专利，风险指针包含在 IBM 提交的专利申请中，在 GPL 协议下允许免费使用
335 | 
336 | ### 引用计数
337 | 
338 | * 另一个方案是使用引用计数记录访问每个节点的线程数量，[std::shared_ptr](https://en.cppreference.com/w/cpp/memory/shared_ptr) 的操作是原子的，但要检查是否 lock-free
339 | 
340 | ```cpp
341 | std::shared_ptr<int> p(new int(42));
342 | assert(std::atomic_is_lock_free(&p));
343 | ```
344 | 
345 | * 如果是，则可以用于实现 lock-free stack
346 | 
347 | ```cpp
348 | #include <atomic>
349 | #include <memory>
350 | 
351 | template <typename T>
352 | class LockFreeStack {
353 |  public:
354 |   ~LockFreeStack() {
355 |     while (pop()) {
356 |     }
357 |   }
358 | 
359 |   void push(const T& x) {
360 |     auto t = std::make_shared<Node>(x);
361 |     t->next = std::atomic_load(&head_);
362 |     while (!std::atomic_compare_exchange_weak(&head_, &t->next, t)) {
363 |     }
364 |   }
365 | 
366 |   std::shared_ptr<T> pop() {
367 |     std::shared_ptr<Node> t = std::atomic_load(&head_);
368 |     while (t && !std::atomic_compare_exchange_weak(&head_, &t, t->next)) {
369 |     }
370 |     if (t) {
371 |       std::atomic_store(&t->next, nullptr);
372 |       return t->v;
373 |     }
374 |     return nullptr;
375 |   }
376 | 
377 |  private:
378 |   struct Node {
379 |     std::shared_ptr<T> v;
380 |     std::shared_ptr<Node> next;
381 |     Node(const T& x) : v(std::make_shared<T>(x)) {}
382 |   };
383 | 
384 |  private:
385 |   std::shared_ptr<Node> head_;
386 | };
387 | ```
388 | 
389 | * C++20 支持 [std::atomic\<std::shared_ptr\>](https://en.cppreference.com/w/cpp/memory/shared_ptr/atomic2)
390 | 
391 | ```cpp
392 | #include <atomic>
393 | #include <memory>
394 | 
395 | template <typename T>
396 | class LockFreeStack {
397 |  public:
398 |   ~LockFreeStack() {
399 |     while (pop()) {
400 |     }
401 |   }
402 | 
403 |   void push(const T& x) {
404 |     auto t = std::make_shared<Node>(x);
405 |     t->next = head_.load();
406 |     while (!head_.compare_exchange_weak(t->next, t)) {
407 |     }
408 |   }
409 | 
410 |   std::shared_ptr<T> pop() {
411 |     std::shared_ptr<Node> t = head_.load();
412 |     while (t && !head_.compare_exchange_weak(t, t->next.load())) {
413 |     }
414 |     if (t) {
415 |       t->next = std::shared_ptr<Node>();
416 |       return t->v;
417 |     }
418 |     return nullptr;
419 |   }
420 | 
421 |  private:
422 |   struct Node {
423 |     std::shared_ptr<T> v;
424 |     std::atomic<std::shared_ptr<Node>> next;
425 |     Node(const T& x) : v(std::make_shared<T>(x)) {}
426 |   };
427 | 
428 |  private:
429 |   std::atomic<std::shared_ptr<Node>> head_;
430 | };
431 | ```
432 | 
433 | * 但 VS2022 上测试发现 [std::atomic\<std::shared_ptr\>](https://en.cppreference.com/w/cpp/memory/shared_ptr/atomic2) 并非 lock-free
434 | 
435 | ```cpp
436 | assert(!std::atomic<std::shared_ptr<int>>{}.is_lock_free());
437 | ```
438 | 
439 | * 更通用的方法是手动管理引用计数，为每个节点设置内外部两个引用计数，两者之和就是节点的引用计数，外部计数默认为 1，访问对象时递增外部计数并递减内部计数，访问结束后则不再需要外部计数，将外部计数减 2 并加到内部计数上
440 | 
441 | ```cpp
442 | #include <atomic>
443 | #include <memory>
444 | 
445 | template <typename T>
446 | class LockFreeStack {
447 |  public:
448 |   ~LockFreeStack() {
449 |     while (pop()) {
450 |     }
451 |   }
452 | 
453 |   void push(const T& x) {
454 |     ReferenceCount t;
455 |     t.p = new Node(x);
456 |     t.external_cnt = 1;
457 |     t.p->next = head_.load();
458 |     while (!head_.compare_exchange_weak(t.p->next, t)) {
459 |     }
460 |   }
461 | 
462 |   std::shared_ptr<T> pop() {
463 |     ReferenceCount t = head_.load();
464 |     while (true) {
465 |       increase_count(t);  // 外部计数递增表示该节点正被使用
466 |       Node* p = t.p;      // 因此可以安全地访问
467 |       if (!p) {
468 |         return nullptr;
469 |       }
470 |       if (head_.compare_exchange_strong(t, p->next)) {
471 |         std::shared_ptr<T> res;
472 |         res.swap(p->v);
473 |         // 将外部计数减 2 后加到内部计数，减 2 是因为，
474 |         // 节点被删除减 1，该线程无法再次访问此节点再减 1
475 |         const int cnt = t.external_cnt - 2;
476 |         if (p->inner_cnt.fetch_add(cnt) == -cnt) {
477 |           delete p;  // 内外部计数和为 0
478 |         }
479 |         return res;
480 |       }
481 |       if (p->inner_cnt.fetch_sub(1) == 1) {
482 |         delete p;  // 内部计数为 0
483 |       }
484 |     }
485 |   }
486 | 
487 |  private:
488 |   struct Node;
489 | 
490 |   struct ReferenceCount {
491 |     int external_cnt;
492 |     Node* p;
493 |   };
494 | 
495 |   struct Node {
496 |     std::shared_ptr<T> v;
497 |     std::atomic<int> inner_cnt = 0;
498 |     ReferenceCount next;
499 |     Node(const T& x) : v(std::make_shared<T>(x)) {}
500 |   };
501 | 
502 |   void increase_count(ReferenceCount& old_cnt) {
503 |     ReferenceCount new_cnt;
504 |     do {
505 |       new_cnt = old_cnt;
506 |       ++new_cnt.external_cnt;  // 访问 head_ 时递增外部计数，表示该节点正被使用
507 |     } while (!head_.compare_exchange_strong(old_cnt, new_cnt));
508 |     old_cnt.external_cnt = new_cnt.external_cnt;
509 |   }
510 | 
511 |  private:
512 |   std::atomic<ReferenceCount> head_;
513 | };
514 | ```
515 | 
516 | * 不指定内存序则默认使用开销最大的 `std::memory_order_seq_cst`，下面根据操作间的依赖关系优化为最小内存序
517 | 
518 | ```cpp
519 | #include <atomic>
520 | #include <memory>
521 | 
522 | template <typename T>
523 | class LockFreeStack {
524 |  public:
525 |   ~LockFreeStack() {
526 |     while (pop()) {
527 |     }
528 |   }
529 | 
530 |   void push(const T& x) {
531 |     ReferenceCount t;
532 |     t.p = new Node(x);
533 |     t.external_cnt = 1;
534 |     // 下面比较中 release 保证之前的语句都先执行，因此 load 可以使用 relaxed
535 |     t.p->next = head_.load(std::memory_order_relaxed);
536 |     while (!head_.compare_exchange_weak(t.p->next, t, std::memory_order_release,
537 |                                         std::memory_order_relaxed)) {
538 |     }
539 |   }
540 | 
541 |   std::shared_ptr<T> pop() {
542 |     ReferenceCount t = head_.load(std::memory_order_relaxed);
543 |     while (true) {
544 |       increase_count(t);  // acquire
545 |       Node* p = t.p;
546 |       if (!p) {
547 |         return nullptr;
548 |       }
549 |       if (head_.compare_exchange_strong(t, p->next,
550 |                                         std::memory_order_relaxed)) {
551 |         std::shared_ptr<T> res;
552 |         res.swap(p->v);
553 |         // 将外部计数减 2 后加到内部计数，减 2 是因为，
554 |         // 节点被删除减 1，该线程无法再次访问此节点再减 1
555 |         const int cnt = t.external_cnt - 2;
556 |         // swap 要先于 delete，因此使用 release
557 |         if (p->inner_cnt.fetch_add(cnt, std::memory_order_release) == -cnt) {
558 |           delete p;  // 内外部计数和为 0
559 |         }
560 |         return res;
561 |       }
562 |       if (p->inner_cnt.fetch_sub(1, std::memory_order_relaxed) == 1) {
563 |         p->inner_cnt.load(std::memory_order_acquire);  // 只是用 acquire 来同步
564 |         // acquire 保证 delete 在之后执行
565 |         delete p;  // 内部计数为 0
566 |       }
567 |     }
568 |   }
569 | 
570 |  private:
571 |   struct Node;
572 | 
573 |   struct ReferenceCount {
574 |     int external_cnt;
575 |     Node* p = nullptr;
576 |   };
577 | 
578 |   struct Node {
579 |     std::shared_ptr<T> v;
580 |     std::atomic<int> inner_cnt = 0;
581 |     ReferenceCount next;
582 |     Node(const T& x) : v(std::make_shared<T>(x)) {}
583 |   };
584 | 
585 |   void increase_count(ReferenceCount& old_cnt) {
586 |     ReferenceCount new_cnt;
587 |     do {  // 比较失败不改变当前值，并可以继续循环，因此可以选择 relaxed
588 |       new_cnt = old_cnt;
589 |       ++new_cnt.external_cnt;  // 访问 head_ 时递增外部计数，表示该节点正被使用
590 |     } while (!head_.compare_exchange_strong(old_cnt, new_cnt,
591 |                                             std::memory_order_acquire,
592 |                                             std::memory_order_relaxed));
593 |     old_cnt.external_cnt = new_cnt.external_cnt;
594 |   }
595 | 
596 |  private:
597 |   std::atomic<ReferenceCount> head_;
598 | };
599 | ```
600 | 


--------------------------------------------------------------------------------
/docs/02_sharing_data_between_thread.md:
--------------------------------------------------------------------------------
  1 | ## 线程间共享数据存在的问题
  2 | 
  3 | * 不变量（invariant）：关于一个特定数据结构总为 true 的语句，比如 `双向链表的两个相邻节点 A 和 B，A 的后指针一定指向 B，B 的前指针一定指向 A`。有时程序为了方便会暂时破坏不变量，这通常发生于更新复杂数据结构的过程中，比如删除双向链表中的一个节点 N，要先让 N 的前一个节点指向 N 的后一个节点（不变量被破坏），再让 N 的后节点指向前节点，最后删除 N（此时不变量重新恢复）
  4 | * 线程修改共享数据时，就会发生破坏不变量的情况，此时如果有其他线程访问，就可能导致不变量被永久性破坏，这就是 race condition
  5 | * 如果线程执行顺序的先后对结果无影响，则为不需要关心的良性竞争。需要关心的是不变量被破坏时产生的 race condition
  6 | * C++ 标准中定义了 data race 的概念，指代一种特定的 race condition，即并发修改单个对象。data race 会造成未定义行为
  7 | * race condition 要求一个线程进行时，另一线程访问同一数据块，出现问题时很难复现，因此编程时需要使用大量复杂操作来避免 race condition
  8 | 
  9 | ## 互斥锁（mutex）
 10 | 
 11 | * 使用 mutex 在访问共享数据前加锁，访问结束后解锁。一个线程用特定的 mutex 锁定后，其他线程必须等待该线程的 mutex 解锁才能访问共享数据
 12 | * C++11 提供了 [std::mutex](https://en.cppreference.com/w/cpp/thread/mutex) 来创建一个 mutex，可通过 [lock](https://en.cppreference.com/w/cpp/thread/mutex/lock) 加锁，通过 [unlock](https://en.cppreference.com/w/cpp/thread/mutex/unlock) 解锁。一般不手动使用这两个成员函数，而是使用 [std::lock_guard](https://en.cppreference.com/w/cpp/thread/lock_guard) 来自动处理加锁与解锁，它在构造时接受一个 mutex，并会调用 mutex.lock()，析构时会调用 mutex.unlock()
 13 | 
 14 | ```cpp
 15 | #include <iostream>
 16 | #include <mutex>
 17 | 
 18 | class A {
 19 |  public:
 20 |   void lock() { std::cout << "lock" << std::endl; }
 21 |   void unlock() { std::cout << "unlock" << std::endl; }
 22 | };
 23 | 
 24 | int main() {
 25 |   A a;
 26 |   {
 27 |     std::lock_guard<A> l(a);  // lock
 28 |   }                           // unlock
 29 | }
 30 | ```
 31 | 
 32 | * C++17 提供了的 [std::scoped_lock](https://en.cppreference.com/w/cpp/thread/scoped_lock)，它可以接受任意数量的 mutex，并将这些 mutex 传给 [std::lock](https://en.cppreference.com/w/cpp/thread/lock) 来同时上锁，它会对其中一个 mutex 调用 lock()，对其他调用 try_lock()，若 try_lock() 返回 false 则对已经上锁的 mutex 调用 unlock()，然后重新进行下一轮上锁，标准未规定下一轮的上锁顺序，可能不一致，重复此过程直到所有 mutex 上锁，从而达到同时上锁的效果。C++17 支持类模板实参推断，可以省略模板参数
 33 | 
 34 | ```cpp
 35 | #include <iostream>
 36 | #include <mutex>
 37 | 
 38 | class A {
 39 |  public:
 40 |   void lock() { std::cout << 1; }
 41 |   void unlock() { std::cout << 2; }
 42 |   bool try_lock() {
 43 |     std::cout << 3;
 44 |     return true;
 45 |   }
 46 | };
 47 | 
 48 | class B {
 49 |  public:
 50 |   void lock() { std::cout << 4; }
 51 |   void unlock() { std::cout << 5; }
 52 |   bool try_lock() {
 53 |     std::cout << 6;
 54 |     return true;
 55 |   }
 56 | };
 57 | 
 58 | int main() {
 59 |   A a;
 60 |   B b;
 61 |   {
 62 |     std::scoped_lock l(a, b);  // 16
 63 |     std::cout << std::endl;
 64 |   }  // 25
 65 | }
 66 | ```
 67 | 
 68 | * 一般 mutex 和要保护的数据一起放在类中，定义为 private 数据成员，而非全局变量，这样能让代码更清晰。但如果某个成员函数返回指向数据成员的指针或引用，则通过这个指针的访问行为不会被 mutex 限制，因此需要谨慎设置接口，确保 mutex 能锁住数据
 69 | 
 70 | ```cpp
 71 | #include <mutex>
 72 | 
 73 | class A {
 74 |  public:
 75 |   void f() {}
 76 | };
 77 | 
 78 | class B {
 79 |  public:
 80 |   A* get_data() {
 81 |     std::lock_guard<std::mutex> l(m_);
 82 |     return &data_;
 83 |   }
 84 | 
 85 |  private:
 86 |   std::mutex m_;
 87 |   A data_;
 88 | };
 89 | 
 90 | int main() {
 91 |   B b;
 92 |   A* p = b.get_data();
 93 |   p->f();  // 未锁定 mutex 的情况下访问数据
 94 | }
 95 | ```
 96 | 
 97 | * 即便在很简单的接口中，也可能遇到 race condition
 98 | 
 99 | ```cpp
100 | std::stack<int> s;
101 | if (!s.empty()) {
102 |   int n = s.top();  // 此时其他线程 pop 就会获取错误的 top
103 |   s.pop();
104 | }
105 | ```
106 | 
107 | * 上述代码先检查非空再获取栈顶元素，在单线程中是安全的，但在多线程中，检查非空之后，如果其他线程先 pop，就会导致当前线程 top 出错。另一个潜在的竞争是，如果两个线程都未 pop，而是分别获取了 top，虽然不会产生未定义行为，但这种对同一值处理了两次的行为更为严重，因为看起来没有任何错误，很难定位 bug
108 | * 既然如此，为什么不直接让 pop 返回栈顶元素？原因在于，构造返回值的过程可能抛异常，弹出后未返回会导致数据丢失。比如有一个元素为 vector 的 stack，拷贝 vector 需要在堆上分配内存，如果系统负载严重或资源有限（比如 vector 有大量元素），vector 的拷贝构造函数就会抛出 [std::bad_alloc](https://en.cppreference.com/w/cpp/memory/new/bad_alloc) 异常。如果 pop 可以返回栈顶元素值，返回一定是最后执行的语句，stack 在返回前已经弹出了元素，但如果拷贝返回值时抛出异常，就会导致弹出的数据丢失（从栈上移除但拷贝失败）。因此 [std::stack](https://en.cppreference.com/w/cpp/container/stack) 的设计者将这个操作分解为 top 和 pop 两部分
109 | * 下面思考几种把 top 和 pop 合为一步的方法。第一种容易想到的方法是传入一个引用来获取结果值，这种方式的明显缺点是，需要构造一个栈元素类型的实例，这是不现实的，为了获取结果而临时构造一个对象并不划算，元素类型可能不支持赋值（比如用户自定义某个类型），构造函数可能还需要一些参数
110 | 
111 | ```cpp
112 | std::vector<int> res;
113 | s.pop(res);
114 | ```
115 | 
116 | * 因为 pop 返回值时只担心该过程抛异常，第二种方案是为元素类型设置不抛异常的拷贝或移动构造函数，使用 [std::is_nothrow_copy_constructible](https://en.cppreference.com/w/cpp/types/is_copy_constructible) 和 [std::is_nothrow_move_constructible](https://en.cppreference.com/w/cpp/types/is_move_constructible)。但这种方式过于局限，只支持拷贝或移动不抛异常的类型
117 | * 第三种方案是返回指向弹出元素的指针，指针可以自由拷贝且不会抛异常，[std::shared_ptr](https://en.cppreference.com/w/cpp/memory/shared_ptr) 是个不错的选择，但这个方案的开销太大，尤其是对于内置类型来说，比如 int 为 4 字节， `shared_ptr<int>` 为 16 字节，开销是原来的 4 倍
118 | * 第四种方案是结合方案一二或者一三，比如结合方案一三实现一个线程安全的 stack
119 | 
120 | ```cpp
121 | #include <exception>
122 | #include <memory>
123 | #include <mutex>
124 | #include <stack>
125 | #include <utility>
126 | 
127 | struct EmptyStack : std::exception {
128 |   const char* what() const noexcept { return "empty stack!"; }
129 | };
130 | 
131 | template <typename T>
132 | class ConcurrentStack {
133 |  public:
134 |   ConcurrentStack() = default;
135 | 
136 |   ConcurrentStack(const ConcurrentStack& rhs) {
137 |     std::lock_guard<std::mutex> l(rhs.m_);
138 |     s_ = rhs.s_;
139 |   }
140 | 
141 |   ConcurrentStack& operator=(const ConcurrentStack&) = delete;
142 | 
143 |   void push(T x) {
144 |     std::lock_guard<std::mutex> l(m_);
145 |     s_.push(std::move(x));
146 |   }
147 | 
148 |   bool empty() const {
149 |     std::lock_guard<std::mutex> l(m_);
150 |     return s_.empty();
151 |   }
152 | 
153 |   std::shared_ptr<T> pop() {
154 |     std::lock_guard<std::mutex> l(m_);
155 |     if (s_.empty()) {
156 |       throw EmptyStack();
157 |     }
158 |     auto res = std::make_shared<T>(std::move(s_.top()));
159 |     s_.pop();
160 |     return res;
161 |   }
162 | 
163 |   void pop(T& res) {
164 |     std::lock_guard<std::mutex> l(m_);
165 |     if (s_.empty()) {
166 |       throw EmptyStack();
167 |     }
168 |     res = std::move(s_.top());
169 |     s_.pop();
170 |   }
171 | 
172 |  private:
173 |   mutable std::mutex m_;
174 |   std::stack<T> s_;
175 | };
176 | ```
177 | 
178 | * 之前锁的粒度（锁保护的数据量大小）太小，保护操作覆盖不周全，这里的粒度就较大，覆盖了大量操作。但并非粒度越大越好，如果锁粒度太大，过多线程请求竞争占用资源时，并发的性能就会较差
179 | * 如果给定操作需要对多个 mutex 上锁时，就会引入一个新的潜在问题，即死锁
180 | 
181 | ## 死锁
182 | 
183 | * 死锁的四个必要条件：互斥、占有且等待、不可抢占、循环等待
184 | * 避免死锁通常建议让两个 mutex 以相同顺序上锁，总是先锁 A 再锁 B，但这并不适用所有情况。[std::lock](https://en.cppreference.com/w/cpp/thread/lock) 可以同时对多个 mutex 上锁，并且没有死锁风险，它可能抛异常，此时就不会上锁，因此要么都锁住，要么都不锁
185 | 
186 | ```cpp
187 | #include <mutex>
188 | #include <thread>
189 | 
190 | struct A {
191 |   explicit A(int n) : n_(n) {}
192 |   std::mutex m_;
193 |   int n_;
194 | };
195 | 
196 | void f(A &a, A &b, int n) {
197 |   if (&a == &b) {
198 |     return;  // 防止对同一对象重复加锁
199 |   }
200 |   std::lock(a.m_, b.m_);  // 同时上锁防止死锁
201 |   // 下面按固定顺序加锁，看似不会有死锁的问题
202 |   // 但如果没有 std::lock 同时上锁，另一线程中执行 f(b, a, n)
203 |   // 两个锁的顺序就反了过来，从而可能导致死锁
204 |   std::lock_guard<std::mutex> lock1(a.m_, std::adopt_lock);
205 |   std::lock_guard<std::mutex> lock2(b.m_, std::adopt_lock);
206 | 
207 |   // 等价实现，先不上锁，后同时上锁
208 |   //   std::unique_lock<std::mutex> lock1(a.m_, std::defer_lock);
209 |   //   std::unique_lock<std::mutex> lock2(b.m_, std::defer_lock);
210 |   //   std::lock(lock1, lock2);
211 | 
212 |   a.n_ -= n;
213 |   b.n_ += n;
214 | }
215 | 
216 | int main() {
217 |   A x{70};
218 |   A y{30};
219 | 
220 |   std::thread t1(f, std::ref(x), std::ref(y), 20);
221 |   std::thread t2(f, std::ref(y), std::ref(x), 10);
222 | 
223 |   t1.join();
224 |   t2.join();
225 | }
226 | ```
227 | 
228 | * [std::unique_lock](https://en.cppreference.com/w/cpp/thread/unique_lock) 在构造时接受一个 mutex，并会调用 mutex.lock()，析构时会调用 mutex.unlock()
229 | 
230 | ```cpp
231 | #include <iostream>
232 | #include <mutex>
233 | 
234 | class A {
235 |  public:
236 |   void lock() { std::cout << "lock" << std::endl; }
237 |   void unlock() { std::cout << "unlock" << std::endl; }
238 | };
239 | 
240 | int main() {
241 |   A a;
242 |   {
243 |     std::unique_lock l(a);  // lock
244 |   }                         // unlock
245 | }
246 | ```
247 | 
248 | * [std::lock_guard](https://en.cppreference.com/w/cpp/thread/lock_guard) 未提供任何接口且不支持拷贝和移动，而 [std::unique_lock](https://en.cppreference.com/w/cpp/thread/unique_lock) 多提供了一些接口，使用更灵活，占用的空间也多一点。一种要求灵活性的情况是转移锁的所有权到另一个作用域
249 | 
250 | ```cpp
251 | std::unique_lock<std::mutex> get_lock() {
252 |   extern std::mutex m;
253 |   std::unique_lock<std::mutex> l(m);
254 |   prepare_data();
255 |   return l;  // 不需要 std::move，编译器负责调用移动构造函数
256 | }
257 | 
258 | void f() {
259 |   std::unique_lock<std::mutex> l(get_lock());
260 |   do_something();
261 | }
262 | ```
263 | 
264 | * 对一些费时的操作上锁可能造成很多操作被阻塞，可以在面对这些操作时先解锁
265 | 
266 | ```cpp
267 | void process_file_data() {
268 |   std::unique_lock<std::mutex> l(m);
269 |   auto data = get_data();
270 |   l.unlock();  // 费时操作没有必要持有锁，先解锁
271 |   auto res = process(data);
272 |   l.lock();  // 写入数据前上锁
273 |   write_result(data, res);
274 | }
275 | ```
276 | 
277 | * C++17 最优的同时上锁方法是使用 [std::scoped_lock](https://en.cppreference.com/w/cpp/thread/scoped_lock)
278 | * 解决死锁并不简单，[std::lock](https://en.cppreference.com/w/cpp/thread/lock) 和 [std::scoped_lock](https://en.cppreference.com/w/cpp/thread/scoped_lock) 无法获取其中的锁，此时解决死锁更依赖于开发者的能力。避免死锁有四个建议
279 |   * 第一个避免死锁的建议是，一个线程已经获取一个锁时就不要获取第二个。如果每个线程只有一个锁，锁上就不会产生死锁（但除了互斥锁，其他方面也可能造成死锁，比如即使无锁，线程间相互等待也可能造成死锁）
280 |   * 第二个建议是，持有锁时避免调用用户提供的代码。用户提供的代码可能做任何时，包括获取锁，如果持有锁时调用用户代码获取锁，就会违反第一个建议，并造成死锁。但有时调用用户代码是无法避免的
281 |   * 第三个建议是，按固定顺序获取锁。如果必须获取多个锁且不能用 [std::lock](https://en.cppreference.com/w/cpp/thread/lock) 同时获取，最好在每个线程上用固定顺序获取。上面的例子虽然是按固定顺序获取锁，但如果不同时加锁就会出现死锁，对于这种情况的建议是规定固定的调用顺序
282 |   * 第四个建议是使用层级锁，如果一个锁被低层持有，就不允许在高层再上锁
283 | * 层级锁实现如下
284 | 
285 | ```cpp
286 | #include <iostream>
287 | #include <mutex>
288 | #include <stdexcept>
289 | 
290 | class HierarchicalMutex {
291 |  public:
292 |   explicit HierarchicalMutex(int hierarchy_value)
293 |       : cur_hierarchy_(hierarchy_value), prev_hierarchy_(0) {}
294 | 
295 |   void lock() {
296 |     validate_hierarchy();  // 层级错误则抛异常
297 |     m_.lock();
298 |     update_hierarchy();
299 |   }
300 | 
301 |   bool try_lock() {
302 |     validate_hierarchy();
303 |     if (!m_.try_lock()) {
304 |       return false;
305 |     }
306 |     update_hierarchy();
307 |     return true;
308 |   }
309 | 
310 |   void unlock() {
311 |     if (thread_hierarchy_ != cur_hierarchy_) {
312 |       throw std::logic_error("mutex hierarchy violated");
313 |     }
314 |     thread_hierarchy_ = prev_hierarchy_;  // 恢复前一线程的层级值
315 |     m_.unlock();
316 |   }
317 | 
318 |  private:
319 |   void validate_hierarchy() {
320 |     if (thread_hierarchy_ <= cur_hierarchy_) {
321 |       throw std::logic_error("mutex hierarchy violated");
322 |     }
323 |   }
324 | 
325 |   void update_hierarchy() {
326 |     // 先存储当前线程的层级值（用于解锁时恢复）
327 |     prev_hierarchy_ = thread_hierarchy_;
328 |     // 再把其设为锁的层级值
329 |     thread_hierarchy_ = cur_hierarchy_;
330 |   }
331 | 
332 |  private:
333 |   std::mutex m_;
334 |   const int cur_hierarchy_;
335 |   int prev_hierarchy_;
336 |   static thread_local int thread_hierarchy_;  // 所在线程的层级值
337 | };
338 | 
339 | // static thread_local 表示存活于一个线程周期
340 | thread_local int HierarchicalMutex::thread_hierarchy_(INT_MAX);
341 | 
342 | HierarchicalMutex high(10000);
343 | HierarchicalMutex mid(6000);
344 | HierarchicalMutex low(5000);
345 | 
346 | void lf() {  // 最低层函数
347 |   std::lock_guard<HierarchicalMutex> l(low);
348 |   // 调用 low.lock()，thread_hierarchy_ 为 INT_MAX，
349 |   // cur_hierarchy_ 为 5000，thread_hierarchy_ > cur_hierarchy_，
350 |   // 通过检查，上锁，prev_hierarchy_ 更新为 INT_MAX，
351 |   // thread_hierarchy_ 更新为 5000
352 | }  // 调用 low.unlock()，thread_hierarchy_ == cur_hierarchy_，
353 | // 通过检查，thread_hierarchy_ 恢复为 prev_hierarchy_ 保存的 INT_MAX，解锁
354 | 
355 | void hf() {
356 |   std::lock_guard<HierarchicalMutex> l(high);  // high.cur_hierarchy_ 为 10000
357 |   // thread_hierarchy_ 为 10000，可以调用低层函数
358 |   lf();  // thread_hierarchy_ 从 10000 更新为 5000
359 |   //  thread_hierarchy_ 恢复为 10000
360 | }  //  thread_hierarchy_ 恢复为 INT_MAX
361 | 
362 | void mf() {
363 |   std::lock_guard<HierarchicalMutex> l(mid);  // thread_hierarchy_ 为 6000
364 |   hf();  // thread_hierarchy_ < high.cur_hierarchy_，违反了层级结构，抛异常
365 | }
366 | 
367 | int main() {
368 |   lf();
369 |   hf();
370 |   try {
371 |     mf();
372 |   } catch (std::logic_error& ex) {
373 |     std::cout << ex.what();
374 |   }
375 | }
376 | ```
377 | 
378 | ### 读写锁（reader-writer mutex）
379 | 
380 | * 有时会希望对一个数据上锁时，根据情况，对某些操作相当于不上锁，可以并发访问，对某些操作保持上锁，同时最多只允许一个线程访问。比如对于需要经常访问但很少更新的缓存数据，用 [std::mutex](https://en.cppreference.com/w/cpp/thread/mutex) 加锁会导致同时最多只有一个线程可以读数据，这就需要用上读写锁，读写锁允许多个线程并发读但仅一个线程写
381 | * C++14 提供了 [std::shared_timed_mutex](https://en.cppreference.com/w/cpp/thread/shared_timed_mutex)，C++17 提供了接口更少性能更高的 [std::shared_mutex](https://en.cppreference.com/w/cpp/thread/shared_mutex)，如果多个线程调用 shared_mutex.lock_shared()，多个线程可以同时读，如果此时有一个写线程调用 shared_mutex.lock()，则读线程均会等待该写线程调用 shared_mutex.unlock()。C++11 没有提供读写锁，可使用 [boost::shared_mutex](https://www.boost.org/doc/libs/1_87_0/doc/html/thread/synchronization.html#thread.synchronization.mutex_types.shared_mutex)
382 | * C++14 提供了 [std::shared_lock](https://en.cppreference.com/w/cpp/thread/shared_lock)，它在构造时接受一个 mutex，并会调用 mutex.lock_shared()，析构时会调用 mutex.unlock_shared()
383 | 
384 | ```cpp
385 | #include <iostream>
386 | #include <shared_mutex>
387 | 
388 | class A {
389 |  public:
390 |   void lock_shared() { std::cout << "lock_shared" << std::endl; }
391 |   void unlock_shared() { std::cout << "unlock_shared" << std::endl; }
392 | };
393 | 
394 | int main() {
395 |   A a;
396 |   {
397 |     std::shared_lock l(a);  // lock_shared
398 |   }                         // unlock_shared
399 | }
400 | ```
401 | 
402 | * 对于 [std::shared_mutex](https://en.cppreference.com/w/cpp/thread/shared_mutex)，通常在读线程中用 [std::shared_lock](https://en.cppreference.com/w/cpp/thread/shared_lock) 管理，在写线程中用 [std::unique_lock](https://en.cppreference.com/w/cpp/thread/unique_lock) 管理
403 | 
404 | ```cpp
405 | class A {
406 |  public:
407 |   int read() const {
408 |     std::shared_lock<std::shared_mutex> l(m_);
409 |     return n_;
410 |   }
411 | 
412 |   int write() {
413 |     std::unique_lock<std::shared_mutex> l(m_);
414 |     return ++n_;
415 |   }
416 | 
417 |  private:
418 |   mutable std::shared_mutex m_;
419 |   int n_ = 0;
420 | };
421 | ```
422 | 
423 | ### 递归锁
424 | 
425 | * [std::mutex](https://en.cppreference.com/w/cpp/thread/mutex) 是不可重入的，未释放前再次上锁是未定义行为
426 | 
427 | ```cpp
428 | #include <mutex>
429 | 
430 | class A {
431 |  public:
432 |   void f() {
433 |     m_.lock();
434 |     m_.unlock();
435 |   }
436 | 
437 |   void g() {
438 |     m_.lock();
439 |     f();
440 |     m_.unlock();
441 |   }
442 | 
443 |  private:
444 |   std::mutex m_;
445 | };
446 | 
447 | int main() {
448 |   A{}.g();  // Undefined Behavior
449 | }
450 | ```
451 | 
452 | * 为此 C++ 提供了 [std::recursive_mutex](https://en.cppreference.com/w/cpp/thread/recursive_mutex)，它可以在一个线程上多次获取锁，但在其他线程获取锁之前必须释放所有的锁
453 | 
454 | ```cpp
455 | #include <mutex>
456 | 
457 | class A {
458 |  public:
459 |   void f() {
460 |     m_.lock();
461 |     m_.unlock();
462 |   }
463 | 
464 |   void g() {
465 |     m_.lock();
466 |     f();
467 |     m_.unlock();
468 |   }
469 | 
470 |  private:
471 |   std::recursive_mutex m_;
472 | };
473 | 
474 | int main() {
475 |   A{}.g();  // OK
476 | }
477 | ```
478 | 
479 | * 多数情况下，如果需要递归锁，说明代码设计存在问题。比如一个类的每个成员函数都会上锁，一个成员函数调用另一个成员函数，就可能多次上锁，这种情况用递归锁就可以避免产生未定义行为。但显然这个设计本身是有问题的，更好的办法是提取其中一个函数作为 private 成员并且不上锁，其他成员先上锁再调用该函数
480 | 
481 | ## 对并发初始化的保护
482 | 
483 | * 除了对并发访问共享数据的保护，另一种常见的情况是对并发初始化的保护
484 | 
485 | ```cpp
486 | #include <memory>
487 | #include <mutex>
488 | #include <thread>
489 | 
490 | class A {
491 |  public:
492 |   void f() {}
493 | };
494 | 
495 | std::shared_ptr<A> p;
496 | std::mutex m;
497 | 
498 | void init() {
499 |   m.lock();
500 |   if (!p) {
501 |     p.reset(new A);
502 |   }
503 |   m.unlock();
504 |   p->f();
505 | }
506 | 
507 | int main() {
508 |   std::thread t1{init};
509 |   std::thread t2{init};
510 | 
511 |   t1.join();
512 |   t2.join();
513 | }
514 | ```
515 | 
516 | * 上锁只是为了保护初始化过程，会不必要地影响性能，一种容易想到的优化方式是双重检查锁模式，但这存在潜在的 race condition
517 | 
518 | ```cpp
519 | #include <memory>
520 | #include <mutex>
521 | #include <thread>
522 | 
523 | class A {
524 |  public:
525 |   void f() {}
526 | };
527 | 
528 | std::shared_ptr<A> p;
529 | std::mutex m;
530 | 
531 | void init() {
532 |   if (!p) {  // 未上锁，其他线程可能在执行 #1，则此时 p 不为空
533 |     std::lock_guard<std::mutex> l(m);
534 |     if (!p) {
535 |       p.reset(new A);  // 1
536 |       // 先分配内存，再在内存上构造 A 的实例并返回内存的指针，最后让 p 指向它
537 |       // 也可能先让 p 指向它，再在内存上构造 A 的实例
538 |     }
539 |   }
540 |   p->f();  // p 可能指向一块还未构造实例的内存，从而崩溃
541 | }
542 | 
543 | int main() {
544 |   std::thread t1{init};
545 |   std::thread t2{init};
546 | 
547 |   t1.join();
548 |   t2.join();
549 | }
550 | ```
551 | 
552 | * 为此，C++11 提供了 [std::once_flag](https://en.cppreference.com/w/cpp/thread/once_flag) 和 [std::call_once](https://en.cppreference.com/w/cpp/thread/call_once) 来保证对某个操作只执行一次
553 | 
554 | ```cpp
555 | #include <memory>
556 | #include <mutex>
557 | #include <thread>
558 | 
559 | class A {
560 |  public:
561 |   void f() {}
562 | };
563 | 
564 | std::shared_ptr<A> p;
565 | std::once_flag flag;
566 | 
567 | void init() {
568 |   std::call_once(flag, [&] { p.reset(new A); });
569 |   p->f();
570 | }
571 | 
572 | int main() {
573 |   std::thread t1{init};
574 |   std::thread t2{init};
575 | 
576 |   t1.join();
577 |   t2.join();
578 | }
579 | ```
580 | 
581 | * [std::call_once](https://en.cppreference.com/w/cpp/thread/call_once) 也可以用在类中
582 | 
583 | ```cpp
584 | #include <iostream>
585 | #include <mutex>
586 | #include <thread>
587 | 
588 | class A {
589 |  public:
590 |   void f() {
591 |     std::call_once(flag_, &A::print, this);
592 |     std::cout << 2;
593 |   }
594 | 
595 |  private:
596 |   void print() { std::cout << 1; }
597 | 
598 |  private:
599 |   std::once_flag flag_;
600 | };
601 | 
602 | int main() {
603 |   A a;
604 |   std::thread t1{&A::f, &a};
605 |   std::thread t2{&A::f, &a};
606 |   t1.join();
607 |   t2.join();
608 | }  // 122
609 | ```
610 | 
611 | * static 局部变量在声明后就完成了初始化，这存在潜在的 race condition，如果多线程的控制流同时到达 static 局部变量的声明处，即使变量已在一个线程中初始化，其他线程并不知晓，仍会对其尝试初始化。为此，C++11 规定，如果 static 局部变量正在初始化，线程到达此处时，将等待其完成，从而避免了 race condition。只有一个全局实例时，可以直接用 static 而不需要 [std::call_once](https://en.cppreference.com/w/cpp/thread/call_once)
612 | 
613 | ```cpp
614 | template <typename T>
615 | class Singleton {
616 |  public:
617 |   static T& Instance();
618 |   Singleton(const Singleton&) = delete;
619 |   Singleton& operator=(const Singleton&) = delete;
620 | 
621 |  private:
622 |   Singleton() = default;
623 |   ~Singleton() = default;
624 | };
625 | 
626 | template <typename T>
627 | T& Singleton<T>::Instance() {
628 |   static T instance;
629 |   return instance;
630 | }
631 | ```
632 | 


--------------------------------------------------------------------------------
/src/atm.cpp:
--------------------------------------------------------------------------------
  1 | #include <condition_variable>
  2 | #include <iostream>
  3 | #include <memory>
  4 | #include <mutex>
  5 | #include <queue>
  6 | #include <thread>
  7 | #include <utility>
  8 | 
  9 | // MessageQueue
 10 | namespace Messaging {
 11 | struct MessageBase {
 12 |   virtual ~MessageBase() = default;
 13 | };
 14 | 
 15 | template <typename Msg>
 16 | struct WrappedMessage : MessageBase {
 17 |   explicit WrappedMessage(const Msg& msg) : contents(msg) {}
 18 |   Msg contents;
 19 | };
 20 | 
 21 | class MessageQueue {
 22 |  public:
 23 |   template <typename T>
 24 |   void push(const T& msg) {
 25 |     std::lock_guard<std::mutex> l(m_);
 26 |     q_.push(std::make_shared<WrappedMessage<T>>(msg));
 27 |     cv_.notify_all();
 28 |   }
 29 | 
 30 |   std::shared_ptr<MessageBase> wait_and_pop() {
 31 |     std::unique_lock<std::mutex> l(m_);
 32 |     cv_.wait(l, [&] { return !q_.empty(); });
 33 |     auto res = q_.front();
 34 |     q_.pop();
 35 |     return res;
 36 |   }
 37 | 
 38 |  private:
 39 |   std::mutex m_;
 40 |   std::condition_variable cv_;
 41 |   std::queue<std::shared_ptr<MessageBase>> q_;
 42 | };
 43 | }  // namespace Messaging
 44 | 
 45 | //  TemplateDispatcher
 46 | namespace Messaging {
 47 | template <typename PreviousDispatcher, typename Msg, typename F>
 48 | class TemplateDispatcher {
 49 |  public:
 50 |   TemplateDispatcher(const TemplateDispatcher&) = delete;
 51 | 
 52 |   TemplateDispatcher& operator=(const TemplateDispatcher&) = delete;
 53 | 
 54 |   TemplateDispatcher(TemplateDispatcher&& rhs) noexcept
 55 |       : q_(rhs.q_),
 56 |         prev_(rhs.prev_),
 57 |         f_(std::move(rhs.f_)),
 58 |         chained_(rhs.chained_) {
 59 |     rhs.chained_ = true;
 60 |   }
 61 | 
 62 |   TemplateDispatcher(MessageQueue* q, PreviousDispatcher* prev, F&& f)
 63 |       : q_(q), prev_(prev), f_(std::forward<F>(f)) {
 64 |     prev->chained_ = true;
 65 |   }
 66 | 
 67 |   template <typename OtherMsg, typename OtherF>
 68 |   TemplateDispatcher<TemplateDispatcher, OtherMsg, OtherF> handle(OtherF&& f) {
 69 |     return TemplateDispatcher<TemplateDispatcher, OtherMsg, OtherF>(
 70 |         q_, this, std::forward<OtherF>(f));
 71 |   }
 72 | 
 73 |   ~TemplateDispatcher() noexcept(false) {  // 所有调度器都可能抛出异常
 74 |     if (!chained_) {
 75 |       wait_and_dispatch();  // 析构函数中完成任务调度
 76 |     }
 77 |   }
 78 | 
 79 |  private:
 80 |   template <typename Dispatcher, typename OtherMsg, typename OtherF>
 81 |   friend class TemplateDispatcher;  // TemplateDispatcher 实例互为友元
 82 | 
 83 |   void wait_and_dispatch() {
 84 |     while (true) {
 85 |       auto msg = q_->wait_and_pop();
 86 |       if (dispatch(msg)) {
 87 |         break;  // 消息被处理后则退出循环
 88 |       }
 89 |     }
 90 |   }
 91 | 
 92 |   bool dispatch(const std::shared_ptr<MessageBase>& msg) {
 93 |     if (auto wrapper = dynamic_cast<WrappedMessage<Msg>*>(msg.get())) {
 94 |       f_(wrapper->contents);
 95 |       return true;
 96 |     }
 97 |     // 如果消息类型不匹配，则链接到前一个 dispatcher
 98 |     return prev_->dispatch(msg);
 99 |   }
100 | 
101 |  private:
102 |   MessageQueue* q_ = nullptr;
103 |   PreviousDispatcher* prev_ = nullptr;
104 |   F f_;
105 |   bool chained_ = false;
106 | };
107 | }  // namespace Messaging
108 | 
109 | // Dispatcher
110 | namespace Messaging {
111 | class CloseQueue {};  // 用于关闭队列的消息
112 | 
113 | class Dispatcher {
114 |  public:
115 |   Dispatcher(const Dispatcher&) = delete;
116 | 
117 |   Dispatcher& operator=(const Dispatcher&) = delete;
118 | 
119 |   Dispatcher(Dispatcher&& rhs) noexcept : q_(rhs.q_), chained_(rhs.chained_) {
120 |     rhs.chained_ = true;
121 |   }
122 | 
123 |   explicit Dispatcher(MessageQueue* q) : q_(q) {}
124 | 
125 |   template <typename Msg, typename F>
126 |   TemplateDispatcher<Dispatcher, Msg, F> handle(F&& f) {
127 |     // 用 TemplateDispatcher 处理特定类型的消息
128 |     return TemplateDispatcher<Dispatcher, Msg, F>(q_, this, std::forward<F>(f));
129 |   }
130 | 
131 |   ~Dispatcher() noexcept(false) {  // 可能抛出 CloseQueue 异常
132 |     if (!chained_) {  // 从 Receiver::wait 返回的 dispatcher 实例会马上被析构
133 |       wait_and_dispatch();  // 析构函数中完成任务调度
134 |     }
135 |   }
136 | 
137 |  private:
138 |   template <typename Dispatcher, typename Msg, typename F>
139 |   friend class TemplateDispatcher;
140 | 
141 |   void wait_and_dispatch() {
142 |     while (true) {
143 |       auto msg = q_->wait_and_pop();
144 |       dispatch(msg);
145 |     }
146 |   }
147 | 
148 |   bool dispatch(const std::shared_ptr<MessageBase>& msg) {
149 |     if (dynamic_cast<WrappedMessage<CloseQueue>*>(msg.get())) {
150 |       throw CloseQueue();
151 |     }
152 |     return false;  // 返回 false 表示消息未被处理
153 |   }
154 | 
155 |  private:
156 |   MessageQueue* q_ = nullptr;
157 |   bool chained_ = false;
158 | };
159 | }  // namespace Messaging
160 | 
161 | // Sender
162 | namespace Messaging {
163 | class Sender {
164 |  public:
165 |   Sender() = default;
166 | 
167 |   explicit Sender(MessageQueue* q) : q_(q) {}
168 | 
169 |   template <typename Msg>
170 |   void send(const Msg& msg) {
171 |     if (q_) {
172 |       q_->push(msg);
173 |     }
174 |   }
175 | 
176 |  private:
177 |   MessageQueue* q_ = nullptr;
178 | };
179 | }  // namespace Messaging
180 | 
181 | // Receiver
182 | namespace Messaging {
183 | class Receiver {
184 |  public:
185 |   operator Sender() {  // 允许隐式转换为 Sender
186 |     return Sender(&q_);
187 |   }
188 | 
189 |   Dispatcher wait() {  // 等待对队列的调度
190 |     return Dispatcher(&q_);
191 |   }
192 | 
193 |  private:
194 |   MessageQueue q_;
195 | };
196 | }  // namespace Messaging
197 | 
198 | // ATM 消息
199 | struct Withdraw {
200 |   Withdraw(const std::string& _account, unsigned _amount,
201 |            Messaging::Sender _atm_queue)
202 |       : account(_account), amount(_amount), atm_queue(_atm_queue) {}
203 | 
204 |   std::string account;
205 |   unsigned amount;
206 |   mutable Messaging::Sender atm_queue;
207 | };
208 | 
209 | struct WithdrawOK {};
210 | 
211 | struct WithdrawDenied {};
212 | 
213 | struct CancelWithdrawal {
214 |   CancelWithdrawal(const std::string& _account, unsigned _amount)
215 |       : account(_account), amount(_amount) {}
216 | 
217 |   std::string account;
218 |   unsigned amount;
219 | };
220 | 
221 | struct WithdrawalProcessed {
222 |   WithdrawalProcessed(const std::string& _account, unsigned _amount)
223 |       : account(_account), amount(_amount) {}
224 | 
225 |   std::string account;
226 |   unsigned amount;
227 | };
228 | 
229 | struct CardInserted {
230 |   explicit CardInserted(const std::string& _account) : account(_account) {}
231 | 
232 |   std::string account;
233 | };
234 | 
235 | struct DigitPressed {
236 |   explicit DigitPressed(char _digit) : digit(_digit) {}
237 | 
238 |   char digit;
239 | };
240 | 
241 | struct ClearLastPressed {};
242 | 
243 | struct EjectCard {};
244 | 
245 | struct WithdrawPressed {
246 |   explicit WithdrawPressed(unsigned _amount) : amount(_amount) {}
247 | 
248 |   unsigned amount;
249 | };
250 | 
251 | struct CancelPressed {};
252 | 
253 | struct IssueMoney {
254 |   IssueMoney(unsigned _amount) : amount(_amount) {}
255 | 
256 |   unsigned amount;
257 | };
258 | 
259 | struct VerifyPIN {
260 |   VerifyPIN(const std::string& _account, const std::string& pin_,
261 |             Messaging::Sender _atm_queue)
262 |       : account(_account), pin(pin_), atm_queue(_atm_queue) {}
263 | 
264 |   std::string account;
265 |   std::string pin;
266 |   mutable Messaging::Sender atm_queue;
267 | };
268 | 
269 | struct PINVerified {};
270 | 
271 | struct PINIncorrect {};
272 | 
273 | struct DisplayEnterPIN {};
274 | 
275 | struct DisplayEnterCard {};
276 | 
277 | struct DisplayInsufficientFunds {};
278 | 
279 | struct DisplayWithdrawalCancelled {};
280 | 
281 | struct DisplayPINIncorrectMessage {};
282 | 
283 | struct DisplayWithdrawalOptions {};
284 | 
285 | struct GetBalance {
286 |   GetBalance(const std::string& _account, Messaging::Sender _atm_queue)
287 |       : account(_account), atm_queue(_atm_queue) {}
288 | 
289 |   std::string account;
290 |   mutable Messaging::Sender atm_queue;
291 | };
292 | 
293 | struct Balance {
294 |   explicit Balance(unsigned _amount) : amount(_amount) {}
295 | 
296 |   unsigned amount;
297 | };
298 | 
299 | struct DisplayBalance {
300 |   explicit DisplayBalance(unsigned _amount) : amount(_amount) {}
301 | 
302 |   unsigned amount;
303 | };
304 | 
305 | struct BalancePressed {};
306 | 
307 | // ATM状态机
308 | class ATM {
309 |  public:
310 |   ATM(const ATM&) = delete;
311 | 
312 |   ATM& operator=(const ATM&) = delete;
313 | 
314 |   ATM(Messaging::Sender bank, Messaging::Sender interface_hardware)
315 |       : bank_(bank), interface_hardware_(interface_hardware) {}
316 | 
317 |   void done() { get_sender().send(Messaging::CloseQueue()); }
318 | 
319 |   void run() {
320 |     state_ = &ATM::waiting_for_card;
321 |     try {
322 |       while (true) {
323 |         (this->*state_)();
324 |       }
325 |     } catch (const Messaging::CloseQueue&) {
326 |     }
327 |   }
328 | 
329 |   Messaging::Sender get_sender() { return incoming_; }
330 | 
331 |  private:
332 |   void process_withdrawal() {
333 |     incoming_.wait()
334 |         .handle<WithdrawOK>([&](const WithdrawOK& msg) {
335 |           interface_hardware_.send(IssueMoney(withdrawal_amount_));
336 |           bank_.send(WithdrawalProcessed(account_, withdrawal_amount_));
337 |           state_ = &ATM::done_processing;
338 |         })
339 |         .handle<WithdrawDenied>([&](const WithdrawDenied& msg) {
340 |           interface_hardware_.send(DisplayInsufficientFunds());
341 |           state_ = &ATM::done_processing;
342 |         })
343 |         .handle<CancelPressed>([&](const CancelPressed& msg) {
344 |           bank_.send(CancelWithdrawal(account_, withdrawal_amount_));
345 |           interface_hardware_.send(DisplayWithdrawalCancelled());
346 |           state_ = &ATM::done_processing;
347 |         });
348 |   }
349 | 
350 |   void process_balance() {
351 |     incoming_.wait()
352 |         .handle<Balance>([&](const Balance& msg) {
353 |           interface_hardware_.send(DisplayBalance(msg.amount));
354 |           state_ = &ATM::wait_for_action;
355 |         })
356 |         .handle<CancelPressed>(
357 |             [&](const CancelPressed& msg) { state_ = &ATM::done_processing; });
358 |   }
359 | 
360 |   void wait_for_action() {
361 |     interface_hardware_.send(DisplayWithdrawalOptions());
362 |     incoming_.wait()
363 |         .handle<WithdrawPressed>([&](const WithdrawPressed& msg) {
364 |           withdrawal_amount_ = msg.amount;
365 |           bank_.send(Withdraw(account_, msg.amount, incoming_));
366 |           state_ = &ATM::process_withdrawal;
367 |         })
368 |         .handle<BalancePressed>([&](const BalancePressed& msg) {
369 |           bank_.send(GetBalance(account_, incoming_));
370 |           state_ = &ATM::process_balance;
371 |         })
372 |         .handle<CancelPressed>(
373 |             [&](const CancelPressed& msg) { state_ = &ATM::done_processing; });
374 |   }
375 | 
376 |   void verifying_pin() {
377 |     incoming_.wait()
378 |         .handle<PINVerified>(
379 |             [&](const PINVerified& msg) { state_ = &ATM::wait_for_action; })
380 |         .handle<PINIncorrect>([&](const PINIncorrect& msg) {
381 |           interface_hardware_.send(DisplayPINIncorrectMessage());
382 |           state_ = &ATM::done_processing;
383 |         })
384 |         .handle<CancelPressed>(
385 |             [&](const CancelPressed& msg) { state_ = &ATM::done_processing; });
386 |   }
387 | 
388 |   void getting_pin() {
389 |     incoming_.wait()
390 |         .handle<DigitPressed>([&](const DigitPressed& msg) {
391 |           const unsigned pin_length = 4;
392 |           pin_ += msg.digit;
393 |           if (pin_.length() == pin_length) {
394 |             bank_.send(VerifyPIN(account_, pin_, incoming_));
395 |             state_ = &ATM::verifying_pin;
396 |           }
397 |         })
398 |         .handle<ClearLastPressed>([&](const ClearLastPressed& msg) {
399 |           if (!pin_.empty()) {
400 |             pin_.pop_back();
401 |           }
402 |         })
403 |         .handle<CancelPressed>(
404 |             [&](const CancelPressed& msg) { state_ = &ATM::done_processing; });
405 |   }
406 | 
407 |   void waiting_for_card() {
408 |     interface_hardware_.send(DisplayEnterCard());
409 |     incoming_.wait().handle<CardInserted>([&](const CardInserted& msg) {
410 |       account_ = msg.account;
411 |       pin_ = "";
412 |       interface_hardware_.send(DisplayEnterPIN());
413 |       state_ = &ATM::getting_pin;
414 |     });
415 |   }
416 | 
417 |   void done_processing() {
418 |     interface_hardware_.send(EjectCard());
419 |     state_ = &ATM::waiting_for_card;
420 |   }
421 | 
422 |  private:
423 |   Messaging::Receiver incoming_;
424 |   Messaging::Sender bank_;
425 |   Messaging::Sender interface_hardware_;
426 |   void (ATM::*state_)();
427 |   std::string account_;
428 |   unsigned withdrawal_amount_;
429 |   std::string pin_;
430 | };
431 | 
432 | // 银行状态机
433 | class BankMachine {
434 |  public:
435 |   void done() { get_sender().send(Messaging::CloseQueue()); }
436 | 
437 |   void run() {
438 |     try {
439 |       while (true) {
440 |         incoming_.wait()
441 |             .handle<VerifyPIN>([&](const VerifyPIN& msg) {
442 |               if (msg.pin == "6666") {  // 输入密码为 6666 则通过验证
443 |                 msg.atm_queue.send(PINVerified());
444 |               } else {  // 否则发送密码错误的消息
445 |                 msg.atm_queue.send(PINIncorrect());
446 |               }
447 |             })
448 |             .handle<Withdraw>([&](const Withdraw& msg) {  // 取钱
449 |               if (balance_ >= msg.amount) {
450 |                 msg.atm_queue.send(WithdrawOK());
451 |                 balance_ -= msg.amount;
452 |               } else {
453 |                 msg.atm_queue.send(WithdrawDenied());
454 |               }
455 |             })
456 |             .handle<GetBalance>([&](const GetBalance& msg) {
457 |               msg.atm_queue.send(::Balance(balance_));
458 |             })
459 |             .handle<WithdrawalProcessed>([&](const WithdrawalProcessed& msg) {})
460 |             .handle<CancelWithdrawal>([&](const CancelWithdrawal& msg) {});
461 |       }
462 |     } catch (const Messaging::CloseQueue&) {
463 |     }
464 |   }
465 | 
466 |   Messaging::Sender get_sender() { return incoming_; }
467 | 
468 |  private:
469 |   Messaging::Receiver incoming_;
470 |   unsigned balance_ = 199;
471 | };
472 | 
473 | // 用户接口状态机
474 | class InterfaceMachine {
475 |  public:
476 |   void done() { get_sender().send(Messaging::CloseQueue()); }
477 | 
478 |   void run() {
479 |     try {
480 |       while (true) {
481 |         incoming_.wait()
482 |             .handle<IssueMoney>([&](const IssueMoney& msg) {
483 |               {
484 |                 std::lock_guard<std::mutex> l(m_);
485 |                 std::cout << "Issuing " << msg.amount << std::endl;
486 |               }
487 |             })
488 |             .handle<DisplayInsufficientFunds>(
489 |                 [&](const DisplayInsufficientFunds& msg) {
490 |                   {
491 |                     std::lock_guard<std::mutex> l(m_);
492 |                     std::cout << "Insufficient funds" << std::endl;
493 |                   }
494 |                 })
495 |             .handle<DisplayEnterPIN>([&](const DisplayEnterPIN& msg) {
496 |               {
497 |                 std::lock_guard<std::mutex> l(m_);
498 |                 std::cout << "Please enter your PIN (0-9)" << std::endl;
499 |               }
500 |             })
501 |             .handle<DisplayEnterCard>([&](const DisplayEnterCard& msg) {
502 |               {
503 |                 std::lock_guard<std::mutex> l(m_);
504 |                 std::cout << "Please enter your card (I)" << std::endl;
505 |               }
506 |             })
507 |             .handle<DisplayBalance>([&](const DisplayBalance& msg) {
508 |               {
509 |                 std::lock_guard<std::mutex> l(m_);
510 |                 std::cout << "The Balance of your account is " << msg.amount
511 |                           << std::endl;
512 |               }
513 |             })
514 |             .handle<DisplayWithdrawalOptions>(
515 |                 [&](const DisplayWithdrawalOptions& msg) {
516 |                   {
517 |                     std::lock_guard<std::mutex> l(m_);
518 |                     std::cout << "Withdraw 50? (w)" << std::endl;
519 |                     std::cout << "Display Balance? (b)" << std::endl;
520 |                     std::cout << "Cancel? (c)" << std::endl;
521 |                   }
522 |                 })
523 |             .handle<DisplayWithdrawalCancelled>(
524 |                 [&](const DisplayWithdrawalCancelled& msg) {
525 |                   {
526 |                     std::lock_guard<std::mutex> l(m_);
527 |                     std::cout << "Withdrawal cancelled" << std::endl;
528 |                   }
529 |                 })
530 |             .handle<DisplayPINIncorrectMessage>(
531 |                 [&](const DisplayPINIncorrectMessage& msg) {
532 |                   {
533 |                     std::lock_guard<std::mutex> l(m_);
534 |                     std::cout << "PIN incorrect" << std::endl;
535 |                   }
536 |                 })
537 |             .handle<EjectCard>([&](const EjectCard& msg) {
538 |               {
539 |                 std::lock_guard<std::mutex> l(m_);
540 |                 std::cout << "Ejecting card" << std::endl;
541 |               }
542 |             });
543 |       }
544 |     } catch (Messaging::CloseQueue&) {
545 |     }
546 |   }
547 | 
548 |   Messaging::Sender get_sender() { return incoming_; }
549 | 
550 |  private:
551 |   Messaging::Receiver incoming_;
552 |   std::mutex m_;
553 | };
554 | 
555 | int main() {
556 |   BankMachine bank;
557 |   InterfaceMachine interface_hardware;
558 |   ATM machine{bank.get_sender(), interface_hardware.get_sender()};
559 |   std::thread bank_thread{&BankMachine::run, &bank};
560 |   std::thread interface_thread{&InterfaceMachine::run, &interface_hardware};
561 |   std::thread atm_thread{&ATM::run, &machine};
562 |   Messaging::Sender atm_queue{machine.get_sender()};
563 |   bool quit = false;
564 |   while (!quit) {
565 |     char c = getchar();
566 |     switch (c) {
567 |       case '0':
568 |       case '1':
569 |       case '2':
570 |       case '3':
571 |       case '4':
572 |       case '5':
573 |       case '6':
574 |       case '7':
575 |       case '8':
576 |       case '9':
577 |         atm_queue.send(DigitPressed(c));
578 |         break;
579 |       case 'b':  // 显示余额
580 |         atm_queue.send(BalancePressed());
581 |         break;
582 |       case 'w':  // 取钱
583 |         atm_queue.send(WithdrawPressed(50));
584 |         break;
585 |       case 'c':  // 退卡
586 |         atm_queue.send(CancelPressed());
587 |         break;
588 |       case 'q':  // 结束程序
589 |         quit = true;
590 |         break;
591 |       case 'i':  // 插卡
592 |         atm_queue.send(CardInserted("downdemo"));
593 |         break;
594 |     }
595 |   }
596 |   bank.done();
597 |   machine.done();
598 |   interface_hardware.done();
599 |   atm_thread.join();
600 |   bank_thread.join();
601 |   interface_thread.join();
602 | }
603 | 


--------------------------------------------------------------------------------
/docs/reference/processes_and_threads.md:
--------------------------------------------------------------------------------
  1 | ## 进程
  2 | 
  3 | * 在进程模型中，计算机上所有可运行的软件，通常也包括操作系统，被组织成若干顺序进程（sequential process），简称进程（process），一个进程就是就是一个正在执行程序的实例，包括程序计数器、寄存器和变量的当前值
  4 | * 概念上来说，每个进程有自己的虚拟 CPU，但实际上真正的 CPU（假设只有一个 CPU）在各进程之间来回切换，同一时刻实际只有一个进程在运行
  5 | * 实际只有一个物理程序计数器。每个进程运行时，它的逻辑程序计数器被装入实际的程序计数器。当进程结束时，物理程序计数器保存到内存中该进程的逻辑程序计数器中
  6 | * 进程创建主要有四种形式
  7 |   * 系统初始化：启动系统时会创建若干进程，包括和用户交互的前台进程和停在后台的守护进程，守护进程可以通过 UNIX 的 ps 指令或 Window 的任务管理器查看
  8 |   * 运行中的程序执行创建进程的系统调用：比如启动一个程序，该程序要启动更多进程来分配任务
  9 |   * 用户请求创建一个新进程：比如用户双击图标启动程序
 10 |   * 大型机批处理作业的初始化
 11 | * 创建进程的系统调用在 UNIX 中是 `fork`，在 Windows 中是 `CreateProcess`，进程创建后，父子进程有不同的地址空间
 12 | * 进程终止通常也有四种形式
 13 |   * 正常退出（自愿的）：比如点击浏览器的关闭图标。进程退出的系统调用在 UNIX 中是 `exit`，在 Windows 中是 `ExitProcess`
 14 |   * 出错退出（自愿的）：比如执行 `cc foo.c` 编译 `foo.c` 而该文件不存在
 15 |   * 严重错误（非自愿）：比如执行非法指令、引用不存在的内存、除数是零，UNIX 中会希望自行处理这些错误以通知操作系统，进程会收到信号被中断而非终止
 16 |   * 被其他进程杀死（非自愿）：UNIX 中是 `kill`，Windows 中是 `TerminateProcess`
 17 | * UNIX 中，进程和其所有子进程（包括其后裔）组成一个进程组，当用户发出一个键盘信号，该信号会发送给进程组所有成员
 18 | * Windows 中没有进程层次的概念，所有进程地位相同
 19 | * 进程阻塞有两种情况，一是正常情况，比如操作系统调度另一个进程占用 CPU，二是异常情况，比如没有足够的 CPU 可调用
 20 | * 进程有三种状态：运行、就绪、阻塞
 21 | 
 22 | ```
 23 | 运行 <-> 就绪
 24 |   ↘    ↗
 25 |     阻塞
 26 | 
 27 | 运行：该时刻实际占用 CPU
 28 | 就绪：操作系统调度了其他进程运行而暂时停止
 29 | 阻塞：逻辑上不能继续运行，比如等待用户输入
 30 | ```
 31 | 
 32 | * 操作系统通过维护一张进程表（一个结构数组）来实现进程模型，每个进程占一个表项（即进程控制块，Processing Control Block）。PCB 包含了进程状态的主要信息，如程序计数器、堆栈指针、内存分配状态、所打开的文件状态、账号和调度信息、进程状态切换时必须保存的信息
 33 | * 所有中断都从保存寄存器开始，通常会保存到当前进程的 PCB 中。一个进程在执行过程中可能中断几千次，但恢复时，被中断的进程都将返回到与中断发生前完全相同的状态
 34 | * 发生中断后，操作系统最底层的工作过程
 35 |   * 中断硬件将程序计数器、程序状态字、寄存器压入堆栈
 36 |   * 硬件从中断向量装入新的程序计数器
 37 |   * 通过汇编保存寄存器值（因为这类操作无法用高级语言完成）
 38 |   * 通过汇编设置新的堆栈
 39 |   * 运行 C 语言（假设操作系统用 C 编写）中断服务例程
 40 |   * 调用调度程序，决定接下来要运行的进程
 41 |   * C 返回到汇编
 42 |   * 通过汇编运行新进程
 43 | * 假设一个进程等待 I/O 操作与其在内存中停留的时间比为 `p`，则 `n` 个进程都在等待（此时 CPU 空转）的概率为 `p ^ n`，CPU 利用率为 `1 - p ^ n`，因此一般（该模型只是粗略情况）I/O 时间越短、运行进程越多，CPU 利用率越高
 44 | 
 45 | ```
 46 | 假如内存为 8G，操作系统和相关表格占 2G，用户程序也占 2G，内存最多容纳 3 个用户程序
 47 | 假设 80% 时间用于等待 I/O 操作
 48 | CPU 利用率 = 1 - 0.8 ^ 3 = 49%
 49 | 如果增加 8G 内存，则最多容纳 7 个用户程序
 50 | CPU 利用率 = 1 - 0.8 ^ 7 = 79%，吞吐量提高为 79% - 49% = 30%
 51 | 如果再增加 8G 内存，则最多容纳 11 个用户程序
 52 | CPU 利用率 = 1 - 0.8 ^ 11 = 91%，吞吐量只提高了 12%，可见第一次增加内存比较划算
 53 | ```
 54 | 
 55 | ## 线程
 56 | 
 57 | * 正如进程提供的抽象使得避免了对中断、定时器、上下文切换的考虑，多线程提供了一种新抽象，即并行实例共享同一地址空间和所有可用数据，这正是多进程模型（地址空间不同）无法表达的
 58 | * 第二个需要多线程的理由是，线程更轻量，创建和撤销都更快（通常创建一个线程比创建一个进程快 10 - 100 倍）
 59 | * 第三个理由是多核 CPU 系统中，多线程为真正的并行提供了可能
 60 | * 线程包含一个程序计数器（记录接下来要执行哪一条指令）、寄存器（保存线程当前的工作变量）、堆栈指针（记录执行历史，每个线程的堆栈有一帧，每一帧保存一个已调用但还未返回的过程，如局部变量、返回地址）
 61 | * 各线程可以访问进程地址空间的每一个内存地址，因此一个线程可以读写甚至清除另一个线程的堆栈。线程之间没有保护，因为不可能，也没必要
 62 | * 除了共享地址空间，线程还共享同一个打开文件集、子进程、定时器及相关信号量
 63 | * 线程可以处在运行、就绪、阻塞、终止等状态中的任何一个
 64 | * thread_yield 允许线程自动放弃 CPU 转让给另一个线程运行，提供这个调用是因为，不同于进程，线程库不能利用时钟中断强制线程让出 CPU
 65 | * 实现线程包主要有两种方式，一是用户级线程（User-Level Thread），二是内核级线程（Kernel-Level Thread），另外也有混合实现
 66 | * 用户级线程把整个线程包放在用户空间中，内核对其一无所知，不需要内核支持，可以在不支持线程的操作系统上实现。在用户空间管理线程时，每个进程需要有其专用的线程表（thread table），这些表和内核中的进程表类似，只不过记录的是各个线程的属性，如程序计数器、寄存器、堆栈指针和状态等。该线程表由运行时系统管理，当线程转换到就绪或阻塞状态时，在线程表中存放重启该线程所需的信息，与内核在进程表中存放进程的信息完全一样
 67 | * 用户级线程允许进程有自己定制的调度算法，具有更好的可扩展性（因为内核级线程需要一些固定表格空间和堆栈空间），性能更好。用户级线程的切换需要少量机器指令，而内核级线程需要完整的上下文切换，修改内存映像，使高速缓存失效，这导致了若干数量级的延迟
 68 | * 用户级线程的问题是如何实现阻塞系统调用，比如线程读取键盘，在没有按下任何按键之前不能让该线程实际进行该系统调用，因为这会停止所有线程。另一个问题是，如果一个线程开始运行，则其所在进程的其他线程就不能运行，除非运行线程自动放弃 CPU。而使用内核级线程时，线程阻塞在 I/O 上时，不需要将整个进程挂起
 69 | * 内核级线程的线程表（和用户级线程的线程表一样，记录寄存器、状态和其他信息）存在于内核中，当一个线程希望创建一个新线程或撤销一个已有线程时，将进行一个系统调用，这个系统调用通过对线程表的更新完成创建或撤销工作
 70 | * 当内核级线程阻塞时，内核可以运行同一进程中的另一线程，或者运行另一个进程的线程。而对于用户级线程，运行时系统始终运行其所在进程的线程，直到内核剥夺 CPU（或没有可运行的线程存在）为止
 71 | * 在内核中创建或撤销线程的代价较大，因此内核级线程被撤销时，系统会将其标记为不可运行的，但其内核数据结构未受影响，之后必须创建新线程时就重新启动一个旧线程。用户级线程也可以这样回收，但因为管理代价很小，所以没必要
 72 | 
 73 | ## 进程间通信（Inter Process Communication）
 74 | 
 75 | * 对共享内存进行访问的程序片段称为临界区（critical region、critical section），如果同一时刻临界区只有一个进程，就能避免 race condition
 76 | * 单处理器系统中实现这点的简单做法是，在每个进程刚进入临界区后立即屏蔽所有中断，在即将离开时再打开中断。屏蔽中断后，时钟中断也被屏蔽。CPU 只有发生时钟中断或其他中断才会进行进程切换，这样 CPU 就不会切换到其他进程
 77 | * 但这个方案并不好，因为把屏蔽中断的权力交给用户进程是不明智的，如果一个进程屏蔽中断后不打开，就可能导致整个系统终止。此外如果系统是多处理器，则屏蔽中断只对执行了 disable 指令的 CPU 有效，其他 CPU 仍将运行
 78 | * 对于内核来说，更新变量或列表的几条指令期间屏蔽中断很方便，因此屏蔽中断对操作系统本身是一项很有用的技术，但对用户进程则不是一种合适的互斥机制
 79 | * 第二种方式是一种软件方案，假设有一个共享锁变量，其初始值为 0，当进程要进入临界区时，首先测试锁，如果值为 0 则将锁设为 1 并进入临界区，如果锁的值已经为 1，则进程等待其值为 0
 80 | * 这种方式的问题在于，如果在一个进程检查到锁为 0，并要将锁设为 1 之前，恰好另一个线程被调度运行将锁设为 1，而第一个进程恢复运行时也将把锁设为 1 并进入临界区，此时临界区就有了两个进程
 81 | * 第三种方式是忙等待（busy waiting），用一个循环不断测试变量值，直到变量值改变才进入临界区，用于忙等待的锁称为自旋锁（spin lock）。这种方式的问题是，在循环中浪费了大量 CPU 时间，应该避免，除非等待时间非常短才有使用的理由
 82 | 
 83 | ```cpp
 84 | // 进程 A
 85 | while (true) {
 86 |   while (x) {
 87 |   }
 88 |   critical_region();
 89 |   x = true;  // 允许进程 B 进入临界区
 90 |   noncritical_region();
 91 | }
 92 | 
 93 | // 进程 B
 94 | while (true) {
 95 |   while (!x) {
 96 |   }
 97 |   critical_region();
 98 |   x = false;  // 允许进程 A 进入临界区
 99 |   noncritical_region();
100 | }
101 | ```
102 | 
103 | * 第四种方式是 1981 年由 G. L. Peterson 提出的 Peterson 算法
104 | 
105 | ```cpp
106 | constexpr int N = 2;  // 进程数量为 2
107 | int turn = 0;         // 轮到的进程
108 | vector<bool> interested(N);
109 | 
110 | void enter_region(int process) {
111 |   int other = 1 - process;  // 另一进程（进程号为 0 或 1）
112 |   interested[process] = true;
113 |   turn = process;  // turn 只有一个，即使两个进程调用也只有后一个赋值会保留
114 |   while (turn == process && interested[other]) {
115 |   }
116 | }
117 | 
118 | void leave_region(int process) {  // 调用上述函数完成后调用此函数
119 |   interested[process] = false;
120 | }
121 | 
122 | // 若进程 A 调用 enter_region 则很快返回，
123 | // 此时进程 B 调用将在 while 循环挂起，
124 | // 直到进程 A 调用 leave_region
125 | // 若进程 AB 同时调用 enter_region，
126 | // turn 为后赋值者，
127 | // 则先赋值者退出循环并调用 leave_region，后赋值者再退出循环
128 | ```
129 | 
130 | * 第五种方式是一种硬件方式，需要借助 TSL 指令，即测试并加锁（test and set lock），该指令是一个原子操作，执行 TSL 指令的 CPU 将锁住内存总线以禁止其他 CPU 在指令结束前访问该内存
131 | 
132 | ```
133 | TSL RX, LOCK // 将内存字 LOCK 读到寄存器 RX 中，然后在该内存地址写一个非零值，读写是原子操作
134 | ```
135 | 
136 | * 为了使用 TSL 指令实现互斥，用一个共享变量 `LOCK` 来协调对内存的访问，其值为 0 时任何进程都能用 TSL 指令将值设为 1 并读写共享内存，操作结束时再用 move 指令将值重置为 0
137 | 
138 | ```asm
139 | enter_region:
140 |     TSL REGISTER, LOCK  ;复制锁到寄存器并设置值为 1
141 |     CMP REGISTER, #0    ;值是否为 0
142 |     JNE enter_region    ;不是 0 则循环
143 |     RET                 ;返回，进入临界区
144 | 
145 | leave_region:
146 |     MOVE LOCK, #0
147 |     RET
148 | ```
149 | 
150 | * 可以用 XCHG 指令替代 TSL 指令，它原子交换两个位置的内容
151 | 
152 | ```asm
153 | enter_region:
154 |     MOVE REGISTER, #1    ;在寄存器放一个 1
155 |     XCHG REGISTER, LOCK  ;原子交换寄存器和锁变量的内容
156 |     CMP REGISTER, #0     ;值是否为 0
157 |     JNE enter_region     ;不是 0 则循环
158 |     RET                  ;返回，进入临界区
159 | 
160 | leave_region:
161 |     MOVE LOCK, #0
162 |     RET
163 | ```
164 | 
165 | * Peterson 算法和 TSL 或 XCHG 解法同样都有忙等待的问题，它们的本质都是在进程进入临界区时检查是否允许进入，不允许则原地等待直到允许为止
166 | 
167 | ## 生产者-消费者问题
168 | 
169 | * 两个进程共享一个固定大小的缓冲区，生产者进程将消息放入缓冲区，消费者进程从缓冲区取出消息
170 | 
171 | ```cpp
172 | constexpr int N = 100;  // 缓冲区的槽数
173 | int cnt = 0;            // 缓冲区数据数
174 | 
175 | void producer() {
176 |   while (true) {
177 |     int item = produce_item();  // 生成新数据
178 |     if (cnt == N) {
179 |       sleep();
180 |     }
181 |     insert_item(item);  // 将消息放入缓冲区
182 |     ++cnt;              // 1
183 |     if (cnt == 1) {
184 |       wakeup(consumer);  // 2
185 |     }
186 |   }
187 | }
188 | 
189 | void consumer() {
190 |   while (true) {
191 |     if (!cnt) {
192 |       sleep();  // 3
193 |     }
194 |     int item = remove_item();  // 从缓冲区取一个数据
195 |     --cnt;
196 |     if (cnt == N - 1) {
197 |       wakeup(producer);
198 |     }
199 |     consume_item(item);  // 打印数据
200 |   }
201 | }
202 | 
203 | // 问题在于 cnt 的访问存在 race condition，
204 | // 如果消费者执行到 3 处，cnt 为 0，在即将 sleep 之前，
205 | // 生产者在此之后才执行到 1 处，此时 cnt 为 1，执行到 2 处，调用 wakeup，
206 | // 但此时消费者还未 sleep，因此 wakeup 的信号丢失，没有实际作用，
207 | // 接着消费者 sleep，生产者开始下一轮循环，
208 | // 生产者下一轮循环到 1 处，cnt 为 2，
209 | // 到 2 处，不再调用 wakeup，消费者保持 sleep，
210 | // 生产者继续之后的循环，并且每一轮都不会唤醒消费者，
211 | // 最终生产者执行到 cnt 为 N 时 sleep，两个进程都将永久 sleep
212 | ```
213 | 
214 | ## 信号量（semaphore）
215 | 
216 | * 信号量是由 E. W. Dijkstra 于 1965 年提出的一种方法，它使用一个整型变量作为信号量，值为 0 表示没有保存下来的唤醒操作，值为正数表示唤醒操作的次数
217 | * 信号量有 down 和 up 两种操作，Dijkstra 在论文中称其为 P 和 V 操作（荷兰语中的 Proberen 意为尝试，Verhogen 意为增加或升高）
218 | * down 操作检查值是否大于 0，若大于 0 则减 1 并继续，若为 0 则进程睡眠，并且此时 down 操作未结束
219 | * up 操作对值加 1。如果有进程在信号量上睡眠，无法完成一个先前的 down 操作，则由系统选择其中一个以允许完成其 down 操作。于是，对一个有睡眠进程的信号量执行一次 up 操作，信号量值仍为 0，但睡眠进程少了一个
220 | * down 操作和 up 操作中的所有操作都是原子的，一般作为系统调用实现。操作系统只要在执行测试信号量、更新信号量、使进程睡眠等操作时暂时屏蔽全部中断，这些动作只需要几条指令，所以屏蔽中断不会带来什么副作用。如果使用多个 CPU，则每个信号量应由一个一个锁保护，使用 TSL 或 XCHG 指令来确保同一时刻只有一个 CPU 对信号量进行操作
221 | * 注意，这里使用 TSL 或 XCHG 指令来防止多 CPU 同时访问一个信号量，与生产者或消费者用忙等待来等待对方腾出或填充缓冲区是完全不同的。信号量操作只需要几毫秒，而生产者或消费者则可能需要任意长时间
222 | * 使用三个信号量解决生产者-消费者问题：full 记录已充满的缓冲槽数，初值为 0；empty 记录空的缓冲槽数，初值为缓冲区中槽的数目；mutex 确保生产者和消费者不会同时访问缓冲区，初值为 1
223 | * 供多个进程使用的信号量初值为 1，保证同时只有一个进程可以进入临界区，这种信号量称为二元信号量（binary semaphore）。如果每个进程进入临界区前执行一个 down 操作，并在刚退出时执行一个 up 操作，就能实现互斥
224 | 
225 | ```cpp
226 | constexpr int N = 100;  // 缓冲区的槽数
227 | using semaphore = int;
228 | semaphore mutex = 1;
229 | semaphore empty = N;  // 缓冲区空槽数
230 | semaphore full = 0;   // 缓冲区满槽数
231 | 
232 | void producer() {
233 |   while (true) {
234 |     int item = produce_item();
235 |     down(&empty);
236 |     down(&mutex);
237 |     insert_item(item);
238 |     up(&mutex);
239 |     up(&full);
240 |   }
241 | }
242 | 
243 | void consumer() {
244 |   while (true) {
245 |     down(&full);
246 |     down(&mutex);
247 |     int item = remove_item();
248 |     up(&mutex);
249 |     up(&empty);
250 |     consume_item(item);
251 |   }
252 | }
253 | ```
254 | 
255 | * 信号量的另一个作用是实现同步（synchronization），这里 full 和 empty 保证缓冲区满时生产者停止运行，缓冲区空时消费者停止运行
256 | 
257 | ## 互斥量（mutex）
258 | 
259 | * 如果不需要信号量的计数功能，可以使用其称为互斥量的简化版本。互斥量仅适用于管理共享资源或一小段代码。互斥量实现简单且有效，在实现用户空间线程包时十分有用
260 | * 互斥量只有加锁和解锁两种状态，只需要一个二进制位表示，不过实际上一般用整型量，0 表示解锁，其他值表示加锁
261 | * 线程需要访问临界区时调用 mutex_lock，如果互斥量是解锁的则临界区可用，调用成功，线程可以进入临界区，否则线程被阻塞，直到临界区中的线程完成并调用 mutex_unlock。如果多个线程阻塞在该互斥量上，则随机选择一个线程并允许它获得锁
262 | * 用 TSL 或 XCHG 指令就可以很容易地在用户空间实现互斥量
263 | 
264 | ```asm
265 | mutex_lock:
266 |     TSL REGISTER, MUTEX  ;将互斥量复制到寄存器，并将互斥量置为 1
267 |     CMP REGISTER, #0
268 |     JZE ok               ;如果互斥量为 0，它被解锁，所以返回
269 |     CALL thread_yield    ;互斥量忙，调度另一个线程
270 |     JMP mutex_lock       ;稍后再试
271 | ok: RET
272 | 
273 | mutex_unlock:
274 |     MOVE MUTEX, #0       ;将互斥量置0
275 |     RET
276 | ```
277 | 
278 | * thread_yield 只是调用用户空间线程调度程序，运行十分快捷，这样 mutex_lock 和 mutex_unlock 都不需要任何内核调用。用户级线程通过互斥量的这个过程即可实现同步，而同步过程仅需要少量指令
279 | 
280 | ## 管程（monitor）
281 | 
282 | * 如果把生产者代码中的两个 down 操作交换顺序，使得 mutex 在 empty 之前减 1，就会导致死锁，因此使用信号量要十分小心。为了更易于编写正确的程序，Brinch Hansen 和 Hoare 提出了一种称为管程的高级同步原语
283 | * 一个管程是由过程、变量、数据结构等组成的一个集合，它们组成一个特殊的模块或软件包，进程可以在任何需要的时候调用管程中的过程，但不能在管程之外声明的过程中直接访问管程内的数据结构
284 | * 任一时刻管程中只能有一个活跃进程，这一特性使得管程能有效地完成互斥。管程是编程语言的组成部分，编译器知道其特殊性，进入管程时的互斥由编译器负责，通常做法是使用互斥量或二元信号量。这样就不需要程序员安排互斥，出错的可能性就小很多
285 | * 管程提供了互斥的简便途径，但此外还需要一种方法使得进程在无法继续运行时被阻塞，这个方法就是引入条件变量（condition variable）
286 | * 当一个管程过程发现它无法继续运行时（如生产者发现缓冲区满），则会在某个条件变量（如 full）上执行 wait 操作，该操作将阻塞当前进程，并将另一个在管程外的进程调入管程。另一个进程可以通过对同一条件变量执行 signal 操作唤醒阻塞进程
287 | * 为了避免管程中有两个活跃进程，执行 signal 操作之后有两种规则。Hoare 建议让新唤醒的进程运行，挂起另一个进程。Brinch Hansen 建议执行 signal 的进程必须立即退出管程，即 signal 语句只能作为一个管程过程的最后一条语句。后者在概念上更简单，并且更容易实现。第三种方法是，让发信号者继续运行，直到其退出管程，才允许等待的进程开始运行
288 | * 如果一个条件变量上有若干进程正在等待，则对其执行 signal 操作之后，系统调度程序只能选择其中一个恢复运行
289 | * 如果一个条件变量没有等待进程，则对其执行 signal 会丢失信号，因此 wait 操作必须在 signal 之前。这与之前提到的 sleep 和 wakeup 的关键区别是，管程的自动互斥保证了在 wait 完成之前不会先 signal
290 | 
291 | ## 消息传递（message passing）
292 | 
293 | * 管程和信号量通过共享内存解决 CPU 互斥问题，但没有提供不同机器间（比如局域网中的机器）的信息交换方法
294 | * 消息传递使用 send 和 receive 原语来实现进程间通信，它们像信号量而不像管程，是系统调用而非语言成分
295 | 
296 | ```cpp
297 | send(destination, &message);
298 | receive(source, &message);
299 | ```
300 | 
301 | * send 向一个给定目标发送一条消息，receive 从一个给定源（或者任意源）接收一条消息，如果没有消息可用则接收者可能被阻塞直至有一条消息到达，或者带着一个错误码立即返回
302 | * 消息传递系统面临许多设计难点：比如消息可能被网络丢失，需要三次握手来确认信息到达情况；比如发送方未收到确认，因此重发消息导致接收方收到两条相同消息，接收方需要区分新老消息；比如身份认证（authentication）问题，客户端如何确认通信的是一个文件服务器还是冒充者
303 | * 消息传递方式可以有许多变体，一种对消息进行编址的方式是，为每个进程分配一个唯一地址，让消息按进程的地址编址。另一种方式是引入一种称为信箱（mailbox）的数据结构，用来对一定数量的消息进行缓冲。使用信箱时，send 和 receive 调用的地址参数就是信箱而非进程的地址
304 | 
305 | ```cpp
306 | constexpr int N = 100;
307 | 
308 | void producer() {
309 |   message m;  // 消息缓冲区
310 | 
311 |   while (true) {
312 |     int item = produce_item();
313 |     receive(consumer, &m);    // 等待消费者发送空缓冲区
314 |     build_message(&m, item);  // 建立一个待发送的消息
315 |     send(consumer, &m);       // 发送数据项给消费者
316 |   }
317 | }
318 | 
319 | void consumer() {
320 |   message m;
321 | 
322 |   for (int i = 0; i < N; ++i) {
323 |     send(producer, &m);  // 发送 N 个空缓冲区
324 |   }
325 | 
326 |   while (true) {
327 |     receive(producer, &m);        // 接收包含数据项的消息
328 |     int item = extract_item(&m);  // 将数据项从消息中提取出来
329 |     send(producer, &m);           // 将空缓冲区发送回生产者
330 |     consume_item(item);
331 |   }
332 | }
333 | ```
334 | 
335 | * 使用信箱的另一种极端方法是彻底取消缓冲。采取这种方法时，如果 send 在 receive 之前执行则发送进程被阻塞，直到 receive 发生，反之亦然。执行 receive 时，消息可以直接从发送者复制到接收者，不用任何中间缓冲。这种方案常被称为会和（rendezvous），实现起来更容易，但降低了灵活性，因为发送者和接收者一定要以步步紧接的方式运行
336 | * 通常在并行程序设计系统中使用消息传递，一个著名的消息传递系统是消息传递接口（Message-Passing Interface，MPI），它广泛应用于科学计算
337 | 
338 | ## 屏障（barrier）
339 | 
340 | * 屏障是一种用于进程组的同步机制，只有所有进程就绪时才能进入下一阶段。每个阶段的结尾设置一个屏障，当一个进程到达屏障时将被阻拦，直到所有进程到达屏障为止
341 | 
342 | ## 调度
343 | 
344 | * 几乎所有进程的 I/O 请求和计算都是交替突发的，如果进程花费大量时间在计算上，则称为计算密集型（compute-bound），如果大量时间花费在等待 I/O 上，则称为 I/O 密集型（I/O-bound）
345 | * 随着 CPU 变得越来越快，更多的进程倾向为 I/O 密集型。这种现象的原因是 CPU 的改进比磁盘的改进快得多，所以未来对 I/O 密集型进程的调度处理更为重要
346 | * 调度的基本思想是，如果需要运行 I/O 密集型进程，就应该让它尽快得到机会，以便发出磁盘请求并保持磁盘始终忙碌
347 | * 根据如何处理时钟中断，可以把调度算法分为非抢占式和抢占式两类
348 | * 非抢占式调度算法挑选一个进程，然后让该进程运行直至阻塞，或直到该进程自动释放 CPU。即使该进程运行了几个小时也不会被强迫挂起，这样导致时钟中断发生时不会进行调度。在处理完时钟中断后，如果没有更高优先级的进程，则被中断的进程将继续运行
349 | * 抢占式调度算法挑选一个进程，让该进程运行某个固定时段的最大值，时段结束时将挂起该进程，并挑选另一个进程运行。抢占式调度需要在时间间隔的末端发生时钟中断，以便把 CPU 控制返回给调度程序，如果没有可用的时钟，就只能选择非抢占式调度
350 | * 不同的应用领域有不同的目标，也就需要不同的调度算法。环境可以划分为三种
351 |   * 批处理：广泛用于商业领域，比如处理薪水清单、账目收入、账目支出、利息计算，批处理系统不会有用户在旁边急切等待响应，因此通常使用非抢占式算法，或对每个进程都有长时间周期的抢占式算法，这样减少了进程切换从而改进了性能
352 |   * 交互式：必须使用抢占式算法，以避免 CPU 被一个进程霸占而拒绝为其他进程服务。服务器也归于此类，因为通常要服务多个突发的远程用户
353 |   * 实时：有时不需要抢占，因为进程了解它们可能会长时间得不到运行，所以通常很快地完成各自工作并阻塞
354 | 
355 | ## 调度算法的评价指标
356 | 
357 | * 对于批处理系统，调度算法的评价指标主要有三个
358 |   * 吞吐量（throughout）：系统单位时间内完成的作业数量，比如 10 道作业花费 100 秒，则吞吐量为 0.1 道/秒
359 |   * 周转时间（turnaround time）：一个批处理作业从提交开始到完成的统计平均时间
360 |   * CPU 利用率：CPU 忙碌时间相对总时间的占比
361 | * 对于交互式系统，评价指标最重要的是最小响应时间，即从发出命令到得到响应之间的时间
362 | * 实时系统的特点是或多或少必须满足截止时间，多数实时系统中，可预测性十分重要，比如如果多媒体实时系统的音频进程运行错误太多，音质就会明显下降，为此实时系统的调度算法必须是高度可预测和有规律的
363 | 
364 | ## 批处理系统中的调度
365 | 
366 | ### 先来先服务（First-Come First-Served，FCFS）
367 | 
368 | * 非抢占式。进程按照请求 CPU 的先后顺序调度，优点是公平，算法实现简单，不会导致进程饥饿（Starvation，等待时间对进程响应带来明显影响）
369 | 
370 | ```
371 | 进程 到达时间 运行时间
372 | P1   0        7
373 | P2   2        4
374 | P3   4        1
375 | P4   5        4
376 | 
377 | 先到先服务，因此调度顺序为 P1 -> P2 -> P3 -> P4
378 | P1      P2   P3 P4
379 | ------- ---- -  ----
380 | 
381 | 周转时间 = 完成时间 - 到达时间
382 | P1 = 7 - 0 = 7
383 | P2 = 11 - 2 = 9
384 | P3 = 12 - 4 = 8  // 只运行 1，却需要等待 8，可见 FCFS 算法对短作业不利
385 | P4 = 16 - 5 = 11
386 | 平均周转时间 = 8.75
387 | 
388 | 带权周转时间 = 周转时间 / 运行时间
389 | P1 = 7 / 7 = 1
390 | P2 = 9 / 4 = 2.25
391 | P3 = 8 / 1 = 8
392 | P4 = 11 / 4 = 2.75
393 | 平均带权周转时间 = 3.5
394 | 
395 | 等待时间 = 周转时间 - 运行时间（不考虑等待 I/O 操作的时间）
396 | P1 = 7 - 7 = 0
397 | P2 = 9 - 4 = 5
398 | P3 = 8 - 1 = 7
399 | P4 = 11 - 4 = 7
400 | 平均等待时间 = 4.75
401 | ```
402 | 
403 | ### 最短作业优先（Shortest Job First，SJF）
404 | 
405 | * 非抢占式。选择已到达的且运行时间最短的进程，运行时间相同则先到达的先运行。目标是追求最短的平均周转时间、平均带权周转时间、平均等待时间，缺点是不公平，对短作业有利，对长作业不利，如果一直有短作业到达可能导致长作业饥饿
406 | 
407 | ```
408 | 进程 到达时间 运行时间
409 | P1   0        7
410 | P2   2        4
411 | P3   4        1
412 | P4   5        4
413 | 
414 | P1 先到达，P1 运行结束时 P2、P3、P4 均到达，P3 运行时间最短先运行
415 | P2、P4 运行时间相同，P2 先到达，因此 P2 先于 P4 运行
416 | 
417 | 最终调度顺序为 P1 -> P3 -> P2 -> P4
418 | P1      P3 P2    P4
419 | ------- -  ----  ----
420 | 
421 | 周转时间 = 完成时间 - 到达时间
422 | P1 = 7 - 0 = 7
423 | P2 = 12 - 2 = 10
424 | P3 = 8 - 4 = 4
425 | P4 = 16 - 5 = 11
426 | 平均周转时间 = 8
427 | 
428 | 带权周转时间 = 周转时间 / 运行时间
429 | P1 = 7 / 7 = 1
430 | P2 = 10 / 4 = 2.5
431 | P3 = 4 / 1 = 4
432 | P4 = 11 / 4 = 2.75
433 | 平均带权周转时间 = 2.56
434 | 
435 | 等待时间 = 周转时间 - 运行时间（不考虑等待 I/O 操作的时间）
436 | P1 = 7 - 7 = 0
437 | P2 = 10 - 4 = 6
438 | P3 = 4 - 1 = 3
439 | P4 = 11 - 4 = 7
440 | 平均等待时间 = 4
441 | ```
442 | 
443 | ### 最短剩余时间优先（Shortest Remaining Time Next，SRTN）
444 | 
445 | * SRTN 是 SJF 的抢占式版本，每当新进程加入时，调度程序总是选择剩余运行时间最短的进程运行，如果当前进程剩余运行时间比新进程长，则挂起当前进程而运行新进程
446 | 
447 | ```
448 | 进程 到达时间 运行时间
449 | P1   0        7
450 | P2   2        4
451 | P3   4        1
452 | P4   5        4
453 | 
454 | P2 到达时，P1 剩余 5，P2 为 4，运行 P2
455 | P3 到达时，P1 剩余 5，P2 剩余 2，P3 为 1，运行 P3
456 | P4 到达时，P3 运行结束，P1 剩余 5，P2 剩余 2，P4 为 4，运行 P2
457 | 最后依次运行 P4 和 P1
458 | 
459 | 最终调度顺序为 P1 -> P2 -> P3 -> P2 -> P4 -> P1
460 | P1 P2 P3 P2 P4    P1
461 | -- -- -  -- ----  -----
462 | 
463 | 周转时间 = 完成时间 - 到达时间
464 | P1 = 16 - 0 = 16
465 | P2 = 7 - 2 = 5
466 | P3 = 5 - 4 = 1
467 | P4 = 11 - 5 = 6
468 | 平均周转时间 = 7
469 | 
470 | 带权周转时间 = 周转时间 / 运行时间
471 | P1 = 16 / 7 = 2.29
472 | P2 = 5 / 4 = 1.25
473 | P3 = 1 / 1 = 1
474 | P4 = 6 / 4 = 1.5
475 | 平均带权周转时间 = 1.51
476 | 
477 | 等待时间 = 周转时间 - 运行时间（不考虑等待 I/O 操作的时间）
478 | P1 = 16 - 7 = 9
479 | P2 = 5 - 4 = 1
480 | P3 = 1 - 1 = 0
481 | P4 = 6 - 4 = 2
482 | 平均等待时间 = 3
483 | ```
484 | 
485 | ### 高响应比优先（Highest Response Ratio Next，HRRN）
486 | 
487 | * 非抢占式。在所有已到达进程中选择响应比（`等待时间 / 运行时间 + 1`）最高的运行，综合 FCFS 和 SJF 的优点，等待时间长、运行时间短的优先，避免长作业饥饿的问题
488 | 
489 | ```
490 | 进程 到达时间 运行时间
491 | P1   0        7
492 | P2   2        4
493 | P3   4        1
494 | P4   5        4
495 | 
496 | 响应比 = （等待时间 + 运行时间） / 运行时间
497 | P1 运行至结束，P2、P3、P4 均到达，响应比分别为
498 | P2 = (5 + 4) / 4 = 2.25
499 | P3 = (3 + 1) / 1 = 4
500 | P4 = (2 + 4) / 4 = 1.5
501 | 运行 P3，P3 结束时，响应比分别为
502 | P2 = (6 + 4) / 4 = 2.5
503 | P4 = (3 + 4) / 4 = 1.75
504 | 运行 P2，最后运行 P4
505 | 
506 | 最终调度顺序为 P1 -> P3 -> P2 -> P4
507 | P1      P3 P2    P4
508 | ------- -  ----  ----
509 | ```
510 | 
511 | ## 交互式系统中的调度
512 | 
513 | ### 时间片轮转调度（Round-Robin Scheduling，RR）
514 | 
515 | * RR 是一种简单公平的抢占式调度算法，并且可以避免饥饿。每个进程被分配一个时间片（quantum）。时间片结束时，如果进程还在运行，则剥夺 CPU 并分配给另一个进程。如果进程在时间片结束前阻塞或结束，则 CPU 立即切换。RR 算法实现很容易，只需要维护一张进程队列表
516 | 
517 | ```
518 | A -> B -> C -> D
519 | 
520 | 若 A 用完时间片，但仍在运行，则插入到队列尾
521 | B -> C -> D -> A
522 | 
523 | 若 B 用完时间片，但仍在运行，并到达一个新进程 E，则先插入新进程
524 | C -> D -> A -> E -> B
525 | 
526 | 若 C 用完时间片之前就结束了，则直接切换到下一个进程
527 | D -> A -> E -> B
528 | ```
529 | 
530 | * 需要考虑的是时间片的长度，假设时间片为 4 ms，上下文切换为 1 ms，则 CPU 完成 4 ms 工作后将浪费 1 ms 进行上下文切换（context switch），即浪费了 20% 的时间。但如果时间片太大，就会退化为 FCFS，导致增大响应时间。通常为了提高 CPU 效率，设置时间片时，切换开销占比应不超过 1%
531 | 
532 | ### 优先级调度
533 | 
534 | * 为每个进程设置优先级，在已到达进程中，选择优先级最高的运行，可以为抢占式或非抢占式
535 | * 比如对于操作系统来说，I/O 密集型进程的优先级应该更高。I/O 密集型继承多数时间用于等待 I/O 结束，因此需要 CPU 时应立即分配给它以便启动下一个 I/O 请求，这样就可以在另一个进程计算的同时执行 I/O 操作
536 | * 一种简单做法是将优先级设置为 `1 / f`，`f` 为该进程在上一时间片中的运行时间占比。比如在 50 ms 时间片中，使用 1 ms 的进程优先级为 50，使用 25 ms 的进程优先级为 2。将进程按优先级分组，再使用 RR 算法调度高优先级组中的进程
537 | 
538 | ### 多级反馈队列调度
539 | 
540 | * CTSS（Compatible Time Sharing System）是最早使用优先级调度的系统之一，但存在进程切换速度太慢的问题，其设计者意识到设置较长的时间片可以减少切换次数，但长时间片又会影响到响应时间。最终的解决方法是多级反馈队列调度，它是对 FCFS、SJF、RR、优先级调度的折中权衡
541 | * 设置多个优先级队列，每个级别对应不同长度的时间片，比如第一级（最高级）时间片为 1，第二级为 2，第三级为 4，以此类推
542 | * 如果一个进程用完当前级别时间片后仍未运行完，则加入下一级队列队尾，如果已经位于最后一级则放回该级队尾
543 | * 高优先级队列为空时，才会调度低优先级队列，因此可能导致低优先级进程饥饿
544 | * 比如一个进程需要 100 个时间片，第一次分配 1 个时间片，第二次分配 2 个，接下来是 4、8、16、32、64，最后一次使用 64 中的 37 个即可结束工作，一共进行 7 次切换。如果使用 RR 算法，则需要 100 次切换
545 | 
546 | ### 最短进程优先
547 | 
548 | * 关键在于如何从可运行进程中找出最短的一个
549 | * 一种方法是根据过去的行为进行预测。假设某终端每条命令的估计运行时间为 `T0`，测量到下一次运行时间为 `T1`，则估计时间可以修正为 `a * T0 + (1 - a) * T1`，比如设 `a` 为 `1 / 2` 可以得到序列如下
550 | 
551 | ```
552 | T0
553 | T0/2 + T1/2
554 | T0/4 + T1/4 + T2/2
555 | T0/8 + T1/8 + T2/4 + T3/2  // T0 在此时估计时间中的占比下降到 1/8
556 | ```
557 | 
558 | ### 保证调度
559 | 
560 | * 向用户作出明确的性能保证，然后实现它。比如有 `n` 个进程运行的单用户系统中，如果所有进程等价，则每个进程获得 `1 / n` 的 CPU 时间，为了实现所作的保证，系统跟踪每个进程已使用的 CPU 时间，并计算应获得的时间，然后转向已用时间最少的进程，直到超过最接近的竞争者
561 | 
562 | ### 彩票调度（Lottery Scheduling）
563 | 
564 | * 保证调度的想法不错，但很难实现。彩票调度既可以给出类似预测结果，并且实现非常简单。其基本思想是为进程提供各种系统资源（如 CPU 时间）的彩票，一旦需要做出调度决策时，就随机抽出一张彩票，拥有该彩票的进程获取该资源
565 | * 比如系统掌握每秒 50 次的一种彩票，作为奖励每个获奖者可以获得 20 ms 的 CPU 时间
566 | * 可以给更重要的进程额外的彩票，以增加其获胜的机会，比如出售 100 张彩票，一个进程持有其中 20 张，则每次抽奖该进程就有 20% 的取胜机会，在较长运行时间中该进程就会得到 20% 的 CPU
567 | * 彩票调度可以解决其他方法很难解决的问题，比如一个视频服务器上有若干提供视频流的进程，每个流的帧率不同，假设帧率分别为 10、20、25，那么给这些进程分别分配 10、20、25 张彩票，它们就会自动按照接近 10:20:25 的比例划分 CPU 的使用
568 | 
569 | ### 公平分享调度
570 | 
571 | * 之前的调度关注的都是进程本身，而没有关注进程所有者。假设两个用户分别启动 9 个进程和 1 个进程，使用 RR 算法，则两者分别得到 90% 和 10% 的 CPU 时间。为了避免这种情况，在调度处理之前应该考虑进程拥有者
572 | 


--------------------------------------------------------------------------------
/docs/08_advanced_thread_management.md:
--------------------------------------------------------------------------------
  1 | ## 线程池
  2 | 
  3 | * 线程池一般会用一个表示线程数的参数来初始化，内部需要一个队列来存储任务。下面是一个最简单的线程池实现
  4 | 
  5 | ```cpp
  6 | #include <condition_variable>
  7 | #include <functional>
  8 | #include <mutex>
  9 | #include <queue>
 10 | #include <thread>
 11 | #include <utility>
 12 | 
 13 | class ThreadPool {
 14 |  public:
 15 |   explicit ThreadPool(std::size_t n) {
 16 |     for (std::size_t i = 0; i < n; ++i) {
 17 |       std::thread{[this] {
 18 |         std::unique_lock<std::mutex> l(m_);
 19 |         while (true) {
 20 |           if (!q_.empty()) {
 21 |             auto task = std::move(q_.front());
 22 |             q_.pop();
 23 |             l.unlock();
 24 |             task();
 25 |             l.lock();
 26 |           } else if (done_) {
 27 |             break;
 28 |           } else {
 29 |             cv_.wait(l);
 30 |           }
 31 |         }
 32 |       }}.detach();
 33 |     }
 34 |   }
 35 | 
 36 |   ~ThreadPool() {
 37 |     {
 38 |       std::lock_guard<std::mutex> l(m_);
 39 |       done_ = true;  // cv_.wait 使用了 done_ 判断所以要加锁
 40 |     }
 41 |     cv_.notify_all();
 42 |   }
 43 | 
 44 |   template <typename F>
 45 |   void submit(F&& f) {
 46 |     {
 47 |       std::lock_guard<std::mutex> l(m_);
 48 |       q_.emplace(std::forward<F>(f));
 49 |     }
 50 |     cv_.notify_one();
 51 |   }
 52 | 
 53 |  private:
 54 |   std::mutex m_;
 55 |   std::condition_variable cv_;
 56 |   bool done_ = false;
 57 |   std::queue<std::function<void()>> q_;
 58 | };
 59 | ```
 60 | 
 61 | * 如果想让提交的任务带参数会麻烦很多
 62 | 
 63 | ```cpp
 64 | template <class F, class... Args>
 65 | auto ThreadPool::submit(F&& f, Args&&... args) {
 66 |   using RT = std::invoke_result_t<F, Args...>;
 67 |   // std::packaged_task 不允许拷贝构造，不能直接传入 lambda，
 68 |   // 因此要借助 std::shared_ptr
 69 |   auto task = std::make_shared<std::packaged_task<RT()>>(
 70 |       std::bind(std::forward<F>(f), std::forward<Args>(args)...));
 71 |   // 但 std::bind 会按值拷贝实参，因此这个实现不允许任务的实参是 move-only 类型
 72 |   {
 73 |     std::lock_guard<std::mutex> l(m_);
 74 |     q_.emplace([task]() { (*task)(); });  // 捕获指针以传入 std::packaged_task
 75 |   }
 76 |   cv_.notify_one();
 77 |   return task->get_future();
 78 | }
 79 | ```
 80 | 
 81 | * 书上实现的线程池都在死循环中使用了 [std::this_thread::yield](https://en.cppreference.com/w/cpp/thread/yield) 来转让时间片
 82 | 
 83 | ```cpp
 84 | #include <atomic>
 85 | #include <functional>
 86 | #include <thread>
 87 | #include <vector>
 88 | 
 89 | #include "concurrent_queue.hpp"
 90 | 
 91 | class ThreadPool {
 92 |  public:
 93 |   ThreadPool() {
 94 |     std::size_t n = std::thread::hardware_concurrency();
 95 |     try {
 96 |       for (std::size_t i = 0; i < n; ++i) {
 97 |         threads_.emplace_back(&ThreadPool::worker_thread, this);
 98 |       }
 99 |     } catch (...) {
100 |       done_ = true;
101 |       for (auto& x : threads_) {
102 |         if (x.joinable()) {
103 |           x.join();
104 |         }
105 |       }
106 |       throw;
107 |     }
108 |   }
109 | 
110 |   ~ThreadPool() {
111 |     done_ = true;
112 |     for (auto& x : threads_) {
113 |       if (x.joinable()) {
114 |         x.join();
115 |       }
116 |     }
117 |   }
118 | 
119 |   template <typename F>
120 |   void submit(F f) {
121 |     q_.push(std::function<void()>(f));
122 |   }
123 | 
124 |  private:
125 |   void worker_thread() {
126 |     while (!done_) {
127 |       std::function<void()> task;
128 |       if (q_.try_pop(task)) {
129 |         task();
130 |       } else {
131 |         std::this_thread::yield();
132 |       }
133 |     }
134 |   }
135 | 
136 |  private:
137 |   std::atomic<bool> done_ = false;
138 |   ConcurrentQueue<std::function<void()>> q_;
139 |   std::vector<std::thread> threads_;  // 要在 done_ 和 q_ 之后声明
140 | };
141 | ```
142 | 
143 | * 这样做的问题是，如果线程池处于空闲状态，就会无限转让时间片，导致 CPU 使用率达 100%，下面是对书中的线程池的 CPU 使用率测试结果
144 | 
145 | ![](images/8-1.png)
146 | 
147 | * 对相同任务用之前实现的线程池的测试结果
148 | 
149 | ![](images/8-2.png)
150 | 
151 | * 这里还是把书上的内容列出来，下文均为书中内容
152 | * 这个线程池只能执行无参数无返回值的函数，并且可能出现死锁，下面希望能执行无参数但有返回值的函数。为了得到返回值，就应该把函数传递给 [std::packaged_task](https://en.cppreference.com/w/cpp/thread/packaged_task) 再加入队列，并返回 [std::packaged_task](https://en.cppreference.com/w/cpp/thread/packaged_task) 中的 [std::future](https://en.cppreference.com/w/cpp/thread/future)。由于 [std::packaged_task](https://en.cppreference.com/w/cpp/thread/packaged_task) 是 move-only 类型，而 [std::function](https://en.cppreference.com/w/cpp/utility/functional/function) 要求存储的函数实例可以拷贝构造，因此这里需要实现一个支持 move-only 类型的函数包裹类，即一个带 call 操作的类型擦除（type-erasure）类
153 | 
154 | ```cpp
155 | #include <memory>
156 | #include <utility>
157 | 
158 | class FunctionWrapper {
159 |  public:
160 |   FunctionWrapper() = default;
161 | 
162 |   FunctionWrapper(const FunctionWrapper&) = delete;
163 | 
164 |   FunctionWrapper& operator=(const FunctionWrapper&) = delete;
165 | 
166 |   FunctionWrapper(FunctionWrapper&& rhs) noexcept
167 |       : impl_(std::move(rhs.impl_)) {}
168 | 
169 |   FunctionWrapper& operator=(FunctionWrapper&& rhs) noexcept {
170 |     impl_ = std::move(rhs.impl_);
171 |     return *this;
172 |   }
173 | 
174 |   template <typename F>
175 |   FunctionWrapper(F&& f) : impl_(new ImplType<F>(std::move(f))) {}
176 | 
177 |   void operator()() const { impl_->call(); }
178 | 
179 |  private:
180 |   struct ImplBase {
181 |     virtual void call() = 0;
182 |     virtual ~ImplBase() = default;
183 |   };
184 | 
185 |   template <typename F>
186 |   struct ImplType : ImplBase {
187 |     ImplType(F&& f) noexcept : f_(std::move(f)) {}
188 |     void call() override { f_(); }
189 | 
190 |     F f_;
191 |   };
192 | 
193 |  private:
194 |   std::unique_ptr<ImplBase> impl_;
195 | };
196 | ```
197 | 
198 | * 用这个包裹类替代 `std::function<void()>`
199 | 
200 | ```cpp
201 | #include <atomic>
202 | #include <future>
203 | #include <thread>
204 | #include <type_traits>
205 | #include <vector>
206 | 
207 | #include "concurrent_queue.hpp"
208 | #include "function_wrapper.hpp"
209 | 
210 | class ThreadPool {
211 |  public:
212 |   ThreadPool() {
213 |     std::size_t n = std::thread::hardware_concurrency();
214 |     try {
215 |       for (std::size_t i = 0; i < n; ++i) {
216 |         threads_.emplace_back(&ThreadPool::worker_thread, this);
217 |       }
218 |     } catch (...) {
219 |       done_ = true;
220 |       for (auto& x : threads_) {
221 |         if (x.joinable()) {
222 |           x.join();
223 |         }
224 |       }
225 |       throw;
226 |     }
227 |   }
228 | 
229 |   ~ThreadPool() {
230 |     done_ = true;
231 |     for (auto& x : threads_) {
232 |       if (x.joinable()) {
233 |         x.join();
234 |       }
235 |     }
236 |   }
237 | 
238 |   template <typename F>
239 |   std::future<std::invoke_result_t<F>> submit(F f) {
240 |     std::packaged_task<std::invoke_result_t<F>()> task(std::move(f));
241 |     std::future<std::invoke_result_t<F>> res(task.get_future());
242 |     q_.push(std::move(task));
243 |     return res;
244 |   }
245 | 
246 |  private:
247 |   void worker_thread() {
248 |     while (!done_) {
249 |       FunctionWrapper task;
250 |       if (q_.try_pop(task)) {
251 |         task();
252 |       } else {
253 |         std::this_thread::yield();
254 |       }
255 |     }
256 |   }
257 | 
258 |  private:
259 |   std::atomic<bool> done_ = false;
260 |   ConcurrentQueue<FunctionWrapper> q_;
261 |   std::vector<std::thread> threads_;  // 要在 done_ 和 q_ 之后声明
262 | };
263 | ```
264 | 
265 | * 往线程池添加任务会增加任务队列的竞争，lock-free 队列可以避免这点但存在乒乓缓存的问题。为此需要把任务队列拆分为线程独立的本地队列和全局队列，当线程队列无任务时就去全局队列取任务
266 | 
267 | ```cpp
268 | #include <atomic>
269 | #include <future>
270 | #include <memory>
271 | #include <queue>
272 | #include <thread>
273 | #include <type_traits>
274 | #include <vector>
275 | 
276 | #include "concurrent_queue.hpp"
277 | #include "function_wrapper.hpp"
278 | 
279 | class ThreadPool {
280 |  public:
281 |   ThreadPool() {
282 |     std::size_t n = std::thread::hardware_concurrency();
283 |     try {
284 |       for (std::size_t i = 0; i < n; ++i) {
285 |         threads_.emplace_back(&ThreadPool::worker_thread, this);
286 |       }
287 |     } catch (...) {
288 |       done_ = true;
289 |       for (auto& x : threads_) {
290 |         if (x.joinable()) {
291 |           x.join();
292 |         }
293 |       }
294 |       throw;
295 |     }
296 |   }
297 | 
298 |   ~ThreadPool() {
299 |     done_ = true;
300 |     for (auto& x : threads_) {
301 |       if (x.joinable()) {
302 |         x.join();
303 |       }
304 |     }
305 |   }
306 | 
307 |   template <typename F>
308 |   std::future<std::invoke_result_t<F>> submit(F f) {
309 |     std::packaged_task<std::invoke_result_t<F>()> task(std::move(f));
310 |     std::future<std::invoke_result_t<F>> res(task.get_future());
311 |     if (local_queue_) {
312 |       local_queue_->push(std::move(task));
313 |     } else {
314 |       pool_queue_.push(std::move(task));
315 |     }
316 |     return res;
317 |   }
318 | 
319 |  private:
320 |   void worker_thread() {
321 |     local_queue_.reset(new std::queue<FunctionWrapper>);
322 |     while (!done_) {
323 |       FunctionWrapper task;
324 |       if (local_queue_ && !local_queue_->empty()) {
325 |         task = std::move(local_queue_->front());
326 |         local_queue_->pop();
327 |         task();
328 |       } else if (pool_queue_.try_pop(task)) {
329 |         task();
330 |       } else {
331 |         std::this_thread::yield();
332 |       }
333 |     }
334 |   }
335 | 
336 |  private:
337 |   std::atomic<bool> done_ = false;
338 |   ConcurrentQueue<FunctionWrapper> pool_queue_;
339 |   inline static thread_local std::unique_ptr<std::queue<FunctionWrapper>>
340 |       local_queue_;
341 |   std::vector<std::thread> threads_;
342 | };
343 | ```
344 | 
345 | * 这可以避免数据竞争，但如果任务分配不均，就会导致某个线程的本地队列中有很多任务，而其他线程无事可做，为此应该让没有工作的线程可以从其他线程获取任务
346 | 
347 | ```cpp
348 | #include <atomic>
349 | #include <deque>
350 | #include <future>
351 | #include <memory>
352 | #include <mutex>
353 | #include <thread>
354 | #include <type_traits>
355 | #include <vector>
356 | 
357 | #include "concurrent_queue.hpp"
358 | #include "function_wrapper.hpp"
359 | 
360 | class WorkStealingQueue {
361 |  public:
362 |   WorkStealingQueue() = default;
363 | 
364 |   WorkStealingQueue(const WorkStealingQueue&) = delete;
365 | 
366 |   WorkStealingQueue& operator=(const WorkStealingQueue&) = delete;
367 | 
368 |   void push(FunctionWrapper f) {
369 |     std::lock_guard<std::mutex> l(m_);
370 |     q_.push_front(std::move(f));
371 |   }
372 | 
373 |   bool empty() const {
374 |     std::lock_guard<std::mutex> l(m_);
375 |     return q_.empty();
376 |   }
377 | 
378 |   bool try_pop(FunctionWrapper& res) {
379 |     std::lock_guard<std::mutex> l(m_);
380 |     if (q_.empty()) {
381 |       return false;
382 |     }
383 |     res = std::move(q_.front());
384 |     q_.pop_front();
385 |     return true;
386 |   }
387 | 
388 |   bool try_steal(FunctionWrapper& res) {
389 |     std::lock_guard<std::mutex> l(m_);
390 |     if (q_.empty()) {
391 |       return false;
392 |     }
393 |     res = std::move(q_.back());
394 |     q_.pop_back();
395 |     return true;
396 |   }
397 | 
398 |  private:
399 |   std::deque<FunctionWrapper> q_;
400 |   mutable std::mutex m_;
401 | };
402 | 
403 | class ThreadPool {
404 |  public:
405 |   ThreadPool() {
406 |     std::size_t n = std::thread::hardware_concurrency();
407 |     try {
408 |       for (std::size_t i = 0; i < n; ++i) {
409 |         work_stealing_queue_.emplace_back(
410 |             std::make_unique<WorkStealingQueue>());
411 |         threads_.emplace_back(&ThreadPool::worker_thread, this, i);
412 |       }
413 |     } catch (...) {
414 |       done_ = true;
415 |       for (auto& x : threads_) {
416 |         if (x.joinable()) {
417 |           x.join();
418 |         }
419 |       }
420 |       throw;
421 |     }
422 |   }
423 | 
424 |   ~ThreadPool() {
425 |     done_ = true;
426 |     for (auto& x : threads_) {
427 |       if (x.joinable()) {
428 |         x.join();
429 |       }
430 |     }
431 |   }
432 | 
433 |   template <typename F>
434 |   std::future<std::invoke_result_t<F>> submit(F f) {
435 |     std::packaged_task<std::invoke_result_t<F>()> task(std::move(f));
436 |     std::future<std::invoke_result_t<F>> res(task.get_future());
437 |     if (local_queue_) {
438 |       local_queue_->push(std::move(task));
439 |     } else {
440 |       pool_queue_.push(std::move(task));
441 |     }
442 |     return res;
443 |   }
444 | 
445 |  private:
446 |   bool pop_task_from_local_queue(FunctionWrapper& task) {
447 |     return local_queue_ && local_queue_->try_pop(task);
448 |   }
449 | 
450 |   bool pop_task_from_pool_queue(FunctionWrapper& task) {
451 |     return pool_queue_.try_pop(task);
452 |   }
453 | 
454 |   bool pop_task_from_other_thread_queue(FunctionWrapper& task) {
455 |     for (std::size_t i = 0; i < work_stealing_queue_.size(); ++i) {
456 |       std::size_t index = (index_ + i + 1) % work_stealing_queue_.size();
457 |       if (work_stealing_queue_[index]->try_steal(task)) {
458 |         return true;
459 |       }
460 |     }
461 |     return false;
462 |   }
463 | 
464 |   void worker_thread(std::size_t index) {
465 |     index_ = index;
466 |     local_queue_ = work_stealing_queue_[index_].get();
467 |     while (!done_) {
468 |       FunctionWrapper task;
469 |       if (pop_task_from_local_queue(task) || pop_task_from_pool_queue(task) ||
470 |           pop_task_from_other_thread_queue(task)) {
471 |         task();
472 |       } else {
473 |         std::this_thread::yield();
474 |       }
475 |     }
476 |   }
477 | 
478 |  private:
479 |   std::atomic<bool> done_ = false;
480 |   ConcurrentQueue<FunctionWrapper> pool_queue_;
481 |   std::vector<std::unique_ptr<WorkStealingQueue>> work_stealing_queue_;
482 |   std::vector<std::thread> threads_;
483 | 
484 |   static thread_local WorkStealingQueue* local_queue_;
485 |   static thread_local std::size_t index_;
486 | };
487 | 
488 | thread_local WorkStealingQueue* ThreadPool::local_queue_;
489 | thread_local std::size_t ThreadPool::index_;
490 | ```
491 | 
492 | ## 中断
493 | 
494 | * 可中断线程的简单实现
495 | 
496 | ```cpp
497 | class InterruptFlag {
498 |  public:
499 |   void set();
500 |   bool is_set() const;
501 | };
502 | 
503 | thread_local InterruptFlag this_thread_interrupt_flag;
504 | 
505 | class InterruptibleThread {
506 |  public:
507 |   template <typename F>
508 |   InterruptibleThread(F f) {
509 |     std::promise<InterruptFlag*> p;
510 |     t = std::thread([f, &p] {
511 |       p.set_value(&this_thread_interrupt_flag);
512 |       f();
513 |     });
514 |     flag = p.get_future().get();
515 |   }
516 | 
517 |   void interrupt() {
518 |     if (flag) {
519 |       flag->set();
520 |     }
521 |   }
522 | 
523 |  private:
524 |   std::thread t;
525 |   InterruptFlag* flag;
526 | };
527 | 
528 | void interruption_point() {
529 |   if (this_thread_interrupt_flag.is_set()) {
530 |     throw thread_interrupted();
531 |   }
532 | }
533 | ```
534 | 
535 | * 在函数中使用
536 | 
537 | ```cpp
538 | void f() {
539 |   while (!done) {
540 |     interruption_point();
541 |     process_next_item();
542 |   }
543 | }
544 | ```
545 | 
546 | * 更好的方式是用 [std::condition_variable](https://en.cppreference.com/w/cpp/thread/condition_variable) 来唤醒，而非在循环中持续运行
547 | 
548 | ```cpp
549 | class InterruptFlag {
550 |  public:
551 |   void set() {
552 |     b_.store(true, std::memory_order_relaxed);
553 |     std::lock_guard<std::mutex> l(m_);
554 |     if (cv_) {
555 |       cv_->notify_all();
556 |     }
557 |   }
558 | 
559 |   bool is_set() const { return b_.load(std::memory_order_relaxed); }
560 | 
561 |   void set_condition_variable(std::condition_variable& cv) {
562 |     std::lock_guard<std::mutex> l(m_);
563 |     cv_ = &cv;
564 |   }
565 | 
566 |   void clear_condition_variable() {
567 |     std::lock_guard<std::mutex> l(m_);
568 |     cv_ = nullptr;
569 |   }
570 | 
571 |   struct ClearConditionVariableOnDestruct {
572 |     ~ClearConditionVariableOnDestruct() {
573 |       this_thread_interrupt_flag.clear_condition_variable();
574 |     }
575 |   };
576 | 
577 |  private:
578 |   std::atomic<bool> b_;
579 |   std::condition_variable* cv_ = nullptr;
580 |   std::mutex m_;
581 | };
582 | 
583 | void interruptible_wait(std::condition_variable& cv,
584 |                         std::unique_lock<std::mutex>& l) {
585 |   interruption_point();
586 |   this_thread_interrupt_flag.set_condition_variable(cv);
587 |   // 之后的 wait_for 可能抛异常，所以需要 RAII 清除标志
588 |   InterruptFlag::ClearConditionVariableOnDestruct guard;
589 |   interruption_point();
590 |   // 设置线程看到中断前的等待时间上限
591 |   cv.wait_for(l, std::chrono::milliseconds(1));
592 |   interruption_point();
593 | }
594 | 
595 | template <typename Predicate>
596 | void interruptible_wait(std::condition_variable& cv,
597 |                         std::unique_lock<std::mutex>& l, Predicate pred) {
598 |   interruption_point();
599 |   this_thread_interrupt_flag.set_condition_variable(cv);
600 |   InterruptFlag::ClearConditionVariableOnDestruct guard;
601 |   while (!this_thread_interrupt_flag.is_set() && !pred()) {
602 |     cv.wait_for(l, std::chrono::milliseconds(1));
603 |   }
604 |   interruption_point();
605 | }
606 | ```
607 | 
608 | * 和 [std::condition_variable](https://en.cppreference.com/w/cpp/thread/condition_variable) 不同的是，[std::condition_variable_any](https://en.cppreference.com/w/cpp/thread/condition_variable_any) 可以使用不限于 [std::unique_lock](https://en.cppreference.com/w/cpp/thread/unique_lock) 的任何类型的锁，这意味着可以使用自定义的锁类型
609 | 
610 | ```cpp
611 | #include <atomic>
612 | #include <condition_variable>
613 | #include <mutex>
614 | 
615 | class InterruptFlag {
616 |  public:
617 |   void set() {
618 |     b_.store(true, std::memory_order_relaxed);
619 |     std::lock_guard<std::mutex> l(m_);
620 |     if (cv_) {
621 |       cv_->notify_all();
622 |     } else if (cv_any_) {
623 |       cv_any_->notify_all();
624 |     }
625 |   }
626 | 
627 |   template <typename Lockable>
628 |   void wait(std::condition_variable_any& cv, Lockable& l) {
629 |     class Mutex {
630 |      public:
631 |       Mutex(InterruptFlag* self, std::condition_variable_any& cv, Lockable& l)
632 |           : self_(self), lock_(l) {
633 |         self_->m_.lock();
634 |         self_->cv_any_ = &cv;
635 |       }
636 | 
637 |       ~Mutex() {
638 |         self_->cv_any_ = nullptr;
639 |         self_->m_.unlock();
640 |       }
641 | 
642 |       void lock() { std::lock(self_->m_, lock_); }
643 | 
644 |       void unlock() {
645 |         lock_.unlock();
646 |         self_->m_.unlock();
647 |       }
648 | 
649 |      private:
650 |       InterruptFlag* self_;
651 |       Lockable& lock_;
652 |     };
653 | 
654 |     Mutex m(this, cv, l);
655 |     interruption_point();
656 |     cv.wait(m);
657 |     interruption_point();
658 |   }
659 |   // rest as before
660 | 
661 |  private:
662 |   std::atomic<bool> b_;
663 |   std::condition_variable* cv_ = nullptr;
664 |   std::condition_variable_any* cv_any_ = nullptr;
665 |   std::mutex m_;
666 | };
667 | 
668 | template <typename Lockable>
669 | void interruptible_wait(std::condition_variable_any& cv, Lockable& l) {
670 |   this_thread_interrupt_flag.wait(cv, l);
671 | }
672 | ```
673 | 
674 | * 对于其他阻塞调用（比如 mutex、future）的中断，一般也可以像对 [std::condition_variable](https://en.cppreference.com/w/cpp/thread/condition_variable) 一样设置超时时间，因为不访问内部 mutex 或 future 无法在未满足等待的条件时中断等待
675 | 
676 | ```cpp
677 | template <typename T>
678 | void interruptible_wait(std::future<T>& ft) {
679 |   while (!this_thread_interrupt_flag.is_set()) {
680 |     if (ft.wait_for(std::chrono::milliseconds(1)) ==
681 |         std::future_status::ready) {
682 |       break;
683 |     }
684 |   }
685 |   interruption_point();
686 | }
687 | ```
688 | 
689 | * 从被中断的线程角度来看，中断就是一个 `thread_interrupted` 异常。因此检查出中断后，可以像异常一样对其进行处理
690 | 
691 | ```cpp
692 | internal_thread = std::thread{[f, &p] {
693 |   p.set_value(&this_thread_interrupt_flag);
694 |   try {
695 |     f();
696 |   } catch (const thread_interrupted&) {
697 |     // 异常传入 std::thread 的析构函数时将调用 std::terminate
698 |     // 为了防止程序终止就要捕获异常
699 |   }
700 | }};
701 | ```
702 | 
703 | * 假如有一个桌面搜索程序，除了与用户交互，程序还需要监控文件系统的状态，以识别任何更改并更新其索引。为了避免影响 GUI 的响应性，这个处理通常会交给一个后台线程，后台线程需要运行于程序的整个生命周期。这样的程序通常只在机器关闭时退出，而在其他情况下关闭程序，就需要井然有序地关闭后台线程，一个关闭方式就是中断
704 | 
705 | ```cpp
706 | std::mutex config_mutex;
707 | std::vector<InterruptibleThread> background_threads;
708 | 
709 | void background_thread(int disk_id) {
710 |   while (true) {
711 |     interruption_point();
712 |     fs_change fsc = get_fs_changes(disk_id);
713 |     if (fsc.has_changes()) {
714 |       update_index(fsc);
715 |     }
716 |   }
717 | }
718 | 
719 | void start_background_processing() {
720 |   background_threads.emplace_back(background_thread, disk_1);
721 |   background_threads.emplace_back(background_thread, disk_2);
722 | }
723 | 
724 | int main() {
725 |   start_background_processing();
726 |   process_gui_until_exit();
727 |   std::unique_lock<std::mutex> l(config_mutex);
728 |   for (auto& x : background_threads) {
729 |     x.interrupt();
730 |   }
731 |   // 中断所有线程后再join
732 |   for (auto& x : background_threads) {
733 |     if (x.joinable()) {
734 |       x.join();
735 |     }
736 |   }
737 |   // 不直接在一个循环里中断并 join 的目的是为了并发，
738 |   // 因为中断不会立即完成，它们必须进入下一个中断点，
739 |   // 再在退出前必要地调用析构和异常处理的代码，
740 |   // 如果对每个线程都中断后立即 join，就会造成中断线程的等待，
741 |   // 即使它还可以做一些有用的工作，比如中断其他线程
742 | }
743 | ```
744 | 


--------------------------------------------------------------------------------