├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── aliasing └── main.cpp ├── aligned_pointer └── main.cpp ├── alloc ├── Makefile └── list.cpp ├── branch_mispred └── main.cpp ├── cache_latency.cpp ├── cache_latency_hugetbl.cpp ├── char_alias └── main.cpp ├── chunked_array └── main.cpp ├── coincide_connect ├── Makefile ├── add_delay.sh ├── connections.cpp ├── tcp_state_digrame_coincide.svg └── tcp_state_digrame_normal.svg ├── cold_hot ├── Makefile ├── main.cpp ├── main2.cpp └── main3.cpp ├── connection_memory ├── Makefile └── connections.cpp ├── copy_construct └── main.cpp ├── cpuid.cpp ├── encapsulate ├── main.cpp └── main2.cpp ├── float_error └── error.cpp ├── float_int ├── fix_freq.sh └── main.cpp ├── hash_compare ├── Makefile └── main.cpp ├── ht ├── div.cpp ├── memory_latency.cpp └── set_hyper_threading.sh ├── ispc ├── Makefile ├── main.cpp ├── sum_ispc.h └── sum_ispc.ispc ├── kary_tree └── main.cpp ├── lazy_pb ├── Makefile ├── gen.sh ├── main.cpp ├── user.pb.cc ├── user.pb.h └── user.proto ├── likely └── main.cpp ├── lockfree ├── LockFreeQueue.h └── main.cpp ├── matrix_mul └── main.cpp ├── member_variable └── main.cpp ├── memory_bandwidth.cpp ├── memory_issues ├── Makefile ├── memory_corrupt.cpp ├── memory_leak.cpp └── memory_protect.cpp ├── memory_latency.cpp ├── memory_model ├── compile_reorder.cpp ├── gcc │ ├── Makefile │ └── ordering.cpp ├── msvc │ ├── ordering.cpp │ ├── ordering.sln │ └── ordering.vcproj └── relaxed.cpp ├── moon ├── map_moon.jpg ├── moon.py └── moon.txt ├── nan └── nan.cpp ├── optimize ├── set_hyper_threading.sh └── vec.cpp ├── pagetable.cpp ├── partition └── main.cpp ├── perf_tool └── main.cpp ├── pgo └── main.cpp ├── pmu ├── Makefile ├── cpuid.cpp ├── cr4.c ├── enable_fixed.sh ├── iaca-version-v3.0-lin64.zip ├── ipc.cpp ├── msr.txt ├── rdpmc.cpp └── set_msr_ipc.sh ├── ranker └── main.cpp ├── rdpmc.cpp ├── register ├── haswell_block_diagram.svg ├── main.cpp └── main.s ├── simd ├── box_box_scalar.cpp ├── box_box_simd.cpp └── make.sh ├── start.sh ├── static_cast └── main.cpp ├── template_expression ├── Makefile ├── calc4.cpp ├── lazy_calc_example.cpp ├── template_expression1.cpp ├── template_expression2.cpp └── template_expression3.cpp ├── thread_local └── main.cpp ├── uninitialized_bool └── main.cpp ├── vec.cpp └── virtual_optimize └── main.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "simd_soa/soa"] 2 | path = simd_soa/soa 3 | url = https://github.com/gongyiling/soa.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 gongyiling 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cpp_lecture -------------------------------------------------------------------------------- /aliasing/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | __attribute__((noinline)) void foo(int32_t& i, uint32_t& __restrict ui) 5 | { 6 | i = ui + 1; 7 | i = ui + 1; 8 | } 9 | 10 | __attribute__((noinline)) void bar(int32_t& i, int64_t& i64) 11 | { 12 | i = i64 + 1; 13 | i = i64 + 1; 14 | } 15 | 16 | int main() 17 | { 18 | uint32_t ui = 1; 19 | int32_t i = 1; 20 | int64_t i64 = 1; 21 | 22 | foo(i, ui); 23 | bar(i, i64); 24 | 25 | ui = 1; 26 | i = 1; 27 | i64 = 1; 28 | int32_t* pui = reinterpret_cast(&ui); 29 | foo(*pui, ui); 30 | std::cout << ui << std::endl; 31 | 32 | ui = 1; 33 | i = 1; 34 | i64 = 1; 35 | int32_t* pi64 = reinterpret_cast(&i64); 36 | bar(*pi64, i64); 37 | std::cout << i64 << std::endl; 38 | 39 | return 0; 40 | } 41 | 42 | -------------------------------------------------------------------------------- /aligned_pointer/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | struct A 7 | { 8 | A():a(0), b(0), c(0), d(0){} 9 | int64_t a; 10 | int64_t b; 11 | int64_t c; 12 | int64_t d; 13 | }; 14 | 15 | int main() 16 | { 17 | char* buffer = (char*)__builtin_assume_aligned((char*)malloc(64), 32); 18 | A* a = new(buffer)A; 19 | asm volatile("": :"r,m"(a): "memory"); 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /alloc/Makefile: -------------------------------------------------------------------------------- 1 | all: list 2 | list: list.cpp 3 | g++ list.cpp -std=c++11 -O2 -o list -g 4 | clean: 5 | rm -f list 6 | -------------------------------------------------------------------------------- /alloc/list.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | struct node 9 | { 10 | int32_t next; 11 | int32_t x; 12 | node& operator=(const node&) = delete; 13 | }; 14 | 15 | static const int N = 1024 * 1024 * 10; 16 | static const int M = 1024 / 100; 17 | static node f[N]; 18 | static int32_t pf[N]; 19 | 20 | void swap_list_element(node& a, node& b, node* array, int32_t* prev) 21 | { 22 | const int32_t ai = &a - array; 23 | const int32_t bi = &b - array; 24 | const int32_t pai = prev[ai]; 25 | const int32_t pbi = prev[bi]; 26 | const int32_t nai = array[ai].next; 27 | const int32_t nbi = array[bi].next; 28 | 29 | // fix prev.next 30 | array[pai].next = bi; 31 | array[pbi].next = ai; 32 | 33 | // fix next.prev 34 | prev[nai] = bi; 35 | prev[nbi] = ai; 36 | 37 | // fix a and b. 38 | std::swap(a.next, b.next); 39 | std::swap(prev[ai], prev[bi]); 40 | } 41 | 42 | void swap(node& a, node& b) 43 | { 44 | swap_list_element(a, b, f, pf); 45 | } 46 | 47 | void check_list(node* array, int32_t* prev, int N) 48 | { 49 | int32_t p = 0; 50 | for (int i = 0; i < N; i++) 51 | { 52 | assert(prev[array[p].next] == p); 53 | assert(array[prev[p]].next == p); 54 | p = array[p].next; 55 | } 56 | std::cout << p << '\t' << std::endl; 57 | } 58 | 59 | int32_t sum(const node* f, int N) 60 | { 61 | int32_t s = 0; 62 | int32_t p = 0; 63 | for (int i = 0; i < N; i++) 64 | { 65 | s += f[p].x; 66 | p = f[p].next; 67 | } 68 | return s; 69 | } 70 | 71 | void bind_to_cpu(int index) 72 | { 73 | cpu_set_t set; 74 | CPU_ZERO(&set); 75 | CPU_SET(index, &set); 76 | sched_setaffinity(index, sizeof(set), &set); 77 | } 78 | 79 | void init_data() 80 | { 81 | for (int i = 0; i < N; i++) 82 | { 83 | f[i].x = i; 84 | f[i].next = i + 1; 85 | pf[i] = i - 1; 86 | } 87 | f[N - 1].next = 0; 88 | pf[0] = N - 1; 89 | } 90 | 91 | int main() 92 | { 93 | bind_to_cpu(0); 94 | 95 | for (int step = 256; step < N / 4; step += 256) 96 | { 97 | init_data(); 98 | for (int i = 0; i < N - step; i += step) 99 | { 100 | std::random_shuffle(f + i, f + i + step); 101 | } 102 | int32_t s = 0; 103 | auto start = std::chrono::high_resolution_clock::now(); 104 | for (int i = 0; i < M; i++) 105 | { 106 | s += sum(f, N); 107 | } 108 | auto end = std::chrono::high_resolution_clock::now(); 109 | std::cout << std::chrono::duration_cast(end - start).count() << '\t' << s << std::endl; 110 | } 111 | 112 | return 0; 113 | } 114 | 115 | -------------------------------------------------------------------------------- /branch_mispred/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | typedef std::array bits_t; 10 | 11 | __attribute__((noinline)) static int sum(const bits_t& bits) 12 | { 13 | int sum = 0; 14 | for (char b: bits) 15 | { 16 | if (b) 17 | { 18 | sum++; 19 | } 20 | asm volatile("": :"r,m"(sum): "memory"); 21 | } 22 | return sum; 23 | } 24 | 25 | static bits_t gen_bits(int NP, int P, bool random) 26 | { 27 | bits_t bits; 28 | std::vector patterns; 29 | patterns.resize(NP); 30 | for (size_t i = 0; i < NP; i++) 31 | { 32 | if (random) 33 | { 34 | patterns[i] = rand() & 1; 35 | } 36 | else 37 | { 38 | if (P & (1 << i)) 39 | { 40 | patterns[i] = true; 41 | } 42 | else 43 | { 44 | patterns[i] = false; 45 | } 46 | } 47 | //std::cout << (bool)patterns[i]; 48 | } 49 | //std::cout << '\n'; 50 | for (size_t i = 0; i <= bits.size() - patterns.size(); i += patterns.size()) 51 | { 52 | memcpy(&bits[i], &patterns[0], patterns.size()); 53 | } 54 | return bits; 55 | } 56 | 57 | void test(int NP) 58 | { 59 | int N = 0; 60 | bool random = false; 61 | if (NP > 4) 62 | { 63 | N = 1 << 4; 64 | random = true; 65 | } 66 | else 67 | { 68 | N = 1 << NP; 69 | random = false; 70 | } 71 | int sum_milliseconds = 0; 72 | for (int P = 0; P < N; P++) 73 | { 74 | bits_t bits = gen_bits(NP, P, random); 75 | const int M = 10000; 76 | int s = 0; 77 | auto start = std::chrono::high_resolution_clock::now(); 78 | for (int i = 0; i < M; i++) 79 | { 80 | s += sum(bits); 81 | } 82 | auto end = std::chrono::high_resolution_clock::now(); 83 | auto elapsed = std::chrono::duration_cast(end - start).count(); 84 | //std::cout << "elapsed: " << elapsed << " milliseconds" << std::endl; 85 | sum_milliseconds += elapsed; 86 | } 87 | std::cout << NP <<" sum: " << sum_milliseconds / N << " milliseconds" << std::endl; 88 | } 89 | 90 | int main() 91 | { 92 | srand(time(NULL)); 93 | for (int i = 1; i < 32; i++) 94 | { 95 | const int ni = i; 96 | test(ni); 97 | } 98 | return 0; 99 | } 100 | -------------------------------------------------------------------------------- /cache_latency.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define CACHELINE_SIZE 64 8 | struct Node 9 | { 10 | Node* next; 11 | char paddings[CACHELINE_SIZE - sizeof(Node*)]; 12 | }; 13 | 14 | #define N1(node) node=node->next; 15 | #define N2(node) N1(node);N1(node); 16 | #define N4(node) N2(node);N2(node); 17 | #define N8(node) N4(node);N4(node); 18 | #define N16(node) N8(node);N8(node); 19 | #define N32(node) N16(node);N16(node); 20 | #define N64(node) N32(node);N32(node); 21 | #define N128(node) N64(node);N64(node); 22 | #define N256(node) N128(node);N128(node); 23 | #define N512(node) N256(node);N256(node); 24 | #define N1024(node) N512(node);N512(node); 25 | 26 | const Node* test(int T, const Node* c, int size) 27 | { 28 | auto start = std::chrono::high_resolution_clock::now(); 29 | const Node* node = NULL; 30 | const int M = T / size; 31 | for (int i = 0; i < M; i++) 32 | { 33 | node = c; 34 | const int s = size / 64; 35 | for (int j = 0; j < s; j++) 36 | { 37 | N64(node); 38 | } 39 | } 40 | auto end = std::chrono::high_resolution_clock::now(); 41 | double elapsed = std::chrono::duration_cast(end - start).count() ; 42 | std::cout << "elapsed nanoseconds per elements: " << elapsed / T << std::endl; 43 | std::cerr << elapsed / T << std::endl; 44 | return node; 45 | } 46 | 47 | int main(int argc, char* argv[]) 48 | { 49 | cpu_set_t set; 50 | CPU_ZERO(&set); 51 | CPU_SET(0, &set); 52 | sched_setaffinity(0, sizeof(set), &set); 53 | const int N = 1024 * 1024 * 16; 54 | std::vector va; 55 | va.resize(N); 56 | for (int i = 0; i < N - 1; i++) 57 | { 58 | va[i].next = &(va[i + 1]); 59 | } 60 | va[N - 1].next = &va[0]; 61 | const int T = 1000000; 62 | const int begin_kb = atoi(argv[1]); 63 | const int end_kb = atoi(argv[2]); 64 | std::cout << "begin_kb: " << begin_kb << '\t' << "end_kb: " << end_kb << std::endl; 65 | const Node* node = NULL; 66 | for (int i = begin_kb; i < end_kb; i += 4) 67 | { 68 | std::cout << "test kb: " << i << std::endl; 69 | std::cerr << i << '\t'; 70 | node = test(T, &va[0], i * 1024 / sizeof(Node)); 71 | } 72 | std::cout << node << '\t' << &va[0] << std::endl; 73 | return 0; 74 | } 75 | 76 | -------------------------------------------------------------------------------- /cache_latency_hugetbl.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define CACHELINE_SIZE 64 10 | struct Node 11 | { 12 | Node* next; 13 | char paddings[CACHELINE_SIZE - sizeof(Node*)]; 14 | }; 15 | 16 | #define N1(node) node=node->next; 17 | #define N2(node) N1(node);N1(node); 18 | #define N4(node) N2(node);N2(node); 19 | #define N8(node) N4(node);N4(node); 20 | #define N16(node) N8(node);N8(node); 21 | #define N32(node) N16(node);N16(node); 22 | #define N64(node) N32(node);N32(node); 23 | #define N128(node) N64(node);N64(node); 24 | #define N256(node) N128(node);N128(node); 25 | #define N512(node) N256(node);N256(node); 26 | #define N1024(node) N512(node);N512(node); 27 | 28 | const Node* test(int T, const Node* c, int size) 29 | { 30 | auto start = std::chrono::high_resolution_clock::now(); 31 | const Node* node = NULL; 32 | const int M = T / size; 33 | for (int i = 0; i < M; i++) 34 | { 35 | node = c; 36 | const int s = size / 64; 37 | for (int j = 0; j < s; j++) 38 | { 39 | N64(node); 40 | } 41 | } 42 | auto end = std::chrono::high_resolution_clock::now(); 43 | double elapsed = std::chrono::duration_cast(end - start).count() ; 44 | std::cout << "elapsed nanoseconds per elements: " << elapsed / T << std::endl; 45 | std::cerr << elapsed / T << std::endl; 46 | return node; 47 | } 48 | 49 | int main(int argc, char* argv[]) 50 | { 51 | cpu_set_t set; 52 | CPU_ZERO(&set); 53 | CPU_SET(0, &set); 54 | sched_setaffinity(0, sizeof(set), &set); 55 | const int N = 1024 * 1024 * 16 / sizeof(Node); 56 | Node* va = (Node*)mmap(NULL, N * sizeof(Node), PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_HUGETLB | MAP_PRIVATE, -1, 0); 57 | if (va == MAP_FAILED) 58 | { 59 | std::cerr << strerror(errno) << std::endl; 60 | return -1; 61 | } 62 | for (int i = 0; i < N - 1; i++) 63 | { 64 | va[i].next = &(va[i + 1]); 65 | } 66 | va[N - 1].next = &va[0]; 67 | const int T = 1000000; 68 | const int begin_kb = atoi(argv[1]); 69 | const int end_kb = atoi(argv[2]); 70 | std::cout << "begin_kb: " << begin_kb << '\t' << "end_kb: " << end_kb << std::endl; 71 | const Node* node = NULL; 72 | for (int i = begin_kb; i < end_kb; i += 4) 73 | { 74 | std::cout << "test kb: " << i << std::endl; 75 | std::cerr << i << '\t'; 76 | node = test(T, &va[0], i * 1024 / sizeof(Node)); 77 | } 78 | std::cout << node << '\t' << &va[0] << std::endl; 79 | munmap(va, N * sizeof(Node)); 80 | return 0; 81 | } 82 | 83 | -------------------------------------------------------------------------------- /char_alias/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | struct request 5 | { 6 | uint32_t a; 7 | uint32_t b; 8 | uint32_t c; 9 | uint32_t d; 10 | }; 11 | 12 | //typedef const char* char_cp_t; 13 | typedef const char* __restrict char_cp_t; 14 | 15 | static void parse(char_cp_t data, request& req) 16 | { 17 | req.a = *(const uint32_t*)data; data += sizeof(uint32_t); 18 | req.b = *(const uint32_t*)data; data += sizeof(uint32_t); 19 | req.c = *(const uint32_t*)data; data += sizeof(uint32_t); 20 | req.d = *(const uint32_t*)data; data += sizeof(uint32_t); 21 | } 22 | 23 | static __attribute__((noinline)) void test(const char* data, request& req) 24 | { 25 | const int N = 1000000000; 26 | for (int i = 0; i < N; i++) 27 | { 28 | parse(data, req); 29 | asm volatile("": :"r,m"(req): "memory"); 30 | } 31 | } 32 | 33 | int main() 34 | { 35 | char data[128]; 36 | for (int i = 0; i < 128; i++) 37 | { 38 | data[i] = rand(); 39 | } 40 | request req; 41 | test(data, req); 42 | return 0; 43 | } 44 | 45 | -------------------------------------------------------------------------------- /chunked_array/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | template 8 | class chunked_array 9 | { 10 | public: 11 | typedef uint32_t size_type; 12 | struct chunk 13 | { 14 | T* data = NULL; 15 | }; 16 | std::vector m_chunks; 17 | size_type m_num; 18 | void resize(size_type num) 19 | { 20 | assert(m_chunks.empty()); 21 | const size_type chunk_num = (num + N - 1) / N; 22 | m_chunks.resize(chunk_num); 23 | for (size_type i = 0; i < chunk_num; i++) 24 | { 25 | m_chunks[i].data = new T[N]; 26 | } 27 | m_num = num; 28 | } 29 | T& operator[](size_type index) 30 | { 31 | // https://homepage.divms.uiowa.edu/%7Ejones/bcd/divide.html 32 | const size_type chunk_index = index / N; 33 | const size_type array_index = index % N; 34 | return m_chunks[chunk_index].data[array_index]; 35 | } 36 | size_type size() const 37 | { 38 | return m_num; 39 | } 40 | }; 41 | 42 | int main() 43 | { 44 | chunked_array ca; 45 | const int N = 1024 * 1024; 46 | ca.resize(N); 47 | 48 | for (int i = 0; i < N; i++) 49 | { 50 | ca[i] = rand(); 51 | } 52 | const int M = 1024; 53 | int s = 0; 54 | auto start = std::chrono::high_resolution_clock::now(); 55 | for (int i = 0; i < M; i++) 56 | { 57 | for (int j = 0; j < N; j++) 58 | { 59 | s += ca[j]; 60 | asm volatile("": :"r,m"(s): "memory"); 61 | } 62 | } 63 | auto end = std::chrono::high_resolution_clock::now(); 64 | auto elapsed = std::chrono::duration_cast(end - start).count(); 65 | std::cout << s << '\t' << elapsed << std::endl; 66 | return 0; 67 | } 68 | 69 | 70 | -------------------------------------------------------------------------------- /coincide_connect/Makefile: -------------------------------------------------------------------------------- 1 | all: connections 2 | 3 | connections: connections.cpp 4 | g++-10 -fcoroutines connections.cpp -o connections -O2 -std=c++20 -I/home/fractal/boost_1_77_0/ -lpthread 5 | -------------------------------------------------------------------------------- /coincide_connect/add_delay.sh: -------------------------------------------------------------------------------- 1 | tc qdisc add dev lo root netem delay 100ms 2 | -------------------------------------------------------------------------------- /coincide_connect/connections.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using boost::asio::ip::tcp; 12 | using boost::asio::awaitable; 13 | using boost::asio::co_spawn; 14 | using boost::asio::detached; 15 | using boost::asio::use_awaitable; 16 | 17 | template 18 | awaitable wait_until(boost::asio::io_context& io_context, pred_t pred) 19 | { 20 | while (!pred()) 21 | { 22 | boost::asio::deadline_timer timer(io_context); 23 | timer.expires_from_now(boost::posix_time::milliseconds(100)); 24 | co_await timer.async_wait(boost::asio::use_awaitable); 25 | } 26 | } 27 | 28 | awaitable connect(boost::asio::io_context& io_context, tcp::endpoint local_ep, tcp::endpoint remote_ep, bool receive, int& step) 29 | { 30 | tcp::socket s(io_context); 31 | s.open(tcp::v4()); 32 | s.bind(local_ep); 33 | 34 | co_await wait_until(io_context, [&](){return step == 1;}); 35 | 36 | std::cerr << "connecting" << std::endl; 37 | co_await s.async_connect(remote_ep, boost::asio::use_awaitable); 38 | 39 | co_await wait_until(io_context, [&](){return step == 2;}); 40 | if (receive) 41 | { 42 | std::cerr << "receiving" << std::endl; 43 | char buf[1024]; 44 | const size_t read_bytes = co_await s.async_read_some( 45 | boost::asio::mutable_buffer(buf, sizeof(buf)), 46 | boost::asio::use_awaitable); 47 | buf[read_bytes] = '\0'; 48 | std::cerr << buf << std::endl; 49 | } 50 | else 51 | { 52 | std::cerr << "sending" << std::endl; 53 | std::string req = "hello, world."; 54 | const size_t written_bytes = co_await boost::asio::async_write(s, 55 | boost::asio::const_buffer(req.data(), req.length()), 56 | boost::asio::use_awaitable); 57 | } 58 | 59 | co_await wait_until(io_context, [&](){return step == 3;}); 60 | } 61 | 62 | int main(int argc, char* argv[]) 63 | { 64 | try 65 | { 66 | boost::asio::io_context io_context; 67 | 68 | boost::asio::signal_set signals(io_context, SIGINT, SIGTERM); 69 | 70 | int step = 0; 71 | std::function signal_handle = [&](const boost::system::error_code&, int) 72 | { 73 | if (step < 3) 74 | { 75 | step++; 76 | signals.async_wait(signal_handle); 77 | } 78 | else 79 | { 80 | io_context.stop(); 81 | } 82 | }; 83 | 84 | signals.async_wait(signal_handle); 85 | 86 | tcp::resolver resolver(io_context); 87 | tcp::resolver::results_type endpoints1 = resolver.resolve("192.168.3.38", "8000"); 88 | tcp::resolver::results_type endpoints2 = resolver.resolve("192.168.3.38", "8080"); 89 | tcp::endpoint ep1 = *endpoints1.begin(); 90 | tcp::endpoint ep2 = *endpoints2.begin(); 91 | co_spawn(io_context, connect(io_context, ep1, ep2, false, step), detached); 92 | co_spawn(io_context, connect(io_context, ep2, ep1, true, step), detached); 93 | 94 | io_context.run(); 95 | } 96 | catch(std::exception& e) 97 | { 98 | std::cerr << "Exception: " << e.what() << "\n"; 99 | } 100 | return 0; 101 | } 102 | 103 | -------------------------------------------------------------------------------- /cold_hot/Makefile: -------------------------------------------------------------------------------- 1 | all: main main2 main3 2 | 3 | main: main.cpp 4 | g++ main.cpp -O2 -o main 5 | 6 | main2: main2.cpp 7 | g++ main2.cpp -O2 -o main2 8 | 9 | main3: main3.cpp 10 | g++ main3.cpp -O2 -o main3 11 | 12 | -------------------------------------------------------------------------------- /cold_hot/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | struct data 9 | { 10 | int32_t d; 11 | char padding[64 - sizeof(int32_t)]; 12 | int32_t d1; 13 | char padding2[64 - sizeof(int32_t)]; 14 | }; 15 | 16 | void bind_to_cpu(int index) 17 | { 18 | cpu_set_t set; 19 | CPU_ZERO(&set); 20 | CPU_SET(index, &set); 21 | sched_setaffinity(index, sizeof(set), &set); 22 | } 23 | 24 | static std::array datas; 25 | 26 | int main() 27 | { 28 | bind_to_cpu(0); 29 | for (int i = 0; i < datas.size(); i++) 30 | { 31 | datas[i] = new data(); 32 | datas[i]->d = rand(); 33 | datas[i]->d1 = rand(); 34 | } 35 | 36 | const int N = 100; 37 | int sum = 0; 38 | auto start = std::chrono::high_resolution_clock::now(); 39 | for (int i = 0; i < N; i++) 40 | { 41 | for (int j = 0; j < datas.size(); j++) 42 | { 43 | const data& d = *datas[j]; 44 | sum += d.d; 45 | if (j % 128 == 0) 46 | { 47 | sum += d.d1; 48 | asm volatile("": :"r,m"(sum): "memory"); 49 | } 50 | } 51 | } 52 | auto end = std::chrono::high_resolution_clock::now(); 53 | auto elapsed = std::chrono::duration_cast(end - start).count(); 54 | 55 | std::cout << "elabsed: " << elapsed << " milliseconds, sum: " << sum << std::endl; 56 | return 0; 57 | } 58 | 59 | -------------------------------------------------------------------------------- /cold_hot/main2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | struct data 9 | { 10 | int32_t d; 11 | char padding[64 - sizeof(int32_t)]; 12 | int32_t d1; 13 | char padding2[64 - sizeof(int32_t)]; 14 | }; 15 | 16 | void bind_to_cpu(int index) 17 | { 18 | cpu_set_t set; 19 | CPU_ZERO(&set); 20 | CPU_SET(index, &set); 21 | sched_setaffinity(index, sizeof(set), &set); 22 | } 23 | 24 | static std::array datas; 25 | static std::array data_pool; 26 | 27 | int main() 28 | { 29 | bind_to_cpu(0); 30 | for (int i = 0; i < datas.size(); i++) 31 | { 32 | datas[i] = &data_pool[i]; 33 | datas[i]->d = rand(); 34 | datas[i]->d1 = rand(); 35 | } 36 | 37 | const int N = 100; 38 | int sum = 0; 39 | auto start = std::chrono::high_resolution_clock::now(); 40 | for (int i = 0; i < N; i++) 41 | { 42 | for (int j = 0; j < datas.size(); j++) 43 | { 44 | const data& d = *datas[j]; 45 | sum += d.d; 46 | if (j % 128 == 0) 47 | { 48 | sum += d.d1; 49 | asm volatile("": :"r,m"(sum): "memory"); 50 | } 51 | } 52 | } 53 | auto end = std::chrono::high_resolution_clock::now(); 54 | auto elapsed = std::chrono::duration_cast(end - start).count(); 55 | 56 | std::cout << "elabsed: " << elapsed << " milliseconds, sum: " << sum << std::endl; 57 | return 0; 58 | } 59 | 60 | -------------------------------------------------------------------------------- /cold_hot/main3.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | struct data_cold 9 | { 10 | char padding[64 - sizeof(int32_t)]; 11 | int32_t d1; 12 | char padding2[64 - sizeof(int32_t)]; 13 | }; 14 | 15 | struct data 16 | { 17 | int32_t d; 18 | data_cold* cold; 19 | }; 20 | 21 | void bind_to_cpu(int index) 22 | { 23 | cpu_set_t set; 24 | CPU_ZERO(&set); 25 | CPU_SET(index, &set); 26 | sched_setaffinity(index, sizeof(set), &set); 27 | } 28 | 29 | static std::array datas; 30 | static std::array data_pool; 31 | static std::array data_cold_pool; 32 | 33 | int main() 34 | { 35 | bind_to_cpu(0); 36 | for (int i = 0; i < datas.size(); i++) 37 | { 38 | datas[i] = &data_pool[i]; 39 | datas[i]->cold = &data_cold_pool[i]; 40 | datas[i]->d = rand(); 41 | datas[i]->cold->d1 = rand(); 42 | } 43 | 44 | const int N = 100; 45 | int sum = 0; 46 | auto start = std::chrono::high_resolution_clock::now(); 47 | for (int i = 0; i < N; i++) 48 | { 49 | for (int j = 0; j < datas.size(); j++) 50 | { 51 | const data& d = *datas[j]; 52 | sum += d.d; 53 | if (j % 128 == 0) 54 | { 55 | sum += d.cold->d1; 56 | asm volatile("": :"r,m"(sum): "memory"); 57 | } 58 | } 59 | } 60 | auto end = std::chrono::high_resolution_clock::now(); 61 | auto elapsed = std::chrono::duration_cast(end - start).count(); 62 | 63 | std::cout << "elabsed: " << elapsed << " milliseconds, sum: " << sum << std::endl; 64 | return 0; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /connection_memory/Makefile: -------------------------------------------------------------------------------- 1 | all: connections 2 | 3 | connections: connections.cpp 4 | g++-10 -fcoroutines connections.cpp -o connections -O2 -std=c++20 -I/home/fractal/boost_1_77_0/ -lpthread 5 | -------------------------------------------------------------------------------- /connection_memory/connections.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using boost::asio::ip::tcp; 12 | using boost::asio::awaitable; 13 | using boost::asio::co_spawn; 14 | using boost::asio::detached; 15 | using boost::asio::use_awaitable; 16 | 17 | template 18 | awaitable wait_until(boost::asio::io_context& io_context, pred_t pred) 19 | { 20 | while (!pred()) 21 | { 22 | boost::asio::deadline_timer timer(io_context); 23 | timer.expires_from_now(boost::posix_time::milliseconds(100)); 24 | co_await timer.async_wait(boost::asio::use_awaitable); 25 | } 26 | } 27 | 28 | awaitable connect(boost::asio::io_context& io_context, tcp::endpoint ep, int& step, const std::string& fn) 29 | { 30 | tcp::socket s(io_context); 31 | s.open(tcp::v4()); 32 | 33 | co_await wait_until(io_context, [&](){return step == 1;}); 34 | std::cerr << "set receiving buffer" << std::endl; 35 | 36 | boost::asio::socket_base::receive_buffer_size option(8 * 1024); 37 | s.set_option(option); 38 | 39 | co_await wait_until(io_context, [&](){return step == 2;}); 40 | std::cerr << "connecting" << std::endl; 41 | co_await s.async_connect(ep, boost::asio::use_awaitable); 42 | 43 | co_await wait_until(io_context, [&](){return step == 3;}); 44 | std::cerr << "writing" << std::endl; 45 | std::string req = "GET /" + fn + " HTTP/1.1\r\n\r\n"; 46 | const size_t written_bytes = co_await boost::asio::async_write(s, 47 | boost::asio::const_buffer(req.data(), req.length()), 48 | boost::asio::use_awaitable); 49 | 50 | co_await wait_until(io_context, [&](){return step == 4;}); 51 | boost::asio::streambuf buf; 52 | const size_t read_bytes = co_await boost::asio::async_read_until(s, 53 | buf, 54 | "\r\n\r\n", 55 | boost::asio::use_awaitable); 56 | std::cerr << "read_bytes: " << read_bytes << std::endl; 57 | 58 | co_await wait_until(io_context, [&](){return step == 5;}); 59 | } 60 | 61 | int main(int argc, char* argv[]) 62 | { 63 | try 64 | { 65 | boost::asio::io_context io_context; 66 | 67 | boost::asio::signal_set signals(io_context, SIGINT, SIGTERM); 68 | 69 | int step = 0; 70 | std::function signal_handle = [&](const boost::system::error_code&, int) 71 | { 72 | if (step < 5) 73 | { 74 | step++; 75 | signals.async_wait(signal_handle); 76 | } 77 | else 78 | { 79 | io_context.stop(); 80 | } 81 | }; 82 | 83 | signals.async_wait(signal_handle); 84 | 85 | tcp::resolver resolver(io_context); 86 | tcp::resolver::results_type endpoints = resolver.resolve("192.168.3.38", "8000"); 87 | const int N = 10; 88 | for (int i = 0; i < N; i++) 89 | { 90 | co_spawn(io_context, connect(io_context, *endpoints.begin(), step, argv[1]), detached); 91 | } 92 | 93 | io_context.run(); 94 | } 95 | catch(std::exception& e) 96 | { 97 | std::cerr << "Exception: " << e.what() << "\n"; 98 | } 99 | return 0; 100 | } 101 | 102 | -------------------------------------------------------------------------------- /copy_construct/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct FSMStatBase 4 | { 5 | FSMStatBase() 6 | { 7 | std::cout << "base constructor called" << std::endl; 8 | } 9 | FSMStatBase(const FSMStatBase& other) 10 | { 11 | std::cout << "base copy constructor called" << std::endl; 12 | } 13 | }; 14 | 15 | struct FSMStat : public FSMStatBase 16 | { 17 | FSMStat() 18 | { 19 | std::cout << "derived constructor called" << std::endl; 20 | } 21 | FSMStat(const FSMStat& other) 22 | { 23 | std::cout << "derived copy constructor called" << std::endl; 24 | } 25 | }; 26 | 27 | void foo(const FSMStatBase& stat) 28 | { 29 | } 30 | 31 | int main() 32 | { 33 | FSMStat* stat = new FSMStat(); 34 | foo(stat != nullptr ? *stat: FSMStatBase()); 35 | return 0; 36 | } 37 | 38 | -------------------------------------------------------------------------------- /cpuid.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | unsigned int level = 0x0a; 7 | unsigned int eax, ebx, ecx, edx; 8 | if (__get_cpuid(level, &eax, &ebx, &ecx, &edx)) 9 | { 10 | printf("eax=%08x\tebx=%08x\tecx=%08x\tedx=%08x\n", eax, ebx, ecx, edx); 11 | } 12 | else 13 | { 14 | printf("__get_cpuid failed\n"); 15 | } 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /encapsulate/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | constexpr int N = 128; 8 | 9 | class hash_table 10 | { 11 | public: 12 | hash_table() 13 | { 14 | for (int i = 0; i < N; i++) 15 | { 16 | m_data[i] = rand() % N; 17 | } 18 | } 19 | 20 | int find_index(int key) const 21 | { 22 | if (key >= 0 && key < N) 23 | { 24 | return m_data[key]; 25 | } 26 | else 27 | { 28 | return -1; 29 | } 30 | } 31 | 32 | __attribute__((noinline)) const int* find(int key) const 33 | { 34 | int index = find_index(key); 35 | if (index == -1) 36 | { 37 | return nullptr; 38 | } 39 | else 40 | { 41 | return &m_data[index]; 42 | } 43 | } 44 | 45 | private: 46 | 47 | std::array m_data; 48 | }; 49 | 50 | int main() 51 | { 52 | hash_table h; 53 | std::array keys; 54 | for (int i = 0; i < keys.size(); i++) 55 | { 56 | keys[i] = rand() % (2 * N); 57 | } 58 | auto start = std::chrono::high_resolution_clock::now(); 59 | for (int i = 0; i < 10000000; i++) 60 | { 61 | for (int j = 0; j < keys.size(); j++) 62 | { 63 | const int* p = h.find(keys[j]); 64 | asm volatile("": :"r,m"(p): "memory"); 65 | } 66 | } 67 | auto end = std::chrono::high_resolution_clock::now(); 68 | auto elapsed = std::chrono::duration_cast(end - start).count(); 69 | printf("elapsed %ld milliseconds\n", elapsed); 70 | return 0; 71 | } 72 | 73 | -------------------------------------------------------------------------------- /encapsulate/main2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | constexpr int N = 128; 8 | class hash_table 9 | { 10 | public: 11 | hash_table() 12 | { 13 | for (int i = 0; i < N; i++) 14 | { 15 | m_data[i] = rand() % N; 16 | } 17 | } 18 | 19 | template 20 | decltype(auto) find_index(int key, succ_op_t succ_op, failed_op_t failed_op) const 21 | { 22 | if (key >= 0 && key < N) 23 | { 24 | return succ_op(m_data[key]); 25 | } 26 | else 27 | { 28 | return failed_op(); 29 | } 30 | } 31 | 32 | __attribute__((noinline)) const int* find(int key) const 33 | { 34 | return find_index(key, 35 | [this](int index){return &m_data[index];}, 36 | [](){return (const int*)nullptr;} 37 | ); 38 | } 39 | 40 | private: 41 | 42 | std::array m_data; 43 | }; 44 | 45 | int main() 46 | { 47 | hash_table h; 48 | std::array keys; 49 | for (int i = 0; i < keys.size(); i++) 50 | { 51 | keys[i] = rand() % (2 * N); 52 | } 53 | auto start = std::chrono::high_resolution_clock::now(); 54 | for (int i = 0; i < 10000000; i++) 55 | { 56 | for (int j = 0; j < keys.size(); j++) 57 | { 58 | const int* p = h.find(keys[j]); 59 | asm volatile("": :"r,m"(p): "memory"); 60 | } 61 | } 62 | auto end = std::chrono::high_resolution_clock::now(); 63 | auto elapsed = std::chrono::duration_cast(end - start).count(); 64 | printf("elapsed %ld milliseconds\n", elapsed); 65 | return 0; 66 | } 67 | 68 | -------------------------------------------------------------------------------- /float_error/error.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | float det(float a, float b, float c ,float d) 5 | { 6 | return a * d - b * c; 7 | } 8 | 9 | int main() 10 | { 11 | float M[2][2] = {{1.0, 1.0}, {1.0, 1.0}}; 12 | float c[2] = {1.0, 1.0}; 13 | uint32_t& u10 = *(uint32_t*)&M[1][0]; 14 | for (int i = 0; i < 100; i++) 15 | { 16 | float d = det(M[0][0], M[0][1], M[1][0], M[1][1]); 17 | float x = det(c[0], M[0][1], c[1], M[1][1]) / d; 18 | float y = det(M[0][0], c[0], M[1][0], c[1]) / d; 19 | printf("det=%e\tx=%e\ty=%e\n", d, x, y); 20 | 21 | u10--; 22 | M[1][1] = 2 - M[1][0]; 23 | } 24 | return 0; 25 | } 26 | 27 | -------------------------------------------------------------------------------- /float_int/fix_freq.sh: -------------------------------------------------------------------------------- 1 | cpupower -c 0 frequency-set -g performance 2 | -------------------------------------------------------------------------------- /float_int/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | void bind_to_cpu(int index) 7 | { 8 | cpu_set_t set; 9 | CPU_ZERO(&set); 10 | CPU_SET(index, &set); 11 | sched_setaffinity(index, sizeof(set), &set); 12 | } 13 | 14 | template 15 | struct unroll 16 | { 17 | static void do_unroll(data_t* totals, const data_t* a) 18 | { 19 | totals[j] += a[j]; 20 | unroll::do_unroll(totals, a); 21 | } 22 | }; 23 | 24 | template 25 | struct unroll 26 | { 27 | static void do_unroll(data_t* totals, const data_t* a) 28 | { 29 | } 30 | }; 31 | 32 | template 33 | __attribute__((noinline)) data_t sum(const data_t* a, int count) 34 | { 35 | data_t totals[N] ={0}; 36 | for (int i = 0; i < count; i += N) 37 | { 38 | unroll::do_unroll(totals, a + i); 39 | } 40 | 41 | data_t total = 0; 42 | for (int j = 0; j < N; j++) 43 | { 44 | total += totals[j]; 45 | } 46 | return total; 47 | } 48 | 49 | template 50 | void test(const std::vector& nums) 51 | { 52 | auto start = std::chrono::high_resolution_clock::now(); 53 | data_t s = 0; 54 | for (int i = 0; i < 1024 * 256; i++) 55 | { 56 | s += sum(nums.data(), nums.size()); 57 | asm volatile("": :"r,m"(s): "memory"); 58 | 59 | } 60 | auto end = std::chrono::high_resolution_clock::now(); 61 | auto elapsed = std::chrono::duration_cast(end - start).count(); 62 | std::cout << s << '\t' << N << "\telapsed : " << elapsed << std::endl; 63 | } 64 | 65 | int main() 66 | { 67 | bind_to_cpu(0); 68 | const int N = 1024 * 4; 69 | std::vector numsf(N); 70 | std::vector numsi(N); 71 | for (int i = 0; i < N;i++) 72 | { 73 | numsi[i] = numsf[i] = rand(); 74 | } 75 | test(numsi); 76 | test(numsf); 77 | std::cout << std::endl; 78 | 79 | test(numsi); 80 | test(numsf); 81 | std::cout << std::endl; 82 | 83 | test(numsi); 84 | test(numsf); 85 | std::cout << std::endl; 86 | 87 | test(numsi); 88 | test(numsf); 89 | std::cout << std::endl; 90 | 91 | test(numsi); 92 | test(numsf); 93 | std::cout << std::endl; 94 | 95 | return 0; 96 | } 97 | 98 | -------------------------------------------------------------------------------- /hash_compare/Makefile: -------------------------------------------------------------------------------- 1 | main: main.cpp 2 | g++ main.cpp -O2 -std=c++14 -o main -I../../FHashTable/FHashTable -I../../abseil-cpp -L../../abseil-cpp/build/absl/container -labsl_raw_hash_set -L../../abseil-cpp/build/absl/hash -labsl_hash -labsl_low_level_hash -labsl_city 3 | -------------------------------------------------------------------------------- /hash_compare/main.cpp: -------------------------------------------------------------------------------- 1 | #include "fhash_table.h" 2 | #include "absl/container/flat_hash_map.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | template 13 | std::vector gen_random_data(int32_t N) 14 | { 15 | std::vector data; 16 | data.reserve(N); 17 | std::unordered_set numbers; 18 | for (int32_t i = 0; i < N; i++) 19 | { 20 | int64_t r = ((int64_t)rand() << 32) | rand(); 21 | if (remove_duplicated) 22 | { 23 | if (numbers.insert(r).second) 24 | { 25 | data.push_back(r); 26 | } 27 | } 28 | else 29 | { 30 | data.push_back(r); 31 | } 32 | } 33 | return data; 34 | } 35 | 36 | static void test_find_success() 37 | { 38 | for (int32_t i = 1; i < 15; i++) 39 | { 40 | const int32_t N = std::pow(3, i); 41 | std::cout << "N = " << N << std::endl; 42 | std::vector data = gen_random_data(N); 43 | { 44 | std::unordered_map m; 45 | for (int64_t i : data) 46 | { 47 | m.emplace(i, i); 48 | } 49 | std::vector shuffled_data = data; 50 | std::random_shuffle(shuffled_data.begin(), shuffled_data.end()); 51 | int64_t sum = 0; 52 | auto start = std::chrono::high_resolution_clock::now(); 53 | for (int32_t i = 0; i < 100000000 / N; i++) 54 | { 55 | for (int64_t i : shuffled_data) 56 | { 57 | sum += m.find(i)->second; 58 | } 59 | } 60 | auto end = std::chrono::high_resolution_clock::now(); 61 | auto elapsed = std::chrono::duration_cast(end - start).count(); 62 | std::cout << "std::unordered_map, elapsed milliseconds: " << elapsed << " sum: " << sum << std::endl; 63 | } 64 | { 65 | absl::flat_hash_map m; 66 | for (int64_t i : data) 67 | { 68 | m.emplace(i, i); 69 | } 70 | std::vector shuffled_data = data; 71 | std::random_shuffle(shuffled_data.begin(), shuffled_data.end()); 72 | int64_t sum = 0; 73 | auto start = std::chrono::high_resolution_clock::now(); 74 | for (int32_t i = 0; i < 100000000 / N; i++) 75 | { 76 | for (int64_t i : shuffled_data) 77 | { 78 | sum += m.find(i)->second; 79 | } 80 | } 81 | auto end = std::chrono::high_resolution_clock::now(); 82 | auto elapsed = std::chrono::duration_cast(end - start).count(); 83 | std::cout << "absl::flat_hash_map, elapsed milliseconds: " << elapsed << " sum: " << sum << std::endl; 84 | } 85 | { 86 | fhash_table m; 87 | for (int64_t i : data) 88 | { 89 | m.insert(i, i); 90 | } 91 | std::vector shuffled_data = data; 92 | std::random_shuffle(shuffled_data.begin(), shuffled_data.end()); 93 | auto start = std::chrono::high_resolution_clock::now(); 94 | int64_t sum = 0; 95 | for (int32_t i = 0; i < 100000000 / N; i++) 96 | { 97 | for (int64_t i : shuffled_data) 98 | { 99 | sum += *m.find(i); 100 | } 101 | } 102 | auto end = std::chrono::high_resolution_clock::now(); 103 | auto elapsed = std::chrono::duration_cast(end - start).count(); 104 | std::cout << "fhash_table, elapsed milliseconds: " << elapsed << " sum: " << sum << " load_factor: " << m.load_factor() << std::endl; 105 | } 106 | } 107 | } 108 | 109 | static void perf_test() 110 | { 111 | test_find_success(); 112 | } 113 | 114 | int main() 115 | { 116 | //functional_test(); 117 | perf_test(); 118 | return 0; 119 | } 120 | -------------------------------------------------------------------------------- /ht/div.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int div(const std::vector& v, const int d, int M) 8 | { 9 | int s = 0; 10 | for (int i = 0; i < M; i ++) 11 | { 12 | for (auto&& k: v) 13 | { 14 | s += k / d; 15 | } 16 | } 17 | return s; 18 | } 19 | 20 | void set_affinity(pid_t pid, int cpu) 21 | { 22 | cpu_set_t set; 23 | CPU_ZERO(&set); 24 | CPU_SET(cpu, &set); 25 | int ret = sched_setaffinity(pid, sizeof(set), &set); 26 | if (ret != 0) 27 | { 28 | fprintf(stderr, "sched_setaffinity failed\n"); 29 | } 30 | } 31 | 32 | int main(int argc, char* argv[1]) 33 | { 34 | const int M = 1024 * 1024; 35 | const bool child_run_hyper_thread = atoi(argv[1]); 36 | std::vector v(1024); 37 | const int d = rand(); 38 | for (auto&& k: v) 39 | { 40 | k = rand(); 41 | } 42 | pid_t pid = fork(); 43 | if (pid == 0) 44 | { 45 | set_affinity(0, 0); 46 | } 47 | else 48 | { 49 | set_affinity(0, child_run_hyper_thread ? 1: 2); 50 | } 51 | auto start = std::chrono::high_resolution_clock::now(); 52 | int s = div(v, d, M); 53 | auto end = std::chrono::high_resolution_clock::now(); 54 | printf("sum: %d, div elapsed milliseconds=%ld\n", 55 | s, std::chrono::duration_cast(end - start).count()); 56 | return 0; 57 | } 58 | 59 | -------------------------------------------------------------------------------- /ht/memory_latency.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define CACHELINE_SIZE 64 9 | struct Node 10 | { 11 | Node* next; 12 | char paddings[CACHELINE_SIZE - sizeof(Node*)]; 13 | }; 14 | 15 | #define N1(node) node=node->next; 16 | #define N2(node) N1(node);N1(node); 17 | #define N4(node) N2(node);N2(node); 18 | #define N8(node) N4(node);N4(node); 19 | #define N16(node) N8(node);N8(node); 20 | #define N32(node) N16(node);N16(node); 21 | #define N64(node) N32(node);N32(node); 22 | #define N128(node) N64(node);N64(node); 23 | #define N256(node) N128(node);N128(node); 24 | #define N512(node) N256(node);N256(node); 25 | #define N1024(node) N512(node);N512(node); 26 | 27 | const Node* test(int M, const std::vector& c) 28 | { 29 | const Node* node = &c[0]; 30 | for (int i = 0; i < M; i++) 31 | { 32 | const size_t s = c.size() / 1024; 33 | for (int j = 0; j < s; j++) 34 | { 35 | N1024(node); 36 | } 37 | } 38 | return node; 39 | } 40 | 41 | void set_affinity(pid_t pid, int cpu) 42 | { 43 | cpu_set_t set; 44 | CPU_ZERO(&set); 45 | CPU_SET(cpu, &set); 46 | int ret = sched_setaffinity(pid, sizeof(set), &set); 47 | if (ret != 0) 48 | { 49 | fprintf(stderr, "sched_setaffinity failed\n"); 50 | } 51 | } 52 | 53 | int main(int argc, char* argv[]) 54 | { 55 | const int N = 1024 * 1024 * 16; 56 | std::vector va; 57 | va.resize(N); 58 | for (int i = 0; i < N - 1; i++) 59 | { 60 | va[i].next = &(va[i + 1]); 61 | } 62 | va[N - 1].next = &va[0]; 63 | const int M = 10; 64 | const bool child_run_hyper_thread = true; 65 | pid_t pid = fork(); 66 | if (pid == 0) 67 | { 68 | set_affinity(0, 0); 69 | } 70 | else 71 | { 72 | set_affinity(0, child_run_hyper_thread ? 1: 2); 73 | } 74 | auto start = std::chrono::high_resolution_clock::now(); 75 | const Node* node = test(M, va); 76 | auto end = std::chrono::high_resolution_clock::now(); 77 | printf("%p,%p, memory latency elapsed milliseconds=%ld\n", 78 | node, &node[0], 79 | std::chrono::duration_cast(end - start).count()); 80 | return 0; 81 | } 82 | 83 | -------------------------------------------------------------------------------- /ht/set_hyper_threading.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # NAME: set-hyper-threading 4 | # PATH: /usr/local/bin 5 | # DESC: Turn Hyper threading off or on. 6 | 7 | # DATE: Aug. 5, 2017. 8 | 9 | # NOTE: Written Part of testing for Ubuntu answer: 10 | # https://askubuntu.com/questions/942728/disable-hyper-threading-in-ubuntu/942843#942843 11 | 12 | # PARM: 1="0" turn off hyper threading, "1" turn it on. 13 | 14 | if [[ $# -ne 1 ]]; then 15 | echo 'One argument required. 0 to turn off hyper-threading or' 16 | echo '1 to turn hyper-threading back on' 17 | exit 1 18 | fi 19 | 20 | echo $1 > /sys/devices/system/cpu/cpu1/online 21 | echo $1 > /sys/devices/system/cpu/cpu3/online 22 | echo $1 > /sys/devices/system/cpu/cpu5/online 23 | echo $1 > /sys/devices/system/cpu/cpu7/online 24 | 25 | grep "" /sys/devices/system/cpu/cpu*/topology/core_id 26 | 27 | grep -q '^flags.*[[:space:]]ht[[:space:]]' /proc/cpuinfo && \ 28 | echo "Hyper-threading is supported" 29 | 30 | grep -E 'model|stepping' /proc/cpuinfo | sort -u 31 | 32 | -------------------------------------------------------------------------------- /ispc/Makefile: -------------------------------------------------------------------------------- 1 | main: main.o sum_ispc.o 2 | g++ sum_ispc.o main.o -o main 3 | 4 | main.o: main.cpp 5 | g++ main.cpp -O2 -c -o main.o 6 | 7 | sum_ispc.o: sum_ispc.ispc 8 | ispc -O2 --target=sse4 sum_ispc.ispc -o sum_ispc.o -h sum_ispc.h 9 | 10 | clean: 11 | rm -f main.o sum_ispc.o main 12 | -------------------------------------------------------------------------------- /ispc/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "sum_ispc.h" 5 | 6 | __attribute__((noinline)) int sum(const int* a, int count) 7 | { 8 | int total = 0; 9 | for (int i = 0; i < count; i++) 10 | { 11 | total += a[i]; 12 | } 13 | return total; 14 | } 15 | 16 | 17 | int main() 18 | { 19 | const int N = 1024*1024*100; 20 | std::vector nums(N); 21 | for (int i = 0; i < N;i++) 22 | { 23 | nums[i] = rand(); 24 | } 25 | auto start = std::chrono::high_resolution_clock::now(); 26 | int s = sum(nums.data(), N); 27 | auto end1 = std::chrono::high_resolution_clock::now(); 28 | int s2 = ispc::sum2(nums.data(), N); 29 | auto end2 = std::chrono::high_resolution_clock::now(); 30 | auto elapsed1 = std::chrono::duration_cast(end1 - start).count(); 31 | auto elapsed2 = std::chrono::duration_cast(end2 - end1).count(); 32 | std::cout < 9 | 10 | 11 | 12 | #ifdef __cplusplus 13 | namespace ispc { /* namespace */ 14 | #endif // __cplusplus 15 | 16 | #ifndef __ISPC_ALIGN__ 17 | #if defined(__clang__) || !defined(_MSC_VER) 18 | // Clang, GCC, ICC 19 | #define __ISPC_ALIGN__(s) __attribute__((aligned(s))) 20 | #define __ISPC_ALIGNED_STRUCT__(s) struct __ISPC_ALIGN__(s) 21 | #else 22 | // Visual Studio 23 | #define __ISPC_ALIGN__(s) __declspec(align(s)) 24 | #define __ISPC_ALIGNED_STRUCT__(s) __ISPC_ALIGN__(s) struct 25 | #endif 26 | #endif 27 | 28 | 29 | /////////////////////////////////////////////////////////////////////////// 30 | // Functions exported from ispc code 31 | /////////////////////////////////////////////////////////////////////////// 32 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C ) 33 | extern "C" { 34 | #endif // __cplusplus 35 | extern int32_t sum2(int32_t * a, int32_t count); 36 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C ) 37 | } /* end extern C */ 38 | #endif // __cplusplus 39 | 40 | 41 | #ifdef __cplusplus 42 | } /* namespace */ 43 | #endif // __cplusplus 44 | -------------------------------------------------------------------------------- /ispc/sum_ispc.ispc: -------------------------------------------------------------------------------- 1 | 2 | export uniform int sum2(uniform int a[], uniform int count) 3 | { 4 | varying int total = 0; 5 | foreach (i = 0 ... count) 6 | { 7 | total += a[i]; 8 | } 9 | return reduce_add(total); 10 | } 11 | -------------------------------------------------------------------------------- /kary_tree/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | struct node 8 | { 9 | const int* b; 10 | int N; 11 | }; 12 | 13 | void build_tree(const int* b, int N, int* & out, int kary) 14 | { 15 | std::queue q; 16 | q.push(node{b, N}); 17 | while (!q.empty()) 18 | { 19 | node nd = q.front(); q.pop(); 20 | const int count = (nd.N - kary + 1) / kary; 21 | for (int i = 0; i < kary; i++) 22 | { 23 | if (i > 0) 24 | { 25 | *out++ = *nd.b; 26 | nd.b++; 27 | } 28 | if (count > 0) 29 | { 30 | q.push(node{nd.b, count}); 31 | nd.b += count; 32 | } 33 | } 34 | } 35 | } 36 | 37 | template 38 | const int* find(const int* b, int index, int value) 39 | { 40 | const int* root = b + index; 41 | int child_base_index = kary * index + kary - 1; 42 | for (int i = 0; i < kary - 1; i++) 43 | { 44 | if (root[i] == value) 45 | { 46 | return &root[i]; 47 | } 48 | else if (root[i] > value) 49 | { 50 | return find(b, child_base_index, value); 51 | } 52 | child_base_index += kary - 1; 53 | } 54 | return find(b, child_base_index, value); 55 | } 56 | 57 | void bind_to_cpu(int index) 58 | { 59 | cpu_set_t set; 60 | CPU_ZERO(&set); 61 | CPU_SET(index, &set); 62 | sched_setaffinity(index, sizeof(set), &set); 63 | } 64 | 65 | int main() 66 | { 67 | bind_to_cpu(0); 68 | const int kary = 1; 69 | // 4^12 - 1 70 | const int N = (1 << (2 * 12)) - 1; 71 | std::vector data, tree; 72 | data.resize(N); 73 | tree.resize(N); 74 | for (int i = 0; i < N; i++) 75 | { 76 | data[i] = i; 77 | } 78 | if (kary == 1) 79 | { 80 | tree = data; 81 | } 82 | else 83 | { 84 | int* p = tree.data(); 85 | build_tree(data.data(), N, p, kary); 86 | } 87 | std::random_shuffle(data.begin(), data.end()); 88 | int s = 0; 89 | auto start = std::chrono::high_resolution_clock::now(); 90 | for (int i = 0; i < N; i++) 91 | { 92 | if (kary == 1) 93 | { 94 | int d = data[i]; 95 | s += *std::lower_bound(tree.begin(), tree.end(), d); 96 | } 97 | else 98 | { 99 | int d = data[i]; 100 | s += *find(tree.data(), 0, d); 101 | } 102 | } 103 | auto end = std::chrono::high_resolution_clock::now(); 104 | int n = N; 105 | std::cout << std::chrono::duration_cast(end - start).count() << std::endl; 106 | std::cout << s << '\t' << n * ((n - 1) / 2) << std::endl; 107 | return 0; 108 | } 109 | 110 | -------------------------------------------------------------------------------- /lazy_pb/Makefile: -------------------------------------------------------------------------------- 1 | all: main 2 | 3 | PROTOBUF_SRC=/home/fractal/protobuf/src/ 4 | PROTOBUF_LIB=${PROTOBUF_SRC}/.libs 5 | 6 | main: main.o user.pb.o 7 | g++ main.o user.pb.o -o main -O2 -std=c++11 -I${PROTOBUF_SRC} -L${PROTOBUF_LIB} -lprotobuf 8 | 9 | main.o: main.cpp 10 | g++ main.cpp -c -o main.o -O2 -std=c++11 -I${PROTOBUF_SRC} 11 | 12 | user.pb.o: user.pb.cc user.pb.h 13 | g++ user.pb.cc -c -o user.pb.o -O2 -std=c++11 -I${PROTOBUF_SRC} 14 | clean: 15 | rm -f main main.o user.pb.o 16 | -------------------------------------------------------------------------------- /lazy_pb/gen.sh: -------------------------------------------------------------------------------- 1 | /home/fractal/protobuf/src/protoc --cpp_out=. user.proto 2 | -------------------------------------------------------------------------------- /lazy_pb/main.cpp: -------------------------------------------------------------------------------- 1 | #include "user.pb.h" 2 | #include 3 | 4 | int main() 5 | { 6 | Role role; 7 | role.mutable_role_basic()->set_gold(1); 8 | const int N = 1000; 9 | for (int i = 0; i < N; i++) 10 | { 11 | auto* factory = role.mutable_factory_list()->add_factory_list(); 12 | factory->set_id(i); 13 | factory->set_cd(10); 14 | } 15 | std::string data = role.SerializeAsString(); 16 | printf("data.length()=%ld\n", data.length()); 17 | 18 | const int M = 100000; 19 | Role role2; 20 | 21 | auto start = std::chrono::high_resolution_clock::now(); 22 | for (int i = 0; i < M; i++) 23 | { 24 | role2.ParseFromString(data); 25 | role2.mutable_role_basic()->set_gold(role2.role_basic().gold() + 1); 26 | role2.SerializeToString(&data); 27 | } 28 | auto end = std::chrono::high_resolution_clock::now(); 29 | 30 | printf("elapsed milliseconds: %ld\n", std::chrono::duration_cast(end - start).count()); 31 | printf("data.length()=%ld, gold=%d, role2.factory_list_size()=%d\n", data.length(), role2.role_basic().gold(), role2.factory_list().factory_list_size()); 32 | printf("role2.factory_list(10).id()=%d\n", role2.factory_list().factory_list(10).id()); 33 | return 0; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /lazy_pb/user.pb.h: -------------------------------------------------------------------------------- 1 | // Generated by the protocol buffer compiler. DO NOT EDIT! 2 | // source: user.proto 3 | 4 | #ifndef GOOGLE_PROTOBUF_INCLUDED_user_2eproto 5 | #define GOOGLE_PROTOBUF_INCLUDED_user_2eproto 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #if PROTOBUF_VERSION < 3014000 12 | #error This file was generated by a newer version of protoc which is 13 | #error incompatible with your Protocol Buffer headers. Please update 14 | #error your headers. 15 | #endif 16 | #if 3014000 < PROTOBUF_MIN_PROTOC_VERSION 17 | #error This file was generated by an older version of protoc which is 18 | #error incompatible with your Protocol Buffer headers. Please 19 | #error regenerate this file with a newer version of protoc. 20 | #endif 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include // IWYU pragma: export 32 | #include // IWYU pragma: export 33 | #include 34 | // @@protoc_insertion_point(includes) 35 | #include 36 | #define PROTOBUF_INTERNAL_EXPORT_user_2eproto 37 | PROTOBUF_NAMESPACE_OPEN 38 | namespace internal { 39 | class AnyMetadata; 40 | } // namespace internal 41 | PROTOBUF_NAMESPACE_CLOSE 42 | 43 | // Internal implementation detail -- do not use these members. 44 | struct TableStruct_user_2eproto { 45 | static const ::PROTOBUF_NAMESPACE_ID::internal::ParseTableField entries[] 46 | PROTOBUF_SECTION_VARIABLE(protodesc_cold); 47 | static const ::PROTOBUF_NAMESPACE_ID::internal::AuxiliaryParseTableField aux[] 48 | PROTOBUF_SECTION_VARIABLE(protodesc_cold); 49 | static const ::PROTOBUF_NAMESPACE_ID::internal::ParseTable schema[4] 50 | PROTOBUF_SECTION_VARIABLE(protodesc_cold); 51 | static const ::PROTOBUF_NAMESPACE_ID::internal::FieldMetadata field_metadata[]; 52 | static const ::PROTOBUF_NAMESPACE_ID::internal::SerializationTable serialization_table[]; 53 | static const ::PROTOBUF_NAMESPACE_ID::uint32 offsets[]; 54 | }; 55 | extern const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_user_2eproto; 56 | ::PROTOBUF_NAMESPACE_ID::Metadata descriptor_table_user_2eproto_metadata_getter(int index); 57 | class Factory; 58 | struct FactoryDefaultTypeInternal; 59 | extern FactoryDefaultTypeInternal _Factory_default_instance_; 60 | class FactoryList; 61 | struct FactoryListDefaultTypeInternal; 62 | extern FactoryListDefaultTypeInternal _FactoryList_default_instance_; 63 | class Role; 64 | struct RoleDefaultTypeInternal; 65 | extern RoleDefaultTypeInternal _Role_default_instance_; 66 | class RoleBasic; 67 | struct RoleBasicDefaultTypeInternal; 68 | extern RoleBasicDefaultTypeInternal _RoleBasic_default_instance_; 69 | PROTOBUF_NAMESPACE_OPEN 70 | template<> ::Factory* Arena::CreateMaybeMessage<::Factory>(Arena*); 71 | template<> ::FactoryList* Arena::CreateMaybeMessage<::FactoryList>(Arena*); 72 | template<> ::Role* Arena::CreateMaybeMessage<::Role>(Arena*); 73 | template<> ::RoleBasic* Arena::CreateMaybeMessage<::RoleBasic>(Arena*); 74 | PROTOBUF_NAMESPACE_CLOSE 75 | 76 | // =================================================================== 77 | 78 | class RoleBasic PROTOBUF_FINAL : 79 | public ::PROTOBUF_NAMESPACE_ID::Message /* @@protoc_insertion_point(class_definition:RoleBasic) */ { 80 | public: 81 | inline RoleBasic() : RoleBasic(nullptr) {} 82 | virtual ~RoleBasic(); 83 | explicit constexpr RoleBasic(::PROTOBUF_NAMESPACE_ID::internal::ConstantInitialized); 84 | 85 | RoleBasic(const RoleBasic& from); 86 | RoleBasic(RoleBasic&& from) noexcept 87 | : RoleBasic() { 88 | *this = ::std::move(from); 89 | } 90 | 91 | inline RoleBasic& operator=(const RoleBasic& from) { 92 | CopyFrom(from); 93 | return *this; 94 | } 95 | inline RoleBasic& operator=(RoleBasic&& from) noexcept { 96 | if (GetArena() == from.GetArena()) { 97 | if (this != &from) InternalSwap(&from); 98 | } else { 99 | CopyFrom(from); 100 | } 101 | return *this; 102 | } 103 | 104 | inline const ::PROTOBUF_NAMESPACE_ID::UnknownFieldSet& unknown_fields() const { 105 | return _internal_metadata_.unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(::PROTOBUF_NAMESPACE_ID::UnknownFieldSet::default_instance); 106 | } 107 | inline ::PROTOBUF_NAMESPACE_ID::UnknownFieldSet* mutable_unknown_fields() { 108 | return _internal_metadata_.mutable_unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(); 109 | } 110 | 111 | static const ::PROTOBUF_NAMESPACE_ID::Descriptor* descriptor() { 112 | return GetDescriptor(); 113 | } 114 | static const ::PROTOBUF_NAMESPACE_ID::Descriptor* GetDescriptor() { 115 | return GetMetadataStatic().descriptor; 116 | } 117 | static const ::PROTOBUF_NAMESPACE_ID::Reflection* GetReflection() { 118 | return GetMetadataStatic().reflection; 119 | } 120 | static const RoleBasic& default_instance() { 121 | return *internal_default_instance(); 122 | } 123 | static inline const RoleBasic* internal_default_instance() { 124 | return reinterpret_cast( 125 | &_RoleBasic_default_instance_); 126 | } 127 | static constexpr int kIndexInFileMessages = 128 | 0; 129 | 130 | friend void swap(RoleBasic& a, RoleBasic& b) { 131 | a.Swap(&b); 132 | } 133 | inline void Swap(RoleBasic* other) { 134 | if (other == this) return; 135 | if (GetArena() == other->GetArena()) { 136 | InternalSwap(other); 137 | } else { 138 | ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); 139 | } 140 | } 141 | void UnsafeArenaSwap(RoleBasic* other) { 142 | if (other == this) return; 143 | GOOGLE_DCHECK(GetArena() == other->GetArena()); 144 | InternalSwap(other); 145 | } 146 | 147 | // implements Message ---------------------------------------------- 148 | 149 | inline RoleBasic* New() const final { 150 | return CreateMaybeMessage(nullptr); 151 | } 152 | 153 | RoleBasic* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { 154 | return CreateMaybeMessage(arena); 155 | } 156 | void CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final; 157 | void MergeFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final; 158 | void CopyFrom(const RoleBasic& from); 159 | void MergeFrom(const RoleBasic& from); 160 | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; 161 | bool IsInitialized() const final; 162 | 163 | size_t ByteSizeLong() const final; 164 | const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; 165 | ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( 166 | ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; 167 | int GetCachedSize() const final { return _cached_size_.Get(); } 168 | 169 | private: 170 | inline void SharedCtor(); 171 | inline void SharedDtor(); 172 | void SetCachedSize(int size) const final; 173 | void InternalSwap(RoleBasic* other); 174 | friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; 175 | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { 176 | return "RoleBasic"; 177 | } 178 | protected: 179 | explicit RoleBasic(::PROTOBUF_NAMESPACE_ID::Arena* arena); 180 | private: 181 | static void ArenaDtor(void* object); 182 | inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); 183 | public: 184 | 185 | ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadata() const final; 186 | private: 187 | static ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadataStatic() { 188 | return ::descriptor_table_user_2eproto_metadata_getter(kIndexInFileMessages); 189 | } 190 | 191 | public: 192 | 193 | // nested types ---------------------------------------------------- 194 | 195 | // accessors ------------------------------------------------------- 196 | 197 | enum : int { 198 | kGoldFieldNumber = 1, 199 | }; 200 | // optional int32 gold = 1; 201 | bool has_gold() const; 202 | private: 203 | bool _internal_has_gold() const; 204 | public: 205 | void clear_gold(); 206 | ::PROTOBUF_NAMESPACE_ID::int32 gold() const; 207 | void set_gold(::PROTOBUF_NAMESPACE_ID::int32 value); 208 | private: 209 | ::PROTOBUF_NAMESPACE_ID::int32 _internal_gold() const; 210 | void _internal_set_gold(::PROTOBUF_NAMESPACE_ID::int32 value); 211 | public: 212 | 213 | // @@protoc_insertion_point(class_scope:RoleBasic) 214 | private: 215 | class _Internal; 216 | 217 | template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; 218 | typedef void InternalArenaConstructable_; 219 | typedef void DestructorSkippable_; 220 | ::PROTOBUF_NAMESPACE_ID::internal::HasBits<1> _has_bits_; 221 | mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; 222 | ::PROTOBUF_NAMESPACE_ID::int32 gold_; 223 | friend struct ::TableStruct_user_2eproto; 224 | }; 225 | // ------------------------------------------------------------------- 226 | 227 | class Factory PROTOBUF_FINAL : 228 | public ::PROTOBUF_NAMESPACE_ID::Message /* @@protoc_insertion_point(class_definition:Factory) */ { 229 | public: 230 | inline Factory() : Factory(nullptr) {} 231 | virtual ~Factory(); 232 | explicit constexpr Factory(::PROTOBUF_NAMESPACE_ID::internal::ConstantInitialized); 233 | 234 | Factory(const Factory& from); 235 | Factory(Factory&& from) noexcept 236 | : Factory() { 237 | *this = ::std::move(from); 238 | } 239 | 240 | inline Factory& operator=(const Factory& from) { 241 | CopyFrom(from); 242 | return *this; 243 | } 244 | inline Factory& operator=(Factory&& from) noexcept { 245 | if (GetArena() == from.GetArena()) { 246 | if (this != &from) InternalSwap(&from); 247 | } else { 248 | CopyFrom(from); 249 | } 250 | return *this; 251 | } 252 | 253 | inline const ::PROTOBUF_NAMESPACE_ID::UnknownFieldSet& unknown_fields() const { 254 | return _internal_metadata_.unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(::PROTOBUF_NAMESPACE_ID::UnknownFieldSet::default_instance); 255 | } 256 | inline ::PROTOBUF_NAMESPACE_ID::UnknownFieldSet* mutable_unknown_fields() { 257 | return _internal_metadata_.mutable_unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(); 258 | } 259 | 260 | static const ::PROTOBUF_NAMESPACE_ID::Descriptor* descriptor() { 261 | return GetDescriptor(); 262 | } 263 | static const ::PROTOBUF_NAMESPACE_ID::Descriptor* GetDescriptor() { 264 | return GetMetadataStatic().descriptor; 265 | } 266 | static const ::PROTOBUF_NAMESPACE_ID::Reflection* GetReflection() { 267 | return GetMetadataStatic().reflection; 268 | } 269 | static const Factory& default_instance() { 270 | return *internal_default_instance(); 271 | } 272 | static inline const Factory* internal_default_instance() { 273 | return reinterpret_cast( 274 | &_Factory_default_instance_); 275 | } 276 | static constexpr int kIndexInFileMessages = 277 | 1; 278 | 279 | friend void swap(Factory& a, Factory& b) { 280 | a.Swap(&b); 281 | } 282 | inline void Swap(Factory* other) { 283 | if (other == this) return; 284 | if (GetArena() == other->GetArena()) { 285 | InternalSwap(other); 286 | } else { 287 | ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); 288 | } 289 | } 290 | void UnsafeArenaSwap(Factory* other) { 291 | if (other == this) return; 292 | GOOGLE_DCHECK(GetArena() == other->GetArena()); 293 | InternalSwap(other); 294 | } 295 | 296 | // implements Message ---------------------------------------------- 297 | 298 | inline Factory* New() const final { 299 | return CreateMaybeMessage(nullptr); 300 | } 301 | 302 | Factory* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { 303 | return CreateMaybeMessage(arena); 304 | } 305 | void CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final; 306 | void MergeFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final; 307 | void CopyFrom(const Factory& from); 308 | void MergeFrom(const Factory& from); 309 | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; 310 | bool IsInitialized() const final; 311 | 312 | size_t ByteSizeLong() const final; 313 | const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; 314 | ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( 315 | ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; 316 | int GetCachedSize() const final { return _cached_size_.Get(); } 317 | 318 | private: 319 | inline void SharedCtor(); 320 | inline void SharedDtor(); 321 | void SetCachedSize(int size) const final; 322 | void InternalSwap(Factory* other); 323 | friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; 324 | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { 325 | return "Factory"; 326 | } 327 | protected: 328 | explicit Factory(::PROTOBUF_NAMESPACE_ID::Arena* arena); 329 | private: 330 | static void ArenaDtor(void* object); 331 | inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); 332 | public: 333 | 334 | ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadata() const final; 335 | private: 336 | static ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadataStatic() { 337 | return ::descriptor_table_user_2eproto_metadata_getter(kIndexInFileMessages); 338 | } 339 | 340 | public: 341 | 342 | // nested types ---------------------------------------------------- 343 | 344 | // accessors ------------------------------------------------------- 345 | 346 | enum : int { 347 | kIdFieldNumber = 1, 348 | kCdFieldNumber = 2, 349 | }; 350 | // optional int32 id = 1; 351 | bool has_id() const; 352 | private: 353 | bool _internal_has_id() const; 354 | public: 355 | void clear_id(); 356 | ::PROTOBUF_NAMESPACE_ID::int32 id() const; 357 | void set_id(::PROTOBUF_NAMESPACE_ID::int32 value); 358 | private: 359 | ::PROTOBUF_NAMESPACE_ID::int32 _internal_id() const; 360 | void _internal_set_id(::PROTOBUF_NAMESPACE_ID::int32 value); 361 | public: 362 | 363 | // optional uint32 cd = 2; 364 | bool has_cd() const; 365 | private: 366 | bool _internal_has_cd() const; 367 | public: 368 | void clear_cd(); 369 | ::PROTOBUF_NAMESPACE_ID::uint32 cd() const; 370 | void set_cd(::PROTOBUF_NAMESPACE_ID::uint32 value); 371 | private: 372 | ::PROTOBUF_NAMESPACE_ID::uint32 _internal_cd() const; 373 | void _internal_set_cd(::PROTOBUF_NAMESPACE_ID::uint32 value); 374 | public: 375 | 376 | // @@protoc_insertion_point(class_scope:Factory) 377 | private: 378 | class _Internal; 379 | 380 | template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; 381 | typedef void InternalArenaConstructable_; 382 | typedef void DestructorSkippable_; 383 | ::PROTOBUF_NAMESPACE_ID::internal::HasBits<1> _has_bits_; 384 | mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; 385 | ::PROTOBUF_NAMESPACE_ID::int32 id_; 386 | ::PROTOBUF_NAMESPACE_ID::uint32 cd_; 387 | friend struct ::TableStruct_user_2eproto; 388 | }; 389 | // ------------------------------------------------------------------- 390 | 391 | class FactoryList PROTOBUF_FINAL : 392 | public ::PROTOBUF_NAMESPACE_ID::Message /* @@protoc_insertion_point(class_definition:FactoryList) */ { 393 | public: 394 | inline FactoryList() : FactoryList(nullptr) {} 395 | virtual ~FactoryList(); 396 | explicit constexpr FactoryList(::PROTOBUF_NAMESPACE_ID::internal::ConstantInitialized); 397 | 398 | FactoryList(const FactoryList& from); 399 | FactoryList(FactoryList&& from) noexcept 400 | : FactoryList() { 401 | *this = ::std::move(from); 402 | } 403 | 404 | inline FactoryList& operator=(const FactoryList& from) { 405 | CopyFrom(from); 406 | return *this; 407 | } 408 | inline FactoryList& operator=(FactoryList&& from) noexcept { 409 | if (GetArena() == from.GetArena()) { 410 | if (this != &from) InternalSwap(&from); 411 | } else { 412 | CopyFrom(from); 413 | } 414 | return *this; 415 | } 416 | 417 | inline const ::PROTOBUF_NAMESPACE_ID::UnknownFieldSet& unknown_fields() const { 418 | return _internal_metadata_.unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(::PROTOBUF_NAMESPACE_ID::UnknownFieldSet::default_instance); 419 | } 420 | inline ::PROTOBUF_NAMESPACE_ID::UnknownFieldSet* mutable_unknown_fields() { 421 | return _internal_metadata_.mutable_unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(); 422 | } 423 | 424 | static const ::PROTOBUF_NAMESPACE_ID::Descriptor* descriptor() { 425 | return GetDescriptor(); 426 | } 427 | static const ::PROTOBUF_NAMESPACE_ID::Descriptor* GetDescriptor() { 428 | return GetMetadataStatic().descriptor; 429 | } 430 | static const ::PROTOBUF_NAMESPACE_ID::Reflection* GetReflection() { 431 | return GetMetadataStatic().reflection; 432 | } 433 | static const FactoryList& default_instance() { 434 | return *internal_default_instance(); 435 | } 436 | static inline const FactoryList* internal_default_instance() { 437 | return reinterpret_cast( 438 | &_FactoryList_default_instance_); 439 | } 440 | static constexpr int kIndexInFileMessages = 441 | 2; 442 | 443 | friend void swap(FactoryList& a, FactoryList& b) { 444 | a.Swap(&b); 445 | } 446 | inline void Swap(FactoryList* other) { 447 | if (other == this) return; 448 | if (GetArena() == other->GetArena()) { 449 | InternalSwap(other); 450 | } else { 451 | ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); 452 | } 453 | } 454 | void UnsafeArenaSwap(FactoryList* other) { 455 | if (other == this) return; 456 | GOOGLE_DCHECK(GetArena() == other->GetArena()); 457 | InternalSwap(other); 458 | } 459 | 460 | // implements Message ---------------------------------------------- 461 | 462 | inline FactoryList* New() const final { 463 | return CreateMaybeMessage(nullptr); 464 | } 465 | 466 | FactoryList* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { 467 | return CreateMaybeMessage(arena); 468 | } 469 | void CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final; 470 | void MergeFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final; 471 | void CopyFrom(const FactoryList& from); 472 | void MergeFrom(const FactoryList& from); 473 | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; 474 | bool IsInitialized() const final; 475 | 476 | size_t ByteSizeLong() const final; 477 | const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; 478 | ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( 479 | ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; 480 | int GetCachedSize() const final { return _cached_size_.Get(); } 481 | 482 | private: 483 | inline void SharedCtor(); 484 | inline void SharedDtor(); 485 | void SetCachedSize(int size) const final; 486 | void InternalSwap(FactoryList* other); 487 | friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; 488 | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { 489 | return "FactoryList"; 490 | } 491 | protected: 492 | explicit FactoryList(::PROTOBUF_NAMESPACE_ID::Arena* arena); 493 | private: 494 | static void ArenaDtor(void* object); 495 | inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); 496 | public: 497 | 498 | ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadata() const final; 499 | private: 500 | static ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadataStatic() { 501 | return ::descriptor_table_user_2eproto_metadata_getter(kIndexInFileMessages); 502 | } 503 | 504 | public: 505 | 506 | // nested types ---------------------------------------------------- 507 | 508 | // accessors ------------------------------------------------------- 509 | 510 | enum : int { 511 | kFactoryListFieldNumber = 1, 512 | }; 513 | // repeated .Factory factory_list = 1; 514 | int factory_list_size() const; 515 | private: 516 | int _internal_factory_list_size() const; 517 | public: 518 | void clear_factory_list(); 519 | ::Factory* mutable_factory_list(int index); 520 | ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::Factory >* 521 | mutable_factory_list(); 522 | private: 523 | const ::Factory& _internal_factory_list(int index) const; 524 | ::Factory* _internal_add_factory_list(); 525 | public: 526 | const ::Factory& factory_list(int index) const; 527 | ::Factory* add_factory_list(); 528 | const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::Factory >& 529 | factory_list() const; 530 | 531 | // @@protoc_insertion_point(class_scope:FactoryList) 532 | private: 533 | class _Internal; 534 | 535 | template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; 536 | typedef void InternalArenaConstructable_; 537 | typedef void DestructorSkippable_; 538 | ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::Factory > factory_list_; 539 | mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; 540 | friend struct ::TableStruct_user_2eproto; 541 | }; 542 | // ------------------------------------------------------------------- 543 | 544 | class Role PROTOBUF_FINAL : 545 | public ::PROTOBUF_NAMESPACE_ID::Message /* @@protoc_insertion_point(class_definition:Role) */ { 546 | public: 547 | inline Role() : Role(nullptr) {} 548 | virtual ~Role(); 549 | explicit constexpr Role(::PROTOBUF_NAMESPACE_ID::internal::ConstantInitialized); 550 | 551 | Role(const Role& from); 552 | Role(Role&& from) noexcept 553 | : Role() { 554 | *this = ::std::move(from); 555 | } 556 | 557 | inline Role& operator=(const Role& from) { 558 | CopyFrom(from); 559 | return *this; 560 | } 561 | inline Role& operator=(Role&& from) noexcept { 562 | if (GetArena() == from.GetArena()) { 563 | if (this != &from) InternalSwap(&from); 564 | } else { 565 | CopyFrom(from); 566 | } 567 | return *this; 568 | } 569 | 570 | inline const ::PROTOBUF_NAMESPACE_ID::UnknownFieldSet& unknown_fields() const { 571 | return _internal_metadata_.unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(::PROTOBUF_NAMESPACE_ID::UnknownFieldSet::default_instance); 572 | } 573 | inline ::PROTOBUF_NAMESPACE_ID::UnknownFieldSet* mutable_unknown_fields() { 574 | return _internal_metadata_.mutable_unknown_fields<::PROTOBUF_NAMESPACE_ID::UnknownFieldSet>(); 575 | } 576 | 577 | static const ::PROTOBUF_NAMESPACE_ID::Descriptor* descriptor() { 578 | return GetDescriptor(); 579 | } 580 | static const ::PROTOBUF_NAMESPACE_ID::Descriptor* GetDescriptor() { 581 | return GetMetadataStatic().descriptor; 582 | } 583 | static const ::PROTOBUF_NAMESPACE_ID::Reflection* GetReflection() { 584 | return GetMetadataStatic().reflection; 585 | } 586 | static const Role& default_instance() { 587 | return *internal_default_instance(); 588 | } 589 | static inline const Role* internal_default_instance() { 590 | return reinterpret_cast( 591 | &_Role_default_instance_); 592 | } 593 | static constexpr int kIndexInFileMessages = 594 | 3; 595 | 596 | friend void swap(Role& a, Role& b) { 597 | a.Swap(&b); 598 | } 599 | inline void Swap(Role* other) { 600 | if (other == this) return; 601 | if (GetArena() == other->GetArena()) { 602 | InternalSwap(other); 603 | } else { 604 | ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); 605 | } 606 | } 607 | void UnsafeArenaSwap(Role* other) { 608 | if (other == this) return; 609 | GOOGLE_DCHECK(GetArena() == other->GetArena()); 610 | InternalSwap(other); 611 | } 612 | 613 | // implements Message ---------------------------------------------- 614 | 615 | inline Role* New() const final { 616 | return CreateMaybeMessage(nullptr); 617 | } 618 | 619 | Role* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { 620 | return CreateMaybeMessage(arena); 621 | } 622 | void CopyFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final; 623 | void MergeFrom(const ::PROTOBUF_NAMESPACE_ID::Message& from) final; 624 | void CopyFrom(const Role& from); 625 | void MergeFrom(const Role& from); 626 | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; 627 | bool IsInitialized() const final; 628 | 629 | size_t ByteSizeLong() const final; 630 | const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; 631 | ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( 632 | ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; 633 | int GetCachedSize() const final { return _cached_size_.Get(); } 634 | 635 | private: 636 | inline void SharedCtor(); 637 | inline void SharedDtor(); 638 | void SetCachedSize(int size) const final; 639 | void InternalSwap(Role* other); 640 | friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; 641 | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { 642 | return "Role"; 643 | } 644 | protected: 645 | explicit Role(::PROTOBUF_NAMESPACE_ID::Arena* arena); 646 | private: 647 | static void ArenaDtor(void* object); 648 | inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); 649 | public: 650 | 651 | ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadata() const final; 652 | private: 653 | static ::PROTOBUF_NAMESPACE_ID::Metadata GetMetadataStatic() { 654 | return ::descriptor_table_user_2eproto_metadata_getter(kIndexInFileMessages); 655 | } 656 | 657 | public: 658 | 659 | // nested types ---------------------------------------------------- 660 | 661 | // accessors ------------------------------------------------------- 662 | 663 | enum : int { 664 | kRoleBasicFieldNumber = 1, 665 | kFactoryListFieldNumber = 2, 666 | }; 667 | // optional .RoleBasic role_basic = 1; 668 | bool has_role_basic() const; 669 | private: 670 | bool _internal_has_role_basic() const; 671 | public: 672 | void clear_role_basic(); 673 | const ::RoleBasic& role_basic() const; 674 | ::RoleBasic* release_role_basic(); 675 | ::RoleBasic* mutable_role_basic(); 676 | void set_allocated_role_basic(::RoleBasic* role_basic); 677 | private: 678 | const ::RoleBasic& _internal_role_basic() const; 679 | ::RoleBasic* _internal_mutable_role_basic(); 680 | public: 681 | void unsafe_arena_set_allocated_role_basic( 682 | ::RoleBasic* role_basic); 683 | ::RoleBasic* unsafe_arena_release_role_basic(); 684 | 685 | // optional .FactoryList factory_list = 2 [lazy = true]; 686 | bool has_factory_list() const; 687 | private: 688 | bool _internal_has_factory_list() const; 689 | public: 690 | void clear_factory_list(); 691 | const ::FactoryList& factory_list() const; 692 | ::FactoryList* release_factory_list(); 693 | ::FactoryList* mutable_factory_list(); 694 | void set_allocated_factory_list(::FactoryList* factory_list); 695 | private: 696 | const ::FactoryList& _internal_factory_list() const; 697 | ::FactoryList* _internal_mutable_factory_list(); 698 | public: 699 | void unsafe_arena_set_allocated_factory_list( 700 | ::FactoryList* factory_list); 701 | ::FactoryList* unsafe_arena_release_factory_list(); 702 | 703 | // @@protoc_insertion_point(class_scope:Role) 704 | private: 705 | class _Internal; 706 | 707 | template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; 708 | typedef void InternalArenaConstructable_; 709 | typedef void DestructorSkippable_; 710 | ::PROTOBUF_NAMESPACE_ID::internal::HasBits<1> _has_bits_; 711 | mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; 712 | ::RoleBasic* role_basic_; 713 | ::PROTOBUF_NAMESPACE_ID::LazyMessage<::FactoryList> factory_list_; 714 | friend struct ::TableStruct_user_2eproto; 715 | }; 716 | // =================================================================== 717 | 718 | 719 | // =================================================================== 720 | 721 | #ifdef __GNUC__ 722 | #pragma GCC diagnostic push 723 | #pragma GCC diagnostic ignored "-Wstrict-aliasing" 724 | #endif // __GNUC__ 725 | // RoleBasic 726 | 727 | // optional int32 gold = 1; 728 | inline bool RoleBasic::_internal_has_gold() const { 729 | bool value = (_has_bits_[0] & 0x00000001u) != 0; 730 | return value; 731 | } 732 | inline bool RoleBasic::has_gold() const { 733 | return _internal_has_gold(); 734 | } 735 | inline void RoleBasic::clear_gold() { 736 | gold_ = 0; 737 | _has_bits_[0] &= ~0x00000001u; 738 | } 739 | inline ::PROTOBUF_NAMESPACE_ID::int32 RoleBasic::_internal_gold() const { 740 | return gold_; 741 | } 742 | inline ::PROTOBUF_NAMESPACE_ID::int32 RoleBasic::gold() const { 743 | // @@protoc_insertion_point(field_get:RoleBasic.gold) 744 | return _internal_gold(); 745 | } 746 | inline void RoleBasic::_internal_set_gold(::PROTOBUF_NAMESPACE_ID::int32 value) { 747 | _has_bits_[0] |= 0x00000001u; 748 | gold_ = value; 749 | } 750 | inline void RoleBasic::set_gold(::PROTOBUF_NAMESPACE_ID::int32 value) { 751 | _internal_set_gold(value); 752 | // @@protoc_insertion_point(field_set:RoleBasic.gold) 753 | } 754 | 755 | // ------------------------------------------------------------------- 756 | 757 | // Factory 758 | 759 | // optional int32 id = 1; 760 | inline bool Factory::_internal_has_id() const { 761 | bool value = (_has_bits_[0] & 0x00000001u) != 0; 762 | return value; 763 | } 764 | inline bool Factory::has_id() const { 765 | return _internal_has_id(); 766 | } 767 | inline void Factory::clear_id() { 768 | id_ = 0; 769 | _has_bits_[0] &= ~0x00000001u; 770 | } 771 | inline ::PROTOBUF_NAMESPACE_ID::int32 Factory::_internal_id() const { 772 | return id_; 773 | } 774 | inline ::PROTOBUF_NAMESPACE_ID::int32 Factory::id() const { 775 | // @@protoc_insertion_point(field_get:Factory.id) 776 | return _internal_id(); 777 | } 778 | inline void Factory::_internal_set_id(::PROTOBUF_NAMESPACE_ID::int32 value) { 779 | _has_bits_[0] |= 0x00000001u; 780 | id_ = value; 781 | } 782 | inline void Factory::set_id(::PROTOBUF_NAMESPACE_ID::int32 value) { 783 | _internal_set_id(value); 784 | // @@protoc_insertion_point(field_set:Factory.id) 785 | } 786 | 787 | // optional uint32 cd = 2; 788 | inline bool Factory::_internal_has_cd() const { 789 | bool value = (_has_bits_[0] & 0x00000002u) != 0; 790 | return value; 791 | } 792 | inline bool Factory::has_cd() const { 793 | return _internal_has_cd(); 794 | } 795 | inline void Factory::clear_cd() { 796 | cd_ = 0u; 797 | _has_bits_[0] &= ~0x00000002u; 798 | } 799 | inline ::PROTOBUF_NAMESPACE_ID::uint32 Factory::_internal_cd() const { 800 | return cd_; 801 | } 802 | inline ::PROTOBUF_NAMESPACE_ID::uint32 Factory::cd() const { 803 | // @@protoc_insertion_point(field_get:Factory.cd) 804 | return _internal_cd(); 805 | } 806 | inline void Factory::_internal_set_cd(::PROTOBUF_NAMESPACE_ID::uint32 value) { 807 | _has_bits_[0] |= 0x00000002u; 808 | cd_ = value; 809 | } 810 | inline void Factory::set_cd(::PROTOBUF_NAMESPACE_ID::uint32 value) { 811 | _internal_set_cd(value); 812 | // @@protoc_insertion_point(field_set:Factory.cd) 813 | } 814 | 815 | // ------------------------------------------------------------------- 816 | 817 | // FactoryList 818 | 819 | // repeated .Factory factory_list = 1; 820 | inline int FactoryList::_internal_factory_list_size() const { 821 | return factory_list_.size(); 822 | } 823 | inline int FactoryList::factory_list_size() const { 824 | return _internal_factory_list_size(); 825 | } 826 | inline void FactoryList::clear_factory_list() { 827 | factory_list_.Clear(); 828 | } 829 | inline ::Factory* FactoryList::mutable_factory_list(int index) { 830 | // @@protoc_insertion_point(field_mutable:FactoryList.factory_list) 831 | return factory_list_.Mutable(index); 832 | } 833 | inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::Factory >* 834 | FactoryList::mutable_factory_list() { 835 | // @@protoc_insertion_point(field_mutable_list:FactoryList.factory_list) 836 | return &factory_list_; 837 | } 838 | inline const ::Factory& FactoryList::_internal_factory_list(int index) const { 839 | return factory_list_.Get(index); 840 | } 841 | inline const ::Factory& FactoryList::factory_list(int index) const { 842 | // @@protoc_insertion_point(field_get:FactoryList.factory_list) 843 | return _internal_factory_list(index); 844 | } 845 | inline ::Factory* FactoryList::_internal_add_factory_list() { 846 | return factory_list_.Add(); 847 | } 848 | inline ::Factory* FactoryList::add_factory_list() { 849 | // @@protoc_insertion_point(field_add:FactoryList.factory_list) 850 | return _internal_add_factory_list(); 851 | } 852 | inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::Factory >& 853 | FactoryList::factory_list() const { 854 | // @@protoc_insertion_point(field_list:FactoryList.factory_list) 855 | return factory_list_; 856 | } 857 | 858 | // ------------------------------------------------------------------- 859 | 860 | // Role 861 | 862 | // optional .RoleBasic role_basic = 1; 863 | inline bool Role::_internal_has_role_basic() const { 864 | bool value = (_has_bits_[0] & 0x00000001u) != 0; 865 | PROTOBUF_ASSUME(!value || role_basic_ != nullptr); 866 | return value; 867 | } 868 | inline bool Role::has_role_basic() const { 869 | return _internal_has_role_basic(); 870 | } 871 | inline void Role::clear_role_basic() { 872 | if (role_basic_ != nullptr) role_basic_->Clear(); 873 | _has_bits_[0] &= ~0x00000001u; 874 | } 875 | inline const ::RoleBasic& Role::_internal_role_basic() const { 876 | const ::RoleBasic* p = role_basic_; 877 | return p != nullptr ? *p : reinterpret_cast( 878 | ::_RoleBasic_default_instance_); 879 | } 880 | inline const ::RoleBasic& Role::role_basic() const { 881 | // @@protoc_insertion_point(field_get:Role.role_basic) 882 | return _internal_role_basic(); 883 | } 884 | inline void Role::unsafe_arena_set_allocated_role_basic( 885 | ::RoleBasic* role_basic) { 886 | if (GetArena() == nullptr) { 887 | delete reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(role_basic_); 888 | } 889 | role_basic_ = role_basic; 890 | if (role_basic) { 891 | _has_bits_[0] |= 0x00000001u; 892 | } else { 893 | _has_bits_[0] &= ~0x00000001u; 894 | } 895 | // @@protoc_insertion_point(field_unsafe_arena_set_allocated:Role.role_basic) 896 | } 897 | inline ::RoleBasic* Role::release_role_basic() { 898 | _has_bits_[0] &= ~0x00000001u; 899 | ::RoleBasic* temp = role_basic_; 900 | role_basic_ = nullptr; 901 | if (GetArena() != nullptr) { 902 | temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); 903 | } 904 | return temp; 905 | } 906 | inline ::RoleBasic* Role::unsafe_arena_release_role_basic() { 907 | // @@protoc_insertion_point(field_release:Role.role_basic) 908 | _has_bits_[0] &= ~0x00000001u; 909 | ::RoleBasic* temp = role_basic_; 910 | role_basic_ = nullptr; 911 | return temp; 912 | } 913 | inline ::RoleBasic* Role::_internal_mutable_role_basic() { 914 | _has_bits_[0] |= 0x00000001u; 915 | if (role_basic_ == nullptr) { 916 | auto* p = CreateMaybeMessage<::RoleBasic>(GetArena()); 917 | role_basic_ = p; 918 | } 919 | return role_basic_; 920 | } 921 | inline ::RoleBasic* Role::mutable_role_basic() { 922 | // @@protoc_insertion_point(field_mutable:Role.role_basic) 923 | return _internal_mutable_role_basic(); 924 | } 925 | inline void Role::set_allocated_role_basic(::RoleBasic* role_basic) { 926 | ::PROTOBUF_NAMESPACE_ID::Arena* message_arena = GetArena(); 927 | if (message_arena == nullptr) { 928 | delete role_basic_; 929 | } 930 | if (role_basic) { 931 | ::PROTOBUF_NAMESPACE_ID::Arena* submessage_arena = 932 | ::PROTOBUF_NAMESPACE_ID::Arena::GetArena(role_basic); 933 | if (message_arena != submessage_arena) { 934 | role_basic = ::PROTOBUF_NAMESPACE_ID::internal::GetOwnedMessage( 935 | message_arena, role_basic, submessage_arena); 936 | } 937 | _has_bits_[0] |= 0x00000001u; 938 | } else { 939 | _has_bits_[0] &= ~0x00000001u; 940 | } 941 | role_basic_ = role_basic; 942 | // @@protoc_insertion_point(field_set_allocated:Role.role_basic) 943 | } 944 | 945 | // optional .FactoryList factory_list = 2 [lazy = true]; 946 | inline bool Role::_internal_has_factory_list() const { 947 | bool value = (_has_bits_[0] & 0x00000002u) != 0; 948 | PROTOBUF_ASSUME(!value || factory_list_ != nullptr); 949 | return value; 950 | } 951 | inline bool Role::has_factory_list() const { 952 | return _internal_has_factory_list(); 953 | } 954 | inline void Role::clear_factory_list() { 955 | if (factory_list_ != nullptr) factory_list_.Clear(); 956 | _has_bits_[0] &= ~0x00000002u; 957 | } 958 | inline const ::FactoryList& Role::_internal_factory_list() const { 959 | const ::FactoryList* p = factory_list_.GetLazyMessage(GetArena()); 960 | return p != nullptr ? *p : reinterpret_cast( 961 | ::_FactoryList_default_instance_); 962 | } 963 | inline const ::FactoryList& Role::factory_list() const { 964 | // @@protoc_insertion_point(field_get:Role.factory_list) 965 | return _internal_factory_list(); 966 | } 967 | inline void Role::unsafe_arena_set_allocated_factory_list( 968 | ::FactoryList* factory_list) { 969 | if (GetArena() == nullptr) { 970 | factory_list_.Delete(); 971 | } 972 | factory_list_ = factory_list; 973 | if (factory_list) { 974 | _has_bits_[0] |= 0x00000002u; 975 | } else { 976 | _has_bits_[0] &= ~0x00000002u; 977 | } 978 | // @@protoc_insertion_point(field_unsafe_arena_set_allocated:Role.factory_list) 979 | } 980 | inline ::FactoryList* Role::release_factory_list() { 981 | _has_bits_[0] &= ~0x00000002u; 982 | ::FactoryList* temp = factory_list_.GetLazyMessage(GetArena()); 983 | factory_list_ = nullptr; 984 | if (GetArena() != nullptr) { 985 | temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); 986 | } 987 | return temp; 988 | } 989 | inline ::FactoryList* Role::unsafe_arena_release_factory_list() { 990 | // @@protoc_insertion_point(field_release:Role.factory_list) 991 | _has_bits_[0] &= ~0x00000002u; 992 | ::FactoryList* temp = factory_list_.GetLazyMessage(GetArena()); 993 | factory_list_ = nullptr; 994 | return temp; 995 | } 996 | inline ::FactoryList* Role::_internal_mutable_factory_list() { 997 | _has_bits_[0] |= 0x00000002u; 998 | if (factory_list_ == nullptr) { 999 | auto* p = CreateMaybeMessage<::FactoryList>(GetArena()); 1000 | factory_list_ = p; 1001 | } 1002 | return factory_list_.GetLazyMessage(GetArena()); 1003 | } 1004 | inline ::FactoryList* Role::mutable_factory_list() { 1005 | // @@protoc_insertion_point(field_mutable:Role.factory_list) 1006 | return _internal_mutable_factory_list(); 1007 | } 1008 | inline void Role::set_allocated_factory_list(::FactoryList* factory_list) { 1009 | ::PROTOBUF_NAMESPACE_ID::Arena* message_arena = GetArena(); 1010 | if (message_arena == nullptr) { 1011 | factory_list_.Delete(); 1012 | } 1013 | if (factory_list) { 1014 | ::PROTOBUF_NAMESPACE_ID::Arena* submessage_arena = 1015 | ::PROTOBUF_NAMESPACE_ID::Arena::GetArena(factory_list); 1016 | if (message_arena != submessage_arena) { 1017 | factory_list = ::PROTOBUF_NAMESPACE_ID::internal::GetOwnedMessage( 1018 | message_arena, factory_list, submessage_arena); 1019 | } 1020 | _has_bits_[0] |= 0x00000002u; 1021 | } else { 1022 | _has_bits_[0] &= ~0x00000002u; 1023 | } 1024 | factory_list_ = factory_list; 1025 | // @@protoc_insertion_point(field_set_allocated:Role.factory_list) 1026 | } 1027 | 1028 | #ifdef __GNUC__ 1029 | #pragma GCC diagnostic pop 1030 | #endif // __GNUC__ 1031 | // ------------------------------------------------------------------- 1032 | 1033 | // ------------------------------------------------------------------- 1034 | 1035 | // ------------------------------------------------------------------- 1036 | 1037 | 1038 | // @@protoc_insertion_point(namespace_scope) 1039 | 1040 | 1041 | // @@protoc_insertion_point(global_scope) 1042 | 1043 | #include 1044 | #endif // GOOGLE_PROTOBUF_INCLUDED_GOOGLE_PROTOBUF_INCLUDED_user_2eproto 1045 | -------------------------------------------------------------------------------- /lazy_pb/user.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | message RoleBasic 4 | { 5 | optional int32 gold = 1; 6 | } 7 | 8 | message Factory 9 | { 10 | optional int32 id = 1; 11 | optional uint32 cd = 2; 12 | } 13 | 14 | message FactoryList 15 | { 16 | repeated Factory factory_list = 1; 17 | } 18 | 19 | message Role 20 | { 21 | optional RoleBasic role_basic = 1; 22 | optional FactoryList factory_list = 2[lazy = true]; 23 | // dress 24 | // skin 25 | } 26 | 27 | -------------------------------------------------------------------------------- /likely/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define NOOP asm("nop"); 7 | #define NOOP2 NOOP NOOP 8 | #define NOOP4 NOOP2 NOOP2 9 | #define NOOP8 NOOP4 NOOP4 10 | #define NOOP16 NOOP8 NOOP8 11 | 12 | __attribute__((noinline)) void foo(int r) 13 | { 14 | if (r != 0) [[likely]] 15 | { 16 | NOOP; 17 | } 18 | else 19 | { 20 | NOOP16; 21 | } 22 | } 23 | 24 | void bind_to_cpu(int index) 25 | { 26 | cpu_set_t set; 27 | CPU_ZERO(&set); 28 | CPU_SET(index, &set); 29 | sched_setaffinity(index, sizeof(set), &set); 30 | } 31 | 32 | int main() 33 | { 34 | bind_to_cpu(0); 35 | const int N = 1024 * 1024 * 1000; 36 | const int r = rand(); 37 | auto start = std::chrono::high_resolution_clock::now(); 38 | for (int i = 0; i < N; i++) 39 | { 40 | foo(r); 41 | } 42 | auto end = std::chrono::high_resolution_clock::now(); 43 | std::cout << std::chrono::duration_cast(end - start).count() << std::endl; 44 | return 0; 45 | } 46 | 47 | -------------------------------------------------------------------------------- /lockfree/LockFreeQueue.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | /* 6 | read1 read2 7 | read_index_ 8 | 9 | write1 write2 10 | write_index_ 11 | 12 | read write 13 | 14 | */ 15 | template 16 | class LockFreeQueue 17 | { 18 | public: 19 | struct Element 20 | { 21 | std::atomic full_; 22 | T data_; 23 | }; 24 | 25 | LockFreeQueue(): data_(N) 26 | { 27 | read_index_ = 0; 28 | write_index_ = 0; 29 | } 30 | 31 | // write_index_ == read_index_ + data_.size() 32 | bool Enqueue(T value) 33 | { 34 | size_t write_index = 0; 35 | Element* e = NULL; 36 | do 37 | { 38 | write_index = write_index_.load(std::memory_order_relaxed); 39 | if (write_index >= read_index_.load(std::memory_order_relaxed) + data_.size()) 40 | { 41 | return false; 42 | } 43 | size_t index = write_index % data_.size(); 44 | e = &data_[index]; 45 | if (e->full_.load(std::memory_order_relaxed)) 46 | { 47 | return false; 48 | } 49 | } while (!write_index_.compare_exchange_weak(write_index, write_index + 1, std::memory_order_release, std::memory_order_relaxed)); 50 | 51 | e->data_ = std::move(value); 52 | e->full_.store(true, std::memory_order_release); 53 | return true; 54 | } 55 | 56 | bool Dequeue(T& value) 57 | { 58 | size_t read_index = 0; 59 | Element* e = NULL; 60 | do 61 | { 62 | read_index = read_index_.load(std::memory_order_relaxed); 63 | if (read_index >= write_index_.load(std::memory_order_relaxed)) 64 | { 65 | return false; 66 | } 67 | size_t index = read_index % data_.size(); 68 | e = &data_[index]; 69 | if (!e->full_.load(std::memory_order_relaxed)) 70 | { 71 | return false; 72 | } 73 | } while (!read_index_.compare_exchange_weak(read_index, read_index + 1, std::memory_order_release, std::memory_order_relaxed)); 74 | value = std::move(e->data_); 75 | e->full_.store(false, std::memory_order_release); 76 | return true; 77 | } 78 | 79 | private: 80 | std::vector data_; 81 | std::atomic read_index_; 82 | std::atomic write_index_; 83 | }; 84 | -------------------------------------------------------------------------------- /lockfree/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "LockFreeQueue.h" 3 | #include 4 | #include 5 | 6 | int main() 7 | { 8 | LockFreeQueue q; 9 | std::atomic seq; 10 | seq = 0; 11 | static const int PRODUCE_N = 4; 12 | static const int CONSUMER_N = 4; 13 | static const int MULTIPLIER = 10; 14 | std::atomic finished_producer; 15 | finished_producer = 0; 16 | auto producer = [&q, &seq, &finished_producer]() 17 | { 18 | for (int i = 0; i < 65536 * MULTIPLIER; i++) 19 | { 20 | uint16_t s = seq++; 21 | while (!q.Enqueue(s)); 22 | } 23 | finished_producer++; 24 | }; 25 | 26 | std::atomic counter[65536]; 27 | for (int i = 0; i < 65536; i++) 28 | { 29 | counter[i] = 0; 30 | } 31 | auto consumer = [&q, &counter, &finished_producer]() 32 | { 33 | uint16_t s = 0; 34 | while (finished_producer < PRODUCE_N) 35 | { 36 | if (q.Dequeue(s)) 37 | { 38 | counter[s]++; 39 | } 40 | } 41 | while (q.Dequeue(s)) 42 | { 43 | counter[s]++; 44 | } 45 | }; 46 | 47 | std::unique_ptr produce_threads[PRODUCE_N]; 48 | std::unique_ptr consumer_threads[CONSUMER_N]; 49 | 50 | for (int i = 0; i < CONSUMER_N; i++) 51 | { 52 | consumer_threads[i].reset(new std::thread(consumer)); 53 | } 54 | 55 | for (int i = 0; i < PRODUCE_N; i++) 56 | { 57 | produce_threads[i].reset(new std::thread(producer)); 58 | } 59 | 60 | for (int i = 0; i < PRODUCE_N; i++) 61 | { 62 | produce_threads[i]->join(); 63 | } 64 | for (int i = 0; i < CONSUMER_N; i++) 65 | { 66 | consumer_threads[i]->join(); 67 | } 68 | 69 | bool has_race = false; 70 | for (int i = 0; i < 65536; i++) 71 | { 72 | if (counter[i] != MULTIPLIER * PRODUCE_N) 73 | { 74 | std::cout << "found race condition\t" < 2 | #include 3 | #include 4 | #include 5 | 6 | template 7 | struct matrix 8 | { 9 | typedef element_t ele_t; 10 | element_t ele[N][N]; 11 | inline int size() const 12 | { 13 | return N; 14 | } 15 | 16 | inline const element_t& operator[](int index) const 17 | { 18 | return *(ele[0] + index); 19 | } 20 | 21 | inline element_t& operator[](int index) 22 | { 23 | return *(ele[0] + index); 24 | } 25 | 26 | inline void operator=(float x) 27 | { 28 | for (int i = 0; i < N; i++) 29 | { 30 | for (int j = 0; j < N; j++) 31 | { 32 | ele[i][j] = x; 33 | } 34 | } 35 | } 36 | 37 | inline void operator += (const matrix& o) 38 | { 39 | for (int i = 0; i < N; i++) 40 | { 41 | for (int j = 0; j < N; j++) 42 | { 43 | ele[i][j] += o.ele[i][j]; 44 | } 45 | } 46 | } 47 | }; 48 | 49 | template 50 | matrix_t operator*(const matrix_t& __restrict a, const matrix_t& __restrict b) 51 | { 52 | const int N = a.size(); 53 | matrix_t r; 54 | for (int i = 0; i < N; i++) 55 | { 56 | for (int j = 0; j < N; j++) 57 | { 58 | r.ele[i][j] = 0.0f; 59 | for (int k = 0; k < N; k++) 60 | { 61 | r.ele[i][j] += a.ele[i][k] * b.ele[k][j]; 62 | } 63 | } 64 | } 65 | return r; 66 | } 67 | 68 | static const int TILE_SIZE = 16; 69 | static const int MATRIX_SIZE = 512; 70 | 71 | template 72 | void init_matrix(matrix_t& m) 73 | { 74 | float* f = reinterpret_cast(&m); 75 | float ff = 1.01f; 76 | for (int i = 0; i < MATRIX_SIZE * MATRIX_SIZE; i++) 77 | { 78 | *f++ = ff; 79 | } 80 | } 81 | 82 | template 83 | float sum_matrix(matrix_t& m) 84 | { 85 | float* f = reinterpret_cast(&m); 86 | float ff = 0; 87 | for (int i = 0; i < MATRIX_SIZE * MATRIX_SIZE; i++) 88 | { 89 | ff += *f++; 90 | } 91 | return ff; 92 | } 93 | 94 | template 95 | float test_mul(int N) 96 | { 97 | matrix_t n, a, b; 98 | memset(&n, 0, sizeof(matrix_t)); 99 | init_matrix(a); 100 | init_matrix(b); 101 | for (int i = 0; i < N; i++) 102 | { 103 | n = a * b; 104 | } 105 | return sum_matrix(n); 106 | } 107 | 108 | void bind_to_cpu(int index) 109 | { 110 | cpu_set_t set; 111 | CPU_ZERO(&set); 112 | CPU_SET(index, &set); 113 | sched_setaffinity(index, sizeof(set), &set); 114 | } 115 | 116 | int main() 117 | { 118 | typedef matrix tile_t; 119 | typedef matrix normal_matrix_t; 120 | typedef matrix block_matrix_t; 121 | 122 | bind_to_cpu(0); 123 | const int N = 10; 124 | if (true) 125 | { 126 | auto start = std::chrono::high_resolution_clock::now(); 127 | float f = test_mul(N); 128 | auto end = std::chrono::high_resolution_clock::now(); 129 | std::cout << std::chrono::duration_cast(end - start).count() << '\t' << f << std::endl; 130 | } 131 | if (true) 132 | { 133 | auto start = std::chrono::high_resolution_clock::now(); 134 | float f = test_mul(N); 135 | auto end = std::chrono::high_resolution_clock::now(); 136 | std::cout << std::chrono::duration_cast(end - start).count() << '\t' << f << std::endl; 137 | } 138 | if (true) 139 | { 140 | typedef matrix, MATRIX_SIZE / (TILE_SIZE * TILE_SIZE)> block_block_matrix_t; 141 | auto start = std::chrono::high_resolution_clock::now(); 142 | float f = test_mul(N); 143 | auto end = std::chrono::high_resolution_clock::now(); 144 | std::cout << std::chrono::duration_cast(end - start).count() << '\t' << f << std::endl; 145 | } 146 | return 0; 147 | } 148 | 149 | -------------------------------------------------------------------------------- /member_variable/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | struct A 6 | { 7 | int m_total; 8 | __attribute__((noinline)) void sum1(const int* a, int count) 9 | { 10 | m_total = 0; 11 | for (int i = 0; i < count; i++) 12 | { 13 | m_total += a[i]; 14 | } 15 | } 16 | __attribute__((noinline)) void sum2(const int* a, int count) 17 | { 18 | int total = 0; 19 | for (int i = 0; i < count; i++) 20 | { 21 | total += a[i]; 22 | } 23 | m_total = total; 24 | } 25 | 26 | }; 27 | 28 | int main() 29 | { 30 | A a; 31 | const int N = 1024*1024*100; 32 | std::vector nums(N); 33 | for (int i = 0; i < N;i++) 34 | { 35 | nums[i] = rand(); 36 | } 37 | auto start = std::chrono::high_resolution_clock::now(); 38 | a.sum1(nums.data(), N); 39 | auto end1 = std::chrono::high_resolution_clock::now(); 40 | a.sum2(nums.data(), N); 41 | auto end2 = std::chrono::high_resolution_clock::now(); 42 | auto elapsed1 = std::chrono::duration_cast(end1 - start).count(); 43 | auto elapsed2 = std::chrono::duration_cast(end2 - end1).count(); 44 | std::cout <<"elapsed1: " << elapsed1 << std::endl; 45 | std::cout <<"elapsed2: " << elapsed2 << std::endl; 46 | return 0; 47 | } 48 | 49 | -------------------------------------------------------------------------------- /memory_bandwidth.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define CACHELINE_SIZE 64 8 | int test(int M, const std::vector& c) 9 | { 10 | int sum = 0; 11 | auto start = std::chrono::high_resolution_clock::now(); 12 | for (int i = 0; i < M; i++) 13 | { 14 | for (size_t j = 0; j < c.size(); j+= CACHELINE_SIZE / sizeof(int)) 15 | { 16 | sum += c[j]; 17 | } 18 | } 19 | auto end = std::chrono::high_resolution_clock::now(); 20 | std::cout << "elapsed milliseconds: " << std::chrono::duration_cast(end - start).count() << std::endl; 21 | return sum; 22 | } 23 | 24 | int main() 25 | { 26 | cpu_set_t set; 27 | CPU_ZERO(&set); 28 | CPU_SET(0, &set); 29 | sched_setaffinity(0, sizeof(set), &set); 30 | 31 | const int N = 1024 * 1024 * 16; 32 | std::vector va; 33 | for (int i = 0; i < N; i++) 34 | { 35 | va.push_back(rand()); 36 | } 37 | const int M = 100; 38 | int sum = test(M, va); 39 | std::cout << sum << std::endl; 40 | return 0; 41 | } 42 | 43 | -------------------------------------------------------------------------------- /memory_issues/Makefile: -------------------------------------------------------------------------------- 1 | all: corrupt protect leak 2 | 3 | corrupt: memory_corrupt.cpp 4 | clang++ memory_corrupt.cpp -fsanitize=address -o corrupt 5 | 6 | protect: memory_protect.cpp 7 | clang++ memory_protect.cpp -fsanitize=address -o protect 8 | 9 | # perf record -e page-faults --callgraph dwarf -p pid 10 | # perf script 11 | leak: memory_leak.cpp 12 | clang++ memory_leak.cpp -o leak 13 | 14 | clean: 15 | rm -rf corrupt protect leak 16 | 17 | -------------------------------------------------------------------------------- /memory_issues/memory_corrupt.cpp: -------------------------------------------------------------------------------- 1 | 2 | int main() 3 | { 4 | char buffer[16]; 5 | buffer[16] = '\0'; 6 | return 0; 7 | } 8 | -------------------------------------------------------------------------------- /memory_issues/memory_leak.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | void leak() 7 | { 8 | char* p = (char*)malloc(128); 9 | memset(p, 0, 128); 10 | printf("leak p=%p\n", p); 11 | } 12 | 13 | int main() 14 | { 15 | while (true) 16 | { 17 | leak(); 18 | usleep(1000*100); 19 | } 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /memory_issues/memory_protect.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | volatile char buffer[8]; 6 | ASAN_POISON_MEMORY_REGION(buffer, 8); 7 | buffer[0] = '\0'; 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /memory_latency.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define CACHELINE_SIZE 64 8 | struct Node 9 | { 10 | Node* next; 11 | char paddings[CACHELINE_SIZE - sizeof(Node*)]; 12 | }; 13 | 14 | #define N1(node) node=node->next; 15 | #define N2(node) N1(node);N1(node); 16 | #define N4(node) N2(node);N2(node); 17 | #define N8(node) N4(node);N4(node); 18 | #define N16(node) N8(node);N8(node); 19 | #define N32(node) N16(node);N16(node); 20 | #define N64(node) N32(node);N32(node); 21 | #define N128(node) N64(node);N64(node); 22 | #define N256(node) N128(node);N128(node); 23 | #define N512(node) N256(node);N256(node); 24 | #define N1024(node) N512(node);N512(node); 25 | 26 | const Node* test(int M, const std::vector& c) 27 | { 28 | auto start = std::chrono::high_resolution_clock::now(); 29 | const Node* node = &c[0]; 30 | for (int i = 0; i < M; i++) 31 | { 32 | const size_t s = c.size() / 1024; 33 | for (int j = 0; j < s; j++) 34 | { 35 | N1024(node); 36 | } 37 | } 38 | auto end = std::chrono::high_resolution_clock::now(); 39 | std::cout << "elapsed milliseconds: " << std::chrono::duration_cast(end - start).count() << std::endl; 40 | return node; 41 | } 42 | 43 | int main() 44 | { 45 | cpu_set_t set; 46 | CPU_ZERO(&set); 47 | CPU_SET(0, &set); 48 | sched_setaffinity(0, sizeof(set), &set); 49 | const int N = 1024 * 1024 * 16; 50 | std::vector va; 51 | va.resize(N); 52 | for (int i = 0; i < N - 1; i++) 53 | { 54 | va[i].next = &(va[i + 1]); 55 | } 56 | va[N - 1].next = &va[0]; 57 | const int M = 10; 58 | const Node* node = test(M, va); 59 | std::cout << node << '\t' << &node[0] << std::endl; 60 | return 0; 61 | } 62 | 63 | -------------------------------------------------------------------------------- /memory_model/compile_reorder.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int A, B, V; 5 | 6 | void plain() 7 | { 8 | A = V + 1; 9 | B = 1; 10 | } 11 | 12 | void thread() 13 | { 14 | if (B == 1) 15 | { 16 | assert(A == V + 1); 17 | } 18 | } 19 | 20 | int main() 21 | { 22 | plain(); 23 | return 0; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /memory_model/gcc/Makefile: -------------------------------------------------------------------------------- 1 | ordering: ordering.cpp 2 | gcc -o ordering -O2 ordering.cpp -lpthread 3 | -------------------------------------------------------------------------------- /memory_model/gcc/ordering.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | // Set either of these to 1 to prevent CPU reordering 7 | #define USE_CPU_FENCE 1 8 | #define USE_SINGLE_HW_THREAD 0 // Supported on Linux, but not Cygwin or PS3 9 | 10 | #if USE_SINGLE_HW_THREAD 11 | #include 12 | #endif 13 | 14 | //------------------------------------- 15 | // Main program, as decribed in the post 16 | //------------------------------------- 17 | sem_t beginSema1; 18 | sem_t beginSema2; 19 | sem_t endSema; 20 | 21 | int X, Y; 22 | int r1, r2; 23 | 24 | void *thread1Func(void *param) 25 | { 26 | unsigned int seed = 1; 27 | for (;;) 28 | { 29 | sem_wait(&beginSema1); // Wait for signal 30 | while (rand_r(&seed) % 8 != 0) {} // Random delay 31 | 32 | // ----- THE TRANSACTION! ----- 33 | X = 1; 34 | #if USE_CPU_FENCE 35 | asm volatile("mfence" ::: "memory"); // Prevent CPU reordering 36 | #else 37 | asm volatile("" ::: "memory"); // Prevent compiler reordering 38 | #endif 39 | r1 = Y; 40 | 41 | sem_post(&endSema); // Notify transaction complete 42 | } 43 | return NULL; // Never returns 44 | }; 45 | 46 | void *thread2Func(void *param) 47 | { 48 | unsigned int seed = 2; 49 | for (;;) 50 | { 51 | sem_wait(&beginSema2); // Wait for signal 52 | while (rand_r(&seed) % 8 != 0) {} // Random delay 53 | 54 | // ----- THE TRANSACTION! ----- 55 | Y = 1; 56 | #if USE_CPU_FENCE 57 | asm volatile("mfence" ::: "memory"); // Prevent CPU reordering 58 | #else 59 | asm volatile("" ::: "memory"); // Prevent compiler reordering 60 | #endif 61 | r2 = X; 62 | 63 | sem_post(&endSema); // Notify transaction complete 64 | } 65 | return NULL; // Never returns 66 | }; 67 | 68 | int main() 69 | { 70 | // Initialize the semaphores 71 | sem_init(&beginSema1, 0, 0); 72 | sem_init(&beginSema2, 0, 0); 73 | sem_init(&endSema, 0, 0); 74 | 75 | // Spawn the threads 76 | pthread_t thread1, thread2; 77 | pthread_create(&thread1, NULL, thread1Func, NULL); 78 | pthread_create(&thread2, NULL, thread2Func, NULL); 79 | 80 | #if USE_SINGLE_HW_THREAD 81 | // Force thread affinities to the same cpu core. 82 | cpu_set_t cpus; 83 | CPU_ZERO(&cpus); 84 | CPU_SET(0, &cpus); 85 | pthread_setaffinity_np(thread1, sizeof(cpu_set_t), &cpus); 86 | pthread_setaffinity_np(thread2, sizeof(cpu_set_t), &cpus); 87 | #endif 88 | 89 | // Repeat the experiment ad infinitum 90 | int detected = 0; 91 | for (int iterations = 1; ; iterations++) 92 | { 93 | // Reset X and Y 94 | X = 0; 95 | Y = 0; 96 | // Signal both threads 97 | sem_post(&beginSema1); 98 | sem_post(&beginSema2); 99 | // Wait for both threads 100 | sem_wait(&endSema); 101 | sem_wait(&endSema); 102 | // Check if there was a simultaneous reorder 103 | if (r1 == 0 && r2 == 0) 104 | { 105 | detected++; 106 | printf("%d reorders detected after %d iterations\n", detected, iterations); 107 | } 108 | } 109 | return 0; // Never returns 110 | } 111 | 112 | -------------------------------------------------------------------------------- /memory_model/msvc/ordering.cpp: -------------------------------------------------------------------------------- 1 | #define WIN32_LEAN_AND_MEAN 2 | #include 3 | #include 4 | #include 5 | 6 | // Set either of these to 1 to prevent CPU reordering 7 | #define USE_CPU_FENCE 0 8 | #define USE_SINGLE_HW_THREAD 0 9 | 10 | 11 | //------------------------------------- 12 | // MersenneTwister 13 | // A thread-safe random number generator with good randomness 14 | // in a small number of instructions. We'll use it to introduce 15 | // random timing delays. 16 | //------------------------------------- 17 | #define MT_IA 397 18 | #define MT_LEN 624 19 | 20 | class MersenneTwister 21 | { 22 | unsigned int m_buffer[MT_LEN]; 23 | int m_index; 24 | 25 | public: 26 | MersenneTwister(unsigned int seed); 27 | // Declare noinline so that the function call acts as a compiler barrier: 28 | __declspec(noinline) unsigned int integer(); 29 | }; 30 | 31 | MersenneTwister::MersenneTwister(unsigned int seed) 32 | { 33 | // Initialize by filling with the seed, then iterating 34 | // the algorithm a bunch of times to shuffle things up. 35 | for (int i = 0; i < MT_LEN; i++) 36 | m_buffer[i] = seed; 37 | m_index = 0; 38 | for (int i = 0; i < MT_LEN * 100; i++) 39 | integer(); 40 | } 41 | 42 | unsigned int MersenneTwister::integer() 43 | { 44 | // Indices 45 | int i = m_index; 46 | int i2 = m_index + 1; if (i2 >= MT_LEN) i2 = 0; // wrap-around 47 | int j = m_index + MT_IA; if (j >= MT_LEN) j -= MT_LEN; // wrap-around 48 | 49 | // Twist 50 | unsigned int s = (m_buffer[i] & 0x80000000) | (m_buffer[i2] & 0x7fffffff); 51 | unsigned int r = m_buffer[j] ^ (s >> 1) ^ ((s & 1) * 0x9908B0DF); 52 | m_buffer[m_index] = r; 53 | m_index = i2; 54 | 55 | // Swizzle 56 | r ^= (r >> 11); 57 | r ^= (r << 7) & 0x9d2c5680UL; 58 | r ^= (r << 15) & 0xefc60000UL; 59 | r ^= (r >> 18); 60 | return r; 61 | } 62 | 63 | 64 | //------------------------------------- 65 | // Main program, as decribed in the post 66 | //------------------------------------- 67 | HANDLE beginSema1; 68 | HANDLE beginSema2; 69 | HANDLE endSema; 70 | 71 | int X, Y; 72 | int r1, r2; 73 | 74 | DWORD WINAPI thread1Func(LPVOID param) 75 | { 76 | MersenneTwister random(1); 77 | for (;;) 78 | { 79 | WaitForSingleObject(beginSema1, INFINITE); // Wait for signal 80 | while (random.integer() % 8 != 0) {} // Random delay 81 | 82 | // ----- THE TRANSACTION! ----- 83 | X = 1; 84 | #if USE_CPU_FENCE 85 | MemoryBarrier(); // Prevent CPU reordering 86 | #else 87 | _ReadWriteBarrier(); // Prevent compiler reordering only 88 | #endif 89 | r1 = Y; 90 | 91 | ReleaseSemaphore(endSema, 1, NULL); // Notify transaction complete 92 | } 93 | return 0; // Never returns 94 | }; 95 | 96 | DWORD WINAPI thread2Func(LPVOID param) 97 | { 98 | MersenneTwister random(2); 99 | for (;;) 100 | { 101 | WaitForSingleObject(beginSema2, INFINITE); // Wait for signal 102 | while (random.integer() % 8 != 0) {} // Random delay 103 | 104 | // ----- THE TRANSACTION! ----- 105 | Y = 1; 106 | #if USE_CPU_FENCE 107 | MemoryBarrier(); // Prevent CPU reordering 108 | #else 109 | _ReadWriteBarrier(); // Prevent compiler reordering only 110 | #endif 111 | r2 = X; 112 | 113 | ReleaseSemaphore(endSema, 1, NULL); // Notify transaction complete 114 | } 115 | return 0; // Never returns 116 | }; 117 | 118 | int main() 119 | { 120 | // Initialize the semaphores 121 | beginSema1 = CreateSemaphore(NULL, 0, 99, NULL); 122 | beginSema2 = CreateSemaphore(NULL, 0, 99, NULL); 123 | endSema = CreateSemaphore(NULL, 0, 99, NULL); 124 | 125 | // Spawn the threads 126 | HANDLE thread1, thread2; 127 | thread1 = CreateThread(NULL, 0, thread1Func, NULL, 0, NULL); 128 | thread2 = CreateThread(NULL, 0, thread2Func, NULL, 0, NULL); 129 | 130 | #if USE_SINGLE_HW_THREAD 131 | // Force thread affinities to the same cpu core. 132 | SetThreadAffinityMask(thread1, 1); 133 | SetThreadAffinityMask(thread2, 1); 134 | #endif 135 | 136 | // Repeat the experiment ad infinitum 137 | int detected = 0; 138 | for (int iterations = 1; ; iterations++) 139 | { 140 | // Reset X and Y 141 | X = 0; 142 | Y = 0; 143 | // Signal both threads 144 | ReleaseSemaphore(beginSema1, 1, NULL); 145 | ReleaseSemaphore(beginSema2, 1, NULL); 146 | // Wait for both threads 147 | WaitForSingleObject(endSema, INFINITE); 148 | WaitForSingleObject(endSema, INFINITE); 149 | // Check if there was a simultaneous reorder 150 | if (r1 == 0 && r2 == 0) 151 | { 152 | detected++; 153 | printf("%d reorders detected after %d iterations\n", detected, iterations); 154 | } 155 | } 156 | return 0; // Never returns 157 | } 158 | 159 | -------------------------------------------------------------------------------- /memory_model/msvc/ordering.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 10.00 3 | # Visual C++ Express 2008 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ordering", "ordering.vcproj", "{11BFF8C6-FB6E-411B-88A4-8ACE87E8AF50}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug|Win32 = Debug|Win32 9 | Release|Win32 = Release|Win32 10 | EndGlobalSection 11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 12 | {11BFF8C6-FB6E-411B-88A4-8ACE87E8AF50}.Debug|Win32.ActiveCfg = Debug|Win32 13 | {11BFF8C6-FB6E-411B-88A4-8ACE87E8AF50}.Debug|Win32.Build.0 = Debug|Win32 14 | {11BFF8C6-FB6E-411B-88A4-8ACE87E8AF50}.Release|Win32.ActiveCfg = Release|Win32 15 | {11BFF8C6-FB6E-411B-88A4-8ACE87E8AF50}.Release|Win32.Build.0 = Release|Win32 16 | EndGlobalSection 17 | GlobalSection(SolutionProperties) = preSolution 18 | HideSolutionNode = FALSE 19 | EndGlobalSection 20 | EndGlobal 21 | -------------------------------------------------------------------------------- /memory_model/msvc/ordering.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 18 | 19 | 20 | 21 | 22 | 29 | 32 | 35 | 38 | 41 | 44 | 55 | 58 | 61 | 64 | 71 | 74 | 77 | 80 | 83 | 86 | 89 | 92 | 93 | 101 | 104 | 107 | 110 | 113 | 116 | 128 | 131 | 134 | 137 | 146 | 149 | 152 | 155 | 158 | 161 | 164 | 167 | 168 | 169 | 170 | 171 | 172 | 175 | 176 | 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /memory_model/relaxed.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | int A, B, V; 5 | std::atomic VV; 6 | 7 | void plain() 8 | { 9 | A = V; 10 | B = V; 11 | } 12 | 13 | 14 | void atom() 15 | { 16 | A = VV.load(std::memory_order_relaxed); 17 | B = VV.load(std::memory_order_relaxed); 18 | } 19 | 20 | int main() 21 | { 22 | plain(); 23 | atom(); 24 | return 0; 25 | } 26 | 27 | -------------------------------------------------------------------------------- /moon/map_moon.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gongyiling/cpp_lecture/91ec4233a8b8175db94f7430604ace82fa51cefb/moon/map_moon.jpg -------------------------------------------------------------------------------- /moon/moon.py: -------------------------------------------------------------------------------- 1 | import pygame as pg 2 | import numpy as np 3 | import math 4 | import random 5 | 6 | clock = pg.time.Clock() 7 | FPS = 30 8 | WIDTH = 1920 9 | HEIGHT = 1080 10 | 11 | R = 350 12 | MAP_WIDTH = 149 13 | MAP_HEIGHT = 38 14 | 15 | pg.init() 16 | 17 | text_font = pg.font.SysFont('arial', 10) 18 | title_font = pg.font.SysFont('arial', 20) 19 | title = 'Happy Mid Autumn Festival' 20 | ascii_chars = [] 21 | title_color = (255, 215, 0) 22 | 23 | with open('moon.txt') as f: 24 | lines = f.readlines() 25 | for i, line in enumerate(lines): 26 | line = line.strip() 27 | for j, char in enumerate(line): 28 | if j < len(title) and i == MAP_HEIGHT / 2: 29 | ascii_chars.append((title[j], True)) 30 | else: 31 | ascii_chars.append((char, False)) 32 | 33 | ascii_chars.reverse() 34 | 35 | class Projection: 36 | def __init__(self, width, height): 37 | self.width = width 38 | self.height = height 39 | self.screen = pg.display.set_mode((width, height)) 40 | self.background = (10, 60, 60) 41 | pg.display.set_caption(title) 42 | self.surfaces = {} 43 | 44 | def addSurface(self, name, surface): 45 | text_surfaces = [] 46 | low = 250 47 | high = 250 48 | for i, node in enumerate(surface.nodes): 49 | text = ascii_chars[i][0] 50 | if ascii_chars[i][1]: 51 | render_font = title_font 52 | c = title_color 53 | else: 54 | render_font = text_font 55 | c = (random.randint(low, high), random.randint(low, high), random.randint(low, high)) 56 | text_surfaces.append(render_font.render(text, False, c)) 57 | surface.text_surfaces = text_surfaces 58 | self.surfaces[name] = surface 59 | 60 | def display(self): 61 | self.screen.fill(self.background) 62 | for surface in self.surfaces.values(): 63 | for i, node in enumerate(surface.nodes): 64 | text_surface = surface.text_surfaces[i] 65 | if node[1] > 0: 66 | self.screen.blit(text_surface, ((WIDTH / 2) + int(node[0]), HEIGHT / 2 + int(node[2]))) 67 | 68 | def rotateAll(self, theta): 69 | for surface in self.surfaces.values(): 70 | c = np.cos(theta) 71 | s = np.sin(theta) 72 | matrix = np.array([ [c, -s, 0, 0], 73 | [s, c, 0, 0], 74 | [0, 0, 1, 0], 75 | [0, 0, 0, 1]]) 76 | surface.rotate(matrix) 77 | 78 | class Object: 79 | def __init__(self): 80 | self.nodes = np.zeros((0, 4)) 81 | self.text_surfaces = [] 82 | 83 | def addNodes(self, node_array): 84 | ones_column = np.ones((len(node_array), 1)) 85 | ones_added = np.hstack((node_array, ones_column)) 86 | self.nodes = np.vstack((self.nodes, ones_added)) 87 | self.center = self.findCenter() 88 | 89 | def findCenter(self): 90 | return self.nodes.mean(axis = 0) 91 | 92 | def rotate(self, matrix): 93 | nodes = self.nodes - self.center 94 | self.nodes = self.center + np.transpose(np.dot(matrix, np.transpose(nodes))) 95 | 96 | running = True 97 | xyz = [] 98 | 99 | for i in range(MAP_HEIGHT + 1): 100 | lat = (math.pi / MAP_HEIGHT) * i 101 | for j in range(MAP_WIDTH + 1): 102 | lon = (2 * math.pi / MAP_WIDTH) * j; 103 | x = round(R * math.sin(lat) * math.cos(lon), 2) 104 | y = round(R * math.sin(lat) * math.sin(lon), 2) 105 | z = round(R * math.cos(lat), 2) 106 | xyz.append((x, y, z)) 107 | 108 | pv = Projection(WIDTH, HEIGHT) 109 | moon = Object() 110 | moon_nodes = [i for i in xyz] 111 | moon.addNodes(np.array(moon_nodes)) 112 | pv.addSurface('moon', moon) 113 | spin = 0.01 114 | 115 | while running: 116 | clock.tick(FPS) 117 | pv.rotateAll(spin) 118 | pv.display() 119 | for event in pg.event.get(): 120 | if event.type == pg.QUIT: 121 | running = False 122 | pg.display.update() 123 | 124 | -------------------------------------------------------------------------------- /moon/moon.txt: -------------------------------------------------------------------------------- 1 | //////+/+///////+++++++///////++/+///++++++/////+++++/++++++++++++++++++++/+++++++++///+++++++++++++++++///+++++////////++///++///++++///+++++//////// 2 | /+//:///////////////+////////++/////+++//+//+++++++++/+++++/+++++++++//+//+++///+++////+++++//+//++++++///++/+++///++/://////+//+++//+///+//+//://+/// 3 | ///////////+//++////+////++/++/////////////////////////////////////////////////////+/////+///+++++/++++/+++++++//////////////+/:////////////:://////// 4 | :://///////////////+/+++/++///////////++///+///://///////////////++/////+////+///++++//++++//+//++++++++/++/+++///////////////////////////:/://///:/// 5 | /://://///////////////+////////////////////////////////////++++ooo+++++++////+++++++++++++++++/////////////://+//://////:////////:////////://///////// 6 | ://:/:////////:///////+/////++/////////////////////////++++ooooooooooooooo+oo++oo++ooo+++++++++/////////////:////:::::://:::/:///////://:/://:///:///: 7 | /////////::://://///////////+///////:///+//+++++++++++++oooooosssssssssssysssoooooossssooooo++///////++///oo+++//::::::::::::/://////://///////////::/ 8 | ///:/:////:://:////:///////+/////+++//++++++sssooooossssssssooossssssoososossssssssysyyyysss++////o///++++ossoo+/:::::::::::/://///:///////////:////// 9 | ::::::////////://://://+/+///////+/////++++osyyysssyyyyysooo+oooossysossoo++oossssoossssssoo+++++++++//+++++/+/+/::::-:::::::::/://:////////:////::/// 10 | ::::::::/::://:://:::///////////////+/++oosyyyyyyyyyyyysoo+osyyyyhyyyyyyyssoooooooooooooooo++++/+ooo+/++/+++/////:::::::/::::::::::/::///+//://///:/:/ 11 | ::::::::::::::::::::/::://///:::////+++ooossyyhhhyyyyyhssoooyhhhhhhhyyyyyysso+++o++osssoooo+++++o++o++o++/////////::::-::::::::::///:::/:://:://:/:::: 12 | :--::-::::::::--::::/:://////////////+++osyyyhhhhyyhhyyyyssyhyhhhhhhhyyyyyssso+osysssyyssss++++++++/+++++++//////:/:---:::::::::::/::/:::::::::::/:::: 13 | :::::::::::::-:::::::/:::///////++++++++osyhhyyhhhhyhyyyyyyyyyyhhhhhhhyhyysssssyhhhyyyyooo++/+/++///++++/+++////////:::::::::::::/::::::///::::::-:::: 14 | :::/:::--::::-::::::::/:////////+++///+osyyyyyyhhhhhyyssyyyyyyyyyyyyyyyyssssssyhhhhhhhhyo+///+//++++++++//////:////+///:::/::::://::://osyo:::/::::::: 15 | :::-::-:--:---::::::::::://:///////////+ossssyyyhhhyyysssyyyyyyyyyyyyyyyysoooshhhhyhhhhyooo++o+ooossssoo+//////+/+//////://:::::::::::/+o+///:/::::::: 16 | :::::-:-::::::::::::::/::::::://////////++oyyhyyhhhhhyyyyyyyyysssyyyyyyyssoossyyyyyhhhhyssys+//+syhhhhhso++++o++//+//+////::/:::::::::::::///::::::::: 17 | :::-:-::-::/::::::::::::::::::::///////+/++ssyyhhhhhhhhyyyssyysooooooosyyysyyyssysshhhhhhhhyso++syhhhyss++oooyysso++//+///::::::::::::://::/::-:-::--: 18 | -::---::::::::::::::::::::::::://///////////+ooyhhyhhhhhhysosysooo++ossyyysosyysosyhhhhhhhhhysso+osoooooo+++ossssoo+//////:-:::::::://:::///:::::----- 19 | -:::::::--:::::::--::/::/:::::::::://////////++oyyhhhhhhhysssyssssssyyyhysso+++/+osyhhhyyyyyysyyysoooosso++++ossso++///::/::---:::::/:::::/::::::::::- 20 | :::::::--::::::::--:::::////:::::::::////+/////osyhhhhhhhhyyyyyssssossysssso+///+++syyhyooosyyyhyhhysssoooo+oyyyyo/+//::://:---:::::::::::::::://::::: 21 | :::/:::::::::::::::::::::////::::::::::////+++so+syhhhyhhhhhhyysyyyssss++++++///////+syso++osyyhyyyysoooooooossso+//////////:---:::::::::/:/:::///:::: 22 | :::::---:::::::::::::::::///:::::::::/://////+o+/osyyyyyhhyhhyyshhyysyyso++++///////++osoo++osyyyyso++oo+++++++++////+//:////::::::::://:/:///:::::::: 23 | //::::::::::::::::::://////:::::::://:///////++++/+osssssssyyhhyyyyyyyyss++++++/////:/+osyso+oooossooo+o++++++///////+/////////::/:/://::////:::/:/::/ 24 | +////+/:///////:/://////:::::::::+sos+////////++//++ooo+ossyyyyysyyyyyyyo++++++/+//////ossso+o+osso+ooooo+++/+/////////+///:////+/////://:///////////+ 25 | o+ooo++++++++//////////::/::::::::+ss/:/::////:::://+++oyhhhhyyyysyhhhhys++++++////+////++/++++++++++++++++o++++++////+++///////+////////::/+/+//+++++ 26 | ssosssoooooooo+//::://///////::::////::::///////://+//++syyyysssysyyyyso+//:///////////////+////////////+ooo+++++++++oo++++////////////////:+++++ooo++ 27 | sssssssoosssssoo/////////////////////:/:///+++///+++////ooooosyss+//++//:::////////////+////++//::::////+++++oooooo++++++//////+///////++++//+oosooooo 28 | ysssyysoysshyssso+///+/////+//////////////+///+++//+++++ooo+++o++/+//::::::://////////////////+/:///+/++++++oosssoooooo+//:////+/++++//oo+/+++oosooosy 29 | ssooyysssyyyysysoo++++o+++//+////++//////////++///+oo++++++++++++///::::::::////+//////+/////////+++++++o+oossosssooooo++////////+/++/:////++oosoo+oso 30 | oossyyyssyyysyssssso++o++o++/+//+++++/////////++//++o++oo+++/+++++/:/::/:::::///++++++////+//:/+++++++++oosoosoooooo+++o+++////+/+/++//+++oooossoossso 31 | ssssyyyyyyyyyyyysyysoooo++++//+/+/++o+/+//////+++++++oooooo+++++++///:://://:///++++++/++/+///+++++++ooossssossoo++o++++/o++/+/+++++++++oooossossosyso 32 | yyysyyyyyyyyyyysyysyyssso+ooo/////++++/////++/++++++++ooooo++++///++++/////////++++/++/++/++++++++++++o+osssooooo+++++o+/+++//+/ooooooo+ososssyysysyss 33 | ssssysysyyyyyyssssssssosooo+o+//+/////////////////////////+++++//++o+++/+++//////////++++++++++/++///++++/+++oooo++ooooo/+++////+ooooo+oooosssyyyyyssy 34 | sossysssyyysysssssssssssoooo+o+++/+//+++/++++++++/////////+++////+//////+/////////////////++/////////+++++o++ooo++//+++/++++/+++++//+oooooooooosysssss 35 | ssyyyossyssssssoooo+oooooooo+o+++++//+++++++++++////////+////++++/+////////+///////////+//////:/++//++++++++++++++//++/++++///++++++o++o+ooo++ossoosso 36 | oo+ooooossssss++oooosso++ooooooo+/+++///++++///++//////////++++/+++/+/+++++///+/+//+/+//////+//++++++/+o+++++o++++/o/::/+/+oooooososssooo+/++oo+++ooo+ 37 | +ooooooooooooo+ooo+oooo+++ooooo+///+o++oo++/+oo++++++/++/+++++++/+o+///:++////++////++//o++//////+++++/++o++/://////+//+o++++oo+oo+o++++++++++o/+++o++ 38 | //++/++++oo+++o++oo+++/+/+++/+++//+++++oo++///+o+++++o+o+++//++++++o+/++++/o++o+///+///++//+++++/+/+oo++++/+++o+++++++/++++//+++++++oo+++++++//++/++++ 39 | +++++++++/////////++++++++//://///////++++/+++++++ooooooo++++++//++oooo+ooo++ooooooo++++o+++++ooo+ooooo++++++++++++++++++++++++++////++++++//+++++++++ 40 | -------------------------------------------------------------------------------- /nan/nan.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main() 7 | { 8 | float nan = 0.0f / 0.0f; 9 | uint32_t* u = (uint32_t*)&nan; 10 | std::cout << std::hex << *u << " is nan: " << isnan(nan) << std::endl; 11 | *u = 0xFFFFFFFF; 12 | std::cout << std::hex << *u << " is nan: " << isnan(nan) << std::endl; 13 | 14 | float f = nan; 15 | assert(f == nan); 16 | return 0; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /optimize/set_hyper_threading.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # NAME: set-hyper-threading 4 | # PATH: /usr/local/bin 5 | # DESC: Turn Hyper threading off or on. 6 | 7 | # DATE: Aug. 5, 2017. 8 | 9 | # NOTE: Written Part of testing for Ubuntu answer: 10 | # https://askubuntu.com/questions/942728/disable-hyper-threading-in-ubuntu/942843#942843 11 | 12 | # PARM: 1="0" turn off hyper threading, "1" turn it on. 13 | 14 | if [[ $# -ne 1 ]]; then 15 | echo 'One argument required. 0 to turn off hyper-threading or' 16 | echo '1 to turn hyper-threading back on' 17 | exit 1 18 | fi 19 | 20 | echo $1 > /sys/devices/system/cpu/cpu1/online 21 | echo $1 > /sys/devices/system/cpu/cpu3/online 22 | echo $1 > /sys/devices/system/cpu/cpu5/online 23 | echo $1 > /sys/devices/system/cpu/cpu7/online 24 | 25 | grep "" /sys/devices/system/cpu/cpu*/topology/core_id 26 | 27 | grep -q '^flags.*[[:space:]]ht[[:space:]]' /proc/cpuinfo && \ 28 | echo "Hyper-threading is supported" 29 | 30 | grep -E 'model|stepping' /proc/cpuinfo | sort -u 31 | 32 | -------------------------------------------------------------------------------- /optimize/vec.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int test(int M, const std::vector& c) 7 | { 8 | int sum = 0; 9 | auto start = std::chrono::high_resolution_clock::now(); 10 | for (int i = 0; i < M; i++) 11 | { 12 | for (auto v: c) 13 | { 14 | sum += v; 15 | } 16 | } 17 | auto end = std::chrono::high_resolution_clock::now(); 18 | std::cout << "vector elapsed milliseconds: " << std::chrono::duration_cast(end - start).count() << std::endl; 19 | return sum; 20 | } 21 | 22 | int test(int M, const std::vector& c) 23 | { 24 | int sum = 0; 25 | auto start = std::chrono::high_resolution_clock::now(); 26 | for (int i = 0; i < M; i++) 27 | { 28 | for (auto v: c) 29 | { 30 | sum += *v; 31 | } 32 | } 33 | auto end = std::chrono::high_resolution_clock::now(); 34 | std::cout << "vector pointer elapsed milliseconds: " << std::chrono::duration_cast(end - start).count() << std::endl; 35 | return sum; 36 | } 37 | 38 | int test(int M, const std::list& c) 39 | { 40 | int sum = 0; 41 | auto start = std::chrono::high_resolution_clock::now(); 42 | for (int i = 0; i < M; i++) 43 | { 44 | for (auto v: c) 45 | { 46 | sum += v; 47 | } 48 | } 49 | auto end = std::chrono::high_resolution_clock::now(); 50 | std::cout << "list elapsed milliseconds: " << std::chrono::duration_cast(end - start).count() << std::endl; 51 | return sum; 52 | } 53 | 54 | int main() 55 | { 56 | const int N = 10000; 57 | std::vector va; 58 | std::vector vva; 59 | std::list la; 60 | for (int i = 0; i < N; i++) 61 | { 62 | vva.push_back(rand()); 63 | va.push_back(new int(rand())); 64 | la.push_back(rand()); 65 | } 66 | const int M = 100000; 67 | int sum = test(M, vva); 68 | sum += test(M, va); 69 | sum += test(M, la); 70 | std::cout << sum << std::endl; 71 | return 0; 72 | } 73 | 74 | -------------------------------------------------------------------------------- /pagetable.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | struct Page 9 | { 10 | uint64_t address; 11 | uint64_t entry[512]; 12 | }; 13 | 14 | uint64_t get_phys_address(uint64_t entry) 15 | { 16 | static const uint64_t mask = (1LL << 63) | ((1 << 12) - 1); 17 | return entry & ~mask; 18 | } 19 | 20 | bool writable(uint64_t entry) 21 | { 22 | return (entry & (1 << 1)) != 0; 23 | } 24 | 25 | bool executable(uint64_t entry) 26 | { 27 | return (entry & (1LL << 63)) == 0; 28 | } 29 | 30 | bool user_mode(uint64_t entry) 31 | { 32 | return (entry & (1 << 2)) != 0; 33 | } 34 | 35 | void print_entry(FILE* fp, int level, uint64_t entry, uint64_t virtual_address) 36 | { 37 | fprintf(fp, "%d\t0x%016lx\t0x%016lx\t%d\t%d\t%d\n", level, get_phys_address(entry), virtual_address, writable(entry), executable(entry), user_mode(entry)); 38 | } 39 | 40 | void dump(FILE* fp, const Page*& page, int level, uint64_t virtual_address) 41 | { 42 | const Page* cur_page = page++; 43 | for (int i = 0; i < 512; i++) 44 | { 45 | const uint64_t entry = cur_page->entry[i]; 46 | const uint64_t child_virtual_address = (virtual_address << 9) | i; 47 | if (level > 0) 48 | { 49 | if (entry & 1) 50 | { 51 | if (!(entry&(1<<7))) 52 | { 53 | dump(fp, page, level - 1, child_virtual_address); 54 | } 55 | else 56 | { 57 | print_entry(fp, level, entry, child_virtual_address << (level * 9 + 12)); 58 | } 59 | } 60 | } 61 | else 62 | { 63 | 64 | if (entry) 65 | { 66 | print_entry(fp, level, entry, child_virtual_address << 12); 67 | } 68 | } 69 | } 70 | } 71 | 72 | void dump_pagetable(FILE* fp) 73 | { 74 | std::ifstream ifs("/proc/page_table_3", std::ios::binary); 75 | if (!ifs) 76 | { 77 | return; 78 | } 79 | std::string content((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); 80 | const Page* page = (const Page*)&content[0]; 81 | const Page* end_page = (const Page*)(&content[0] + content.length()); 82 | dump(fp, page, 3, 0); 83 | std::cout << (const void*)end_page << '\t' << (const void*)page << std::endl; 84 | std::flush(std::cout); 85 | } 86 | 87 | int main() 88 | { 89 | const int N = 1024 * 1024 * 8; 90 | const bool hugetable = true; 91 | const bool do_fork = false; 92 | 93 | char* m = (char*)mmap(NULL, N, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED | (hugetable ? MAP_HUGETLB: 0), -1, 0); 94 | std::cout << *m << std::endl; 95 | FILE* fp = NULL; 96 | if (do_fork) 97 | { 98 | pid_t pid = fork(); 99 | if (pid == 0) 100 | { 101 | fp = fopen("/home/fractal/lecture/child.log", "w"); 102 | } 103 | else 104 | { 105 | fp = fopen("/home/fractal/lecture/father.log", "w"); 106 | } 107 | } 108 | else 109 | { 110 | fp = fopen("/home/fractal/lecture/father.log", "w"); 111 | } 112 | 113 | fprintf(fp, "mmap address: %p\n", m); 114 | dump_pagetable(fp); 115 | 116 | fclose(fp); 117 | while (true) 118 | { 119 | usleep(10000); 120 | } 121 | return 0; 122 | } 123 | -------------------------------------------------------------------------------- /partition/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | static std::array vips; 8 | 9 | int main() 10 | { 11 | for (int i = 0; i < vips.size(); i++) 12 | { 13 | if (rand() % 10 == 0) 14 | { 15 | vips[i] = rand() % 8; 16 | } 17 | else 18 | { 19 | vips[i] = 0; 20 | } 21 | } 22 | 23 | auto start = std::chrono::high_resolution_clock::now(); 24 | 25 | { 26 | auto end_vip0 = std::partition(vips.begin(), vips.end(), [](int vip){return vip == 0;}); 27 | std::sort(end_vip0, vips.end()); 28 | } 29 | 30 | auto end = std::chrono::high_resolution_clock::now(); 31 | auto elapsed = std::chrono::duration_cast(end - start).count(); 32 | std::cout << "is sorted: " << std::is_sorted(vips.begin(), vips.end()) << ", elapsed: " << elapsed << " milliseconds." << std::endl; 33 | return 0; 34 | } 35 | 36 | -------------------------------------------------------------------------------- /perf_tool/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #define barrier() __asm__ __volatile__("" : : : "memory") 20 | 21 | static const uint64_t PERF_BUFF_SIZE_SHIFT = 4; 22 | static const uint64_t PERF_MMAP_DATA_SIZE = 10 * 1024 * 1024; 23 | 24 | static int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, 25 | int group_fd, unsigned long flags) { 26 | int ret = syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); 27 | return ret; 28 | } 29 | 30 | static uint64_t get_page_size() { return sysconf(_SC_PAGESIZE); } 31 | 32 | static uint64_t perf_mmap_size() 33 | { 34 | return ((1U << PERF_BUFF_SIZE_SHIFT) + 1) * get_page_size(); 35 | } 36 | 37 | struct perf_record_time 38 | { 39 | struct perf_event_header header; 40 | uint64_t time; 41 | }; 42 | 43 | struct perf_data 44 | { 45 | perf_event_mmap_page *page = NULL; 46 | uint64_t pages_size = 0; 47 | uint64_t data_offset = 0; 48 | uint64_t data_size = 0; 49 | int fd = -1; 50 | uint8_t* perf_data = NULL; 51 | uint64_t perf_data_length = 0; 52 | }; 53 | 54 | static perf_data g_perf_data; 55 | 56 | static void read_perf_data() 57 | { 58 | perf_event_mmap_page *page = g_perf_data.page; 59 | 60 | uint64_t tail = page->data_tail; 61 | const uint64_t head = page->data_head; 62 | const uint64_t buffer_size = g_perf_data.data_size; 63 | const uint64_t page_size = g_perf_data.data_offset; 64 | uint8_t * perf_data = g_perf_data.perf_data; 65 | uint64_t perf_data_length = g_perf_data.perf_data_length; 66 | auto time = std::chrono::high_resolution_clock::now().time_since_epoch().count(); 67 | barrier(); 68 | const uint8_t *base = reinterpret_cast(page) + page_size; 69 | 70 | // https://android.googlesource.com/platform/external/bcc/+/fd247435dfdfe9a6daa159620127f2724f6d1d7a/src/cc/perf_reader.c 71 | while (tail + sizeof(perf_event_header) <= head ) { 72 | const uint8_t *begin = base + tail % buffer_size; 73 | const perf_event_header *e = (const perf_event_header *)begin; 74 | if (e->type == PERF_RECORD_SAMPLE && perf_data_length + e->size <= PERF_MMAP_DATA_SIZE) 75 | { 76 | const uint8_t *end = base + (tail + e->size) % buffer_size; 77 | perf_record_time* record = (perf_record_time*)(perf_data + perf_data_length); 78 | if (end < begin) 79 | { 80 | // perf event wraps around the ring, make a contiguous copy 81 | const uint8_t *sentinel = base + buffer_size; 82 | const size_t len = sentinel - begin; 83 | 84 | memcpy(perf_data + perf_data_length, begin, len); 85 | perf_data_length += len; 86 | 87 | memcpy(perf_data + perf_data_length, base, e->size - len); 88 | perf_data_length += e->size - len; 89 | } 90 | else 91 | { 92 | memcpy(perf_data + perf_data_length, base, e->size); 93 | perf_data_length += e->size; 94 | } 95 | record->time = time; 96 | } 97 | tail += e->size; 98 | } 99 | 100 | barrier(); 101 | page->data_tail = tail; 102 | g_perf_data.perf_data_length = perf_data_length; 103 | } 104 | 105 | static void perf_event_handler(int signum, siginfo_t *info, void *ucontext) 106 | { 107 | if (info->si_code == POLL_IN) 108 | { 109 | read_perf_data(); 110 | } 111 | } 112 | 113 | void worker() 114 | { 115 | auto start = std::chrono::high_resolution_clock::now(); 116 | int s = 0; 117 | for (int i = 0; i < 100000000; i++) 118 | { 119 | s += rand(); 120 | } 121 | auto end = std::chrono::high_resolution_clock::now(); 122 | auto elapsed = std::chrono::duration_cast(end - start).count(); 123 | printf("s: %d, elapsed milliseconds: %ld\n", s, elapsed); 124 | } 125 | 126 | int main() 127 | { 128 | if (access("/proc/sys/kernel/perf_event_paranoid", F_OK) == -1) 129 | { 130 | return -1; 131 | } 132 | 133 | struct sigaction sa; 134 | memset(&sa, 0, sizeof(struct sigaction)); 135 | sa.sa_sigaction = perf_event_handler; 136 | sa.sa_flags = SA_SIGINFO; 137 | int ret = sigaction(SIGIO, &sa, NULL); 138 | assert(ret != -1); 139 | 140 | struct perf_event_attr attr; 141 | memset(&attr, 0, sizeof(attr)); 142 | attr.type = PERF_TYPE_HARDWARE; 143 | attr.sample_type = PERF_SAMPLE_TIME; 144 | attr.size = sizeof(attr); 145 | //attr.config = PERF_COUNT_HW_CPU_CYCLES; 146 | attr.config = PERF_COUNT_HW_INSTRUCTIONS; 147 | attr.sample_period = 10000000ULL; 148 | attr.exclude_kernel = 1; 149 | attr.exclude_hv = 1; 150 | attr.disabled = 1; 151 | 152 | int fd = perf_event_open(&attr, 0, -1, -1, 0); 153 | assert(fd !=0); 154 | 155 | g_perf_data.fd = fd; 156 | fcntl(fd, F_SETFL, O_RDWR | O_NONBLOCK | O_ASYNC); 157 | fcntl(fd, F_SETSIG, SIGIO); 158 | fcntl(fd, F_SETOWN, getpid()); 159 | 160 | const size_t pages_size = perf_mmap_size(); 161 | void *page = mmap(NULL, pages_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 162 | assert(page != MAP_FAILED); 163 | 164 | g_perf_data.pages_size = pages_size; 165 | g_perf_data.page = reinterpret_cast(page); 166 | g_perf_data.data_offset = get_page_size(); 167 | g_perf_data.data_size = pages_size - g_perf_data.data_offset; 168 | 169 | void *perf_data = mmap(NULL, PERF_MMAP_DATA_SIZE, 170 | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_POPULATE, -1, 0); 171 | assert(perf_data != MAP_FAILED); 172 | 173 | g_perf_data.perf_data = (uint8_t*)perf_data; 174 | g_perf_data.perf_data_length = 0; 175 | 176 | uint64_t time = std::chrono::high_resolution_clock::now().time_since_epoch().count(); 177 | ioctl(fd, PERF_EVENT_IOC_ENABLE); 178 | worker(); 179 | ioctl(fd, PERF_EVENT_IOC_DISABLE); 180 | 181 | const perf_record_time* r = (const perf_record_time*)g_perf_data.perf_data; 182 | const perf_record_time* e = (const perf_record_time*)(g_perf_data.perf_data + g_perf_data.perf_data_length); 183 | 184 | int sample_count = 0; 185 | for (; r < e; r++) 186 | { 187 | printf("%d, %ld\n", r->header.size, (r->time - time) / (1000 * 1000)); 188 | sample_count++; 189 | } 190 | printf("sample_count=%d\n", sample_count); 191 | return 0; 192 | } 193 | 194 | -------------------------------------------------------------------------------- /pgo/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | class B 5 | { 6 | public: 7 | virtual int foo() = 0; 8 | }; 9 | 10 | class D1 : public B 11 | { 12 | public: 13 | virtual int foo() override 14 | { 15 | return 1; 16 | } 17 | }; 18 | 19 | class D2 : public B 20 | { 21 | public: 22 | virtual int foo() override 23 | { 24 | return 2; 25 | } 26 | }; 27 | 28 | int main() 29 | { 30 | const int N = 1024; 31 | const int M = 1024 * 1024; 32 | B* b[N]; 33 | for (int i = 0; i < N; i++) 34 | { 35 | b[i] = new D2(); 36 | } 37 | int s = 0; 38 | auto start = std::chrono::high_resolution_clock::now(); 39 | for (int i = 0; i < M; i++) 40 | { 41 | for (int j = 0; j < N; j++) 42 | { 43 | s += b[j]->foo(); 44 | } 45 | } 46 | auto end = std::chrono::high_resolution_clock::now(); 47 | std::cout << std::chrono::duration_cast(end - start).count() << std::endl; 48 | return s; 49 | } 50 | 51 | -------------------------------------------------------------------------------- /pmu/Makefile: -------------------------------------------------------------------------------- 1 | obj-m+=cr4.o 2 | 3 | all: 4 | make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) modules 5 | clean: 6 | make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) clean 7 | -------------------------------------------------------------------------------- /pmu/cpuid.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | unsigned int level = 0x0a; 7 | unsigned int eax, ebx, ecx, edx; 8 | if (__get_cpuid(level, &eax, &ebx, &ecx, &edx)) 9 | { 10 | printf("eax=%08x\tebx=%08x\tecx=%08x\tedx=%08x\n", eax, ebx, ecx, edx); 11 | } 12 | else 13 | { 14 | printf("__get_cpuid failed\n"); 15 | } 16 | return 0; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /pmu/cr4.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Read PMC in kernel mode. 3 | */ 4 | #include /* Needed by all modules */ 5 | #include /* Needed for KERN_INFO */ 6 | 7 | static void printc4(void) { 8 | typedef long unsigned int uint64_t; 9 | uint64_t output; 10 | // Read back CR4 to check the bit. 11 | __asm__("\t mov %%cr4,%0" : "=r"(output)); 12 | printk(KERN_INFO "%lu", output); 13 | } 14 | 15 | static void setc4b8(void * info) { 16 | // Set CR4, Bit 8 (9th bit from the right) to enable 17 | __asm__("push %rax\n\t" 18 | "mov %cr4,%rax;\n\t" 19 | "or $(1 << 8),%rax;\n\t" 20 | "mov %rax,%cr4;\n\t" 21 | "wbinvd\n\t" 22 | "pop %rax" 23 | ); 24 | 25 | // Check which CPU we are on: 26 | printk(KERN_INFO "Ran on Processor %d", smp_processor_id()); 27 | printc4(); 28 | } 29 | 30 | static void clearc4b8(void * info) { 31 | printc4(); 32 | __asm__("push %rax\n\t" 33 | "push %rbx\n\t" 34 | "mov %cr4,%rax;\n\t" 35 | "mov $(1 << 8), %rbx\n\t" 36 | "not %rbx\n\t" 37 | "and %rbx, %rax;\n\t" 38 | "mov %rax,%cr4;\n\t" 39 | "wbinvd\n\t" 40 | "pop %rbx\n\t" 41 | "pop %rax\n\t" 42 | ); 43 | printk(KERN_INFO "Ran on Processor %d", smp_processor_id()); 44 | } 45 | 46 | int init_module(void) 47 | { 48 | on_each_cpu(setc4b8, NULL, 0); 49 | return 0; 50 | } 51 | 52 | void cleanup_module(void) 53 | { 54 | on_each_cpu(clearc4b8, NULL, 0); 55 | } 56 | 57 | -------------------------------------------------------------------------------- /pmu/enable_fixed.sh: -------------------------------------------------------------------------------- 1 | wrmsr -p 0 0x38d 0x333 # enable all three fixed counts. 2 | -------------------------------------------------------------------------------- /pmu/iaca-version-v3.0-lin64.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gongyiling/cpp_lecture/91ec4233a8b8175db94f7430604ace82fa51cefb/pmu/iaca-version-v3.0-lin64.zip -------------------------------------------------------------------------------- /pmu/ipc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | uint64_t rdpmc(int index, int fixed) 6 | { 7 | 8 | uint32_t a, d, c; 9 | c = (fixed << 30) + index; 10 | __asm__ volatile ("rdpmc": "=a"(a), "=d"(d): "c"(c)); 11 | return ((uint64_t)a | ((uint64_t)d << 32)); 12 | } 13 | 14 | int sum(int v, int M) 15 | { 16 | int s = 0; 17 | for (int i = 0; i < M; i++) 18 | { 19 | s += v; 20 | asm volatile("": :"r,m"(s): "memory"); 21 | } 22 | return s; 23 | } 24 | 25 | int main() 26 | { 27 | cpu_set_t set; 28 | CPU_ZERO(&set); 29 | CPU_SET(0, &set); 30 | sched_setaffinity(0, sizeof(set), &set); 31 | const int fixed_count = false; 32 | 33 | const int v = rand(); 34 | const int M = 1024 * 1024 * 1024; 35 | const uint64_t start_instruction_retired = rdpmc(0, fixed_count); 36 | const uint64_t start_cycles = rdpmc(1, fixed_count); 37 | const int s = sum(v, M); 38 | const uint64_t end_instruction_retired = rdpmc(0, fixed_count); 39 | const uint64_t end_cycles = rdpmc(1, fixed_count); 40 | const uint64_t instruction_retired = end_instruction_retired - start_instruction_retired; 41 | const uint64_t cycles = end_cycles - start_cycles; 42 | printf("sum: %d, instruction retired: %ld, cycles: %ld, instructions per cycle: %f\n", 43 | s, instruction_retired, cycles, instruction_retired/float(cycles)); 44 | return 0; 45 | } 46 | 47 | -------------------------------------------------------------------------------- /pmu/msr.txt: -------------------------------------------------------------------------------- 1 | RDPMC 1715 2 | C1H IA32_PMC0 4567 3 | 186H IA32_PERFEVTSEL0 4571 4 | 5 | 309H IA32_FIXED_CTR0 4633 instructions, core cycles, reference cycles. 6 | 38DH IA32_FIXED_CTR_CTRL 4584 7 | 38FH IA32_PERF_GLOBAL_CTRL 4586 8 | 9 | Pre-defined Architectural Performance Events 3475 10 | 11 | -------------------------------------------------------------------------------- /pmu/rdpmc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | uint64_t rdpmc(int index, int fixed) 6 | { 7 | 8 | uint32_t a, d, c; 9 | c = (fixed << 30) + index; 10 | __asm__ volatile ("rdpmc": "=a"(a), "=d"(d): "c"(c)); 11 | return ((uint64_t)a | ((uint64_t)d << 32)); 12 | } 13 | 14 | int sum(const std::vector& v, int M) 15 | { 16 | int s = 0; 17 | for (int i = 0; i < M; i++) 18 | { 19 | for (auto k: v) 20 | { 21 | s += k; 22 | } 23 | } 24 | return s; 25 | } 26 | 27 | int main() 28 | { 29 | cpu_set_t set; 30 | CPU_ZERO(&set); 31 | CPU_SET(0, &set); 32 | sched_setaffinity(0, sizeof(set), &set); 33 | std::vector v(1024); 34 | const int M = 1024; 35 | for (auto&& k: v) 36 | { 37 | k = rand(); 38 | } 39 | const uint64_t start_instruction_retired = rdpmc(0, 0); 40 | const uint64_t start_cycles = rdpmc(1, 0); 41 | const int s = sum(v, M); 42 | const uint64_t end_instruction_retired = rdpmc(0, 0); 43 | const uint64_t end_cycles = rdpmc(1, 0); 44 | const uint64_t instruction_retired = end_instruction_retired - start_instruction_retired; 45 | const uint64_t cycles = end_cycles - start_cycles; 46 | printf("sum: %d, instruction retired: %ld, cycles: %ld, instructions per cycle: %f\n", 47 | s, instruction_retired, cycles, instruction_retired/float(cycles)); 48 | return 0; 49 | } 50 | 51 | -------------------------------------------------------------------------------- /pmu/set_msr_ipc.sh: -------------------------------------------------------------------------------- 1 | wrmsr -p 0 0x186 0x4100c0 # instruction retired 2 | wrmsr -p 0 0x187 0x41003c # unhalted core cycles 3 | -------------------------------------------------------------------------------- /ranker/main.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace bmi = boost::multi_index; 12 | namespace bi = boost::interprocess; 13 | 14 | typedef bi::managed_shared_memory managed_shared_memory_t; 15 | 16 | struct rank_info 17 | { 18 | uint32_t uid; 19 | uint32_t score; 20 | char name[16]; 21 | }; 22 | 23 | typedef bi::node_allocator node_allocator_t; 24 | 25 | struct tag_uid{}; 26 | struct tag_score{}; 27 | 28 | typedef bmi::multi_index_container, bmi::member >, 31 | bmi::ranked_non_unique, bmi::member, std::greater > 32 | >, 33 | node_allocator_t 34 | > container_t; 35 | 36 | void gen_rand_name(char name[16]) 37 | { 38 | snprintf(name, 16, "%04x%04x%04x", rand() & 0xffff, rand() & 0xffff, rand() & 0xffff); 39 | } 40 | 41 | void insert_data(container_t& c, int n) 42 | { 43 | for (int i = 0; i < n; i++) 44 | { 45 | rank_info info; 46 | info.uid = i; 47 | info.score = rand(); 48 | auto pr = c.insert(info); 49 | gen_rand_name(const_cast(pr.first->name)); 50 | } 51 | } 52 | 53 | std::vector get_ranks(const container_t& c, int start_rank, int count) 54 | { 55 | std::vector ranks; 56 | if (count <= 0) 57 | { 58 | return ranks; 59 | } 60 | auto& index = c.get(); 61 | ranks.reserve(count); 62 | for (auto it = index.nth(start_rank); it != index.end() && count-- > 0; ++it) 63 | { 64 | ranks.push_back(&(*it)); 65 | } 66 | return ranks; 67 | } 68 | 69 | uint32_t get_rank_by_uid(const container_t& c, uint32_t uid) 70 | { 71 | const auto& uid_index = c.get(); 72 | auto it = uid_index.find(uid); 73 | if (it == uid_index.end()) 74 | { 75 | return -1; 76 | } 77 | const auto& score_index = c.get(); 78 | return score_index.rank(c.project(it)); 79 | } 80 | 81 | void modify_score(container_t& c, uint32_t uid, uint32_t score) 82 | { 83 | auto& uid_index = c.get(); 84 | auto it = uid_index.find(uid); 85 | if (it == uid_index.end()) 86 | { 87 | return; 88 | } 89 | c.modify(it, [score](rank_info& info){info.score = score;}); 90 | } 91 | 92 | int main() 93 | { 94 | static const int DATA_SIZE = 1000000; 95 | static const int MEMORY_SIZE = DATA_SIZE * sizeof(rank_info) * 10; 96 | managed_shared_memory_t shared_memory(bi::open_or_create, "__Ranker__", MEMORY_SIZE); 97 | node_allocator_t alloc(shared_memory.get_segment_manager()); 98 | container_t* c = shared_memory.find_or_construct("__Ranker__")(alloc); 99 | if (c->empty()) 100 | { 101 | insert_data(*c, DATA_SIZE); 102 | } 103 | printf("free memory: %ld\n", shared_memory.get_free_memory()); 104 | 105 | std::vector ranks = get_ranks(*c, 0, 100); 106 | for (size_t i = 0; i < ranks.size(); i++) 107 | { 108 | printf("%d\t%d\t%s\n", ranks[i]->score, ranks[i]->uid, ranks[i]->name); 109 | } 110 | printf("uid 41, rank: %d\n", get_rank_by_uid(*c, 41)); 111 | modify_score(*c, 41, 2147481194 + 1); 112 | printf("uid 41, after modify rank: %d\n", get_rank_by_uid(*c, 41)); 113 | ranks = get_ranks(*c, 0, 100); 114 | for (size_t i = 0; i < ranks.size(); i++) 115 | { 116 | printf("%d\t%d\t%s\n", ranks[i]->score, ranks[i]->uid, ranks[i]->name); 117 | } 118 | return 0; 119 | } 120 | -------------------------------------------------------------------------------- /rdpmc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | unsigned long rdpmc_instructions() 7 | { 8 | 9 | unsigned int a, d, c; 10 | c = (1 << 30) + 2; 11 | __asm__ volatile ("rdpmc": "=a"(a), "=d"(d): "c"(c)); 12 | return ((unsigned long)a | ((unsigned long)d << 32)); 13 | } 14 | 15 | int main() 16 | { 17 | cpu_set_t set; 18 | CPU_ZERO(&set); 19 | CPU_SET(0, &set); 20 | sched_setaffinity(0, sizeof(set), &set); 21 | std::cout < 2 | #include 3 | #include 4 | 5 | static const int N = 1024 * 1024 * 1024; 6 | 7 | int test_register() 8 | { 9 | int a = 0; 10 | int b = 0; 11 | int c = 0; 12 | int d = 0; 13 | int e = rand(); 14 | int f = rand(); 15 | int g = rand(); 16 | int h = rand(); 17 | for (int i = 0; i < N; i++) 18 | { 19 | a += e; 20 | b += f; 21 | c += g; 22 | d += h; 23 | } 24 | return a + b + c + d; 25 | } 26 | 27 | int test_memory() 28 | { 29 | int a = 0; 30 | int b = 0; 31 | int c = 0; 32 | int d = 0; 33 | volatile int e = rand(); 34 | volatile int f = rand(); 35 | volatile int g = rand(); 36 | volatile int h = rand(); 37 | for (int i = 0; i < N; i++) 38 | { 39 | a += e; 40 | b += f; 41 | c += g; 42 | d += h; 43 | } 44 | return a + b + c + d; 45 | } 46 | 47 | int main() 48 | { 49 | auto start = std::chrono::high_resolution_clock::now(); 50 | test_register(); 51 | auto end_register = std::chrono::high_resolution_clock::now(); 52 | test_memory(); 53 | auto end_all = std::chrono::high_resolution_clock::now(); 54 | std::cout << std::chrono::duration_cast(end_register - start).count() << std::endl; 55 | std::cout << std::chrono::duration_cast(end_all - end_register).count() << std::endl; 56 | return 0; 57 | } 58 | 59 | -------------------------------------------------------------------------------- /register/main.s: -------------------------------------------------------------------------------- 1 | .file "main.cpp" 2 | .text 3 | .type _Z41__static_initialization_and_destruction_0ii, @function 4 | _Z41__static_initialization_and_destruction_0ii: 5 | .LFB2393: 6 | .cfi_startproc 7 | cmpl $1, %edi 8 | je .L7 9 | .L4: 10 | ret 11 | .L7: 12 | cmpl $65535, %esi 13 | jne .L4 14 | subq $8, %rsp 15 | .cfi_def_cfa_offset 16 16 | leaq _ZStL8__ioinit(%rip), %rdi 17 | call _ZNSt8ios_base4InitC1Ev@PLT 18 | leaq __dso_handle(%rip), %rdx 19 | leaq _ZStL8__ioinit(%rip), %rsi 20 | movq _ZNSt8ios_base4InitD1Ev@GOTPCREL(%rip), %rdi 21 | call __cxa_atexit@PLT 22 | addq $8, %rsp 23 | .cfi_def_cfa_offset 8 24 | ret 25 | .cfi_endproc 26 | .LFE2393: 27 | .size _Z41__static_initialization_and_destruction_0ii, .-_Z41__static_initialization_and_destruction_0ii 28 | .globl _Z13test_registerv 29 | .type _Z13test_registerv, @function 30 | _Z13test_registerv: 31 | .LFB1876: 32 | .cfi_startproc 33 | endbr64 34 | pushq %r12 35 | .cfi_def_cfa_offset 16 36 | .cfi_offset 12, -16 37 | pushq %rbp 38 | .cfi_def_cfa_offset 24 39 | .cfi_offset 6, -24 40 | pushq %rbx 41 | .cfi_def_cfa_offset 32 42 | .cfi_offset 3, -32 43 | call rand@PLT 44 | movl %eax, %r12d 45 | call rand@PLT 46 | movl %eax, %ebp 47 | call rand@PLT 48 | movl %eax, %ebx 49 | call rand@PLT 50 | movl %eax, %r8d 51 | movl $0, %edx 52 | movl $0, %esi 53 | movl $0, %edi 54 | movl $0, %eax 55 | movl $0, %ecx 56 | .L10: 57 | cmpl $1073741823, %edx 58 | jg .L9 59 | addl %r12d, %ecx 60 | addl %ebp, %eax 61 | addl %ebx, %edi 62 | addl %r8d, %esi 63 | addl $1, %edx 64 | jmp .L10 65 | .L9: 66 | addl %ecx, %eax 67 | addl %edi, %eax 68 | addl %esi, %eax 69 | popq %rbx 70 | .cfi_def_cfa_offset 24 71 | popq %rbp 72 | .cfi_def_cfa_offset 16 73 | popq %r12 74 | .cfi_def_cfa_offset 8 75 | ret 76 | .cfi_endproc 77 | .LFE1876: 78 | .size _Z13test_registerv, .-_Z13test_registerv 79 | .globl _Z11test_memoryv 80 | .type _Z11test_memoryv, @function 81 | _Z11test_memoryv: 82 | .LFB1877: 83 | .cfi_startproc 84 | endbr64 85 | subq $24, %rsp 86 | .cfi_def_cfa_offset 32 87 | call rand@PLT 88 | movl %eax, (%rsp) 89 | call rand@PLT 90 | movl %eax, 4(%rsp) 91 | call rand@PLT 92 | movl %eax, 8(%rsp) 93 | call rand@PLT 94 | movl %eax, 12(%rsp) 95 | movl $0, %eax 96 | movl $0, %ecx 97 | movl $0, %esi 98 | movl $0, %edi 99 | movl $0, %edx 100 | .L14: 101 | cmpl $1073741823, %eax 102 | jg .L13 103 | addl (%rsp), %edx 104 | addl 4(%rsp), %edi 105 | addl 8(%rsp), %esi 106 | addl 12(%rsp), %ecx 107 | addl $1, %eax 108 | jmp .L14 109 | .L13: 110 | leal (%rdx,%rdi), %eax 111 | addl %esi, %eax 112 | addl %ecx, %eax 113 | addq $24, %rsp 114 | .cfi_def_cfa_offset 8 115 | ret 116 | .cfi_endproc 117 | .LFE1877: 118 | .size _Z11test_memoryv, .-_Z11test_memoryv 119 | .section .text._ZNSt6chronomiINS_3_V212system_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE,"axG",@progbits,_ZNSt6chronomiINS_3_V212system_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE,comdat 120 | .weak _ZNSt6chronomiINS_3_V212system_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE 121 | .type _ZNSt6chronomiINS_3_V212system_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE, @function 122 | _ZNSt6chronomiINS_3_V212system_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE: 123 | .LFB1885: 124 | .cfi_startproc 125 | endbr64 126 | movq (%rdi), %rax 127 | subq (%rsi), %rax 128 | ret 129 | .cfi_endproc 130 | .LFE1885: 131 | .size _ZNSt6chronomiINS_3_V212system_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE, .-_ZNSt6chronomiINS_3_V212system_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE 132 | .text 133 | .globl main 134 | .type main, @function 135 | main: 136 | .LFB1878: 137 | .cfi_startproc 138 | endbr64 139 | pushq %r12 140 | .cfi_def_cfa_offset 16 141 | .cfi_offset 12, -16 142 | pushq %rbp 143 | .cfi_def_cfa_offset 24 144 | .cfi_offset 6, -24 145 | pushq %rbx 146 | .cfi_def_cfa_offset 32 147 | .cfi_offset 3, -32 148 | subq $32, %rsp 149 | .cfi_def_cfa_offset 64 150 | movl $40, %ebp 151 | movq %fs:0(%rbp), %rax 152 | movq %rax, 24(%rsp) 153 | xorl %eax, %eax 154 | call _ZNSt6chrono3_V212system_clock3nowEv@PLT 155 | movq %rax, (%rsp) 156 | call _Z13test_registerv 157 | call _ZNSt6chrono3_V212system_clock3nowEv@PLT 158 | movq %rax, 8(%rsp) 159 | call _Z11test_memoryv 160 | call _ZNSt6chrono3_V212system_clock3nowEv@PLT 161 | movq %rax, 16(%rsp) 162 | movq %rsp, %rsi 163 | leaq 8(%rsp), %r12 164 | movq %r12, %rdi 165 | call _ZNSt6chronomiINS_3_V212system_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE 166 | movq %rax, %rcx 167 | movabsq $4835703278458516699, %rbx 168 | imulq %rbx 169 | sarq $18, %rdx 170 | sarq $63, %rcx 171 | subq %rcx, %rdx 172 | movq %rdx, %rsi 173 | leaq _ZSt4cout(%rip), %rdi 174 | call _ZNSo9_M_insertIlEERSoT_@PLT 175 | movq %rax, %rdi 176 | call _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_@PLT 177 | leaq 16(%rsp), %rdi 178 | movq %r12, %rsi 179 | call _ZNSt6chronomiINS_3_V212system_clockENS_8durationIlSt5ratioILl1ELl1000000000EEEES6_EENSt11common_typeIJT0_T1_EE4typeERKNS_10time_pointIT_S8_EERKNSC_ISD_S9_EE 180 | movq %rax, %rcx 181 | imulq %rbx 182 | sarq $18, %rdx 183 | sarq $63, %rcx 184 | movq %rdx, %rsi 185 | subq %rcx, %rsi 186 | leaq _ZSt4cout(%rip), %rdi 187 | call _ZNSo9_M_insertIlEERSoT_@PLT 188 | movq %rax, %rdi 189 | call _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_@PLT 190 | movq 24(%rsp), %rax 191 | xorq %fs:0(%rbp), %rax 192 | jne .L20 193 | movl $0, %eax 194 | addq $32, %rsp 195 | .cfi_remember_state 196 | .cfi_def_cfa_offset 32 197 | popq %rbx 198 | .cfi_def_cfa_offset 24 199 | popq %rbp 200 | .cfi_def_cfa_offset 16 201 | popq %r12 202 | .cfi_def_cfa_offset 8 203 | ret 204 | .L20: 205 | .cfi_restore_state 206 | call __stack_chk_fail@PLT 207 | .cfi_endproc 208 | .LFE1878: 209 | .size main, .-main 210 | .type _GLOBAL__sub_I__Z13test_registerv, @function 211 | _GLOBAL__sub_I__Z13test_registerv: 212 | .LFB2394: 213 | .cfi_startproc 214 | endbr64 215 | subq $8, %rsp 216 | .cfi_def_cfa_offset 16 217 | movl $65535, %esi 218 | movl $1, %edi 219 | call _Z41__static_initialization_and_destruction_0ii 220 | addq $8, %rsp 221 | .cfi_def_cfa_offset 8 222 | ret 223 | .cfi_endproc 224 | .LFE2394: 225 | .size _GLOBAL__sub_I__Z13test_registerv, .-_GLOBAL__sub_I__Z13test_registerv 226 | .section .init_array,"aw" 227 | .align 8 228 | .quad _GLOBAL__sub_I__Z13test_registerv 229 | .local _ZStL8__ioinit 230 | .comm _ZStL8__ioinit,1,1 231 | .hidden __dso_handle 232 | .ident "GCC: (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0" 233 | .section .note.GNU-stack,"",@progbits 234 | .section .note.gnu.property,"a" 235 | .align 8 236 | .long 1f - 0f 237 | .long 4f - 1f 238 | .long 5 239 | 0: 240 | .string "GNU" 241 | 1: 242 | .align 8 243 | .long 0xc0000002 244 | .long 3f - 2f 245 | 2: 246 | .long 0x3 247 | 3: 248 | .align 8 249 | 4: 250 | -------------------------------------------------------------------------------- /simd/box_box_scalar.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | struct Point2D 6 | { 7 | float x; 8 | float y; 9 | }; 10 | 11 | struct Rect2D 12 | { 13 | Point2D minimum; 14 | Point2D maximum; 15 | }; 16 | 17 | #define AND && 18 | 19 | inline bool intersect1d(float a_minimum, float a_maxinum, float b_minimum, float b_maximum) 20 | { 21 | return (a_minimum <= b_maximum) AND (b_minimum <= a_maxinum); 22 | } 23 | 24 | inline bool intersect2d(const Rect2D& a, const Rect2D& b) 25 | { 26 | return intersect1d(a.minimum.x, a.maximum.x, b.minimum.x, b.maximum.x) AND 27 | intersect1d(a.minimum.y, a.maximum.y, b.minimum.y, b.maximum.y); 28 | } 29 | 30 | static const int N = 4; 31 | __attribute__((noinline)) int test(const Rect2D* boxes1, const Rect2D* boxes2) 32 | { 33 | int intersects = 0; 34 | for (int i = 0; i < N; i++) 35 | { 36 | intersects += intersect2d(boxes1[i], boxes2[i]); 37 | asm volatile("": :"r,m"(intersects): "memory"); 38 | } 39 | return intersects; 40 | } 41 | 42 | int main() 43 | { 44 | Rect2D box{{0, 0}, {1, 1}}; 45 | Rect2D boxes1[N] = {box, box, box, box}; 46 | Rect2D boxes2[N] = {box, box, box, box}; 47 | 48 | static const int M = 1024 * 1024 * 100; 49 | int sum = 0; 50 | auto start = std::chrono::high_resolution_clock::now(); 51 | for (int i = 0; i < M; i++) 52 | { 53 | sum += test(boxes1, boxes2); 54 | } 55 | auto end = std::chrono::high_resolution_clock::now(); 56 | std::cout << sum << "\telapsed milliseconds: " << std::chrono::duration_cast(end - start).count() << std::endl; 57 | return 0; 58 | } 59 | 60 | -------------------------------------------------------------------------------- /simd/box_box_simd.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef __m128 float4; 6 | 7 | struct Point2D 8 | { 9 | float4 x; 10 | float4 y; 11 | }; 12 | 13 | struct Rect2D 14 | { 15 | Point2D minimum; 16 | Point2D maximum; 17 | }; 18 | 19 | inline float4 intersect1d(float4 a_minimum, float4 a_maxinum, float4 b_minimum, float4 b_maximum) 20 | { 21 | float4 i = _mm_cmple_ps(a_minimum, b_maximum); 22 | float4 j = _mm_cmple_ps(b_minimum, a_maxinum); 23 | return _mm_and_ps(i, j); 24 | } 25 | 26 | inline int intersect2d(const Rect2D& a, const Rect2D& b) 27 | { 28 | float4 i = intersect1d(a.minimum.x, a.maximum.x, b.minimum.x, b.maximum.x); 29 | float4 j = intersect1d(a.minimum.y, a.maximum.y, b.minimum.y, b.maximum.y); 30 | return __builtin_popcount(_mm_movemask_ps(_mm_and_ps(i, j))); 31 | } 32 | 33 | __attribute__((noinline)) int test(const Rect2D& boxes1, const Rect2D& boxes2) 34 | { 35 | int intersects = intersect2d(boxes1, boxes2); 36 | return intersects; 37 | } 38 | 39 | int main() 40 | { 41 | float a_minimum[] = {0, 0, 0, 0}; 42 | float b_minimum[] = {0, 0, 0, 0}; 43 | float a_maximum[] = {1, 1, 1, 1}; 44 | float b_maximum[] = {1, 1, 1, 1}; 45 | Rect2D boxes1 = {{_mm_load_ps(a_minimum), _mm_load_ps(b_minimum)}, {_mm_load_ps(a_maximum), _mm_load_ps(b_maximum)}}; 46 | Rect2D boxes2 = boxes1; 47 | 48 | static const int M = 1024 * 1024 * 100; 49 | int sum = 0; 50 | auto start = std::chrono::high_resolution_clock::now(); 51 | for (int i = 0; i < M; i++) 52 | { 53 | sum += test(boxes1, boxes2); 54 | asm volatile("": :"r,m"(sum): "memory"); 55 | } 56 | auto end = std::chrono::high_resolution_clock::now(); 57 | std::cout << sum << "\telapsed milliseconds: " << std::chrono::duration_cast(end - start).count() << std::endl; 58 | return 0; 59 | } 60 | 61 | -------------------------------------------------------------------------------- /simd/make.sh: -------------------------------------------------------------------------------- 1 | g++ box_box_simd.cpp -O2 -std=c++11 -march=native -o simd 2 | g++ box_box_scalar.cpp -O2 -std=c++11 -march=native -o scalar 3 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for i in {1..2} 3 | do 4 | ./a.out & 5 | done 6 | -------------------------------------------------------------------------------- /static_cast/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | struct B 6 | { 7 | int i; 8 | }; 9 | 10 | struct D : B 11 | { 12 | int j; 13 | virtual ~D(){} 14 | }; 15 | 16 | int main() 17 | { 18 | std::aligned_storage_t buffer; 19 | 20 | B* b = reinterpret_cast(new (&buffer) D); 21 | D* d = static_cast(b); 22 | 23 | const int offset = reinterpret_cast(d) - reinterpret_cast(&buffer); 24 | printf("offset=%d\n", offset); 25 | 26 | return 0; 27 | } 28 | 29 | -------------------------------------------------------------------------------- /template_expression/Makefile: -------------------------------------------------------------------------------- 1 | all: calc4 2 | 3 | calc4: calc4.cpp 4 | g++ calc4.cpp -o calc4 -O2 -std=c++11 -I/home/fractal/boost_1_75_0/ 5 | -------------------------------------------------------------------------------- /template_expression/calc4.cpp: -------------------------------------------------------------------------------- 1 | /*============================================================================= 2 | Copyright (c) 2001-2011 Joel de Guzman 3 | 4 | Distributed under the Boost Software License, Version 1.0. (See accompanying 5 | file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 | =============================================================================*/ 7 | /////////////////////////////////////////////////////////////////////////////// 8 | // 9 | // A Calculator example demonstrating generation of AST. The AST, 10 | // once created, is traversed, 1) To print its contents and 11 | // 2) To evaluate the result. 12 | // 13 | // [ JDG April 28, 2008 ] For BoostCon 2008 14 | // [ JDG February 18, 2011 ] Pure attributes. No semantic actions. 15 | // 16 | /////////////////////////////////////////////////////////////////////////////// 17 | 18 | // Spirit v2.5 allows you to suppress automatic generation 19 | // of predefined terminals to speed up complation. With 20 | // BOOST_SPIRIT_NO_PREDEFINED_TERMINALS defined, you are 21 | // responsible in creating instances of the terminals that 22 | // you need (e.g. see qi::uint_type uint_ below). 23 | #define BOOST_SPIRIT_NO_PREDEFINED_TERMINALS 24 | 25 | #if defined(_MSC_VER) 26 | # pragma warning(disable: 4345) 27 | #endif 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include 37 | #include 38 | 39 | namespace client { namespace ast 40 | { 41 | /////////////////////////////////////////////////////////////////////////// 42 | // The AST 43 | /////////////////////////////////////////////////////////////////////////// 44 | struct nil {}; 45 | struct signed_; 46 | struct program; 47 | 48 | typedef boost::variant< 49 | nil 50 | , unsigned int 51 | , boost::recursive_wrapper 52 | , boost::recursive_wrapper 53 | > 54 | operand; 55 | 56 | struct signed_ 57 | { 58 | char sign; 59 | operand operand_; 60 | }; 61 | 62 | struct operation 63 | { 64 | char operator_; 65 | operand operand_; 66 | }; 67 | 68 | struct program 69 | { 70 | operand first; 71 | std::list rest; 72 | }; 73 | }} 74 | 75 | BOOST_FUSION_ADAPT_STRUCT( 76 | client::ast::signed_, 77 | (char, sign) 78 | (client::ast::operand, operand_) 79 | ) 80 | 81 | BOOST_FUSION_ADAPT_STRUCT( 82 | client::ast::operation, 83 | (char, operator_) 84 | (client::ast::operand, operand_) 85 | ) 86 | 87 | BOOST_FUSION_ADAPT_STRUCT( 88 | client::ast::program, 89 | (client::ast::operand, first) 90 | (std::list, rest) 91 | ) 92 | 93 | namespace client { namespace ast 94 | { 95 | /////////////////////////////////////////////////////////////////////////// 96 | // The AST Printer 97 | /////////////////////////////////////////////////////////////////////////// 98 | struct printer 99 | { 100 | typedef void result_type; 101 | 102 | void operator()(nil) const {} 103 | void operator()(unsigned int n) const { std::cout << n; } 104 | 105 | void operator()(operation const& x) const 106 | { 107 | boost::apply_visitor(*this, x.operand_); 108 | switch (x.operator_) 109 | { 110 | case '+': std::cout << " add"; break; 111 | case '-': std::cout << " subt"; break; 112 | case '*': std::cout << " mult"; break; 113 | case '/': std::cout << " div"; break; 114 | } 115 | } 116 | 117 | void operator()(signed_ const& x) const 118 | { 119 | boost::apply_visitor(*this, x.operand_); 120 | switch (x.sign) 121 | { 122 | case '-': std::cout << " neg"; break; 123 | case '+': std::cout << " pos"; break; 124 | } 125 | } 126 | 127 | void operator()(program const& x) const 128 | { 129 | boost::apply_visitor(*this, x.first); 130 | BOOST_FOREACH(operation const& oper, x.rest) 131 | { 132 | std::cout << ' '; 133 | (*this)(oper); 134 | } 135 | } 136 | }; 137 | 138 | /////////////////////////////////////////////////////////////////////////// 139 | // The AST evaluator 140 | /////////////////////////////////////////////////////////////////////////// 141 | struct eval 142 | { 143 | typedef int result_type; 144 | 145 | int operator()(nil) const { BOOST_ASSERT(0); return 0; } 146 | int operator()(unsigned int n) const { return n; } 147 | 148 | int operator()(operation const& x, int lhs) const 149 | { 150 | int rhs = boost::apply_visitor(*this, x.operand_); 151 | switch (x.operator_) 152 | { 153 | case '+': return lhs + rhs; 154 | case '-': return lhs - rhs; 155 | case '*': return lhs * rhs; 156 | case '/': return lhs / rhs; 157 | } 158 | BOOST_ASSERT(0); 159 | return 0; 160 | } 161 | 162 | int operator()(signed_ const& x) const 163 | { 164 | int rhs = boost::apply_visitor(*this, x.operand_); 165 | switch (x.sign) 166 | { 167 | case '-': return -rhs; 168 | case '+': return +rhs; 169 | } 170 | BOOST_ASSERT(0); 171 | return 0; 172 | } 173 | 174 | int operator()(program const& x) const 175 | { 176 | int state = boost::apply_visitor(*this, x.first); 177 | BOOST_FOREACH(operation const& oper, x.rest) 178 | { 179 | state = (*this)(oper, state); 180 | } 181 | return state; 182 | } 183 | }; 184 | }} 185 | 186 | namespace client 187 | { 188 | namespace qi = boost::spirit::qi; 189 | namespace ascii = boost::spirit::ascii; 190 | 191 | /////////////////////////////////////////////////////////////////////////////// 192 | // The calculator grammar 193 | /////////////////////////////////////////////////////////////////////////////// 194 | template 195 | struct calculator : qi::grammar 196 | { 197 | calculator() : calculator::base_type(expression) 198 | { 199 | qi::uint_type uint_; 200 | qi::char_type char_; 201 | 202 | expression = 203 | term 204 | >> *( (char_('+') >> term) 205 | | (char_('-') >> term) 206 | ) 207 | ; 208 | 209 | term = 210 | factor 211 | >> *( (char_('*') >> factor) 212 | | (char_('/') >> factor) 213 | ) 214 | ; 215 | 216 | factor = 217 | uint_ 218 | | '(' >> expression >> ')' 219 | | (char_('-') >> factor) 220 | | (char_('+') >> factor) 221 | ; 222 | } 223 | 224 | qi::rule expression; 225 | qi::rule term; 226 | qi::rule factor; 227 | }; 228 | } 229 | 230 | /////////////////////////////////////////////////////////////////////////////// 231 | // Main program 232 | /////////////////////////////////////////////////////////////////////////////// 233 | int 234 | main() 235 | { 236 | std::cout << "/////////////////////////////////////////////////////////\n\n"; 237 | std::cout << "Expression parser...\n\n"; 238 | std::cout << "/////////////////////////////////////////////////////////\n\n"; 239 | std::cout << "Type an expression...or [q or Q] to quit\n\n"; 240 | 241 | typedef std::string::const_iterator iterator_type; 242 | typedef client::calculator calculator; 243 | typedef client::ast::program ast_program; 244 | typedef client::ast::printer ast_print; 245 | typedef client::ast::eval ast_eval; 246 | 247 | std::string str; 248 | while (std::getline(std::cin, str)) 249 | { 250 | if (str.empty() || str[0] == 'q' || str[0] == 'Q') 251 | break; 252 | 253 | calculator calc; // Our grammar 254 | ast_program program; // Our program (AST) 255 | ast_print print; // Prints the program 256 | ast_eval eval; // Evaluates the program 257 | 258 | std::string::const_iterator iter = str.begin(); 259 | std::string::const_iterator end = str.end(); 260 | boost::spirit::ascii::space_type space; 261 | bool r = phrase_parse(iter, end, calc, space, program); 262 | 263 | if (r && iter == end) 264 | { 265 | std::cout << "-------------------------\n"; 266 | std::cout << "Parsing succeeded\n"; 267 | print(program); 268 | std::cout << "\nResult: " << eval(program) << std::endl; 269 | std::cout << "-------------------------\n"; 270 | } 271 | else 272 | { 273 | std::string rest(iter, end); 274 | std::cout << "-------------------------\n"; 275 | std::cout << "Parsing failed\n"; 276 | std::cout << "stopped at: \" " << rest << "\"\n"; 277 | std::cout << "-------------------------\n"; 278 | } 279 | } 280 | 281 | std::cout << "Bye... :-) \n\n"; 282 | return 0; 283 | } 284 | 285 | 286 | -------------------------------------------------------------------------------- /template_expression/lazy_calc_example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int very_heavy_calc() 4 | { 5 | return 42; 6 | } 7 | 8 | typedef int (*func_t)(); 9 | 10 | int bad(int a, int b) 11 | { 12 | return a? b: 0; 13 | } 14 | 15 | int good(int a, func_t b) 16 | { 17 | return a? b(): 0; 18 | } 19 | 20 | int main() 21 | { 22 | int ret1 = bad(0, very_heavy_calc()); 23 | int ret2 = good(0, very_heavy_calc); 24 | return 0; 25 | } 26 | 27 | -------------------------------------------------------------------------------- /template_expression/template_expression1.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct factor 4 | { 5 | int f; 6 | }; 7 | 8 | struct term 9 | { 10 | 11 | factor a; 12 | factor b; 13 | }; 14 | 15 | term operator+(factor a, factor b) 16 | { 17 | return term{a, b}; 18 | } 19 | 20 | int calc(term t) 21 | { 22 | return t.a.f + t.b.f; 23 | } 24 | 25 | int main() 26 | { 27 | factor a{1}, b{2}; 28 | term t = a + b; 29 | std::cout << calc(t) << std::endl; 30 | return 0; 31 | } 32 | 33 | -------------------------------------------------------------------------------- /template_expression/template_expression2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // term = factor factor | term factor 4 | struct factor 5 | { 6 | int f; 7 | }; 8 | 9 | template 10 | struct term 11 | { 12 | term_or_factor_t a; 13 | factor b; 14 | }; 15 | 16 | template 17 | term operator+(term_or_factor_t a, factor b) 18 | { 19 | return term{a, b}; 20 | } 21 | 22 | template 23 | int calc(term t) 24 | { 25 | return calc(t.a) + t.b.f; 26 | } 27 | 28 | template <> 29 | int calc(term t) 30 | { 31 | return t.a.f + t.b.f; 32 | } 33 | 34 | int main() 35 | { 36 | factor a{1}, b{2}; 37 | auto t = a + b + b; 38 | //b + b => 2 * b 39 | std::cout << calc(t) << std::endl; 40 | return 0; 41 | } 42 | 43 | -------------------------------------------------------------------------------- /template_expression/template_expression3.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | struct factor 5 | { 6 | int f; 7 | }; 8 | 9 | template 10 | struct term 11 | { 12 | term_or_factor_t a; 13 | factor b; 14 | }; 15 | 16 | template 17 | term operator+(term_or_factor_t a, factor b) 18 | { 19 | return term{a, b}; 20 | } 21 | 22 | template 23 | int calc(term t) 24 | { 25 | return calc(t.a) + t.b.f; 26 | } 27 | 28 | template <> 29 | int calc(term t) 30 | { 31 | return t.a.f + t.b.f; 32 | } 33 | 34 | typedef term term2_t; 35 | typedef term term3_t; 36 | template <> 37 | int calc(term3_t t) 38 | { 39 | if (t.b.f == t.a.b.f) 40 | { 41 | std::cout << "transformed" << std::endl; 42 | return 2 * t.b.f + t.a.a.f; 43 | } 44 | else 45 | { 46 | std::cout << "not transformed" << std::endl; 47 | return t.b.f + t.a.b.f + t.a.a.f; 48 | } 49 | } 50 | 51 | int main() 52 | { 53 | factor a{1}, b{2}; 54 | auto t = a + b + b; 55 | std::cout << calc(t) << std::endl; 56 | return 0; 57 | } 58 | 59 | -------------------------------------------------------------------------------- /thread_local/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | static unsigned long readfsbase() 9 | { 10 | unsigned long addr; 11 | syscall(SYS_arch_prctl, ARCH_GET_FS, &addr); 12 | return addr; 13 | } 14 | 15 | static void foo() 16 | { 17 | thread_local int i = 0; 18 | i++; 19 | printf("i=%d, tid=%d, fs=%lx\n", i, gettid(), readfsbase()); 20 | } 21 | 22 | int main() 23 | { 24 | foo(); 25 | foo(); 26 | 27 | std::thread t([](){ 28 | foo(); 29 | foo(); 30 | foo(); 31 | }); 32 | 33 | t.join(); 34 | return 0; 35 | } 36 | 37 | -------------------------------------------------------------------------------- /uninitialized_bool/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | uint8_t buf1 = 1; 7 | uint8_t buf255 = 255; 8 | volatile bool* b1 = new(&buf1) bool(); 9 | volatile bool* b255 = new (&buf255) bool(); 10 | std::cout << (*b1 == *b255) << std::endl; 11 | return 0; 12 | } 13 | 14 | -------------------------------------------------------------------------------- /vec.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int test(int M, const std::vector& c) 7 | { 8 | int sum = 0; 9 | auto start = std::chrono::high_resolution_clock::now(); 10 | for (int i = 0; i < M; i++) 11 | { 12 | for (auto v: c) 13 | { 14 | __asm volatile("#LLVM-MCA-BEGIN test"); 15 | sum += v; 16 | } 17 | __asm volatile("#LLVM-MCA-END"); 18 | } 19 | auto end = std::chrono::high_resolution_clock::now(); 20 | std::cout << "elapsed milliseconds: " << std::chrono::duration_cast(end - start).count() << std::endl; 21 | return sum; 22 | } 23 | 24 | int test(int M, const std::list& c) 25 | { 26 | int sum = 0; 27 | auto start = std::chrono::high_resolution_clock::now(); 28 | for (int i = 0; i < M; i++) 29 | { 30 | for (auto v: c) 31 | { 32 | __asm volatile("#LLVM-MCA-BEGIN test"); 33 | sum += v; 34 | } 35 | __asm volatile("#LLVM-MCA-END"); 36 | } 37 | auto end = std::chrono::high_resolution_clock::now(); 38 | std::cout << "elapsed milliseconds: " << std::chrono::duration_cast(end - start).count() << std::endl; 39 | return sum; 40 | } 41 | 42 | int main() 43 | { 44 | const int N = 10000; 45 | std::vector va; 46 | std::list la; 47 | for (int i = 0; i < N; i++) 48 | { 49 | va.push_back(rand()); 50 | la.push_back(rand()); 51 | } 52 | const int M = 100000; 53 | int sum = test(M, va); 54 | sum += test(M, la); 55 | std::cout << sum << std::endl; 56 | return 0; 57 | } 58 | 59 | -------------------------------------------------------------------------------- /virtual_optimize/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | template 6 | int get_function_vtable_offset(T t) 7 | { 8 | union 9 | { 10 | T ptr; 11 | int offset; 12 | }; 13 | ptr = t; 14 | return (offset - 1) / sizeof(void*); 15 | } 16 | 17 | class A; 18 | 19 | void* get_virtual_function_address(const A* a, int offset) 20 | { 21 | void** vtable = *(void***)a; 22 | return vtable[offset]; 23 | } 24 | 25 | class A 26 | { 27 | public: 28 | void init() 29 | { 30 | static const int offset = get_function_vtable_offset(&A::update); 31 | static const A proto_type; 32 | static const void* a_update_address = get_virtual_function_address(&proto_type, offset); 33 | has_update = a_update_address != get_virtual_function_address(this, offset); 34 | } 35 | virtual void update(){} 36 | bool has_update = false; 37 | }; 38 | 39 | static int count = 0; 40 | class B : public A 41 | { 42 | public: 43 | virtual void update() override 44 | { 45 | count++; 46 | } 47 | }; 48 | 49 | int main() 50 | { 51 | const int M = 10000; 52 | const int N = 100000; 53 | std::vector va; 54 | va.reserve(M); 55 | for (int i = 0; i < M - 1; i++) 56 | { 57 | va.push_back(new A()); 58 | va.back()->init(); 59 | } 60 | va.push_back(new A()); 61 | va.back()->init(); 62 | auto start = std::chrono::high_resolution_clock::now(); 63 | for (int i = 0; i < N; i++) 64 | { 65 | for (int j = 0; j < M; j++) 66 | { 67 | if (va[j]->has_update) 68 | va[j]->update(); 69 | } 70 | } 71 | auto end = std::chrono::high_resolution_clock::now(); 72 | std::cout << count << '\t'<< std::chrono::duration_cast(end - start).count() << std::endl; 73 | return 0; 74 | } 75 | 76 | 77 | --------------------------------------------------------------------------------