├── .gitignore ├── test_u01.cpp ├── example.cpp ├── Makefile ├── misc └── gen_constant.cpp ├── README.md ├── include └── qrand.h └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | test_u01 3 | example -------------------------------------------------------------------------------- /test_u01.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "include/qrand.h" 6 | 7 | extern "C" { 8 | #include 9 | } 10 | 11 | uint32_t gen32(){ 12 | static qrand rng(123); 13 | return rng(); 14 | } 15 | 16 | 17 | int main(){ 18 | std::string genName = "qrand"; 19 | unif01_Gen* gen = unif01_CreateExternGenBits((char*) genName.c_str(), gen32); 20 | 21 | swrite_Basic = FALSE; 22 | bbattery_SmallCrush(gen); 23 | fflush(stdout); 24 | 25 | bbattery_Crush(gen); 26 | fflush(stdout); 27 | 28 | bbattery_BigCrush(gen); 29 | fflush(stdout); 30 | 31 | unif01_DeleteExternGenBits(gen); 32 | 33 | return 0; 34 | } -------------------------------------------------------------------------------- /example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "include/qrand.h" 4 | 5 | int main(){ 6 | std::uniform_int_distribution dist(0, 1000); 7 | std::uniform_real_distribution distf(0.0, 1.0); 8 | 9 | std::cout << "Random" << std::endl; 10 | qrand rng; 11 | for(int i = 0; i < 4; i++) 12 | std::cout << dist(rng) << " " << distf(rng) << std::endl; 13 | 14 | std::cout << "Seeded" << std::endl; 15 | qrand rng_seeded(1); 16 | for(int i = 0; i < 2; i++) 17 | std::cout << dist(rng_seeded) << " " << distf(rng_seeded) << std::endl; 18 | qrand rng_seeded2(12345); 19 | for(int i = 0; i < 2; i++) 20 | std::cout << dist(rng_seeded2) << " " << distf(rng_seeded2) << std::endl; 21 | } -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Chunqing Shan 2 | # 3 | # qrand is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Lesser General Public License as published by 5 | # the Free Software Foundation, either version 3 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # qrand is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU Lesser General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU Lesser General Public License 14 | # along with qrand. If not, see . 15 | 16 | CXX = g++ 17 | CXXFLAGS = -Wall -O3 -std=c++11 -march=native 18 | LDFLAGS = -ltestu01 -ltestu01probdist -ltestu01mylib 19 | 20 | example: example.cpp include/qrand.h 21 | $(CXX) $(CXXFLAGS) -o example example.cpp 22 | 23 | test_u01: test_u01.cpp include/qrand.h 24 | $(CXX) $(CXXFLAGS) -o test_u01 test_u01.cpp $(LDFLAGS) 25 | 26 | .PHONY: clean test 27 | 28 | clean: 29 | rm -f test_u01 example 30 | 31 | test: test_u01 32 | ./test_u01 33 | -------------------------------------------------------------------------------- /misc/gen_constant.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Chunqing Shan 2 | // 3 | // qrand is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Lesser General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // qrand is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public License 14 | // along with qrand. If not, see . 15 | 16 | #include 17 | #include 18 | 19 | uint16_t balanced_rand16(){ 20 | FILE* fp = fopen("/dev/urandom", "rb"); 21 | uint16_t ret = 0; 22 | while(__builtin_popcount(ret) != 8) 23 | fread(&ret, sizeof(uint16_t), 1, fp); 24 | return ret; 25 | } 26 | 27 | uint64_t balanced_rand64(){ 28 | uint64_t ret = 0; 29 | for(int i = 0; i < 4; i++) 30 | ret = (ret << 16) | balanced_rand16(); 31 | return ret; 32 | } 33 | 34 | int main(){ 35 | for(int i = 0; i < 4; i++) 36 | printf("0x%016llx, ", balanced_rand64()); 37 | return 0; 38 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Qrand: High Quality Quick Random Number Generator 2 | 3 | The qrand project is a high quality quick random number generator with 4 | following features: 5 | 6 | * High quality: Passes all tests in TestU01 BigCrush suite, has better 7 | statistical properties than MT19937(if not properly seeded, which is 8 | often the case) 9 | * Quick: The overhead for random number generation is at the same order 10 | of magnitude as the time for a single function call. It's THE fastest 11 | random number generator passes TestU01 BigCrush suite, much faster than 12 | MT19937 on modern CPUs 13 | * Reproducible: The random number generator is deterministic and 14 | reproducible 15 | * Long state: The state of the random number generator is 128 bytes, which 16 | is also 2 cache lines on most modern CPUs, making it as fast as possible 17 | but also retaining a long enough period for most applications 18 | 19 | Qrand is not cryptographic secure. It's a high performance, high quality, 20 | reproducible random number generator for simulation and other applications. 21 | 22 | It's intended to be used as a drop-in replacement for STL random number 23 | generators with better statistical properties and faster speed. 24 | 25 | ## Usage 26 | 27 | The qrand library is compatible with STL random number generators. It can 28 | be used as a drop-in replacement for STL random number generators. 29 | 30 | ```c++ 31 | #include 32 | #include 33 | #include "qrand.h" 34 | 35 | int main(){ 36 | std::uniform_int_distribution dist(0, 100); 37 | std::uniform_real_distribution distf(0.0, 1.0); 38 | 39 | qrand rng; 40 | for(int i = 0; i < 10; i++){ 41 | std::cout << dist(rng) << " " << distf(rng) << std::endl; 42 | } 43 | } 44 | ``` 45 | 46 | Or you can use the callable object as a drop in replacement for rand function: 47 | 48 | ```c++ 49 | #include 50 | #include "qrand.h" 51 | 52 | int main(){ 53 | qrand randq; 54 | for(int i = 0; i < 10; i++){ 55 | std::cout << randq() << " " << randq() % 100 << std::endl; 56 | } 57 | } 58 | ``` 59 | 60 | There is no global state, so it's thread safe as long as it's thread local: 61 | 62 | ```c++ 63 | void some_thread_function(){ 64 | static thread_local qrand randq; 65 | // call randq 66 | } 67 | ``` 68 | 69 | Or in OpenMP 70 | 71 | ```c++ 72 | extern qrand randq; 73 | #pragma omp threadprivate(randq) 74 | qrand randq; 75 | ``` 76 | 77 | To use the qrand library, simply include the header file `qrand.h`. The 78 | qrand uses VAES or AES-NI instructions to accelerate the random number 79 | generation. So it need to be compiled with either `-maes` or `-mvaes` option. 80 | 81 | The library utilizes the AVX2/AVX512F instructions if available. So it can 82 | be compiled with `-mavx2` or `-mavx512f` option to accelerate the performance. 83 | 84 | The easiest way to compile is to use the provided `-march=native` option on 85 | a modern x86 CPU. 86 | 87 | ## Algorithm 88 | 89 | The qrand library uses a AES round with 4 128 bit key to generate 512 bits 90 | random numbers. The counter is incremented by a balanced random delta each 91 | round. 92 | 93 | ```text 94 | ! All variable is 512 bit 95 | ! += is 64 bit addition 96 | 97 | const add, add0, add1 98 | 99 | ct += add 100 | buf = AES_ROUND(ct, key) 101 | key += add0 102 | buf = AES_ROUND(buf, key) 103 | key += add1 104 | buf = AES_ROUND(buf, key) 105 | key = buf 106 | ``` 107 | 108 | ## Tested Platforms 109 | 110 | The qrand library is tested on following platforms: 111 | 112 | ```text 113 | Intel Xeon E5-2650 (Sandy Bridge EP, AES-NI+SSE) 114 | Intel Xeon E5-2670 v2 (Ivy Bridge EP, AES-NI+SSE) 115 | Intel Xeon E5-2686 v4 (Broadwell EP, AES-NI+AVX2) 116 | Intel Xeon Platinum 8259CL (Cascade Lake, AES-NI+AVX512F) 117 | Intel Xeon Platinum 8375C (Ice Lake, VAES+AVX512F) 118 | 119 | AMD EPYC 7571 (Zen/Naples, AES-NI+AVX2) 120 | AMD EPYC 7R32 (Zen2/Rome, AES-NI+AVX2) 121 | AMD EPYC 7R13 (Zen3/Milan, VAES+AVX2) 122 | 123 | Apple M1 (Rosetta 2/GCC, AES-NI+SSE) 124 | ``` 125 | 126 | 142 | -------------------------------------------------------------------------------- /include/qrand.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2023 Chunqing Shan 2 | // 3 | // qrand is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Lesser General Public License as published by 5 | // the Free Software Foundation, either version 3 of the License, or 6 | // (at your option) any later version. 7 | // 8 | // qrand is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Lesser General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Lesser General Public License 14 | // along with qrand. If not, see . 15 | 16 | #pragma once 17 | #include 18 | #include 19 | #include 20 | 21 | static inline void _qrand_vaes_round(__m512i& dt, __m512i key) __attribute__ ((always_inline)); 22 | static inline void _qrand_vaes_round(__m512i& dt, __m512i key) 23 | { 24 | #if defined(__VAES__) && defined(__AVX512F__) 25 | dt = _mm512_aesenc_epi128(dt, key); 26 | #elif defined(__VAES__) 27 | // AMD Zen 3(Roma) supports VAES but not AVX512F 28 | for(int i = 0; i < 2; i++) 29 | *(((__m256i*)&dt) + i) = _mm256_aesenc_epi128(*(((__m256i*)&dt) + i), *(((__m256i*)&key) + i)); 30 | #else 31 | for(int i = 0; i < 4; i++) 32 | *(((__m128i*)&dt) + i) = _mm_aesenc_si128(*(((__m128i*)&dt) + i), *(((__m128i*)&key) + i)); 33 | #endif 34 | } 35 | 36 | static inline void _qrand_vadd_epi64(__m512i& v, std::uint64_t a, std::uint64_t b, std::uint64_t c, std::uint64_t d, 37 | std::uint64_t e, std::uint64_t f, std::uint64_t g, std::uint64_t h) 38 | __attribute__ ((always_inline)); 39 | static inline void _qrand_vadd_epi64(__m512i& v, std::uint64_t a, std::uint64_t b, std::uint64_t c, std::uint64_t d, 40 | std::uint64_t e, std::uint64_t f, std::uint64_t g, std::uint64_t h) 41 | { 42 | __m512i add; 43 | #if defined(__AVX512F__) 44 | add = _mm512_set_epi64(a, b, c, d, e, f, g, h); 45 | v = _mm512_add_epi64(v, add); 46 | #elif defined(__AVX2__) 47 | *(((__m256i*)&add) + 0) = _mm256_set_epi64x(e, f, g, h); 48 | *(((__m256i*)&v) + 0) = _mm256_add_epi64(*(((__m256i*)&v) + 0), *(((__m256i*)&add) + 0)); 49 | *(((__m256i*)&add) + 1) = _mm256_set_epi64x(a, b, c, d); 50 | *(((__m256i*)&v) + 1) = _mm256_add_epi64(*(((__m256i*)&v) + 1), *(((__m256i*)&add) + 1)); 51 | #else 52 | *(((__m128i*)&add) + 0) = _mm_set_epi64x(g, h); 53 | *(((__m128i*)&v) + 0) = _mm_add_epi64(*(((__m128i*)&v) + 0), *(((__m128i*)&add) + 0)); 54 | *(((__m128i*)&add) + 1) = _mm_set_epi64x(e, f); 55 | *(((__m128i*)&v) + 1) = _mm_add_epi64(*(((__m128i*)&v) + 1), *(((__m128i*)&add) + 1)); 56 | *(((__m128i*)&add) + 2) = _mm_set_epi64x(c, d); 57 | *(((__m128i*)&v) + 2) = _mm_add_epi64(*(((__m128i*)&v) + 2), *(((__m128i*)&add) + 2)); 58 | *(((__m128i*)&add) + 3) = _mm_set_epi64x(a, b); 59 | *(((__m128i*)&v) + 3) = _mm_add_epi64(*(((__m128i*)&v) + 3), *(((__m128i*)&add) + 3)); 60 | #endif 61 | } 62 | 63 | class qrand{ 64 | protected: 65 | typedef std::uint64_t v512 __attribute__ ((vector_size (64))); 66 | v512 keybuf, ct; 67 | std::uint_fast8_t buf_i; 68 | void fill() __attribute__ ((optimize("vect-cost-model=unlimited"))) 69 | { 70 | __m512i dt; 71 | 72 | _qrand_vadd_epi64((__m512i&)ct, 73 | 0x45d36387550f4d99, 0x5b1c11bd09ee7529, 0x5b4a5a66c29deab1, 0xbb0a9761e05b3ec1, 74 | 0xa4cb9b1aab19e271, 0x6d1c8aab307ec70f, 0xf0e16e85425f1e95, 0x5f8486d6745a89e9); 75 | 76 | dt = (__m512i)ct; 77 | _qrand_vaes_round(dt, (__m512i)keybuf); 78 | _qrand_vadd_epi64((__m512i&)keybuf, 79 | 0xa8b53596c9a68e2b, 0xcae2958d699a4cec, 0xb45a6c1bb9b0b456, 0xdc154b5c2e6a2d9a, 80 | 0xe29a16a7ceb0bf04, 0x23756c3c1ad30fd1, 0x0dba4b531e33ad0b, 0xb1596c8ee3c475a4); 81 | _qrand_vaes_round(dt, (__m512i)keybuf); 82 | _qrand_vadd_epi64((__m512i&)keybuf, 83 | 0x4976b84e93a6ad70, 0x89e6756894aec8ad, 0xb165ad2cb31a91b6, 0xc6d40aed4af89273, 84 | 0x38973de0b03b7a23, 0xd56499172d9a4f54, 0x7d88cce4361d4aea, 0xcc95b93865b1f829); 85 | _qrand_vaes_round(dt, (__m512i)keybuf); 86 | 87 | keybuf = (v512)dt; 88 | 89 | buf_i = 0; 90 | } 91 | public: 92 | using result_type = std::uint32_t; 93 | static constexpr result_type min() { return 0; } 94 | static constexpr result_type max() { return UINT32_MAX; } 95 | 96 | qrand(){ 97 | FILE* urandom = fopen("/dev/urandom", "rb"); 98 | fread(&keybuf, sizeof(keybuf), 1, urandom); 99 | fclose(urandom); 100 | for(int i = 0; i < 8; i++) 101 | ct[i] = 0; 102 | fill(); 103 | } 104 | 105 | qrand(std::uint64_t seed){ 106 | for(int i = 0; i < 8; i++){ 107 | ct[i] = 0; 108 | keybuf[i] = seed; 109 | } 110 | fill(); 111 | } 112 | 113 | result_type operator()(){ 114 | if(buf_i == 16) fill(); 115 | return ((std::uint32_t*)&keybuf)[buf_i++]; 116 | } 117 | }; 118 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. --------------------------------------------------------------------------------