├── .gitignore ├── src ├── aes-stream.h └── aes-stream.c ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | *.mod* 47 | *.cmd 48 | .tmp_versions/ 49 | modules.order 50 | Module.symvers 51 | Mkfile.old 52 | dkms.conf 53 | -------------------------------------------------------------------------------- /src/aes-stream.h: -------------------------------------------------------------------------------- 1 | #ifndef aes_stream_H 2 | #define aes_stream_H 3 | 4 | #include 5 | 6 | #ifndef CRYPTO_ALIGN 7 | # if defined(__INTEL_COMPILER) || defined(_MSC_VER) 8 | # define CRYPTO_ALIGN(x) __declspec(align(x)) 9 | # else 10 | # define CRYPTO_ALIGN(x) __attribute__((aligned(x))) 11 | # endif 12 | #endif 13 | 14 | #ifndef AES_STREAM_ROUNDS 15 | # define AES_STREAM_ROUNDS 10 16 | #endif 17 | 18 | typedef struct CRYPTO_ALIGN(16) aes_stream_state { 19 | unsigned char opaque[((AES_STREAM_ROUNDS) + 1) * 16 + 16]; 20 | } aes_stream_state; 21 | 22 | #define AES_STREAM_SEEDBYTES 32 23 | 24 | void aes_stream_init(aes_stream_state *st, 25 | const unsigned char seed[AES_STREAM_SEEDBYTES]); 26 | 27 | void aes_stream(aes_stream_state *st, unsigned char *buf, size_t buf_len); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2017-2023, Frank Denis 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | AES-STREAM 2 | ========== 3 | 4 | A simple, but fast AES-PRF-based random number generator. 5 | 6 | Fast, designed to fill large buffers with random data. 7 | Does fast key erasure. 8 | 9 | Requires a modern Intel or AMD CPU with AES-NI support. 10 | 11 | API 12 | === 13 | 14 | Pretty straightforward: 15 | 16 | ```c 17 | #include "aes-stream.h" 18 | 19 | #define AES_STREAM_SEEDBYTES 32 20 | 21 | void aes_stream_init(aes_stream_state *st, const unsigned char seed[AES_STREAM_SEEDBYTES]); 22 | 23 | void aes_stream(aes_stream_state *st, unsigned char *buf, size_t buf_len); 24 | ``` 25 | 26 | Call `aes_stream_init()` with a seed, then `aes_stream()` to fill 27 | `buf` with `buf_len` random bytes. 28 | 29 | `aes_stream()` can be called indefinitely without having to reseed the 30 | generator. 31 | 32 | Compilation 33 | =========== 34 | 35 | Do not forget to tell your compiler to enable support for AES opcodes 36 | with the `-maes` flag. 37 | 38 | Recommended: `-Ofast -maes -march=native` 39 | 40 | Clang 7 appears to produce faster code than gcc 8. 41 | 42 | Key erasure is performed after every call to `stream()`. If you are 43 | dealing with many short keys, implement a pool on top of this. 44 | 45 | Uses AES-128 by default. Define `AES_STREAM_ROUNDS=14` in order to use 46 | AES-256 instead. 47 | 48 | References 49 | ========== 50 | 51 | * [Cryptanalysis of AES-PRF and its Dual](https://tosc.iacr.org/index.php/ToSC/article/view/892/843) 52 | (Patrick Derbez, Tetsu Iwata, Ling Sun, Siwei Sun, Yosuke Todo, Haoyang Wang and Meiqin Wang) 53 | * [Optimal PRFs from blockcipher designs](https://eprint.iacr.org/2017/812.pdf) 54 | (Bart Mennink and Samuel Neves) 55 | * [Fast-key-erasure random-number generators](https://blog.cr.yp.to/20170723-random.html) 56 | (Daniel J. Bernstein) 57 | -------------------------------------------------------------------------------- /src/aes-stream.c: -------------------------------------------------------------------------------- 1 | #include "aes-stream.h" 2 | 3 | #if defined(__GNUC__) && !defined(__clang__) 4 | #pragma GCC target("ssse3") 5 | #pragma GCC target("aes") 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #define COMPILER_ASSERT(X) (void) sizeof(char[(X) ? 1 : -1]) 13 | 14 | #ifdef __IBMC__ 15 | # pragma pack(1) 16 | #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) 17 | # pragma pack(1) 18 | #else 19 | # pragma pack(push, 1) 20 | #endif 21 | 22 | typedef struct CRYPTO_ALIGN(16) _aes_stream_state { 23 | __m128i round_keys[AES_STREAM_ROUNDS + 1]; 24 | __m128i counter; 25 | } _aes_stream_state; 26 | 27 | #ifdef __IBMC__ 28 | # pragma pack(pop) 29 | #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) 30 | # pragma pack() 31 | #else 32 | # pragma pack(pop) 33 | #endif 34 | 35 | #define DRC(ROUND, RC) \ 36 | do { \ 37 | s = _mm_aeskeygenassist_si128(t1, (RC)); \ 38 | round_keys[ROUND] = t1; \ 39 | t1 = _mm_xor_si128(t1, _mm_slli_si128(t1, 4)); \ 40 | t1 = _mm_xor_si128(t1, _mm_slli_si128(t1, 8)); \ 41 | t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(s, 0xff)); \ 42 | } while (0) 43 | 44 | #define DRC1(ROUND, RC) \ 45 | do { \ 46 | s = _mm_aeskeygenassist_si128(t2, (RC)); \ 47 | round_keys[ROUND] = t2; \ 48 | t1 = _mm_xor_si128(t1, _mm_slli_si128(t1, 4)); \ 49 | t1 = _mm_xor_si128(t1, _mm_slli_si128(t1, 8)); \ 50 | t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(s, 0xff)); \ 51 | } while (0) 52 | 53 | #define DRC2(ROUND, RC) \ 54 | do { \ 55 | s = _mm_aeskeygenassist_si128(t1, (RC)); \ 56 | round_keys[ROUND] = t1; \ 57 | t2 = _mm_xor_si128(t2, _mm_slli_si128(t2, 4)); \ 58 | t2 = _mm_xor_si128(t2, _mm_slli_si128(t2, 8)); \ 59 | t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(s, 0xaa)); \ 60 | } while (0) 61 | 62 | #if AES_STREAM_ROUNDS == 10 63 | static void 64 | _aes_key_expand_128(__m128i round_keys[AES_STREAM_ROUNDS + 1], __m128i t1) 65 | { 66 | __m128i s; 67 | 68 | DRC(0, 1); 69 | DRC(1, 2); 70 | DRC(2, 4); 71 | DRC(3, 8); 72 | DRC(4, 16); 73 | DRC(5, 32); 74 | DRC(6, 64); 75 | DRC(7, 128); 76 | DRC(8, 27); 77 | DRC(9, 54); 78 | round_keys[10] = t1; 79 | } 80 | 81 | #elif AES_STREAM_ROUNDS == 14 82 | 83 | static void 84 | _aes_key_expand_256(__m128i round_keys[AES_STREAM_ROUNDS + 1], __m128i t1, __m128i t2) 85 | { 86 | __m128i s; 87 | 88 | round_keys[0] = t1; 89 | DRC1(1, 1); 90 | DRC2(2, 1); 91 | DRC1(3, 2); 92 | DRC2(4, 2); 93 | DRC1(5, 4); 94 | DRC2(6, 4); 95 | DRC1(7, 8); 96 | DRC2(8, 8); 97 | DRC1(9, 16); 98 | DRC2(10, 16); 99 | DRC1(11, 32); 100 | DRC2(12, 32); 101 | DRC1(13, 64); 102 | round_keys[14] = t1; 103 | } 104 | #endif 105 | 106 | static void 107 | _aes_stream(_aes_stream_state *_st, unsigned char *buf, size_t buf_len) 108 | { 109 | CRYPTO_ALIGN(16) unsigned char t[16]; 110 | const __m128i one = _mm_set_epi64x(0, 1); 111 | const __m128i two = _mm_set_epi64x(0, 2); 112 | __m128i * round_keys = _st->round_keys; 113 | __m128i c0, c1, c2, c3, c4, c5, c6, c7; 114 | __m128i r0, r1, r2, r3, r4, r5, r6, r7; 115 | __m128i s0, s1, s2, s3, s4, s5, s6, s7; 116 | size_t i; 117 | size_t remaining; 118 | 119 | #if AES_STREAM_ROUNDS == 10 120 | #define COMPUTE_AES_STREAM_ROUNDS(N) \ 121 | do { \ 122 | r##N = _mm_xor_si128(c##N, round_keys[0]); \ 123 | r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[1]), round_keys[2]); \ 124 | r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[3]), round_keys[4]); \ 125 | s##N = r##N; \ 126 | r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[5]), round_keys[6]); \ 127 | r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[7]), round_keys[8]); \ 128 | r##N = _mm_aesenclast_si128(_mm_aesenc_si128(r##N, round_keys[9]), round_keys[10]); \ 129 | r##N = _mm_xor_si128(s##N, r##N); \ 130 | } while (0) 131 | 132 | #elif AES_STREAM_ROUNDS == 14 133 | 134 | #define COMPUTE_AES_STREAM_ROUNDS(N) \ 135 | do { \ 136 | r##N = _mm_xor_si128(c##N, round_keys[0]); \ 137 | r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[1]), round_keys[2]); \ 138 | r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[3]), round_keys[4]); \ 139 | r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[5]), round_keys[6]); \ 140 | s##N = r##N; \ 141 | r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[7]), round_keys[8]); \ 142 | r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[9]), round_keys[10]); \ 143 | r##N = _mm_aesenc_si128(_mm_aesenc_si128(r##N, round_keys[11]), round_keys[12]); \ 144 | r##N = _mm_aesenclast_si128(_mm_aesenc_si128(r##N, round_keys[13]), round_keys[14]); \ 145 | r##N = _mm_xor_si128(s##N, r##N); \ 146 | } while (0) 147 | #endif 148 | 149 | c0 = _st->counter; 150 | remaining = buf_len; 151 | while (remaining > 128) { 152 | c1 = _mm_add_epi64(c0, one); 153 | c2 = _mm_add_epi64(c0, two); 154 | c3 = _mm_add_epi64(c2, one); 155 | c4 = _mm_add_epi64(c2, two); 156 | c5 = _mm_add_epi64(c4, one); 157 | c6 = _mm_add_epi64(c4, two); 158 | c7 = _mm_add_epi64(c6, one); 159 | COMPUTE_AES_STREAM_ROUNDS(0); 160 | COMPUTE_AES_STREAM_ROUNDS(1); 161 | COMPUTE_AES_STREAM_ROUNDS(2); 162 | COMPUTE_AES_STREAM_ROUNDS(3); 163 | COMPUTE_AES_STREAM_ROUNDS(4); 164 | COMPUTE_AES_STREAM_ROUNDS(5); 165 | COMPUTE_AES_STREAM_ROUNDS(6); 166 | COMPUTE_AES_STREAM_ROUNDS(7); 167 | c0 = _mm_add_epi64(c7, one); 168 | _mm_storeu_si128((__m128i *) (void *) (buf + 0), r0); 169 | _mm_storeu_si128((__m128i *) (void *) (buf + 16), r1); 170 | _mm_storeu_si128((__m128i *) (void *) (buf + 32), r2); 171 | _mm_storeu_si128((__m128i *) (void *) (buf + 48), r3); 172 | _mm_storeu_si128((__m128i *) (void *) (buf + 64), r4); 173 | _mm_storeu_si128((__m128i *) (void *) (buf + 80), r5); 174 | _mm_storeu_si128((__m128i *) (void *) (buf + 96), r6); 175 | _mm_storeu_si128((__m128i *) (void *) (buf + 112), r7); 176 | buf += 128; 177 | remaining -= 128; 178 | } 179 | while (remaining > 32) { 180 | c1 = _mm_add_epi64(c0, one); 181 | COMPUTE_AES_STREAM_ROUNDS(0); 182 | COMPUTE_AES_STREAM_ROUNDS(1); 183 | c0 = _mm_add_epi64(c1, one); 184 | _mm_storeu_si128((__m128i *) (void *) (buf + 0), r0); 185 | _mm_storeu_si128((__m128i *) (void *) (buf + 16), r1); 186 | buf += 32; 187 | remaining -= 32; 188 | } 189 | while (remaining > 16) { 190 | COMPUTE_AES_STREAM_ROUNDS(0); 191 | c0 = _mm_add_epi64(c0, one); 192 | _mm_storeu_si128((__m128i *) (void *) buf, r0); 193 | buf += 16; 194 | remaining -= 16; 195 | } 196 | if (remaining > (size_t) 0U) { 197 | COMPUTE_AES_STREAM_ROUNDS(0); 198 | c0 = _mm_add_epi64(c0, one); 199 | _mm_store_si128((__m128i *) (void *) t, r0); 200 | for (i = 0; i < remaining; i++) { 201 | buf[i] = t[i]; 202 | } 203 | } 204 | _st->counter = c0; 205 | 206 | c0 = _mm_xor_si128(c0, _mm_set_epi64x(1ULL << 63, 0)); 207 | 208 | #if AES_STREAM_ROUNDS == 10 209 | COMPUTE_AES_STREAM_ROUNDS(0); 210 | _aes_key_expand_128(round_keys, r0); 211 | 212 | #elif AES_STREAM_ROUNDS == 14 213 | 214 | c1 = _mm_add_epi64(c0, one); 215 | COMPUTE_AES_STREAM_ROUNDS(0); 216 | COMPUTE_AES_STREAM_ROUNDS(1); 217 | _aes_key_expand_256(round_keys, r0, r1); 218 | #endif 219 | } 220 | 221 | void 222 | aes_stream_init(aes_stream_state *st, const unsigned char seed[AES_STREAM_SEEDBYTES]) 223 | { 224 | _aes_stream_state *_st = (_aes_stream_state *) (void *) st; 225 | 226 | COMPILER_ASSERT(sizeof *st >= sizeof *_st); 227 | 228 | #if AES_STREAM_ROUNDS == 10 229 | _aes_key_expand_128(_st->round_keys, _mm_loadu_si128((const __m128i *) (const void *) seed)); 230 | _st->counter = _mm_loadu_si128((const __m128i *) (const void *) (seed + 16)); 231 | 232 | #elif AES_STREAM_ROUNDS == 14 233 | 234 | _aes_key_expand_256(_st->round_keys, _mm_loadu_si128((const __m128i *) (const void *) seed), 235 | _mm_loadu_si128((const __m128i *) (const void *) (seed + 16))); 236 | _st->counter = _mm_setzero_si128(); 237 | #endif 238 | } 239 | 240 | void 241 | aes_stream(aes_stream_state *st, unsigned char *buf, size_t buf_len) 242 | { 243 | _aes_stream((_aes_stream_state *) (void *) st, buf, buf_len); 244 | } 245 | --------------------------------------------------------------------------------