├── poly1305-donna.h ├── example-poly1305.c ├── README.md ├── poly1305-donna-8.h ├── poly1305-donna.c ├── poly1305-donna-16.h ├── poly1305-donna-64.h └── poly1305-donna-32.h /poly1305-donna.h: -------------------------------------------------------------------------------- 1 | #ifndef POLY1305_DONNA_H 2 | #define POLY1305_DONNA_H 3 | 4 | #include 5 | 6 | typedef struct poly1305_context { 7 | size_t aligner; 8 | unsigned char opaque[136]; 9 | } poly1305_context; 10 | 11 | void poly1305_init(poly1305_context *ctx, const unsigned char key[32]); 12 | void poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes); 13 | void poly1305_finish(poly1305_context *ctx, unsigned char mac[16]); 14 | void poly1305_auth(unsigned char mac[16], const unsigned char *m, size_t bytes, const unsigned char key[32]); 15 | 16 | int poly1305_verify(const unsigned char mac1[16], const unsigned char mac2[16]); 17 | int poly1305_power_on_self_test(void); 18 | 19 | #endif /* POLY1305_DONNA_H */ 20 | 21 | -------------------------------------------------------------------------------- /example-poly1305.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "poly1305-donna.h" 3 | 4 | int 5 | main(void) { 6 | const unsigned char expected[16] = {0xdd,0xb9,0xda,0x7d,0xdd,0x5e,0x52,0x79,0x27,0x30,0xed,0x5c,0xda,0x5f,0x90,0xa4}; 7 | unsigned char key[32]; 8 | unsigned char mac[16]; 9 | unsigned char msg[73]; 10 | size_t i; 11 | int success = poly1305_power_on_self_test(); 12 | 13 | printf("poly1305 self test: %s\n", success ? "successful" : "failed"); 14 | if (!success) 15 | return 1; 16 | 17 | for (i = 0; i < sizeof(key); i++) 18 | key[i] = (unsigned char)(i + 221); 19 | for (i = 0; i < sizeof(msg); i++) 20 | msg[i] = (unsigned char)(i + 121); 21 | poly1305_auth(mac, msg, sizeof(msg), key); 22 | 23 | printf("sample mac is "); 24 | for (i = 0; i < sizeof(mac); i++) 25 | printf("%02x", mac[i]); 26 | printf(" (%s)\n", poly1305_verify(expected, mac) ? "correct" : "incorrect"); 27 | 28 | return 0; 29 | } 30 | 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | "A state-of-the-art message-authentication code" 2 | 3 | # ABOUT 4 | 5 | See: [http://cr.yp.to/mac.html](http://cr.yp.to/mac.html) and [http://cr.yp.to/mac/poly1305-20050329.pdf](http://cr.yp.to/mac/poly1305-20050329.pdf) 6 | 7 | These are quite portable implementations of increasing efficiency depending on the size of the multiplier available. 8 | Optimized implementations have been moved to [poly1305-opt](https://github.com/floodyberry/poly1305-opt) 9 | 10 | # BUILDING 11 | 12 | ## Default 13 | 14 | If compiled with no options, `poly1305-donna.c` will select between the 32 bit and 64 bit implementations based 15 | on what it can tell the compiler supports 16 | 17 | gcc poly1305-donna.c -O3 -o poly1305.o 18 | 19 | ## Selecting a specific version 20 | 21 | gcc poly1305-donna.c -O3 -o poly1305.o -DPOLY1305_XXBIT 22 | 23 | Where `-DPOLY1305_XXBIT` is one of 24 | 25 | * `-DPOLY1305_8BIT`, 8->16 bit multiplies, 32 bit additions 26 | * `-DPOLY1305_16BIT`, 16->32 bit multiples, 32 bit additions 27 | * `-DPOLY1305_32BIT`, 32->64 bit multiplies, 64 bit additions 28 | * `-DPOLY1305_64BIT`, 64->128 bit multiplies, 128 bit additions 29 | 30 | 8 bit and 16 bit versions were written to keep the code size small, 32 bit and 64 bit versions are mildly optimized due 31 | to needing fewer multiplications. All 4 can be made faster at the expense of increased code size and complexity, which 32 | is not the intention of this project. 33 | 34 | # USAGE 35 | 36 | See: [http://nacl.cace-project.eu/onetimeauth.html](http://nacl.cace-project.eu/onetimeauth.html), in specific, slightly plagiarized: 37 | 38 | The poly1305_auth function, viewed as a function of the message for a uniform random key, is 39 | designed to meet the standard notion of unforgeability after a single message. After the sender 40 | authenticates one message, an attacker cannot find authenticators for any other messages. 41 | 42 | The sender **MUST NOT** use poly1305_auth to authenticate more than one message under the same key. 43 | Authenticators for two messages under the same key should be expected to reveal enough information 44 | to allow forgeries of authenticators on other messages. 45 | 46 | ## Functions 47 | 48 | `poly1305_context` is declared in [poly1305.h](poly1305.h) and is an opaque structure large enough to support 49 | every underlying platform specific implementation. It should be size_t aligned, which should be handled already 50 | with the size_t member `aligner`. 51 | 52 | `void poly1305_init(poly1305_context *ctx, const unsigned char key[32]);` 53 | 54 | where 55 | 56 | `key` is the 32 byte key that is **only used for this message and is discarded immediately after** 57 | 58 | `void poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes);` 59 | 60 | where `m` is a pointer to the message fragment to be processed, and 61 | 62 | `bytes` is the length of the message fragment 63 | 64 | `void poly1305_finish(poly1305_context *ctx, unsigned char mac[16]);` 65 | 66 | where `mac` is the buffer which receives the 16 byte authenticator. After calling finish, the underlying 67 | implementation will zero out `ctx`. 68 | 69 | `void poly1305_auth(unsigned char mac[16], const unsigned char *m, size_t bytes, const unsigned char key[32]);` 70 | 71 | where `mac` is the buffer which receives the 16 byte authenticator, 72 | 73 | `m` is a pointer to the message to be processed, 74 | 75 | `bytes` is the number of bytes in the message, and 76 | 77 | `key` is the 32 byte key that is **only used for this message and is discarded immediately after**. 78 | 79 | `int poly1305_verify(const unsigned char mac1[16], const unsigned char mac2[16]);` 80 | 81 | where `mac1` is compared to `mac2` in constant time and returns `1` if they are equal and `0` if they are not 82 | 83 | `int poly1305_power_on_self_test(void);` 84 | 85 | tests the underlying implementation to verify it is working correctly. It returns `1` if all tests pass, and `0` if 86 | any tests fail. 87 | 88 | ## Example 89 | 90 | ### Simple 91 | 92 | #include "poly1305-donna.h" 93 | 94 | unsigned char key[32] = {...}, mac[16]; 95 | unsigned char msg[] = {...}; 96 | 97 | poly1305_auth(mac, msg, msglen, key); 98 | 99 | ### Full 100 | 101 | [example-poly1305.c](example-poly1305.c) is a simple example of how to verify the underlying implementation is producing 102 | the correct results, compute an authenticator, and test it against an expected value. 103 | 104 | # LICENSE 105 | 106 | [MIT](http://www.opensource.org/licenses/mit-license.php) or PUBLIC DOMAIN 107 | 108 | 109 | # NAMESAKE 110 | 111 | I borrowed the idea for these from Adam Langley's [curve25519-donna](http://github.com/agl/curve25519-donna), hence 112 | the name. -------------------------------------------------------------------------------- /poly1305-donna-8.h: -------------------------------------------------------------------------------- 1 | /* 2 | poly1305 implementation using 8 bit * 8 bit = 16 bit multiplication and 32 bit addition 3 | 4 | based on the public domain reference version in supercop by djb 5 | */ 6 | 7 | #if defined(_MSC_VER) 8 | #define POLY1305_NOINLINE __declspec(noinline) 9 | #elif defined(__GNUC__) 10 | #define POLY1305_NOINLINE __attribute__((noinline)) 11 | #else 12 | #define POLY1305_NOINLINE 13 | #endif 14 | 15 | #define poly1305_block_size 16 16 | 17 | /* 17 + sizeof(size_t) + 51*sizeof(unsigned char) */ 18 | typedef struct poly1305_state_internal_t { 19 | unsigned char buffer[poly1305_block_size]; 20 | size_t leftover; 21 | unsigned char h[17]; 22 | unsigned char r[17]; 23 | unsigned char pad[17]; 24 | unsigned char final; 25 | } poly1305_state_internal_t; 26 | 27 | void 28 | poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { 29 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 30 | size_t i; 31 | 32 | st->leftover = 0; 33 | 34 | /* h = 0 */ 35 | for (i = 0; i < 17; i++) 36 | st->h[i] = 0; 37 | 38 | /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 39 | st->r[ 0] = key[ 0] & 0xff; 40 | st->r[ 1] = key[ 1] & 0xff; 41 | st->r[ 2] = key[ 2] & 0xff; 42 | st->r[ 3] = key[ 3] & 0x0f; 43 | st->r[ 4] = key[ 4] & 0xfc; 44 | st->r[ 5] = key[ 5] & 0xff; 45 | st->r[ 6] = key[ 6] & 0xff; 46 | st->r[ 7] = key[ 7] & 0x0f; 47 | st->r[ 8] = key[ 8] & 0xfc; 48 | st->r[ 9] = key[ 9] & 0xff; 49 | st->r[10] = key[10] & 0xff; 50 | st->r[11] = key[11] & 0x0f; 51 | st->r[12] = key[12] & 0xfc; 52 | st->r[13] = key[13] & 0xff; 53 | st->r[14] = key[14] & 0xff; 54 | st->r[15] = key[15] & 0x0f; 55 | st->r[16] = 0; 56 | 57 | /* save pad for later */ 58 | for (i = 0; i < 16; i++) 59 | st->pad[i] = key[i + 16]; 60 | st->pad[16] = 0; 61 | 62 | st->final = 0; 63 | } 64 | 65 | static void 66 | poly1305_add(unsigned char h[17], const unsigned char c[17]) { 67 | unsigned short u; 68 | unsigned int i; 69 | for (u = 0, i = 0; i < 17; i++) { 70 | u += (unsigned short)h[i] + (unsigned short)c[i]; 71 | h[i] = (unsigned char)u & 0xff; 72 | u >>= 8; 73 | } 74 | } 75 | 76 | static void 77 | poly1305_squeeze(unsigned char h[17], unsigned long hr[17]) { 78 | unsigned long u; 79 | unsigned int i; 80 | u = 0; 81 | for (i = 0; i < 16; i++) { 82 | u += hr[i]; 83 | h[i] = (unsigned char)u & 0xff; 84 | u >>= 8; 85 | } 86 | u += hr[16]; 87 | h[16] = (unsigned char)u & 0x03; 88 | u >>= 2; 89 | u += (u << 2); /* u *= 5; */ 90 | for (i = 0; i < 16; i++) { 91 | u += h[i]; 92 | h[i] = (unsigned char)u & 0xff; 93 | u >>= 8; 94 | } 95 | h[16] += (unsigned char)u; 96 | } 97 | 98 | static void 99 | poly1305_freeze(unsigned char h[17]) { 100 | static const unsigned char minusp[17] = { 101 | 0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 102 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 103 | 0xfc 104 | }; 105 | unsigned char horig[17], negative; 106 | unsigned int i; 107 | 108 | /* compute h + -p */ 109 | for (i = 0; i < 17; i++) 110 | horig[i] = h[i]; 111 | poly1305_add(h, minusp); 112 | 113 | /* select h if h < p, or h + -p if h >= p */ 114 | negative = -(h[16] >> 7); 115 | for (i = 0; i < 17; i++) 116 | h[i] ^= negative & (horig[i] ^ h[i]); 117 | } 118 | 119 | static void 120 | poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) { 121 | const unsigned char hibit = st->final ^ 1; /* 1 << 128 */ 122 | 123 | while (bytes >= poly1305_block_size) { 124 | unsigned long hr[17], u; 125 | unsigned char c[17]; 126 | unsigned int i, j; 127 | 128 | /* h += m */ 129 | for (i = 0; i < 16; i++) 130 | c[i] = m[i]; 131 | c[16] = hibit; 132 | poly1305_add(st->h, c); 133 | 134 | /* h *= r */ 135 | for (i = 0; i < 17; i++) { 136 | u = 0; 137 | for (j = 0; j <= i ; j++) { 138 | u += (unsigned short)st->h[j] * st->r[i - j]; 139 | } 140 | for (j = i + 1; j < 17; j++) { 141 | unsigned long v = (unsigned short)st->h[j] * st->r[i + 17 - j]; 142 | v = ((v << 8) + (v << 6)); /* v *= (5 << 6); */ 143 | u += v; 144 | } 145 | hr[i] = u; 146 | } 147 | 148 | /* (partial) h %= p */ 149 | poly1305_squeeze(st->h, hr); 150 | 151 | m += poly1305_block_size; 152 | bytes -= poly1305_block_size; 153 | } 154 | } 155 | 156 | POLY1305_NOINLINE void 157 | poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) { 158 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 159 | size_t i; 160 | 161 | /* process the remaining block */ 162 | if (st->leftover) { 163 | size_t i = st->leftover; 164 | st->buffer[i++] = 1; 165 | for (; i < poly1305_block_size; i++) 166 | st->buffer[i] = 0; 167 | st->final = 1; 168 | poly1305_blocks(st, st->buffer, poly1305_block_size); 169 | } 170 | 171 | /* fully reduce h */ 172 | poly1305_freeze(st->h); 173 | 174 | /* h = (h + pad) % (1 << 128) */ 175 | poly1305_add(st->h, st->pad); 176 | for (i = 0; i < 16; i++) 177 | mac[i] = st->h[i]; 178 | 179 | /* zero out the state */ 180 | for (i = 0; i < 17; i++) 181 | st->h[i] = 0; 182 | for (i = 0; i < 17; i++) 183 | st->r[i] = 0; 184 | for (i = 0; i < 17; i++) 185 | st->pad[i] = 0; 186 | } 187 | -------------------------------------------------------------------------------- /poly1305-donna.c: -------------------------------------------------------------------------------- 1 | #include "poly1305-donna.h" 2 | 3 | #if defined(POLY1305_8BIT) 4 | #include "poly1305-donna-8.h" 5 | #elif defined(POLY1305_16BIT) 6 | #include "poly1305-donna-16.h" 7 | #elif defined(POLY1305_32BIT) 8 | #include "poly1305-donna-32.h" 9 | #elif defined(POLY1305_64BIT) 10 | #include "poly1305-donna-64.h" 11 | #else 12 | 13 | /* auto detect between 32bit / 64bit */ 14 | #define HAS_SIZEOF_INT128_64BIT (defined(__SIZEOF_INT128__) && defined(__LP64__)) 15 | #define HAS_MSVC_64BIT (defined(_MSC_VER) && defined(_M_X64)) 16 | #define HAS_GCC_4_4_64BIT (defined(__GNUC__) && defined(__LP64__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)))) 17 | 18 | #if (HAS_SIZEOF_INT128_64BIT || HAS_MSVC_64BIT || HAS_GCC_4_4_64BIT) 19 | #include "poly1305-donna-64.h" 20 | #else 21 | #include "poly1305-donna-32.h" 22 | #endif 23 | 24 | #endif 25 | 26 | void 27 | poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes) { 28 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 29 | size_t i; 30 | 31 | /* handle leftover */ 32 | if (st->leftover) { 33 | size_t want = (poly1305_block_size - st->leftover); 34 | if (want > bytes) 35 | want = bytes; 36 | for (i = 0; i < want; i++) 37 | st->buffer[st->leftover + i] = m[i]; 38 | bytes -= want; 39 | m += want; 40 | st->leftover += want; 41 | if (st->leftover < poly1305_block_size) 42 | return; 43 | poly1305_blocks(st, st->buffer, poly1305_block_size); 44 | st->leftover = 0; 45 | } 46 | 47 | /* process full blocks */ 48 | if (bytes >= poly1305_block_size) { 49 | size_t want = (bytes & ~(poly1305_block_size - 1)); 50 | poly1305_blocks(st, m, want); 51 | m += want; 52 | bytes -= want; 53 | } 54 | 55 | /* store leftover */ 56 | if (bytes) { 57 | for (i = 0; i < bytes; i++) 58 | st->buffer[st->leftover + i] = m[i]; 59 | st->leftover += bytes; 60 | } 61 | } 62 | 63 | void 64 | poly1305_auth(unsigned char mac[16], const unsigned char *m, size_t bytes, const unsigned char key[32]) { 65 | poly1305_context ctx; 66 | poly1305_init(&ctx, key); 67 | poly1305_update(&ctx, m, bytes); 68 | poly1305_finish(&ctx, mac); 69 | } 70 | 71 | int 72 | poly1305_verify(const unsigned char mac1[16], const unsigned char mac2[16]) { 73 | size_t i; 74 | unsigned int dif = 0; 75 | for (i = 0; i < 16; i++) 76 | dif |= (mac1[i] ^ mac2[i]); 77 | dif = (dif - 1) >> ((sizeof(unsigned int) * 8) - 1); 78 | return (dif & 1); 79 | } 80 | 81 | 82 | /* test a few basic operations */ 83 | int 84 | poly1305_power_on_self_test(void) { 85 | /* example from nacl */ 86 | static const unsigned char nacl_key[32] = { 87 | 0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91, 88 | 0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25, 89 | 0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65, 90 | 0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80, 91 | }; 92 | 93 | static const unsigned char nacl_msg[131] = { 94 | 0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73, 95 | 0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce, 96 | 0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4, 97 | 0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a, 98 | 0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b, 99 | 0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72, 100 | 0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2, 101 | 0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38, 102 | 0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a, 103 | 0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae, 104 | 0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea, 105 | 0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda, 106 | 0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde, 107 | 0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3, 108 | 0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6, 109 | 0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74, 110 | 0xe3,0x55,0xa5 111 | }; 112 | 113 | static const unsigned char nacl_mac[16] = { 114 | 0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5, 115 | 0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 116 | }; 117 | 118 | /* generates a final value of (2^130 - 2) == 3 */ 119 | static const unsigned char wrap_key[32] = { 120 | 0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 121 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 122 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 123 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 124 | }; 125 | 126 | static const unsigned char wrap_msg[16] = { 127 | 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, 128 | 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 129 | }; 130 | 131 | static const unsigned char wrap_mac[16] = { 132 | 0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 133 | 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 134 | }; 135 | 136 | /* 137 | mac of the macs of messages of length 0 to 256, where the key and messages 138 | have all their values set to the length 139 | */ 140 | static const unsigned char total_key[32] = { 141 | 0x01,0x02,0x03,0x04,0x05,0x06,0x07, 142 | 0xff,0xfe,0xfd,0xfc,0xfb,0xfa,0xf9, 143 | 0xff,0xff,0xff,0xff,0xff,0xff,0xff, 144 | 0xff,0xff,0xff,0xff,0xff,0xff,0xff 145 | }; 146 | 147 | static const unsigned char total_mac[16] = { 148 | 0x64,0xaf,0xe2,0xe8,0xd6,0xad,0x7b,0xbd, 149 | 0xd2,0x87,0xf9,0x7c,0x44,0x62,0x3d,0x39 150 | }; 151 | 152 | poly1305_context ctx; 153 | poly1305_context total_ctx; 154 | unsigned char all_key[32]; 155 | unsigned char all_msg[256]; 156 | unsigned char mac[16]; 157 | size_t i, j; 158 | int result = 1; 159 | 160 | for (i = 0; i < sizeof(mac); i++) 161 | mac[i] = 0; 162 | poly1305_auth(mac, nacl_msg, sizeof(nacl_msg), nacl_key); 163 | result &= poly1305_verify(nacl_mac, mac); 164 | 165 | for (i = 0; i < sizeof(mac); i++) 166 | mac[i] = 0; 167 | poly1305_init(&ctx, nacl_key); 168 | poly1305_update(&ctx, nacl_msg + 0, 32); 169 | poly1305_update(&ctx, nacl_msg + 32, 64); 170 | poly1305_update(&ctx, nacl_msg + 96, 16); 171 | poly1305_update(&ctx, nacl_msg + 112, 8); 172 | poly1305_update(&ctx, nacl_msg + 120, 4); 173 | poly1305_update(&ctx, nacl_msg + 124, 2); 174 | poly1305_update(&ctx, nacl_msg + 126, 1); 175 | poly1305_update(&ctx, nacl_msg + 127, 1); 176 | poly1305_update(&ctx, nacl_msg + 128, 1); 177 | poly1305_update(&ctx, nacl_msg + 129, 1); 178 | poly1305_update(&ctx, nacl_msg + 130, 1); 179 | poly1305_finish(&ctx, mac); 180 | result &= poly1305_verify(nacl_mac, mac); 181 | 182 | for (i = 0; i < sizeof(mac); i++) 183 | mac[i] = 0; 184 | poly1305_auth(mac, wrap_msg, sizeof(wrap_msg), wrap_key); 185 | result &= poly1305_verify(wrap_mac, mac); 186 | 187 | poly1305_init(&total_ctx, total_key); 188 | for (i = 0; i < 256; i++) { 189 | /* set key and message to 'i,i,i..' */ 190 | for (j = 0; j < sizeof(all_key); j++) 191 | all_key[j] = i; 192 | for (j = 0; j < i; j++) 193 | all_msg[j] = i; 194 | poly1305_auth(mac, all_msg, i, all_key); 195 | poly1305_update(&total_ctx, mac, 16); 196 | } 197 | poly1305_finish(&total_ctx, mac); 198 | result &= poly1305_verify(total_mac, mac); 199 | 200 | return result; 201 | } 202 | -------------------------------------------------------------------------------- /poly1305-donna-16.h: -------------------------------------------------------------------------------- 1 | /* 2 | poly1305 implementation using 16 bit * 16 bit = 32 bit multiplication and 32 bit addition 3 | */ 4 | 5 | #if defined(_MSC_VER) 6 | #define POLY1305_NOINLINE __declspec(noinline) 7 | #elif defined(__GNUC__) 8 | #define POLY1305_NOINLINE __attribute__((noinline)) 9 | #else 10 | #define POLY1305_NOINLINE 11 | #endif 12 | 13 | #define poly1305_block_size 16 14 | 15 | /* 17 + sizeof(size_t) + 18*sizeof(unsigned short) */ 16 | typedef struct poly1305_state_internal_t { 17 | unsigned char buffer[poly1305_block_size]; 18 | size_t leftover; 19 | unsigned short r[10]; 20 | unsigned short h[10]; 21 | unsigned short pad[8]; 22 | unsigned char final; 23 | } poly1305_state_internal_t; 24 | 25 | /* interpret two 8 bit unsigned integers as a 16 bit unsigned integer in little endian */ 26 | static unsigned short 27 | U8TO16(const unsigned char *p) { 28 | return 29 | (((unsigned short)(p[0] & 0xff) ) | 30 | ((unsigned short)(p[1] & 0xff) << 8)); 31 | } 32 | 33 | /* store a 16 bit unsigned integer as two 8 bit unsigned integers in little endian */ 34 | static void 35 | U16TO8(unsigned char *p, unsigned short v) { 36 | p[0] = (v ) & 0xff; 37 | p[1] = (v >> 8) & 0xff; 38 | } 39 | 40 | void 41 | poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { 42 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 43 | unsigned short t0,t1,t2,t3,t4,t5,t6,t7; 44 | size_t i; 45 | 46 | /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 47 | t0 = U8TO16(&key[ 0]); st->r[0] = ( t0 ) & 0x1fff; 48 | t1 = U8TO16(&key[ 2]); st->r[1] = ((t0 >> 13) | (t1 << 3)) & 0x1fff; 49 | t2 = U8TO16(&key[ 4]); st->r[2] = ((t1 >> 10) | (t2 << 6)) & 0x1f03; 50 | t3 = U8TO16(&key[ 6]); st->r[3] = ((t2 >> 7) | (t3 << 9)) & 0x1fff; 51 | t4 = U8TO16(&key[ 8]); st->r[4] = ((t3 >> 4) | (t4 << 12)) & 0x00ff; 52 | st->r[5] = ((t4 >> 1) ) & 0x1ffe; 53 | t5 = U8TO16(&key[10]); st->r[6] = ((t4 >> 14) | (t5 << 2)) & 0x1fff; 54 | t6 = U8TO16(&key[12]); st->r[7] = ((t5 >> 11) | (t6 << 5)) & 0x1f81; 55 | t7 = U8TO16(&key[14]); st->r[8] = ((t6 >> 8) | (t7 << 8)) & 0x1fff; 56 | st->r[9] = ((t7 >> 5) ) & 0x007f; 57 | 58 | /* h = 0 */ 59 | for (i = 0; i < 10; i++) 60 | st->h[i] = 0; 61 | 62 | /* save pad for later */ 63 | for (i = 0; i < 8; i++) 64 | st->pad[i] = U8TO16(&key[16 + (2 * i)]); 65 | 66 | st->leftover = 0; 67 | st->final = 0; 68 | } 69 | 70 | static void 71 | poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) { 72 | const unsigned short hibit = (st->final) ? 0 : (1 << 11); /* 1 << 128 */ 73 | unsigned short t0,t1,t2,t3,t4,t5,t6,t7; 74 | unsigned long d[10]; 75 | unsigned long c; 76 | 77 | while (bytes >= poly1305_block_size) { 78 | size_t i, j; 79 | 80 | /* h += m[i] */ 81 | t0 = U8TO16(&m[ 0]); st->h[0] += ( t0 ) & 0x1fff; 82 | t1 = U8TO16(&m[ 2]); st->h[1] += ((t0 >> 13) | (t1 << 3)) & 0x1fff; 83 | t2 = U8TO16(&m[ 4]); st->h[2] += ((t1 >> 10) | (t2 << 6)) & 0x1fff; 84 | t3 = U8TO16(&m[ 6]); st->h[3] += ((t2 >> 7) | (t3 << 9)) & 0x1fff; 85 | t4 = U8TO16(&m[ 8]); st->h[4] += ((t3 >> 4) | (t4 << 12)) & 0x1fff; 86 | st->h[5] += ((t4 >> 1) ) & 0x1fff; 87 | t5 = U8TO16(&m[10]); st->h[6] += ((t4 >> 14) | (t5 << 2)) & 0x1fff; 88 | t6 = U8TO16(&m[12]); st->h[7] += ((t5 >> 11) | (t6 << 5)) & 0x1fff; 89 | t7 = U8TO16(&m[14]); st->h[8] += ((t6 >> 8) | (t7 << 8)) & 0x1fff; 90 | st->h[9] += ((t7 >> 5) ) | hibit; 91 | 92 | /* h *= r, (partial) h %= p */ 93 | for (i = 0, c = 0; i < 10; i++) { 94 | d[i] = c; 95 | for (j = 0; j < 10; j++) { 96 | d[i] += (unsigned long)st->h[j] * ((j <= i) ? st->r[i - j] : (5 * st->r[i + 10 - j])); 97 | /* Sum(h[i] * r[i] * 5) will overflow slightly above 6 products with an unclamped r, so carry at 5 */ 98 | if (j == 4) { 99 | c = (d[i] >> 13); 100 | d[i] &= 0x1fff; 101 | } 102 | } 103 | c += (d[i] >> 13); 104 | d[i] &= 0x1fff; 105 | } 106 | c = ((c << 2) + c); /* c *= 5 */ 107 | c += d[0]; 108 | d[0] = ((unsigned short)c & 0x1fff); 109 | c = (c >> 13); 110 | d[1] += c; 111 | 112 | for (i = 0; i < 10; i++) 113 | st->h[i] = (unsigned short)d[i]; 114 | 115 | m += poly1305_block_size; 116 | bytes -= poly1305_block_size; 117 | } 118 | } 119 | 120 | POLY1305_NOINLINE void 121 | poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) { 122 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 123 | unsigned short c; 124 | unsigned short g[10]; 125 | unsigned short mask; 126 | unsigned long f; 127 | size_t i; 128 | 129 | /* process the remaining block */ 130 | if (st->leftover) { 131 | size_t i = st->leftover; 132 | st->buffer[i++] = 1; 133 | for (; i < poly1305_block_size; i++) 134 | st->buffer[i] = 0; 135 | st->final = 1; 136 | poly1305_blocks(st, st->buffer, poly1305_block_size); 137 | } 138 | 139 | /* fully carry h */ 140 | c = st->h[1] >> 13; 141 | st->h[1] &= 0x1fff; 142 | for (i = 2; i < 10; i++) { 143 | st->h[i] += c; 144 | c = st->h[i] >> 13; 145 | st->h[i] &= 0x1fff; 146 | } 147 | st->h[0] += (c * 5); 148 | c = st->h[0] >> 13; 149 | st->h[0] &= 0x1fff; 150 | st->h[1] += c; 151 | c = st->h[1] >> 13; 152 | st->h[1] &= 0x1fff; 153 | st->h[2] += c; 154 | 155 | /* compute h + -p */ 156 | g[0] = st->h[0] + 5; 157 | c = g[0] >> 13; 158 | g[0] &= 0x1fff; 159 | for (i = 1; i < 10; i++) { 160 | g[i] = st->h[i] + c; 161 | c = g[i] >> 13; 162 | g[i] &= 0x1fff; 163 | } 164 | 165 | /* select h if h < p, or h + -p if h >= p */ 166 | mask = (c ^ 1) - 1; 167 | for (i = 0; i < 10; i++) 168 | g[i] &= mask; 169 | mask = ~mask; 170 | for (i = 0; i < 10; i++) 171 | st->h[i] = (st->h[i] & mask) | g[i]; 172 | 173 | /* h = h % (2^128) */ 174 | st->h[0] = ((st->h[0] ) | (st->h[1] << 13) ) & 0xffff; 175 | st->h[1] = ((st->h[1] >> 3) | (st->h[2] << 10) ) & 0xffff; 176 | st->h[2] = ((st->h[2] >> 6) | (st->h[3] << 7) ) & 0xffff; 177 | st->h[3] = ((st->h[3] >> 9) | (st->h[4] << 4) ) & 0xffff; 178 | st->h[4] = ((st->h[4] >> 12) | (st->h[5] << 1) | (st->h[6] << 14)) & 0xffff; 179 | st->h[5] = ((st->h[6] >> 2) | (st->h[7] << 11) ) & 0xffff; 180 | st->h[6] = ((st->h[7] >> 5) | (st->h[8] << 8) ) & 0xffff; 181 | st->h[7] = ((st->h[8] >> 8) | (st->h[9] << 5) ) & 0xffff; 182 | 183 | /* mac = (h + pad) % (2^128) */ 184 | f = (unsigned long)st->h[0] + st->pad[0]; 185 | st->h[0] = (unsigned short)f; 186 | for (i = 1; i < 8; i++) { 187 | f = (unsigned long)st->h[i] + st->pad[i] + (f >> 16); 188 | st->h[i] = (unsigned short)f; 189 | } 190 | 191 | for (i = 0; i < 8; i++) 192 | U16TO8(mac + (i * 2), st->h[i]); 193 | 194 | /* zero out the state */ 195 | for (i = 0; i < 10; i++) 196 | st->h[i] = 0; 197 | for (i = 0; i < 10; i++) 198 | st->r[i] = 0; 199 | for (i = 0; i < 8; i++) 200 | st->pad[i] = 0; 201 | } 202 | -------------------------------------------------------------------------------- /poly1305-donna-64.h: -------------------------------------------------------------------------------- 1 | /* 2 | poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication and 128 bit addition 3 | */ 4 | 5 | #if defined(_MSC_VER) 6 | #include 7 | 8 | typedef struct uint128_t { 9 | unsigned long long lo; 10 | unsigned long long hi; 11 | } uint128_t; 12 | 13 | #define MUL(out, x, y) out.lo = _umul128((x), (y), &out.hi) 14 | #define ADD(out, in) { unsigned long long t = out.lo; out.lo += in.lo; out.hi += (out.lo < t) + in.hi; } 15 | #define ADDLO(out, in) { unsigned long long t = out.lo; out.lo += in; out.hi += (out.lo < t); } 16 | #define SHR(in, shift) (__shiftright128(in.lo, in.hi, (shift))) 17 | #define LO(in) (in.lo) 18 | 19 | #define POLY1305_NOINLINE __declspec(noinline) 20 | #elif defined(__GNUC__) 21 | #if defined(__SIZEOF_INT128__) 22 | typedef unsigned __int128 uint128_t; 23 | #else 24 | typedef unsigned uint128_t __attribute__((mode(TI))); 25 | #endif 26 | 27 | #define MUL(out, x, y) out = ((uint128_t)x * y) 28 | #define ADD(out, in) out += in 29 | #define ADDLO(out, in) out += in 30 | #define SHR(in, shift) (unsigned long long)(in >> (shift)) 31 | #define LO(in) (unsigned long long)(in) 32 | 33 | #define POLY1305_NOINLINE __attribute__((noinline)) 34 | #endif 35 | 36 | #define poly1305_block_size 16 37 | 38 | /* 17 + sizeof(size_t) + 8*sizeof(unsigned long long) */ 39 | typedef struct poly1305_state_internal_t { 40 | unsigned long long r[3]; 41 | unsigned long long h[3]; 42 | unsigned long long pad[2]; 43 | size_t leftover; 44 | unsigned char buffer[poly1305_block_size]; 45 | unsigned char final; 46 | } poly1305_state_internal_t; 47 | 48 | /* interpret eight 8 bit unsigned integers as a 64 bit unsigned integer in little endian */ 49 | static unsigned long long 50 | U8TO64(const unsigned char *p) { 51 | return 52 | (((unsigned long long)(p[0] & 0xff) ) | 53 | ((unsigned long long)(p[1] & 0xff) << 8) | 54 | ((unsigned long long)(p[2] & 0xff) << 16) | 55 | ((unsigned long long)(p[3] & 0xff) << 24) | 56 | ((unsigned long long)(p[4] & 0xff) << 32) | 57 | ((unsigned long long)(p[5] & 0xff) << 40) | 58 | ((unsigned long long)(p[6] & 0xff) << 48) | 59 | ((unsigned long long)(p[7] & 0xff) << 56)); 60 | } 61 | 62 | /* store a 64 bit unsigned integer as eight 8 bit unsigned integers in little endian */ 63 | static void 64 | U64TO8(unsigned char *p, unsigned long long v) { 65 | p[0] = (v ) & 0xff; 66 | p[1] = (v >> 8) & 0xff; 67 | p[2] = (v >> 16) & 0xff; 68 | p[3] = (v >> 24) & 0xff; 69 | p[4] = (v >> 32) & 0xff; 70 | p[5] = (v >> 40) & 0xff; 71 | p[6] = (v >> 48) & 0xff; 72 | p[7] = (v >> 56) & 0xff; 73 | } 74 | 75 | void 76 | poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { 77 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 78 | unsigned long long t0,t1; 79 | 80 | /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 81 | t0 = U8TO64(&key[0]); 82 | t1 = U8TO64(&key[8]); 83 | 84 | st->r[0] = ( t0 ) & 0xffc0fffffff; 85 | st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff; 86 | st->r[2] = ((t1 >> 24) ) & 0x00ffffffc0f; 87 | 88 | /* h = 0 */ 89 | st->h[0] = 0; 90 | st->h[1] = 0; 91 | st->h[2] = 0; 92 | 93 | /* save pad for later */ 94 | st->pad[0] = U8TO64(&key[16]); 95 | st->pad[1] = U8TO64(&key[24]); 96 | 97 | st->leftover = 0; 98 | st->final = 0; 99 | } 100 | 101 | static void 102 | poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) { 103 | const unsigned long long hibit = (st->final) ? 0 : ((unsigned long long)1 << 40); /* 1 << 128 */ 104 | unsigned long long r0,r1,r2; 105 | unsigned long long s1,s2; 106 | unsigned long long h0,h1,h2; 107 | unsigned long long c; 108 | uint128_t d0,d1,d2,d; 109 | 110 | r0 = st->r[0]; 111 | r1 = st->r[1]; 112 | r2 = st->r[2]; 113 | 114 | h0 = st->h[0]; 115 | h1 = st->h[1]; 116 | h2 = st->h[2]; 117 | 118 | s1 = r1 * (5 << 2); 119 | s2 = r2 * (5 << 2); 120 | 121 | while (bytes >= poly1305_block_size) { 122 | unsigned long long t0,t1; 123 | 124 | /* h += m[i] */ 125 | t0 = U8TO64(&m[0]); 126 | t1 = U8TO64(&m[8]); 127 | 128 | h0 += (( t0 ) & 0xfffffffffff); 129 | h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff); 130 | h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit; 131 | 132 | /* h *= r */ 133 | MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d); 134 | MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d); 135 | MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d); 136 | 137 | /* (partial) h %= p */ 138 | c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff; 139 | ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff; 140 | ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff; 141 | h0 += c * 5; c = (h0 >> 44); h0 = h0 & 0xfffffffffff; 142 | h1 += c; 143 | 144 | m += poly1305_block_size; 145 | bytes -= poly1305_block_size; 146 | } 147 | 148 | st->h[0] = h0; 149 | st->h[1] = h1; 150 | st->h[2] = h2; 151 | } 152 | 153 | 154 | POLY1305_NOINLINE void 155 | poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) { 156 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 157 | unsigned long long h0,h1,h2,c; 158 | unsigned long long g0,g1,g2; 159 | unsigned long long t0,t1; 160 | 161 | /* process the remaining block */ 162 | if (st->leftover) { 163 | size_t i = st->leftover; 164 | st->buffer[i] = 1; 165 | for (i = i + 1; i < poly1305_block_size; i++) 166 | st->buffer[i] = 0; 167 | st->final = 1; 168 | poly1305_blocks(st, st->buffer, poly1305_block_size); 169 | } 170 | 171 | /* fully carry h */ 172 | h0 = st->h[0]; 173 | h1 = st->h[1]; 174 | h2 = st->h[2]; 175 | 176 | c = (h1 >> 44); h1 &= 0xfffffffffff; 177 | h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; 178 | h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff; 179 | h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff; 180 | h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; 181 | h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff; 182 | h1 += c; 183 | 184 | /* compute h + -p */ 185 | g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff; 186 | g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff; 187 | g2 = h2 + c - ((unsigned long long)1 << 42); 188 | 189 | /* select h if h < p, or h + -p if h >= p */ 190 | c = (g2 >> ((sizeof(unsigned long long) * 8) - 1)) - 1; 191 | g0 &= c; 192 | g1 &= c; 193 | g2 &= c; 194 | c = ~c; 195 | h0 = (h0 & c) | g0; 196 | h1 = (h1 & c) | g1; 197 | h2 = (h2 & c) | g2; 198 | 199 | /* h = (h + pad) */ 200 | t0 = st->pad[0]; 201 | t1 = st->pad[1]; 202 | 203 | h0 += (( t0 ) & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff; 204 | h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff; 205 | h2 += (((t1 >> 24) ) & 0x3ffffffffff) + c; h2 &= 0x3ffffffffff; 206 | 207 | /* mac = h % (2^128) */ 208 | h0 = ((h0 ) | (h1 << 44)); 209 | h1 = ((h1 >> 20) | (h2 << 24)); 210 | 211 | U64TO8(&mac[0], h0); 212 | U64TO8(&mac[8], h1); 213 | 214 | /* zero out the state */ 215 | st->h[0] = 0; 216 | st->h[1] = 0; 217 | st->h[2] = 0; 218 | st->r[0] = 0; 219 | st->r[1] = 0; 220 | st->r[2] = 0; 221 | st->pad[0] = 0; 222 | st->pad[1] = 0; 223 | } 224 | 225 | -------------------------------------------------------------------------------- /poly1305-donna-32.h: -------------------------------------------------------------------------------- 1 | /* 2 | poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication and 64 bit addition 3 | */ 4 | 5 | #if defined(_MSC_VER) 6 | #define POLY1305_NOINLINE __declspec(noinline) 7 | #elif defined(__GNUC__) 8 | #define POLY1305_NOINLINE __attribute__((noinline)) 9 | #else 10 | #define POLY1305_NOINLINE 11 | #endif 12 | 13 | #define poly1305_block_size 16 14 | 15 | /* 17 + sizeof(size_t) + 14*sizeof(unsigned long) */ 16 | typedef struct poly1305_state_internal_t { 17 | unsigned long r[5]; 18 | unsigned long h[5]; 19 | unsigned long pad[4]; 20 | size_t leftover; 21 | unsigned char buffer[poly1305_block_size]; 22 | unsigned char final; 23 | } poly1305_state_internal_t; 24 | 25 | /* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */ 26 | static unsigned long 27 | U8TO32(const unsigned char *p) { 28 | return 29 | (((unsigned long)(p[0] & 0xff) ) | 30 | ((unsigned long)(p[1] & 0xff) << 8) | 31 | ((unsigned long)(p[2] & 0xff) << 16) | 32 | ((unsigned long)(p[3] & 0xff) << 24)); 33 | } 34 | 35 | /* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */ 36 | static void 37 | U32TO8(unsigned char *p, unsigned long v) { 38 | p[0] = (v ) & 0xff; 39 | p[1] = (v >> 8) & 0xff; 40 | p[2] = (v >> 16) & 0xff; 41 | p[3] = (v >> 24) & 0xff; 42 | } 43 | 44 | void 45 | poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { 46 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 47 | 48 | /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 49 | st->r[0] = (U8TO32(&key[ 0]) ) & 0x3ffffff; 50 | st->r[1] = (U8TO32(&key[ 3]) >> 2) & 0x3ffff03; 51 | st->r[2] = (U8TO32(&key[ 6]) >> 4) & 0x3ffc0ff; 52 | st->r[3] = (U8TO32(&key[ 9]) >> 6) & 0x3f03fff; 53 | st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff; 54 | 55 | /* h = 0 */ 56 | st->h[0] = 0; 57 | st->h[1] = 0; 58 | st->h[2] = 0; 59 | st->h[3] = 0; 60 | st->h[4] = 0; 61 | 62 | /* save pad for later */ 63 | st->pad[0] = U8TO32(&key[16]); 64 | st->pad[1] = U8TO32(&key[20]); 65 | st->pad[2] = U8TO32(&key[24]); 66 | st->pad[3] = U8TO32(&key[28]); 67 | 68 | st->leftover = 0; 69 | st->final = 0; 70 | } 71 | 72 | static void 73 | poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) { 74 | const unsigned long hibit = (st->final) ? 0 : (1UL << 24); /* 1 << 128 */ 75 | unsigned long r0,r1,r2,r3,r4; 76 | unsigned long s1,s2,s3,s4; 77 | unsigned long h0,h1,h2,h3,h4; 78 | unsigned long long d0,d1,d2,d3,d4; 79 | unsigned long c; 80 | 81 | r0 = st->r[0]; 82 | r1 = st->r[1]; 83 | r2 = st->r[2]; 84 | r3 = st->r[3]; 85 | r4 = st->r[4]; 86 | 87 | s1 = r1 * 5; 88 | s2 = r2 * 5; 89 | s3 = r3 * 5; 90 | s4 = r4 * 5; 91 | 92 | h0 = st->h[0]; 93 | h1 = st->h[1]; 94 | h2 = st->h[2]; 95 | h3 = st->h[3]; 96 | h4 = st->h[4]; 97 | 98 | while (bytes >= poly1305_block_size) { 99 | /* h += m[i] */ 100 | h0 += (U8TO32(m+ 0) ) & 0x3ffffff; 101 | h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff; 102 | h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff; 103 | h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff; 104 | h4 += (U8TO32(m+12) >> 8) | hibit; 105 | 106 | /* h *= r */ 107 | d0 = ((unsigned long long)h0 * r0) + ((unsigned long long)h1 * s4) + ((unsigned long long)h2 * s3) + ((unsigned long long)h3 * s2) + ((unsigned long long)h4 * s1); 108 | d1 = ((unsigned long long)h0 * r1) + ((unsigned long long)h1 * r0) + ((unsigned long long)h2 * s4) + ((unsigned long long)h3 * s3) + ((unsigned long long)h4 * s2); 109 | d2 = ((unsigned long long)h0 * r2) + ((unsigned long long)h1 * r1) + ((unsigned long long)h2 * r0) + ((unsigned long long)h3 * s4) + ((unsigned long long)h4 * s3); 110 | d3 = ((unsigned long long)h0 * r3) + ((unsigned long long)h1 * r2) + ((unsigned long long)h2 * r1) + ((unsigned long long)h3 * r0) + ((unsigned long long)h4 * s4); 111 | d4 = ((unsigned long long)h0 * r4) + ((unsigned long long)h1 * r3) + ((unsigned long long)h2 * r2) + ((unsigned long long)h3 * r1) + ((unsigned long long)h4 * r0); 112 | 113 | /* (partial) h %= p */ 114 | c = (unsigned long)(d0 >> 26); h0 = (unsigned long)d0 & 0x3ffffff; 115 | d1 += c; c = (unsigned long)(d1 >> 26); h1 = (unsigned long)d1 & 0x3ffffff; 116 | d2 += c; c = (unsigned long)(d2 >> 26); h2 = (unsigned long)d2 & 0x3ffffff; 117 | d3 += c; c = (unsigned long)(d3 >> 26); h3 = (unsigned long)d3 & 0x3ffffff; 118 | d4 += c; c = (unsigned long)(d4 >> 26); h4 = (unsigned long)d4 & 0x3ffffff; 119 | h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff; 120 | h1 += c; 121 | 122 | m += poly1305_block_size; 123 | bytes -= poly1305_block_size; 124 | } 125 | 126 | st->h[0] = h0; 127 | st->h[1] = h1; 128 | st->h[2] = h2; 129 | st->h[3] = h3; 130 | st->h[4] = h4; 131 | } 132 | 133 | POLY1305_NOINLINE void 134 | poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) { 135 | poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; 136 | unsigned long h0,h1,h2,h3,h4,c; 137 | unsigned long g0,g1,g2,g3,g4; 138 | unsigned long long f; 139 | unsigned long mask; 140 | 141 | /* process the remaining block */ 142 | if (st->leftover) { 143 | size_t i = st->leftover; 144 | st->buffer[i++] = 1; 145 | for (; i < poly1305_block_size; i++) 146 | st->buffer[i] = 0; 147 | st->final = 1; 148 | poly1305_blocks(st, st->buffer, poly1305_block_size); 149 | } 150 | 151 | /* fully carry h */ 152 | h0 = st->h[0]; 153 | h1 = st->h[1]; 154 | h2 = st->h[2]; 155 | h3 = st->h[3]; 156 | h4 = st->h[4]; 157 | 158 | c = h1 >> 26; h1 = h1 & 0x3ffffff; 159 | h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff; 160 | h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff; 161 | h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff; 162 | h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff; 163 | h1 += c; 164 | 165 | /* compute h + -p */ 166 | g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff; 167 | g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff; 168 | g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff; 169 | g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff; 170 | g4 = h4 + c - (1UL << 26); 171 | 172 | /* select h if h < p, or h + -p if h >= p */ 173 | mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1; 174 | g0 &= mask; 175 | g1 &= mask; 176 | g2 &= mask; 177 | g3 &= mask; 178 | g4 &= mask; 179 | mask = ~mask; 180 | h0 = (h0 & mask) | g0; 181 | h1 = (h1 & mask) | g1; 182 | h2 = (h2 & mask) | g2; 183 | h3 = (h3 & mask) | g3; 184 | h4 = (h4 & mask) | g4; 185 | 186 | /* h = h % (2^128) */ 187 | h0 = ((h0 ) | (h1 << 26)) & 0xffffffff; 188 | h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; 189 | h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; 190 | h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; 191 | 192 | /* mac = (h + pad) % (2^128) */ 193 | f = (unsigned long long)h0 + st->pad[0] ; h0 = (unsigned long)f; 194 | f = (unsigned long long)h1 + st->pad[1] + (f >> 32); h1 = (unsigned long)f; 195 | f = (unsigned long long)h2 + st->pad[2] + (f >> 32); h2 = (unsigned long)f; 196 | f = (unsigned long long)h3 + st->pad[3] + (f >> 32); h3 = (unsigned long)f; 197 | 198 | U32TO8(mac + 0, h0); 199 | U32TO8(mac + 4, h1); 200 | U32TO8(mac + 8, h2); 201 | U32TO8(mac + 12, h3); 202 | 203 | /* zero out the state */ 204 | st->h[0] = 0; 205 | st->h[1] = 0; 206 | st->h[2] = 0; 207 | st->h[3] = 0; 208 | st->h[4] = 0; 209 | st->r[0] = 0; 210 | st->r[1] = 0; 211 | st->r[2] = 0; 212 | st->r[3] = 0; 213 | st->r[4] = 0; 214 | st->pad[0] = 0; 215 | st->pad[1] = 0; 216 | st->pad[2] = 0; 217 | st->pad[3] = 0; 218 | } 219 | 220 | --------------------------------------------------------------------------------