├── README.md
├── curve25519-donna
    ├── README.md
    ├── curve25519-donna.cpp
    ├── curve25519-donna.h
    ├── examples
    │   └── curve25519_timing_test
    │   │   └── curve25519_timing_test.ino
    └── keywords.txt
├── esp8266-trng
    ├── README.md
    ├── esp8266-trng.h
    └── examples
    │   └── esp8266-trng
    │       └── esp8266-trng.ino
└── sha256
    ├── README.md
    ├── examples
        └── sha256-test
        │   └── sha256-test.ino
    ├── sha256.cpp
    └── sha256.h


/README.md:
--------------------------------------------------------------------------------
 1 | # ESP8266-Arduino-cryptolibs
 2 | Collection of strong crypto libs that can be included in Arduino-core for ESP8266
 3 | 
 4 |   - curve25519-donna : strong ECC key agreement function
 5 |   - esp8266-trng : simple wrapper to the HW TRNG
 6 |   - sha256 : SHA2-256 class
 7 | 
 8 | All these libs require that you already have:
 9 |   - installed [Arduino IDE](https://www.arduino.cc/en/Main/Software) for your platform; minimum release tested is 1.6.5, but should work with anything 1.5+
10 |   - installed the [Arduino-core](https://github.com/esp8266/Arduino)
11 |   - applied the [patch](https://github.com/esp8266/Arduino/commit/fe04165bbeafd0dcf5356172b2580655396279b0) if you get an error about iram0 filling up when you compile an example
12 |   - copied the lib you need under the right folder (~/Arduino/libraries in Linux)
13 | 


--------------------------------------------------------------------------------
/curve25519-donna/README.md:
--------------------------------------------------------------------------------
 1 | Arduino-esp8266 porting of curve25519-donna.
 2 | 
 3 | curve25519 is an elliptic curve function for KEY AGREEMENT, like
 4 | Diffie-Hellman. No encryption. No signature. Just key setup!
 5 | 
 6 | Calling curve25519_donna() takes about 270ms, but yields 32 times so
 7 | that wifi and IP stacks can run.
 8 | 
 9 | * Copyright 2008, Google Inc.
10 | * All rights reserved.
11 | *
12 | * Redistribution and use in source and binary forms, with or without
13 | * modification, are permitted provided that the following conditions are
14 | * met:
15 | *
16 | *     * Redistributions of source code must retain the above copyright
17 | * notice, this list of conditions and the following disclaimer.
18 | *     * Redistributions in binary form must reproduce the above
19 | * copyright notice, this list of conditions and the following disclaimer
20 | * in the documentation and/or other materials provided with the
21 | * distribution.
22 | *     * Neither the name of Google Inc. nor the names of its
23 | * contributors may be used to endorse or promote products derived from
24 | * this software without specific prior written permission.
25 | *
26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 | *
38 | * curve25519-donna: Curve25519 elliptic curve, public key function
39 | *
40 | * http://code.google.com/p/curve25519-donna/
41 | *
42 | * Adam Langley <agl@imperialviolet.org>
43 | *
44 | * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
45 | *
46 | * More information about curve25519 can be found here
47 | *   http://cr.yp.to/ecdh.html
48 | *
49 | * djb's sample implementation of curve25519 is written in a special assembly
50 | * language called qhasm and uses the floating point registers.
51 | *
52 | * This is, almost, a clean room reimplementation from the curve25519 paper. It
53 | * uses many of the tricks described therein. Only the crecip function is taken
54 | * from the sample implementation.
55 | 
56 | Arduino-esp8266 port by Diego Zuccato <ndk.clanbo@gmail.com>
57 | Main changes:
58 | - renamed curve25519-donna.c to curve25519-donna.cpp
59 | - moved datatypes definitions to curve25519-donna.h
60 | - added yield() prototype in curve25519-donna.cpp
61 | - added curve25519_donna_debug() that does a step at a time, to be able to
62 |   obtain more finely-grained timing data
63 | - ardulibrarization
64 | 


--------------------------------------------------------------------------------
/curve25519-donna/curve25519-donna.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright 2008, Google Inc.
  2 |  * All rights reserved.
  3 |  *
  4 |  * Redistribution and use in source and binary forms, with or without
  5 |  * modification, are permitted provided that the following conditions are
  6 |  * met:
  7 |  *
  8 |  *     * Redistributions of source code must retain the above copyright
  9 |  * notice, this list of conditions and the following disclaimer.
 10 |  *     * Redistributions in binary form must reproduce the above
 11 |  * copyright notice, this list of conditions and the following disclaimer
 12 |  * in the documentation and/or other materials provided with the
 13 |  * distribution.
 14 |  *     * Neither the name of Google Inc. nor the names of its
 15 |  * contributors may be used to endorse or promote products derived from
 16 |  * this software without specific prior written permission.
 17 |  *
 18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 22 |  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23 |  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 24 |  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25 |  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26 |  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 |  *
 30 |  * curve25519-donna: Curve25519 elliptic curve, public key function
 31 |  *
 32 |  * http://code.google.com/p/curve25519-donna/
 33 |  *
 34 |  * Adam Langley <agl@imperialviolet.org>
 35 |  *
 36 |  * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
 37 |  *
 38 |  * More information about curve25519 can be found here
 39 |  *   http://cr.yp.to/ecdh.html
 40 |  *
 41 |  * djb's sample implementation of curve25519 is written in a special assembly
 42 |  * language called qhasm and uses the floating point registers.
 43 |  *
 44 |  * This is, almost, a clean room reimplementation from the curve25519 paper. It
 45 |  * uses many of the tricks described therein. Only the crecip function is taken
 46 |  * from the sample implementation. */
 47 | 
 48 | #include <string.h>
 49 | #include "curve25519-donna.h"
 50 | 
 51 | #ifdef _MSC_VER
 52 | #define inline __inline
 53 | #endif
 54 | #define inline 
 55 | 
 56 | // ESP8266-Arduino specific definition
 57 | extern "C" void yield();
 58 | 
 59 | /* Field element representation:
 60 |  *
 61 |  * Field elements are written as an array of signed, 64-bit limbs, least
 62 |  * significant first. The value of the field element is:
 63 |  *   x[0] + 2^26·x[1] + x^51·x[2] + 2^102·x[3] + ...
 64 |  *
 65 |  * i.e. the limbs are 26, 25, 26, 25, ... bits wide. */
 66 | 
 67 | /* Sum two numbers: output += in */
 68 | static void fsum(limb *output, const limb *in) {
 69 |   unsigned i;
 70 |   for (i = 0; i < 10; i += 2) {
 71 |     output[0+i] = output[0+i] + in[0+i];
 72 |     output[1+i] = output[1+i] + in[1+i];
 73 |   }
 74 | }
 75 | 
 76 | /* Find the difference of two numbers: output = in - output
 77 |  * (note the order of the arguments!). */
 78 | static void fdifference(limb *output, const limb *in) {
 79 |   unsigned i;
 80 |   for (i = 0; i < 10; ++i) {
 81 |     output[i] = in[i] - output[i];
 82 |   }
 83 | }
 84 | 
 85 | /* Multiply a number by a scalar: output = in * scalar */
 86 | static void fscalar_product(limb *output, const limb *in, const limb scalar) {
 87 |   unsigned i;
 88 |   for (i = 0; i < 10; ++i) {
 89 |     output[i] = in[i] * scalar;
 90 |   }
 91 | }
 92 | 
 93 | /* Multiply two numbers: output = in2 * in
 94 |  *
 95 |  * output must be distinct to both inputs. The inputs are reduced coefficient
 96 |  * form, the output is not.
 97 |  *
 98 |  * output[x] <= 14 * the largest product of the input limbs. */
 99 | static void fproduct(limb *output, const limb *in2, const limb *in) {
100 |   output[0] =       ((limb) ((s32) in2[0])) * ((s32) in[0]);
101 |   output[1] =       ((limb) ((s32) in2[0])) * ((s32) in[1]) +
102 |                     ((limb) ((s32) in2[1])) * ((s32) in[0]);
103 |   output[2] =  2 *  ((limb) ((s32) in2[1])) * ((s32) in[1]) +
104 |                     ((limb) ((s32) in2[0])) * ((s32) in[2]) +
105 |                     ((limb) ((s32) in2[2])) * ((s32) in[0]);
106 |   output[3] =       ((limb) ((s32) in2[1])) * ((s32) in[2]) +
107 |                     ((limb) ((s32) in2[2])) * ((s32) in[1]) +
108 |                     ((limb) ((s32) in2[0])) * ((s32) in[3]) +
109 |                     ((limb) ((s32) in2[3])) * ((s32) in[0]);
110 |   output[4] =       ((limb) ((s32) in2[2])) * ((s32) in[2]) +
111 |                2 * (((limb) ((s32) in2[1])) * ((s32) in[3]) +
112 |                     ((limb) ((s32) in2[3])) * ((s32) in[1])) +
113 |                     ((limb) ((s32) in2[0])) * ((s32) in[4]) +
114 |                     ((limb) ((s32) in2[4])) * ((s32) in[0]);
115 |   output[5] =       ((limb) ((s32) in2[2])) * ((s32) in[3]) +
116 |                     ((limb) ((s32) in2[3])) * ((s32) in[2]) +
117 |                     ((limb) ((s32) in2[1])) * ((s32) in[4]) +
118 |                     ((limb) ((s32) in2[4])) * ((s32) in[1]) +
119 |                     ((limb) ((s32) in2[0])) * ((s32) in[5]) +
120 |                     ((limb) ((s32) in2[5])) * ((s32) in[0]);
121 |   output[6] =  2 * (((limb) ((s32) in2[3])) * ((s32) in[3]) +
122 |                     ((limb) ((s32) in2[1])) * ((s32) in[5]) +
123 |                     ((limb) ((s32) in2[5])) * ((s32) in[1])) +
124 |                     ((limb) ((s32) in2[2])) * ((s32) in[4]) +
125 |                     ((limb) ((s32) in2[4])) * ((s32) in[2]) +
126 |                     ((limb) ((s32) in2[0])) * ((s32) in[6]) +
127 |                     ((limb) ((s32) in2[6])) * ((s32) in[0]);
128 |   output[7] =       ((limb) ((s32) in2[3])) * ((s32) in[4]) +
129 |                     ((limb) ((s32) in2[4])) * ((s32) in[3]) +
130 |                     ((limb) ((s32) in2[2])) * ((s32) in[5]) +
131 |                     ((limb) ((s32) in2[5])) * ((s32) in[2]) +
132 |                     ((limb) ((s32) in2[1])) * ((s32) in[6]) +
133 |                     ((limb) ((s32) in2[6])) * ((s32) in[1]) +
134 |                     ((limb) ((s32) in2[0])) * ((s32) in[7]) +
135 |                     ((limb) ((s32) in2[7])) * ((s32) in[0]);
136 |   output[8] =       ((limb) ((s32) in2[4])) * ((s32) in[4]) +
137 |                2 * (((limb) ((s32) in2[3])) * ((s32) in[5]) +
138 |                     ((limb) ((s32) in2[5])) * ((s32) in[3]) +
139 |                     ((limb) ((s32) in2[1])) * ((s32) in[7]) +
140 |                     ((limb) ((s32) in2[7])) * ((s32) in[1])) +
141 |                     ((limb) ((s32) in2[2])) * ((s32) in[6]) +
142 |                     ((limb) ((s32) in2[6])) * ((s32) in[2]) +
143 |                     ((limb) ((s32) in2[0])) * ((s32) in[8]) +
144 |                     ((limb) ((s32) in2[8])) * ((s32) in[0]);
145 |   output[9] =       ((limb) ((s32) in2[4])) * ((s32) in[5]) +
146 |                     ((limb) ((s32) in2[5])) * ((s32) in[4]) +
147 |                     ((limb) ((s32) in2[3])) * ((s32) in[6]) +
148 |                     ((limb) ((s32) in2[6])) * ((s32) in[3]) +
149 |                     ((limb) ((s32) in2[2])) * ((s32) in[7]) +
150 |                     ((limb) ((s32) in2[7])) * ((s32) in[2]) +
151 |                     ((limb) ((s32) in2[1])) * ((s32) in[8]) +
152 |                     ((limb) ((s32) in2[8])) * ((s32) in[1]) +
153 |                     ((limb) ((s32) in2[0])) * ((s32) in[9]) +
154 |                     ((limb) ((s32) in2[9])) * ((s32) in[0]);
155 |   output[10] = 2 * (((limb) ((s32) in2[5])) * ((s32) in[5]) +
156 |                     ((limb) ((s32) in2[3])) * ((s32) in[7]) +
157 |                     ((limb) ((s32) in2[7])) * ((s32) in[3]) +
158 |                     ((limb) ((s32) in2[1])) * ((s32) in[9]) +
159 |                     ((limb) ((s32) in2[9])) * ((s32) in[1])) +
160 |                     ((limb) ((s32) in2[4])) * ((s32) in[6]) +
161 |                     ((limb) ((s32) in2[6])) * ((s32) in[4]) +
162 |                     ((limb) ((s32) in2[2])) * ((s32) in[8]) +
163 |                     ((limb) ((s32) in2[8])) * ((s32) in[2]);
164 |   output[11] =      ((limb) ((s32) in2[5])) * ((s32) in[6]) +
165 |                     ((limb) ((s32) in2[6])) * ((s32) in[5]) +
166 |                     ((limb) ((s32) in2[4])) * ((s32) in[7]) +
167 |                     ((limb) ((s32) in2[7])) * ((s32) in[4]) +
168 |                     ((limb) ((s32) in2[3])) * ((s32) in[8]) +
169 |                     ((limb) ((s32) in2[8])) * ((s32) in[3]) +
170 |                     ((limb) ((s32) in2[2])) * ((s32) in[9]) +
171 |                     ((limb) ((s32) in2[9])) * ((s32) in[2]);
172 |   output[12] =      ((limb) ((s32) in2[6])) * ((s32) in[6]) +
173 |                2 * (((limb) ((s32) in2[5])) * ((s32) in[7]) +
174 |                     ((limb) ((s32) in2[7])) * ((s32) in[5]) +
175 |                     ((limb) ((s32) in2[3])) * ((s32) in[9]) +
176 |                     ((limb) ((s32) in2[9])) * ((s32) in[3])) +
177 |                     ((limb) ((s32) in2[4])) * ((s32) in[8]) +
178 |                     ((limb) ((s32) in2[8])) * ((s32) in[4]);
179 |   output[13] =      ((limb) ((s32) in2[6])) * ((s32) in[7]) +
180 |                     ((limb) ((s32) in2[7])) * ((s32) in[6]) +
181 |                     ((limb) ((s32) in2[5])) * ((s32) in[8]) +
182 |                     ((limb) ((s32) in2[8])) * ((s32) in[5]) +
183 |                     ((limb) ((s32) in2[4])) * ((s32) in[9]) +
184 |                     ((limb) ((s32) in2[9])) * ((s32) in[4]);
185 |   output[14] = 2 * (((limb) ((s32) in2[7])) * ((s32) in[7]) +
186 |                     ((limb) ((s32) in2[5])) * ((s32) in[9]) +
187 |                     ((limb) ((s32) in2[9])) * ((s32) in[5])) +
188 |                     ((limb) ((s32) in2[6])) * ((s32) in[8]) +
189 |                     ((limb) ((s32) in2[8])) * ((s32) in[6]);
190 |   output[15] =      ((limb) ((s32) in2[7])) * ((s32) in[8]) +
191 |                     ((limb) ((s32) in2[8])) * ((s32) in[7]) +
192 |                     ((limb) ((s32) in2[6])) * ((s32) in[9]) +
193 |                     ((limb) ((s32) in2[9])) * ((s32) in[6]);
194 |   output[16] =      ((limb) ((s32) in2[8])) * ((s32) in[8]) +
195 |                2 * (((limb) ((s32) in2[7])) * ((s32) in[9]) +
196 |                     ((limb) ((s32) in2[9])) * ((s32) in[7]));
197 |   output[17] =      ((limb) ((s32) in2[8])) * ((s32) in[9]) +
198 |                     ((limb) ((s32) in2[9])) * ((s32) in[8]);
199 |   output[18] = 2 *  ((limb) ((s32) in2[9])) * ((s32) in[9]);
200 | }
201 | 
202 | /* Reduce a long form to a short form by taking the input mod 2^255 - 19.
203 |  *
204 |  * On entry: |output[i]| < 14*2^54
205 |  * On exit: |output[0..8]| < 280*2^54 */
206 | static void freduce_degree(limb *output) {
207 |   /* Each of these shifts and adds ends up multiplying the value by 19.
208 |    *
209 |    * For output[0..8], the absolute entry value is < 14*2^54 and we add, at
210 |    * most, 19*14*2^54 thus, on exit, |output[0..8]| < 280*2^54. */
211 |   output[8] += output[18] << 4;
212 |   output[8] += output[18] << 1;
213 |   output[8] += output[18];
214 |   output[7] += output[17] << 4;
215 |   output[7] += output[17] << 1;
216 |   output[7] += output[17];
217 |   output[6] += output[16] << 4;
218 |   output[6] += output[16] << 1;
219 |   output[6] += output[16];
220 |   output[5] += output[15] << 4;
221 |   output[5] += output[15] << 1;
222 |   output[5] += output[15];
223 |   output[4] += output[14] << 4;
224 |   output[4] += output[14] << 1;
225 |   output[4] += output[14];
226 |   output[3] += output[13] << 4;
227 |   output[3] += output[13] << 1;
228 |   output[3] += output[13];
229 |   output[2] += output[12] << 4;
230 |   output[2] += output[12] << 1;
231 |   output[2] += output[12];
232 |   output[1] += output[11] << 4;
233 |   output[1] += output[11] << 1;
234 |   output[1] += output[11];
235 |   output[0] += output[10] << 4;
236 |   output[0] += output[10] << 1;
237 |   output[0] += output[10];
238 | }
239 | 
240 | #if (-1 & 3) != 3
241 | #error "This code only works on a two's complement system"
242 | #endif
243 | 
244 | /* return v / 2^26, using only shifts and adds.
245 |  *
246 |  * On entry: v can take any value. */
247 | static inline limb
248 | div_by_2_26(const limb v)
249 | {
250 |   /* High word of v; no shift needed. */
251 |   const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32);
252 |   /* Set to all 1s if v was negative; else set to 0s. */
253 |   const int32_t sign = ((int32_t) highword) >> 31;
254 |   /* Set to 0x3ffffff if v was negative; else set to 0. */
255 |   const int32_t roundoff = ((uint32_t) sign) >> 6;
256 |   /* Should return v / (1<<26) */
257 |   return (v + roundoff) >> 26;
258 | }
259 | 
260 | /* return v / (2^25), using only shifts and adds.
261 |  *
262 |  * On entry: v can take any value. */
263 | static inline limb
264 | div_by_2_25(const limb v)
265 | {
266 |   /* High word of v; no shift needed*/
267 |   const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32);
268 |   /* Set to all 1s if v was negative; else set to 0s. */
269 |   const int32_t sign = ((int32_t) highword) >> 31;
270 |   /* Set to 0x1ffffff if v was negative; else set to 0. */
271 |   const int32_t roundoff = ((uint32_t) sign) >> 7;
272 |   /* Should return v / (1<<25) */
273 |   return (v + roundoff) >> 25;
274 | }
275 | 
276 | /* Reduce all coefficients of the short form input so that |x| < 2^26.
277 |  *
278 |  * On entry: |output[i]| < 280*2^54 */
279 | static void freduce_coefficients(limb *output) {
280 |   unsigned i;
281 | 
282 |   output[10] = 0;
283 | 
284 |   for (i = 0; i < 10; i += 2) {
285 |     limb over = div_by_2_26(output[i]);
286 |     /* The entry condition (that |output[i]| < 280*2^54) means that over is, at
287 |      * most, 280*2^28 in the first iteration of this loop. This is added to the
288 |      * next limb and we can approximate the resulting bound of that limb by
289 |      * 281*2^54. */
290 |     output[i] -= over << 26;
291 |     output[i+1] += over;
292 | 
293 |     /* For the first iteration, |output[i+1]| < 281*2^54, thus |over| <
294 |      * 281*2^29. When this is added to the next limb, the resulting bound can
295 |      * be approximated as 281*2^54.
296 |      *
297 |      * For subsequent iterations of the loop, 281*2^54 remains a conservative
298 |      * bound and no overflow occurs. */
299 |     over = div_by_2_25(output[i+1]);
300 |     output[i+1] -= over << 25;
301 |     output[i+2] += over;
302 |   }
303 |   /* Now |output[10]| < 281*2^29 and all other coefficients are reduced. */
304 |   output[0] += output[10] << 4;
305 |   output[0] += output[10] << 1;
306 |   output[0] += output[10];
307 | 
308 |   output[10] = 0;
309 | 
310 |   /* Now output[1..9] are reduced, and |output[0]| < 2^26 + 19*281*2^29
311 |    * So |over| will be no more than 2^16. */
312 |   {
313 |     limb over = div_by_2_26(output[0]);
314 |     output[0] -= over << 26;
315 |     output[1] += over;
316 |   }
317 | 
318 |   /* Now output[0,2..9] are reduced, and |output[1]| < 2^25 + 2^16 < 2^26. The
319 |    * bound on |output[1]| is sufficient to meet our needs. */
320 | }
321 | 
322 | /* A helpful wrapper around fproduct: output = in * in2.
323 |  *
324 |  * On entry: |in[i]| < 2^27 and |in2[i]| < 2^27.
325 |  *
326 |  * output must be distinct to both inputs. The output is reduced degree
327 |  * (indeed, one need only provide storage for 10 limbs) and |output[i]| < 2^26. */
328 | static void
329 | fmul(limb *output, const limb *in, const limb *in2) {
330 |   limb t[19];
331 |   fproduct(t, in, in2);
332 |   /* |t[i]| < 14*2^54 */
333 |   freduce_degree(t);
334 |   freduce_coefficients(t);
335 |   /* |t[i]| < 2^26 */
336 |   memcpy(output, t, sizeof(limb) * 10);
337 | }
338 | 
339 | /* Square a number: output = in**2
340 |  *
341 |  * output must be distinct from the input. The inputs are reduced coefficient
342 |  * form, the output is not.
343 |  *
344 |  * output[x] <= 14 * the largest product of the input limbs. */
345 | static void fsquare_inner(limb *output, const limb *in) {
346 |   output[0] =       ((limb) ((s32) in[0])) * ((s32) in[0]);
347 |   output[1] =  2 *  ((limb) ((s32) in[0])) * ((s32) in[1]);
348 |   output[2] =  2 * (((limb) ((s32) in[1])) * ((s32) in[1]) +
349 |                     ((limb) ((s32) in[0])) * ((s32) in[2]));
350 |   output[3] =  2 * (((limb) ((s32) in[1])) * ((s32) in[2]) +
351 |                     ((limb) ((s32) in[0])) * ((s32) in[3]));
352 |   output[4] =       ((limb) ((s32) in[2])) * ((s32) in[2]) +
353 |                4 *  ((limb) ((s32) in[1])) * ((s32) in[3]) +
354 |                2 *  ((limb) ((s32) in[0])) * ((s32) in[4]);
355 |   output[5] =  2 * (((limb) ((s32) in[2])) * ((s32) in[3]) +
356 |                     ((limb) ((s32) in[1])) * ((s32) in[4]) +
357 |                     ((limb) ((s32) in[0])) * ((s32) in[5]));
358 |   output[6] =  2 * (((limb) ((s32) in[3])) * ((s32) in[3]) +
359 |                     ((limb) ((s32) in[2])) * ((s32) in[4]) +
360 |                     ((limb) ((s32) in[0])) * ((s32) in[6]) +
361 |                2 *  ((limb) ((s32) in[1])) * ((s32) in[5]));
362 |   output[7] =  2 * (((limb) ((s32) in[3])) * ((s32) in[4]) +
363 |                     ((limb) ((s32) in[2])) * ((s32) in[5]) +
364 |                     ((limb) ((s32) in[1])) * ((s32) in[6]) +
365 |                     ((limb) ((s32) in[0])) * ((s32) in[7]));
366 |   output[8] =       ((limb) ((s32) in[4])) * ((s32) in[4]) +
367 |                2 * (((limb) ((s32) in[2])) * ((s32) in[6]) +
368 |                     ((limb) ((s32) in[0])) * ((s32) in[8]) +
369 |                2 * (((limb) ((s32) in[1])) * ((s32) in[7]) +
370 |                     ((limb) ((s32) in[3])) * ((s32) in[5])));
371 |   output[9] =  2 * (((limb) ((s32) in[4])) * ((s32) in[5]) +
372 |                     ((limb) ((s32) in[3])) * ((s32) in[6]) +
373 |                     ((limb) ((s32) in[2])) * ((s32) in[7]) +
374 |                     ((limb) ((s32) in[1])) * ((s32) in[8]) +
375 |                     ((limb) ((s32) in[0])) * ((s32) in[9]));
376 |   output[10] = 2 * (((limb) ((s32) in[5])) * ((s32) in[5]) +
377 |                     ((limb) ((s32) in[4])) * ((s32) in[6]) +
378 |                     ((limb) ((s32) in[2])) * ((s32) in[8]) +
379 |                2 * (((limb) ((s32) in[3])) * ((s32) in[7]) +
380 |                     ((limb) ((s32) in[1])) * ((s32) in[9])));
381 |   output[11] = 2 * (((limb) ((s32) in[5])) * ((s32) in[6]) +
382 |                     ((limb) ((s32) in[4])) * ((s32) in[7]) +
383 |                     ((limb) ((s32) in[3])) * ((s32) in[8]) +
384 |                     ((limb) ((s32) in[2])) * ((s32) in[9]));
385 |   output[12] =      ((limb) ((s32) in[6])) * ((s32) in[6]) +
386 |                2 * (((limb) ((s32) in[4])) * ((s32) in[8]) +
387 |                2 * (((limb) ((s32) in[5])) * ((s32) in[7]) +
388 |                     ((limb) ((s32) in[3])) * ((s32) in[9])));
389 |   output[13] = 2 * (((limb) ((s32) in[6])) * ((s32) in[7]) +
390 |                     ((limb) ((s32) in[5])) * ((s32) in[8]) +
391 |                     ((limb) ((s32) in[4])) * ((s32) in[9]));
392 |   output[14] = 2 * (((limb) ((s32) in[7])) * ((s32) in[7]) +
393 |                     ((limb) ((s32) in[6])) * ((s32) in[8]) +
394 |                2 *  ((limb) ((s32) in[5])) * ((s32) in[9]));
395 |   output[15] = 2 * (((limb) ((s32) in[7])) * ((s32) in[8]) +
396 |                     ((limb) ((s32) in[6])) * ((s32) in[9]));
397 |   output[16] =      ((limb) ((s32) in[8])) * ((s32) in[8]) +
398 |                4 *  ((limb) ((s32) in[7])) * ((s32) in[9]);
399 |   output[17] = 2 *  ((limb) ((s32) in[8])) * ((s32) in[9]);
400 |   output[18] = 2 *  ((limb) ((s32) in[9])) * ((s32) in[9]);
401 | }
402 | 
403 | /* fsquare sets output = in^2.
404 |  *
405 |  * On entry: The |in| argument is in reduced coefficients form and |in[i]| <
406 |  * 2^27.
407 |  *
408 |  * On exit: The |output| argument is in reduced coefficients form (indeed, one
409 |  * need only provide storage for 10 limbs) and |out[i]| < 2^26. */
410 | static void
411 | fsquare(limb *output, const limb *in) {
412 |   limb t[19];
413 |   fsquare_inner(t, in);
414 |   /* |t[i]| < 14*2^54 because the largest product of two limbs will be <
415 |    * 2^(27+27) and fsquare_inner adds together, at most, 14 of those
416 |    * products. */
417 |   freduce_degree(t);
418 |   freduce_coefficients(t);
419 |   /* |t[i]| < 2^26 */
420 |   memcpy(output, t, sizeof(limb) * 10);
421 | }
422 | 
423 | /* Take a little-endian, 32-byte number and expand it into polynomial form */
424 | static void
425 | fexpand(limb *output, const u8 *input) {
426 | #define F(n,start,shift,mask) \
427 |   output[n] = ((((limb) input[start + 0]) | \
428 |                 ((limb) input[start + 1]) << 8 | \
429 |                 ((limb) input[start + 2]) << 16 | \
430 |                 ((limb) input[start + 3]) << 24) >> shift) & mask;
431 |   F(0, 0, 0, 0x3ffffff);
432 |   F(1, 3, 2, 0x1ffffff);
433 |   F(2, 6, 3, 0x3ffffff);
434 |   F(3, 9, 5, 0x1ffffff);
435 |   F(4, 12, 6, 0x3ffffff);
436 |   F(5, 16, 0, 0x1ffffff);
437 |   F(6, 19, 1, 0x3ffffff);
438 |   F(7, 22, 3, 0x1ffffff);
439 |   F(8, 25, 4, 0x3ffffff);
440 |   F(9, 28, 6, 0x1ffffff);
441 | #undef F
442 | }
443 | 
444 | #if (-32 >> 1) != -16
445 | #error "This code only works when >> does sign-extension on negative numbers"
446 | #endif
447 | 
448 | /* s32_eq returns 0xffffffff iff a == b and zero otherwise. */
449 | static s32 s32_eq(s32 a, s32 b) {
450 |   a = ~(a ^ b);
451 |   a &= a << 16;
452 |   a &= a << 8;
453 |   a &= a << 4;
454 |   a &= a << 2;
455 |   a &= a << 1;
456 |   return a >> 31;
457 | }
458 | 
459 | /* s32_gte returns 0xffffffff if a >= b and zero otherwise, where a and b are
460 |  * both non-negative. */
461 | static s32 s32_gte(s32 a, s32 b) {
462 |   a -= b;
463 |   /* a >= 0 iff a >= b. */
464 |   return ~(a >> 31);
465 | }
466 | 
467 | /* Take a fully reduced polynomial form number and contract it into a
468 |  * little-endian, 32-byte array.
469 |  *
470 |  * On entry: |input_limbs[i]| < 2^26 */
471 | static void
472 | fcontract(u8 *output, limb *input_limbs) {
473 |   int i;
474 |   int j;
475 |   s32 input[10];
476 |   s32 mask;
477 | 
478 |   /* |input_limbs[i]| < 2^26, so it's valid to convert to an s32. */
479 |   for (i = 0; i < 10; i++) {
480 |     input[i] = input_limbs[i];
481 |   }
482 | 
483 |   for (j = 0; j < 2; ++j) {
484 |     for (i = 0; i < 9; ++i) {
485 |       if ((i & 1) == 1) {
486 |         /* This calculation is a time-invariant way to make input[i]
487 |          * non-negative by borrowing from the next-larger limb. */
488 |         const s32 mask = input[i] >> 31;
489 |         const s32 carry = -((input[i] & mask) >> 25);
490 |         input[i] = input[i] + (carry << 25);
491 |         input[i+1] = input[i+1] - carry;
492 |       } else {
493 |         const s32 mask = input[i] >> 31;
494 |         const s32 carry = -((input[i] & mask) >> 26);
495 |         input[i] = input[i] + (carry << 26);
496 |         input[i+1] = input[i+1] - carry;
497 |       }
498 |     }
499 | 
500 |     /* There's no greater limb for input[9] to borrow from, but we can multiply
501 |      * by 19 and borrow from input[0], which is valid mod 2^255-19. */
502 |     {
503 |       const s32 mask = input[9] >> 31;
504 |       const s32 carry = -((input[9] & mask) >> 25);
505 |       input[9] = input[9] + (carry << 25);
506 |       input[0] = input[0] - (carry * 19);
507 |     }
508 | 
509 |     /* After the first iteration, input[1..9] are non-negative and fit within
510 |      * 25 or 26 bits, depending on position. However, input[0] may be
511 |      * negative. */
512 |   }
513 | 
514 |   /* The first borrow-propagation pass above ended with every limb
515 |      except (possibly) input[0] non-negative.
516 | 
517 |      If input[0] was negative after the first pass, then it was because of a
518 |      carry from input[9]. On entry, input[9] < 2^26 so the carry was, at most,
519 |      one, since (2**26-1) >> 25 = 1. Thus input[0] >= -19.
520 | 
521 |      In the second pass, each limb is decreased by at most one. Thus the second
522 |      borrow-propagation pass could only have wrapped around to decrease
523 |      input[0] again if the first pass left input[0] negative *and* input[1]
524 |      through input[9] were all zero.  In that case, input[1] is now 2^25 - 1,
525 |      and this last borrow-propagation step will leave input[1] non-negative. */
526 |   {
527 |     const s32 mask = input[0] >> 31;
528 |     const s32 carry = -((input[0] & mask) >> 26);
529 |     input[0] = input[0] + (carry << 26);
530 |     input[1] = input[1] - carry;
531 |   }
532 | 
533 |   /* All input[i] are now non-negative. However, there might be values between
534 |    * 2^25 and 2^26 in a limb which is, nominally, 25 bits wide. */
535 |   for (j = 0; j < 2; j++) {
536 |     for (i = 0; i < 9; i++) {
537 |       if ((i & 1) == 1) {
538 |         const s32 carry = input[i] >> 25;
539 |         input[i] &= 0x1ffffff;
540 |         input[i+1] += carry;
541 |       } else {
542 |         const s32 carry = input[i] >> 26;
543 |         input[i] &= 0x3ffffff;
544 |         input[i+1] += carry;
545 |       }
546 |     }
547 | 
548 |     {
549 |       const s32 carry = input[9] >> 25;
550 |       input[9] &= 0x1ffffff;
551 |       input[0] += 19*carry;
552 |     }
553 |   }
554 | 
555 |   /* If the first carry-chain pass, just above, ended up with a carry from
556 |    * input[9], and that caused input[0] to be out-of-bounds, then input[0] was
557 |    * < 2^26 + 2*19, because the carry was, at most, two.
558 |    *
559 |    * If the second pass carried from input[9] again then input[0] is < 2*19 and
560 |    * the input[9] -> input[0] carry didn't push input[0] out of bounds. */
561 | 
562 |   /* It still remains the case that input might be between 2^255-19 and 2^255.
563 |    * In this case, input[1..9] must take their maximum value and input[0] must
564 |    * be >= (2^255-19) & 0x3ffffff, which is 0x3ffffed. */
565 |   mask = s32_gte(input[0], 0x3ffffed);
566 |   for (i = 1; i < 10; i++) {
567 |     if ((i & 1) == 1) {
568 |       mask &= s32_eq(input[i], 0x1ffffff);
569 |     } else {
570 |       mask &= s32_eq(input[i], 0x3ffffff);
571 |     }
572 |   }
573 | 
574 |   /* mask is either 0xffffffff (if input >= 2^255-19) and zero otherwise. Thus
575 |    * this conditionally subtracts 2^255-19. */
576 |   input[0] -= mask & 0x3ffffed;
577 | 
578 |   for (i = 1; i < 10; i++) {
579 |     if ((i & 1) == 1) {
580 |       input[i] -= mask & 0x1ffffff;
581 |     } else {
582 |       input[i] -= mask & 0x3ffffff;
583 |     }
584 |   }
585 | 
586 |   input[1] <<= 2;
587 |   input[2] <<= 3;
588 |   input[3] <<= 5;
589 |   input[4] <<= 6;
590 |   input[6] <<= 1;
591 |   input[7] <<= 3;
592 |   input[8] <<= 4;
593 |   input[9] <<= 6;
594 | #define F(i, s) \
595 |   output[s+0] |=  input[i] & 0xff; \
596 |   output[s+1]  = (input[i] >> 8) & 0xff; \
597 |   output[s+2]  = (input[i] >> 16) & 0xff; \
598 |   output[s+3]  = (input[i] >> 24) & 0xff;
599 |   output[0] = 0;
600 |   output[16] = 0;
601 |   F(0,0);
602 |   F(1,3);
603 |   F(2,6);
604 |   F(3,9);
605 |   F(4,12);
606 |   F(5,16);
607 |   F(6,19);
608 |   F(7,22);
609 |   F(8,25);
610 |   F(9,28);
611 | #undef F
612 | }
613 | 
614 | /* Input: Q, Q', Q-Q'
615 |  * Output: 2Q, Q+Q'
616 |  *
617 |  *   x2 z3: long form
618 |  *   x3 z3: long form
619 |  *   x z: short form, destroyed
620 |  *   xprime zprime: short form, destroyed
621 |  *   qmqp: short form, preserved
622 |  *
623 |  * On entry and exit, the absolute value of the limbs of all inputs and outputs
624 |  * are < 2^26. */
625 | static void fmonty(limb *x2, limb *z2,  /* output 2Q */
626 |                    limb *x3, limb *z3,  /* output Q + Q' */
627 |                    limb *x, limb *z,    /* input Q */
628 |                    limb *xprime, limb *zprime,  /* input Q' */
629 |                    const limb *qmqp /* input Q - Q' */) {
630 |   limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19],
631 |         zzprime[19], zzzprime[19], xxxprime[19];
632 | 
633 |   memcpy(origx, x, 10 * sizeof(limb));
634 |   fsum(x, z);
635 |   /* |x[i]| < 2^27 */
636 |   fdifference(z, origx);  /* does x - z */
637 |   /* |z[i]| < 2^27 */
638 | 
639 |   memcpy(origxprime, xprime, sizeof(limb) * 10);
640 |   fsum(xprime, zprime);
641 |   /* |xprime[i]| < 2^27 */
642 |   fdifference(zprime, origxprime);
643 |   /* |zprime[i]| < 2^27 */
644 |   fproduct(xxprime, xprime, z);
645 |   /* |xxprime[i]| < 14*2^54: the largest product of two limbs will be <
646 |    * 2^(27+27) and fproduct adds together, at most, 14 of those products.
647 |    * (Approximating that to 2^58 doesn't work out.) */
648 |   fproduct(zzprime, x, zprime);
649 |   /* |zzprime[i]| < 14*2^54 */
650 |   freduce_degree(xxprime);
651 |   freduce_coefficients(xxprime);
652 |   /* |xxprime[i]| < 2^26 */
653 |   freduce_degree(zzprime);
654 |   freduce_coefficients(zzprime);
655 |   /* |zzprime[i]| < 2^26 */
656 |   memcpy(origxprime, xxprime, sizeof(limb) * 10);
657 |   fsum(xxprime, zzprime);
658 |   /* |xxprime[i]| < 2^27 */
659 |   fdifference(zzprime, origxprime);
660 |   /* |zzprime[i]| < 2^27 */
661 |   fsquare(xxxprime, xxprime);
662 |   /* |xxxprime[i]| < 2^26 */
663 |   fsquare(zzzprime, zzprime);
664 |   /* |zzzprime[i]| < 2^26 */
665 |   fproduct(zzprime, zzzprime, qmqp);
666 |   /* |zzprime[i]| < 14*2^52 */
667 |   freduce_degree(zzprime);
668 |   freduce_coefficients(zzprime);
669 |   /* |zzprime[i]| < 2^26 */
670 |   memcpy(x3, xxxprime, sizeof(limb) * 10);
671 |   memcpy(z3, zzprime, sizeof(limb) * 10);
672 | 
673 |   fsquare(xx, x);
674 |   /* |xx[i]| < 2^26 */
675 |   fsquare(zz, z);
676 |   /* |zz[i]| < 2^26 */
677 |   fproduct(x2, xx, zz);
678 |   /* |x2[i]| < 14*2^52 */
679 |   freduce_degree(x2);
680 |   freduce_coefficients(x2);
681 |   /* |x2[i]| < 2^26 */
682 |   fdifference(zz, xx);  // does zz = xx - zz
683 |   /* |zz[i]| < 2^27 */
684 |   memset(zzz + 10, 0, sizeof(limb) * 9);
685 |   fscalar_product(zzz, zz, 121665);
686 |   /* |zzz[i]| < 2^(27+17) */
687 |   /* No need to call freduce_degree here:
688 |      fscalar_product doesn't increase the degree of its input. */
689 |   freduce_coefficients(zzz);
690 |   /* |zzz[i]| < 2^26 */
691 |   fsum(zzz, xx);
692 |   /* |zzz[i]| < 2^27 */
693 |   fproduct(z2, zz, zzz);
694 |   /* |z2[i]| < 14*2^(26+27) */
695 |   freduce_degree(z2);
696 |   freduce_coefficients(z2);
697 |   /* |z2|i| < 2^26 */
698 | }
699 | 
700 | /* Conditionally swap two reduced-form limb arrays if 'iswap' is 1, but leave
701 |  * them unchanged if 'iswap' is 0.  Runs in data-invariant time to avoid
702 |  * side-channel attacks.
703 |  *
704 |  * NOTE that this function requires that 'iswap' be 1 or 0; other values give
705 |  * wrong results.  Also, the two limb arrays must be in reduced-coefficient,
706 |  * reduced-degree form: the values in a[10..19] or b[10..19] aren't swapped,
707 |  * and all all values in a[0..9],b[0..9] must have magnitude less than
708 |  * INT32_MAX. */
709 | static void
710 | swap_conditional(limb a[19], limb b[19], limb iswap) {
711 |   unsigned i;
712 |   const s32 swap = (s32) -iswap;
713 | 
714 |   for (i = 0; i < 10; ++i) {
715 |     const s32 x = swap & ( ((s32)a[i]) ^ ((s32)b[i]) );
716 |     a[i] = ((s32)a[i]) ^ x;
717 |     b[i] = ((s32)b[i]) ^ x;
718 |   }
719 | }
720 | 
721 | /* Calculates nQ where Q is the x-coordinate of a point on the curve
722 |  *
723 |  *   resultx/resultz: the x coordinate of the resulting curve point (short form)
724 |  *   n: a little endian, 32-byte number
725 |  *   q: a point of the curve (short form) */
726 | static void
727 | cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q) {
728 |   limb a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0};
729 |   limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
730 |   limb e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1};
731 |   limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
732 | 
733 |   unsigned i, j;
734 | 
735 |   memcpy(nqpqx, q, sizeof(limb) * 10);
736 | 
737 |   for (i = 0; i < 32; ++i) {
738 |     u8 byte = n[31 - i];
739 |     for (j = 0; j < 8; ++j) {
740 |       const limb bit = byte >> 7;
741 | 
742 |       swap_conditional(nqx, nqpqx, bit);
743 |       swap_conditional(nqz, nqpqz, bit);
744 |       fmonty(nqx2, nqz2,
745 |              nqpqx2, nqpqz2,
746 |              nqx, nqz,
747 |              nqpqx, nqpqz,
748 |              q);
749 |       swap_conditional(nqx2, nqpqx2, bit);
750 |       swap_conditional(nqz2, nqpqz2, bit);
751 | 
752 |       t = nqx;
753 |       nqx = nqx2;
754 |       nqx2 = t;
755 |       t = nqz;
756 |       nqz = nqz2;
757 |       nqz2 = t;
758 |       t = nqpqx;
759 |       nqpqx = nqpqx2;
760 |       nqpqx2 = t;
761 |       t = nqpqz;
762 |       nqpqz = nqpqz2;
763 |       nqpqz2 = t;
764 | 
765 |       byte <<= 1;
766 |     }
767 |     yield(); // yield so that wifi handling routines can be served
768 |   }
769 | 
770 |   memcpy(resultx, nqx, sizeof(limb) * 10);
771 |   memcpy(resultz, nqz, sizeof(limb) * 10);
772 | }
773 | 
774 | // -----------------------------------------------------------------------------
775 | // Shamelessly copied from djb's code
776 | // -----------------------------------------------------------------------------
777 | static void
778 | crecip(limb *out, const limb *z) {
779 |   limb z2[10];
780 |   limb z9[10];
781 |   limb z11[10];
782 |   limb z2_5_0[10];
783 |   limb z2_10_0[10];
784 |   limb z2_20_0[10];
785 |   limb z2_50_0[10];
786 |   limb z2_100_0[10];
787 |   limb t0[10];
788 |   limb t1[10];
789 |   int i;
790 | 
791 |   /* 2 */ fsquare(z2,z);
792 |   /* 4 */ fsquare(t1,z2);
793 |   /* 8 */ fsquare(t0,t1);
794 |   /* 9 */ fmul(z9,t0,z);
795 |   /* 11 */ fmul(z11,z9,z2);
796 |   /* 22 */ fsquare(t0,z11);
797 |   /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9);
798 | 
799 |   /* 2^6 - 2^1 */ fsquare(t0,z2_5_0);
800 |   /* 2^7 - 2^2 */ fsquare(t1,t0);
801 |   /* 2^8 - 2^3 */ fsquare(t0,t1);
802 |   /* 2^9 - 2^4 */ fsquare(t1,t0);
803 |   /* 2^10 - 2^5 */ fsquare(t0,t1);
804 |   /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0);
805 | 
806 |   /* 2^11 - 2^1 */ fsquare(t0,z2_10_0);
807 |   /* 2^12 - 2^2 */ fsquare(t1,t0);
808 |   /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
809 |   /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0);
810 | 
811 |   /* 2^21 - 2^1 */ fsquare(t0,z2_20_0);
812 |   /* 2^22 - 2^2 */ fsquare(t1,t0);
813 |   /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
814 |   /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0);
815 | 
816 |   /* 2^41 - 2^1 */ fsquare(t1,t0);
817 |   /* 2^42 - 2^2 */ fsquare(t0,t1);
818 |   /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
819 |   /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0);
820 | 
821 |   /* 2^51 - 2^1 */ fsquare(t0,z2_50_0);
822 |   /* 2^52 - 2^2 */ fsquare(t1,t0);
823 |   /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
824 |   /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0);
825 | 
826 |   /* 2^101 - 2^1 */ fsquare(t1,z2_100_0);
827 |   /* 2^102 - 2^2 */ fsquare(t0,t1);
828 |   /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
829 |   /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0);
830 | 
831 |   /* 2^201 - 2^1 */ fsquare(t0,t1);
832 |   /* 2^202 - 2^2 */ fsquare(t1,t0);
833 |   /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
834 |   /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0);
835 | 
836 |   /* 2^251 - 2^1 */ fsquare(t1,t0);
837 |   /* 2^252 - 2^2 */ fsquare(t0,t1);
838 |   /* 2^253 - 2^3 */ fsquare(t1,t0);
839 |   /* 2^254 - 2^4 */ fsquare(t0,t1);
840 |   /* 2^255 - 2^5 */ fsquare(t1,t0);
841 |   /* 2^255 - 21 */ fmul(out,t1,z11);
842 | }
843 | 
844 | void curve25519_donna(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
845 |   limb bp[10], x[10], z[11], zmone[10];
846 |   uint8_t e[32];
847 |   int i;
848 | 
849 |   for (i = 0; i < 32; ++i) e[i] = secret[i];
850 |   e[0] &= 248;
851 |   e[31] &= 127;
852 |   e[31] |= 64;
853 | 
854 |   fexpand(bp, basepoint);
855 |   cmult(x, z, e, bp);
856 |   crecip(zmone, z);
857 |   fmul(z, x, zmone);
858 |   fcontract(mypublic, z);
859 | }
860 | 
861 | u8 curve25519_donna_debug(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
862 |   static limb bp[10], x[10], z[11], zmone[10];
863 |   static uint8_t e[32];
864 | 
865 |   static u8 state=0;
866 | 
867 |   switch(state) {
868 |     case 0:
869 | 	for (int i = 0; i < 32; ++i) e[i] = secret[i];
870 | 	e[0] &= 248;
871 | 	e[31] &= 127;
872 | 	e[31] |= 64;
873 | 	++state;
874 | 	break;
875 |   case 1:
876 | 	fexpand(bp, basepoint);
877 | 	++state;
878 | 	break;
879 |   case 2:
880 | 	cmult(x, z, e, bp);
881 | 	++state;
882 | 	break;
883 |   case 3:
884 | 	crecip(zmone, z);
885 | 	++state;
886 | 	break;
887 |   case 4:
888 | 	fmul(z, x, zmone);
889 | 	++state;
890 | 	break;
891 |   case 5:
892 | 	fcontract(mypublic, z);
893 | 	state=0;
894 | 	break;
895 |     }
896 |   return state;
897 | }
898 | 


--------------------------------------------------------------------------------
/curve25519-donna/curve25519-donna.h:
--------------------------------------------------------------------------------
 1 | /* From https://github.com/agl/curve25519-donna
 2 | *   USAGE:
 3 | *
 4 | *   The usage is exactly the same as djb's code (as described at
 5 | *   http://cr.yp.to/ecdh.html) expect that the function is called curve25519_donna.
 6 | *
 7 | *   In short:
 8 | *
 9 | *   To generate a private key just generate 32 random bytes.
10 | *
11 | *   To generate the public key, just do:
12 | *
13 | *     static const uint8_t basepoint[32] = {9};
14 | *     curve25519_donna(mypublic, mysecret, basepoint);
15 | *
16 | *   To generate a shared secret do:
17 | *
18 | *     uint8_t shared[32];
19 | *     curve25519_donna(shared, mysecret, theirpublic);
20 | *
21 | *   And hash the sender's public key, the recipient's public key, and shared_key
22 | *   with a cryptographic hash function before using.
23 | */
24 | 
25 | // Requires fixes from https://github.com/esp8266/Arduino/commit/fe04165bbeafd0dcf5356172b2580655396279b0
26 | #if !defined(CURVE25519_DONNA)
27 | #define CURVE25519_DONNA
28 | 
29 | #include <stdint.h>
30 | 
31 | typedef uint8_t u8;
32 | typedef int32_t s32;
33 | typedef int64_t limb;
34 | 
35 | void curve25519_donna(u8 *mypublic, const u8 *secret, const u8 *basepoint);
36 | 
37 | #if DEBUG
38 | u8 curve25519_donna_debug(u8 *mypublic, const u8 *secret, const u8 *basepoint);
39 | #endif
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/curve25519-donna/examples/curve25519_timing_test/curve25519_timing_test.ino:
--------------------------------------------------------------------------------
 1 | // Be chatty over Serial
 2 | #define DEBUG 1
 3 | 
 4 | #include <curve25519-donna.h>
 5 | 
 6 | int counter=0;
 7 | 
 8 | void setup() {
 9 |   Serial.begin(115200); // Serial is on GPIO1(tx) and GPIO3(rx)
10 | }
11 | 
12 | void doit(u8 *ek,const u8 *e,const u8 *k)
13 | {
14 |   int i;
15 | 
16 |   int start=millis();
17 |   while(u8 s=curve25519_donna_debug(ek,e,k)) {
18 |     int now=millis();
19 |     Serial.print(s, DEC);
20 |     Serial.print(" took ");
21 |     Serial.println(now-start, DEC);
22 |     start=now;
23 |   }
24 |   for (i = 0;i < 32;++i) Serial.print((unsigned int) e[i],HEX); Serial.print(" ");
25 |   for (i = 0;i < 32;++i) Serial.print((unsigned int) k[i], HEX); Serial.print(" ");
26 |   for (i = 0;i < 32;++i) Serial.print((unsigned int) ek[i], HEX); Serial.println("");
27 | }
28 | 
29 | u8 e1k[32];
30 | u8 e2k[32];
31 | u8 e1e2k[32];
32 | u8 e2e1k[32];
33 | u8 e1[32] = {3};
34 | u8 e2[32] = {5};
35 | u8 k[32] = {9};
36 | 
37 | int cnt=0, err=0;
38 | 
39 | void loop() {
40 |   int i;
41 |   long start=millis();
42 | 
43 |   if(!err) {
44 |     doit(e1k,e1,k);
45 |     delay(0);
46 |     doit(e2e1k,e2,e1k);
47 |     delay(0);
48 |     doit(e2k,e2,k);
49 |     delay(0);
50 |     doit(e1e2k,e1,e2k);
51 |     delay(0);
52 |     for (i = 0;i < 32;++i) if (e1e2k[i] != e2e1k[i]) {
53 |       Serial.print("Fail at pass ");
54 |       Serial.println(cnt, DEC);
55 |       err=1;
56 |     } else {
57 |       ++cnt;
58 |       for (i = 0;i < 32;++i) e1[i] ^= e2k[i];
59 |       for (i = 0;i < 32;++i) e2[i] ^= e1k[i];
60 |       for (i = 0;i < 32;++i) k[i] ^= e1e2k[i];
61 |       long now=millis();
62 |       Serial.print("Pass ");
63 |       Serial.print(cnt, DEC);
64 |       Serial.print(" COMPLETED in ");
65 |       Serial.print(now-start, DEC);
66 |       Serial.println("ms");
67 |     }
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/curve25519-donna/keywords.txt:
--------------------------------------------------------------------------------
1 | curve25519_donna	KEYWORD2
2 | curve25519_donna_debug	KEYWORD2
3 | 


--------------------------------------------------------------------------------
/esp8266-trng/README.md:
--------------------------------------------------------------------------------
1 | Simple access to ESP8266's TRNG as documented on
2 | http://esp8266-re.foogod.com/wiki/Random_Number_Generator
3 | 
4 | To use, just call trng() from your code: you'll get 4 completely random bytes.
5 | 
6 | 


--------------------------------------------------------------------------------
/esp8266-trng/esp8266-trng.h:
--------------------------------------------------------------------------------
1 | #if !defined(ESP8266_TRNG)
2 | #define ESP8266_TRNG
3 | 
4 | inline volatile int trng() {
5 |     return *((volatile int*)0x3FF20E44);
6 | }
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/esp8266-trng/examples/esp8266-trng/esp8266-trng.ino:
--------------------------------------------------------------------------------
 1 | #include <esp8266-trng.h>
 2 | 
 3 | int counter=0;
 4 | 
 5 | void setup() {
 6 |   Serial.begin(115200);
 7 | }
 8 | 
 9 | void loop() {
10 |   if(!(counter&65535))	// slow down a bit...
11 |     Serial.println(trng(), HEX);
12 |   ++counter;
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/sha256/README.md:
--------------------------------------------------------------------------------
 1 | Hash SHA-256
 2 | Original source by Brad Conte (brad AT bradconte.com)
 3 | Ported to Arduino library and objectified by Diego Zuccato (ndk.clanbo@gmail.com)
 4 | 
 5 | Passed validations (from http://www.di-mgt.com.au/sha_testvectors.html ) :
 6 |   - "" => E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855
 7 |   - "abc" => BA7816BF8F01CFEA414140DE5DAE2223B00361A396177A9CB410FF61F20015AD
 8 |   - "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" => 248D6A61D20638B8E5C026930C3E6039A33CE45964FF2167F6ECEDD419DB06C1
 9 |   - "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu" => CF5B16A778AF8380036CE59E7B0492370B249B11E8F07A51AFAC45037AFEE9D1
10 |   - one million (1,000,000) repetitions of the character "a" (0x61) => CDC76E5C9914FB9281A1C7E284D73E67F1809A48A497200E046D39CCC7112CD0
11 |   - the extremely-long message "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmno" repeated 16,777,216 times: a bit string of length 2^33 bits => 50E72A0E26442FE2552DC3938AC58658228C0CBFB1D2CA872AE435266FCD055E
12 | 
13 | No failures in tested validation vectors.


--------------------------------------------------------------------------------
/sha256/examples/sha256-test/sha256-test.ino:
--------------------------------------------------------------------------------
 1 | #include <sha256.h>
 2 | 
 3 | void setup() {
 4 |   BYTE hash[SHA256_BLOCK_SIZE];
 5 |   char texthash[2*SHA256_BLOCK_SIZE+1];
 6 | 
 7 |   Serial.begin(115200);
 8 | 
 9 |   Sha256* sha256Instance=new Sha256();
10 |   BYTE text[]="abc";
11 |   sha256Instance->update(text, strlen((const char*)text));
12 |   sha256Instance->final(hash);
13 | 
14 |   for(int i=0; i<SHA256_BLOCK_SIZE; ++i)
15 |     sprintf(texthash+2*i, "%02X", hash[i]);
16 |   Serial.println(texthash);
17 |   
18 |   delete sha256Instance;
19 | }
20 | 
21 | void loop() {
22 |   delay(10);
23 | }
24 | 


--------------------------------------------------------------------------------
/sha256/sha256.cpp:
--------------------------------------------------------------------------------
  1 | /*********************************************************************
  2 | * Filename:   sha256.c
  3 | * Author:     Brad Conte (brad AT bradconte.com)
  4 | * Copyright:
  5 | * Disclaimer: This code is presented "as is" without any guarantees.
  6 | * Details:    Implementation of the SHA-256 hashing algorithm.
  7 |               SHA-256 is one of the three algorithms in the SHA2
  8 |               specification. The others, SHA-384 and SHA-512, are not
  9 |               offered in this implementation.
 10 |               Algorithm specification can be found here:
 11 |                * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf
 12 |               This implementation uses little endian byte order.
 13 | 
 14 |               Ported to Arduino and objectified by Diego Zuccato <ndk.clanbo@gmail.com>
 15 | 
 16 | *********************************************************************/
 17 | 
 18 | /*************************** HEADER FILES ***************************/
 19 | //#include <stdlib.h>
 20 | //#include <memory.h>
 21 | #include <string.h>
 22 | #include "sha256.h"
 23 | 
 24 | /****************************** MACROS ******************************/
 25 | #define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b))))
 26 | #define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b))))
 27 | 
 28 | #define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z)))
 29 | #define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
 30 | #define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22))
 31 | #define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25))
 32 | #define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3))
 33 | #define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10))
 34 | 
 35 | /**************************** VARIABLES *****************************/
 36 | static const WORD k[64] = {
 37 | 	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
 38 | 	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
 39 | 	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
 40 | 	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
 41 | 	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
 42 | 	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
 43 | 	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
 44 | 	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
 45 | };
 46 | 
 47 | /*********************** ACTUAL IMPLEMENTATION ***********************/
 48 | Sha256::Sha256() {
 49 |     this->datalen = 0;
 50 |     this->bitlen = 0;
 51 |     this->state[0] = 0x6a09e667;
 52 |     this->state[1] = 0xbb67ae85;
 53 |     this->state[2] = 0x3c6ef372;
 54 |     this->state[3] = 0xa54ff53a;
 55 |     this->state[4] = 0x510e527f;
 56 |     this->state[5] = 0x9b05688c;
 57 |     this->state[6] = 0x1f83d9ab;
 58 |     this->state[7] = 0x5be0cd19;
 59 | }
 60 | 
 61 | void Sha256::update(const BYTE data[], size_t len) {
 62 |     WORD i;
 63 | 
 64 |     for (i = 0; i < len; ++i) {
 65 | 	this->data[this->datalen] = data[i];
 66 | 	this->datalen++;
 67 | 	if (this->datalen == 64) {
 68 | 	    this->transform();
 69 | 	    this->bitlen += 512;
 70 | 	    this->datalen = 0;
 71 | 	}
 72 |     }
 73 | }
 74 | 
 75 | void Sha256::final(BYTE hash[]) {
 76 |     WORD i;
 77 | 
 78 |     i = this->datalen;
 79 | 
 80 |     // Pad whatever data is left in the buffer.
 81 |     if (this->datalen < 56) {
 82 | 	this->data[i++] = 0x80;
 83 | 	while (i < 56) //@@@ optimize with memset
 84 | 	    this->data[i++] = 0x00;
 85 |     } else {
 86 | 	this->data[i++] = 0x80;
 87 | 	while (i < 64) //@@@ optimize with memset
 88 | 	    this->data[i++] = 0x00;
 89 | 	this->transform();
 90 | 	memset(this->data, 0, 56);
 91 |     }
 92 | 
 93 |     // Append to the padding the total message's length in bits and transform.
 94 |     this->bitlen += this->datalen * 8;
 95 |     this->data[63] = this->bitlen;
 96 |     this->data[62] = this->bitlen >> 8;
 97 |     this->data[61] = this->bitlen >> 16;
 98 |     this->data[60] = this->bitlen >> 24;
 99 |     this->data[59] = this->bitlen >> 32;
100 |     this->data[58] = this->bitlen >> 40;
101 |     this->data[57] = this->bitlen >> 48;
102 |     this->data[56] = this->bitlen >> 56;
103 |     this->transform();
104 | 
105 |     // Since this implementation uses little endian byte ordering and SHA uses big endian,
106 |     // reverse all the bytes when copying the final state to the output hash.
107 |     for (i = 0; i < 4; ++i) {
108 | 	hash[i]      = (this->state[0] >> (24 - i * 8)) & 0x000000ff;
109 | 	hash[i + 4]  = (this->state[1] >> (24 - i * 8)) & 0x000000ff;
110 | 	hash[i + 8]  = (this->state[2] >> (24 - i * 8)) & 0x000000ff;
111 | 	hash[i + 12] = (this->state[3] >> (24 - i * 8)) & 0x000000ff;
112 | 	hash[i + 16] = (this->state[4] >> (24 - i * 8)) & 0x000000ff;
113 | 	hash[i + 20] = (this->state[5] >> (24 - i * 8)) & 0x000000ff;
114 | 	hash[i + 24] = (this->state[6] >> (24 - i * 8)) & 0x000000ff;
115 | 	hash[i + 28] = (this->state[7] >> (24 - i * 8)) & 0x000000ff;
116 |     }
117 | }
118 | 
119 | void Sha256::transform() {
120 |     WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64];
121 | 
122 |     for (i = 0, j = 0; i < 16; ++i, j += 4)
123 | 	m[i] = (this->data[j] << 24) | (this->data[j + 1] << 16) | (this->data[j + 2] << 8) | (this->data[j + 3]);
124 |     for ( ; i < 64; ++i)
125 | 	m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16];
126 | 
127 |     a = this->state[0];
128 |     b = this->state[1];
129 |     c = this->state[2];
130 |     d = this->state[3];
131 |     e = this->state[4];
132 |     f = this->state[5];
133 |     g = this->state[6];
134 |     h = this->state[7];
135 | 
136 |     for (i = 0; i < 64; ++i) {
137 | 	t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
138 | 	t2 = EP0(a) + MAJ(a,b,c);
139 | 	h = g;
140 | 	g = f;
141 | 	f = e;
142 | 	e = d + t1;
143 | 	d = c;
144 | 	c = b;
145 | 	b = a;
146 | 	a = t1 + t2;
147 |     }
148 | 
149 |     this->state[0] += a;
150 |     this->state[1] += b;
151 |     this->state[2] += c;
152 |     this->state[3] += d;
153 |     this->state[4] += e;
154 |     this->state[5] += f;
155 |     this->state[6] += g;
156 |     this->state[7] += h;
157 | }
158 | 


--------------------------------------------------------------------------------
/sha256/sha256.h:
--------------------------------------------------------------------------------
 1 | /*********************************************************************
 2 | * Filename:   sha256.h
 3 | * Author:     Brad Conte (brad AT bradconte.com)
 4 | * Copyright:
 5 | * Disclaimer: This code is presented "as is" without any guarantees.
 6 | * Details:    Defines the API for the corresponding SHA1 implementation.
 7 | *********************************************************************/
 8 | 
 9 | #ifndef SHA256_H
10 | #define SHA256_H
11 | 
12 | /*************************** HEADER FILES ***************************/
13 | #include <stddef.h>
14 | 
15 | /****************************** MACROS ******************************/
16 | #define SHA256_BLOCK_SIZE 32            // SHA256 outputs a 32 byte digest
17 | 
18 | /**************************** DATA TYPES ****************************/
19 | typedef unsigned char BYTE;             // 8-bit byte
20 | typedef unsigned int  WORD;             // 32-bit word, change to "long" for 16-bit machines
21 | 
22 | class Sha256 {
23 |     public:
24 | 	Sha256();
25 | 	void update(const BYTE data[], size_t len);
26 | 	void final(BYTE hash[]);
27 |     private:
28 | 	BYTE data[64];
29 | 	WORD datalen;
30 | 	unsigned long long bitlen;
31 | 	WORD state[8];
32 | 	void transform();
33 | };
34 | 
35 | #endif   // SHA256_H
36 | 


--------------------------------------------------------------------------------