├── .gitignore
├── LICENSE.txt
├── README.md
├── asm_arm.inc
├── asm_arm_mult_square.inc
├── asm_arm_mult_square_umaal.inc
├── asm_avr.inc
├── asm_avr_mult_square.inc
├── curve-specific.inc
├── emk_project.py
├── emk_rules.py
├── examples
    └── ecc_test
    │   └── ecc_test.ino
├── library.properties
├── platform-specific.inc
├── scripts
    ├── mult_arm.py
    ├── mult_avr.py
    ├── mult_avr_extra.py
    ├── square_arm.py
    └── square_avr.py
├── test
    ├── ecdsa_test_vectors.c
    ├── emk_rules.py
    ├── public_key_test_vectors.c
    ├── test_compress.c
    ├── test_compute.c
    ├── test_ecdh.c
    ├── test_ecdsa.c
    └── test_ecdsa_deterministic.c.example
├── types.h
├── uECC.c
├── uECC.h
└── uECC_vli.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | __build__/
 2 | __pycache__
 3 | *.pyc
 4 | *.pyo
 5 | *.pyd
 6 | *.pyz
 7 | *.egg-info/
 8 | *.a
 9 | *.o
10 | *.so
11 | .DS_Store
12 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Kenneth MacKay
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification,
 5 | are permitted provided that the following conditions are met:
 6 |  * Redistributions of source code must retain the above copyright notice, this
 7 |    list of conditions and the following disclaimer.
 8 |  * Redistributions in binary form must reproduce the above copyright notice,
 9 |    this list of conditions and the following disclaimer in the documentation
10 |    and/or other materials provided with the distribution.
11 | 
12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
13 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
16 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
19 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | micro-ecc
 2 | ==========
 3 | 
 4 | A small and fast ECDH and ECDSA implementation for 8-bit, 32-bit, and 64-bit processors.
 5 | 
 6 | The static version of micro-ecc (ie, where the curve was selected at compile-time) can be found in the "static" branch.
 7 | 
 8 | Features
 9 | --------
10 | 
11 |  * Resistant to known side-channel attacks.
12 |  * Written in C, with optional GCC inline assembly for AVR, ARM and Thumb platforms.
13 |  * Supports 8, 32, and 64-bit architectures.
14 |  * Small code size.
15 |  * No dynamic memory allocation.
16 |  * Support for 5 standard curves: secp160r1, secp192r1, secp224r1, secp256r1, and secp256k1.
17 |  * BSD 2-clause license.
18 | 
19 | Usage Notes
20 | -----------
21 | ### Point Representation ###
22 | Compressed points are represented in the standard format as defined in http://www.secg.org/sec1-v2.pdf; uncompressed points are represented in standard format, but without the `0x04` prefix. All functions except `uECC_decompress()` only accept uncompressed points; use `uECC_compress()` and `uECC_decompress()` to convert between compressed and uncompressed point representations.
23 | 
24 | Private keys are represented in the standard format.
25 | 
26 | ### Using the Code ###
27 | 
28 | I recommend just copying (or symlink) the uECC files into your project. Then just `#include "uECC.h"` to use the micro-ecc functions.
29 | 
30 | For use with Arduino, you can use the Library Manager to download micro-ecc (**Sketch**=>**Include Library**=>**Manage Libraries**). You can then use uECC just like any other Arduino library (uECC should show up in the **Sketch**=>**Import Library** submenu).
31 | 
32 | See uECC.h for documentation for each function.
33 | 
34 | ### Compilation Notes ###
35 | 
36 |  * Should compile with any C/C++ compiler that supports stdint.h (this includes Visual Studio 2013).
37 |  * If you want to change the defaults for any of the uECC compile-time options (such as `uECC_OPTIMIZATION_LEVEL`), you must change them in your Makefile or similar so that uECC.c is compiled with the desired values (ie, compile uECC.c with `-DuECC_OPTIMIZATION_LEVEL=3` or whatever).
38 |  * When compiling for a Thumb-1 platform, you must use the `-fomit-frame-pointer` GCC option (this is enabled by default when compiling with `-O1` or higher).
39 |  * When compiling for an ARM/Thumb-2 platform with `uECC_OPTIMIZATION_LEVEL` >= 3, you must use the `-fomit-frame-pointer` GCC option (this is enabled by default when compiling with `-O1` or higher).
40 |  * When compiling for AVR, you must have optimizations enabled (compile with `-O1` or higher).
41 |  * When building for Windows, you will need to link in the `advapi32.lib` system library.
42 | 


--------------------------------------------------------------------------------
/asm_arm.inc:
--------------------------------------------------------------------------------
  1 | /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
  2 | 
  3 | #ifndef _UECC_ASM_ARM_H_
  4 | #define _UECC_ASM_ARM_H_
  5 | 
  6 | #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
  7 |     #define uECC_MIN_WORDS 8
  8 | #endif
  9 | #if uECC_SUPPORTS_secp224r1
 10 |     #undef uECC_MIN_WORDS
 11 |     #define uECC_MIN_WORDS 7
 12 | #endif
 13 | #if uECC_SUPPORTS_secp192r1
 14 |     #undef uECC_MIN_WORDS
 15 |     #define uECC_MIN_WORDS 6
 16 | #endif
 17 | #if uECC_SUPPORTS_secp160r1
 18 |     #undef uECC_MIN_WORDS
 19 |     #define uECC_MIN_WORDS 5
 20 | #endif
 21 | 
 22 | #if (uECC_PLATFORM == uECC_arm_thumb)
 23 |     #define REG_RW "+&l"
 24 |     #define REG_WRITE "=&l"
 25 | #else
 26 |     #define REG_RW "+&r"
 27 |     #define REG_WRITE "=&r"
 28 | #endif
 29 | 
 30 | #if (uECC_PLATFORM == uECC_arm_thumb || uECC_PLATFORM == uECC_arm_thumb2)
 31 |     #define REG_RW_LO "+&l"
 32 |     #define REG_WRITE_LO "=&l"
 33 | #else
 34 |     #define REG_RW_LO "+&r"
 35 |     #define REG_WRITE_LO "=&r"
 36 | #endif
 37 | 
 38 | #if (uECC_PLATFORM == uECC_arm_thumb2)
 39 |     #define RESUME_SYNTAX
 40 | #else
 41 |     #define RESUME_SYNTAX ".syntax divided \n\t"
 42 | #endif
 43 | 
 44 | #if (uECC_OPTIMIZATION_LEVEL >= 2)
 45 | 
 46 | uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
 47 |                                       const uECC_word_t *left,
 48 |                                       const uECC_word_t *right,
 49 |                                       wordcount_t num_words) {
 50 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
 51 |   #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2)
 52 |     uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1;
 53 |   #else /* ARM */
 54 |     uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4;
 55 |   #endif
 56 | #endif
 57 |     uint32_t carry;
 58 |     uint32_t left_word;
 59 |     uint32_t right_word;
 60 |     
 61 |     __asm__ volatile (
 62 |         ".syntax unified \n\t"
 63 |         "movs %[carry], #0 \n\t"
 64 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
 65 |         "adr %[left], 1f \n\t"
 66 |         ".align 4 \n\t"
 67 |         "adds %[jump], %[left] \n\t"
 68 |     #endif
 69 |         
 70 |         "ldmia %[lptr]!, {%[left]} \n\t"
 71 |         "ldmia %[rptr]!, {%[right]} \n\t"
 72 |         "adds %[left], %[right] \n\t"
 73 |         "stmia %[dptr]!, {%[left]} \n\t"
 74 |         
 75 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
 76 |         "bx %[jump] \n\t"
 77 |     #endif
 78 |         "1: \n\t"
 79 |         REPEAT(DEC(uECC_MAX_WORDS),
 80 |             "ldmia %[lptr]!, {%[left]} \n\t"
 81 |             "ldmia %[rptr]!, {%[right]} \n\t"
 82 |             "adcs %[left], %[right] \n\t"
 83 |             "stmia %[dptr]!, {%[left]} \n\t")
 84 |         
 85 |         "adcs %[carry], %[carry] \n\t"
 86 |         RESUME_SYNTAX
 87 |         : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right),
 88 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
 89 |           [jump] REG_RW_LO (jump),
 90 |     #endif
 91 |           [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word),
 92 |           [right] REG_WRITE_LO (right_word)
 93 |         :
 94 |         : "cc", "memory"
 95 |     );
 96 |     return carry;
 97 | }
 98 | #define asm_add 1
 99 | 
100 | uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
101 |                                       const uECC_word_t *left,
102 |                                       const uECC_word_t *right,
103 |                                       wordcount_t num_words) {
104 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
105 |   #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2)
106 |     uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1;
107 |   #else /* ARM */
108 |     uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4;
109 |   #endif
110 | #endif
111 |     uint32_t carry;
112 |     uint32_t left_word;
113 |     uint32_t right_word;
114 |     
115 |     __asm__ volatile (
116 |         ".syntax unified \n\t"
117 |         "movs %[carry], #0 \n\t"
118 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
119 |         "adr %[left], 1f \n\t"
120 |         ".align 4 \n\t"
121 |         "adds %[jump], %[left] \n\t"
122 |     #endif
123 |         
124 |         "ldmia %[lptr]!, {%[left]} \n\t"
125 |         "ldmia %[rptr]!, {%[right]} \n\t"
126 |         "subs %[left], %[right] \n\t"
127 |         "stmia %[dptr]!, {%[left]} \n\t"
128 |         
129 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
130 |         "bx %[jump] \n\t"
131 |     #endif
132 |         "1: \n\t"
133 |         REPEAT(DEC(uECC_MAX_WORDS),
134 |             "ldmia %[lptr]!, {%[left]} \n\t"
135 |             "ldmia %[rptr]!, {%[right]} \n\t"
136 |             "sbcs %[left], %[right] \n\t"
137 |             "stmia %[dptr]!, {%[left]} \n\t")
138 |         
139 |         "adcs %[carry], %[carry] \n\t"
140 |         RESUME_SYNTAX
141 |         : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right),
142 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
143 |           [jump] REG_RW_LO (jump),
144 |     #endif
145 |           [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word),
146 |           [right] REG_WRITE_LO (right_word)
147 |         :
148 |         : "cc", "memory"
149 |     );
150 |     return !carry; /* Note that on ARM, carry flag set means "no borrow" when subtracting
151 |                       (for some reason...) */
152 | }
153 | #define asm_sub 1
154 | 
155 | #endif /* (uECC_OPTIMIZATION_LEVEL >= 2) */
156 | 
157 | #if (uECC_OPTIMIZATION_LEVEL >= 3)
158 | 
159 | #if (uECC_PLATFORM != uECC_arm_thumb)
160 | 
161 | #if uECC_ARM_USE_UMAAL
162 |     #include "asm_arm_mult_square_umaal.inc"
163 | #else
164 |     #include "asm_arm_mult_square.inc"
165 | #endif
166 | 
167 | #if (uECC_OPTIMIZATION_LEVEL == 3)
168 | 
169 | uECC_VLI_API void uECC_vli_mult(uint32_t *result,
170 |                                 const uint32_t *left,
171 |                                 const uint32_t *right,
172 |                                 wordcount_t num_words) {
173 |     register uint32_t *r0 __asm__("r0") = result;
174 |     register const uint32_t *r1 __asm__("r1") = left;
175 |     register const uint32_t *r2 __asm__("r2") = right;
176 |     register uint32_t r3 __asm__("r3") = num_words;
177 |     
178 |     __asm__ volatile (
179 |         ".syntax unified \n\t"
180 | #if (uECC_MIN_WORDS == 5)
181 |         FAST_MULT_ASM_5
182 |     #if (uECC_MAX_WORDS > 5)
183 |         FAST_MULT_ASM_5_TO_6
184 |     #endif
185 |     #if (uECC_MAX_WORDS > 6)
186 |         FAST_MULT_ASM_6_TO_7
187 |     #endif
188 |     #if (uECC_MAX_WORDS > 7)
189 |         FAST_MULT_ASM_7_TO_8
190 |     #endif
191 | #elif (uECC_MIN_WORDS == 6)
192 |         FAST_MULT_ASM_6
193 |     #if (uECC_MAX_WORDS > 6)
194 |         FAST_MULT_ASM_6_TO_7
195 |     #endif
196 |     #if (uECC_MAX_WORDS > 7)
197 |         FAST_MULT_ASM_7_TO_8
198 |     #endif
199 | #elif (uECC_MIN_WORDS == 7)
200 |         FAST_MULT_ASM_7
201 |     #if (uECC_MAX_WORDS > 7)
202 |         FAST_MULT_ASM_7_TO_8
203 |     #endif
204 | #elif (uECC_MIN_WORDS == 8)
205 |         FAST_MULT_ASM_8
206 | #endif
207 |         "1: \n\t"
208 |         RESUME_SYNTAX
209 |         : "+r" (r0), "+r" (r1), "+r" (r2)
210 |         : "r" (r3)
211 |         : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
212 |     );
213 | }
214 | #define asm_mult 1
215 | 
216 | #if uECC_SQUARE_FUNC
217 | uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
218 |                                   const uECC_word_t *left,
219 |                                   wordcount_t num_words) {
220 |     register uint32_t *r0 __asm__("r0") = result;
221 |     register const uint32_t *r1 __asm__("r1") = left;
222 |     register uint32_t r2 __asm__("r2") = num_words;
223 |     
224 |     __asm__ volatile (
225 |         ".syntax unified \n\t"
226 | #if (uECC_MIN_WORDS == 5)
227 |         FAST_SQUARE_ASM_5
228 |     #if (uECC_MAX_WORDS > 5)
229 |         FAST_SQUARE_ASM_5_TO_6
230 |     #endif
231 |     #if (uECC_MAX_WORDS > 6)
232 |         FAST_SQUARE_ASM_6_TO_7
233 |     #endif
234 |     #if (uECC_MAX_WORDS > 7)
235 |         FAST_SQUARE_ASM_7_TO_8
236 |     #endif
237 | #elif (uECC_MIN_WORDS == 6)
238 |         FAST_SQUARE_ASM_6
239 |     #if (uECC_MAX_WORDS > 6)
240 |         FAST_SQUARE_ASM_6_TO_7
241 |     #endif
242 |     #if (uECC_MAX_WORDS > 7)
243 |         FAST_SQUARE_ASM_7_TO_8
244 |     #endif
245 | #elif (uECC_MIN_WORDS == 7)
246 |         FAST_SQUARE_ASM_7
247 |     #if (uECC_MAX_WORDS > 7)
248 |         FAST_SQUARE_ASM_7_TO_8
249 |     #endif
250 | #elif (uECC_MIN_WORDS == 8)
251 |         FAST_SQUARE_ASM_8
252 | #endif
253 | 
254 |         "1: \n\t"
255 |         RESUME_SYNTAX
256 |         : "+r" (r0), "+r" (r1)
257 |         : "r" (r2)
258 |         : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
259 |     );
260 | }
261 | #define asm_square 1
262 | #endif /* uECC_SQUARE_FUNC */
263 | 
264 | #else /* (uECC_OPTIMIZATION_LEVEL > 3) */
265 | 
266 | uECC_VLI_API void uECC_vli_mult(uint32_t *result,
267 |                                 const uint32_t *left,
268 |                                 const uint32_t *right,
269 |                                 wordcount_t num_words) {
270 |     register uint32_t *r0 __asm__("r0") = result;
271 |     register const uint32_t *r1 __asm__("r1") = left;
272 |     register const uint32_t *r2 __asm__("r2") = right;
273 |     register uint32_t r3 __asm__("r3") = num_words;
274 |     
275 | #if uECC_SUPPORTS_secp160r1
276 |     if (num_words == 5) {
277 |         __asm__ volatile (
278 |             ".syntax unified \n\t"
279 |             FAST_MULT_ASM_5
280 |             RESUME_SYNTAX
281 |             : "+r" (r0), "+r" (r1), "+r" (r2)
282 |             : "r" (r3)
283 |             : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
284 |         );
285 |         return;
286 |     }
287 | #endif
288 | #if uECC_SUPPORTS_secp192r1
289 |     if (num_words == 6) {
290 |         __asm__ volatile (
291 |             ".syntax unified \n\t"
292 |             FAST_MULT_ASM_6
293 |             RESUME_SYNTAX
294 |             : "+r" (r0), "+r" (r1), "+r" (r2)
295 |             : "r" (r3)
296 |             : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
297 |         );
298 |         return;
299 |     }
300 | #endif
301 | #if uECC_SUPPORTS_secp224r1
302 |     if (num_words == 7) {
303 |         __asm__ volatile (
304 |             ".syntax unified \n\t"
305 |             FAST_MULT_ASM_7
306 |             RESUME_SYNTAX
307 |             : "+r" (r0), "+r" (r1), "+r" (r2)
308 |             : "r" (r3)
309 |             : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
310 |         );
311 |         return;
312 |     }
313 | #endif
314 | #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
315 |     if (num_words == 8) {
316 |         __asm__ volatile (
317 |             ".syntax unified \n\t"
318 |             FAST_MULT_ASM_8
319 |             RESUME_SYNTAX
320 |             : "+r" (r0), "+r" (r1), "+r" (r2)
321 |             : "r" (r3)
322 |             : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
323 |         );
324 |         return;
325 |     }
326 | #endif
327 | }
328 | #define asm_mult 1
329 | 
330 | #if uECC_SQUARE_FUNC
331 | uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
332 |                                   const uECC_word_t *left,
333 |                                   wordcount_t num_words) {
334 |     register uint32_t *r0 __asm__("r0") = result;
335 |     register const uint32_t *r1 __asm__("r1") = left;
336 |     register uint32_t r2 __asm__("r2") = num_words;
337 |     
338 | #if uECC_SUPPORTS_secp160r1
339 |     if (num_words == 5) {
340 |         __asm__ volatile (
341 |             ".syntax unified \n\t"
342 |             FAST_SQUARE_ASM_5
343 |             RESUME_SYNTAX
344 |             : "+r" (r0), "+r" (r1)
345 |             : "r" (r2)
346 |             : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
347 |         );
348 |         return;
349 |     }
350 | #endif
351 | #if uECC_SUPPORTS_secp192r1
352 |     if (num_words == 6) {
353 |         __asm__ volatile (
354 |             ".syntax unified \n\t"
355 |             FAST_SQUARE_ASM_6
356 |             RESUME_SYNTAX
357 |             : "+r" (r0), "+r" (r1)
358 |             : "r" (r2)
359 |             : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
360 |         );
361 |         return;
362 |     }
363 | #endif
364 | #if uECC_SUPPORTS_secp224r1
365 |     if (num_words == 7) {
366 |         __asm__ volatile (
367 |             ".syntax unified \n\t"
368 |             FAST_SQUARE_ASM_7
369 |             RESUME_SYNTAX
370 |             : "+r" (r0), "+r" (r1)
371 |             : "r" (r2)
372 |             : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
373 |         );
374 |         return;
375 |     }
376 | #endif
377 | #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
378 |     if (num_words == 8) {
379 |         __asm__ volatile (
380 |             ".syntax unified \n\t"
381 |             FAST_SQUARE_ASM_8
382 |             RESUME_SYNTAX
383 |             : "+r" (r0), "+r" (r1)
384 |             : "r" (r2)
385 |             : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
386 |         );
387 |         return;
388 |     }
389 | #endif
390 | }
391 | #define asm_square 1
392 | #endif /* uECC_SQUARE_FUNC */
393 | 
394 | #endif /* (uECC_OPTIMIZATION_LEVEL > 3) */
395 | 
396 | #endif /* uECC_PLATFORM != uECC_arm_thumb */
397 | 
398 | #endif /* (uECC_OPTIMIZATION_LEVEL >= 3) */
399 | 
400 | /* ---- "Small" implementations ---- */
401 | 
402 | #if !asm_add
403 | uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
404 |                                       const uECC_word_t *left,
405 |                                       const uECC_word_t *right,
406 |                                       wordcount_t num_words) {
407 |     uint32_t carry = 0;
408 |     uint32_t left_word;
409 |     uint32_t right_word;
410 |     
411 |     __asm__ volatile (
412 |         ".syntax unified \n\t"
413 |         "1: \n\t"
414 |         "ldmia %[lptr]!, {%[left]} \n\t"  /* Load left word. */
415 |         "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
416 |         "lsrs %[carry], #1 \n\t"          /* Set up carry flag (carry = 0 after this). */
417 |         "adcs %[left], %[left], %[right] \n\t"   /* Add with carry. */
418 |         "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */
419 |         "stmia %[dptr]!, {%[left]} \n\t"  /* Store result word. */
420 |         "subs %[ctr], #1 \n\t"            /* Decrement counter. */
421 |         "bne 1b \n\t"                     /* Loop until counter == 0. */
422 |         RESUME_SYNTAX
423 |         : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right),
424 |           [ctr] REG_RW (num_words), [carry] REG_RW (carry),
425 |           [left] REG_WRITE (left_word), [right] REG_WRITE (right_word)
426 |         :
427 |         : "cc", "memory"
428 |     );
429 |     return carry;
430 | }
431 | #define asm_add 1
432 | #endif
433 | 
434 | #if !asm_sub
435 | uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
436 |                                       const uECC_word_t *left,
437 |                                       const uECC_word_t *right,
438 |                                       wordcount_t num_words) {
439 |     uint32_t carry = 1; /* carry = 1 initially (means don't borrow) */
440 |     uint32_t left_word;
441 |     uint32_t right_word;
442 |     
443 |     __asm__ volatile (
444 |         ".syntax unified \n\t"
445 |         "1: \n\t"
446 |         "ldmia %[lptr]!, {%[left]} \n\t"  /* Load left word. */
447 |         "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
448 |         "lsrs %[carry], #1 \n\t"          /* Set up carry flag (carry = 0 after this). */
449 |         "sbcs %[left], %[left], %[right] \n\t"   /* Subtract with borrow. */
450 |         "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */
451 |         "stmia %[dptr]!, {%[left]} \n\t"  /* Store result word. */
452 |         "subs %[ctr], #1 \n\t"            /* Decrement counter. */
453 |         "bne 1b \n\t"                     /* Loop until counter == 0. */
454 |         RESUME_SYNTAX
455 |         : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right),
456 |           [ctr] REG_RW (num_words), [carry] REG_RW (carry),
457 |           [left] REG_WRITE (left_word), [right] REG_WRITE (right_word)
458 |         :
459 |         : "cc", "memory"
460 |     );
461 |     return !carry;
462 | }
463 | #define asm_sub 1
464 | #endif
465 | 
466 | #if !asm_mult
467 | uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
468 |                                 const uECC_word_t *left,
469 |                                 const uECC_word_t *right,
470 |                                 wordcount_t num_words) {
471 | #if (uECC_PLATFORM != uECC_arm_thumb)
472 |     uint32_t c0 = 0;
473 |     uint32_t c1 = 0;
474 |     uint32_t c2 = 0;
475 |     uint32_t k = 0;
476 |     uint32_t i;
477 |     uint32_t t0, t1;
478 |     
479 |     __asm__ volatile (
480 |         ".syntax unified \n\t"
481 |         
482 |         "1: \n\t" /* outer loop (k < num_words) */
483 |         "movs %[i], #0 \n\t" /* i = 0 */
484 |         "b 3f \n\t"
485 |         
486 |         "2: \n\t" /* outer loop (k >= num_words) */
487 |         "movs %[i], %[k] \n\t"         /* i = k */
488 |         "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */
489 |         
490 |         "3: \n\t" /* inner loop */
491 |         "subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */
492 |         
493 |         "ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = right[k - i] */
494 |         "ldr %[t0], [%[left], %[i]] \n\t"   /* t0 = left[i] */
495 |         
496 |         "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */
497 |         
498 |         "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */
499 |         "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */
500 |         "adcs %[c2], %[c2], #0 \n\t"    /* add carry to c2 */
501 | 
502 |         "adds %[i], #4 \n\t"          /* i += 4 */
503 |         "cmp %[i], %[last_word] \n\t" /* i > (num_words - 1) (times 4)? */
504 |         "bgt 4f \n\t"                 /*   if so, exit the loop */
505 |         "cmp %[i], %[k] \n\t"         /* i <= k? */
506 |         "ble 3b \n\t"                 /*   if so, continue looping */
507 |         
508 |         "4: \n\t" /* end inner loop */
509 |         
510 |         "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */
511 |         "mov %[c0], %[c1] \n\t"       /* c0 = c1 */
512 |         "mov %[c1], %[c2] \n\t"       /* c1 = c2 */
513 |         "movs %[c2], #0 \n\t"         /* c2 = 0 */
514 |         "adds %[k], #4 \n\t"          /* k += 4 */
515 |         "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */
516 |         "ble 1b \n\t"                 /*   if so, loop back, start with i = 0 */
517 |         "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */
518 |         "ble 2b \n\t"                 /*   if so, loop back, start with i = (k + 1) - num_words */
519 |         /* end outer loop */
520 |         
521 |         "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */
522 |         RESUME_SYNTAX
523 |         : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2),
524 |           [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1)
525 |         : [result] "r" (result), [left] "r" (left), [right] "r" (right),
526 |           [last_word] "r" ((num_words - 1) * 4)
527 |         : "cc", "memory"
528 |     );
529 |     
530 | #else /* Thumb-1 */
531 |     uint32_t r4, r5, r6, r7;
532 | 
533 |     __asm__ volatile (
534 |         ".syntax unified \n\t"
535 |         "subs %[r3], #1 \n\t" /* r3 = num_words - 1 */
536 |         "lsls %[r3], #2 \n\t" /* r3 = (num_words - 1) * 4 */
537 |         "mov r8, %[r3] \n\t"  /* r8 = (num_words - 1) * 4 */
538 |         "lsls %[r3], #1 \n\t" /* r3 = (num_words - 1) * 8 */
539 |         "mov r9, %[r3] \n\t"  /* r9 = (num_words - 1) * 8 */
540 |         "movs %[r3], #0 \n\t" /* c0 = 0 */
541 |         "movs %[r4], #0 \n\t" /* c1 = 0 */
542 |         "movs %[r5], #0 \n\t" /* c2 = 0 */
543 |         "movs %[r6], #0 \n\t" /* k = 0 */
544 |         
545 |         "push {%[r0]} \n\t" /* keep result on the stack */
546 |         
547 |         "1: \n\t" /* outer loop (k < num_words) */
548 |         "movs %[r7], #0 \n\t" /* r7 = i = 0 */
549 |         "b 3f \n\t"
550 |         
551 |         "2: \n\t" /* outer loop (k >= num_words) */
552 |         "movs %[r7], %[r6] \n\t" /* r7 = k */
553 |         "mov %[r0], r8 \n\t"     /* r0 = (num_words - 1) * 4 */
554 |         "subs %[r7], %[r0] \n\t" /* r7 = i = k - (num_words - 1) (times 4) */
555 |         
556 |         "3: \n\t" /* inner loop */
557 |         "mov r10, %[r3] \n\t"
558 |         "mov r11, %[r4] \n\t"
559 |         "mov r12, %[r5] \n\t"
560 |         "mov r14, %[r6] \n\t"
561 |         "subs %[r0], %[r6], %[r7] \n\t"          /* r0 = k - i */
562 |         
563 |         "ldr %[r4], [%[r2], %[r0]] \n\t" /* r4 = right[k - i] */
564 |         "ldr %[r0], [%[r1], %[r7]] \n\t" /* r0 = left[i] */
565 |         
566 |         "lsrs %[r3], %[r0], #16 \n\t" /* r3 = a1 */
567 |         "uxth %[r0], %[r0] \n\t"      /* r0 = a0 */
568 |         
569 |         "lsrs %[r5], %[r4], #16 \n\t" /* r5 = b1 */
570 |         "uxth %[r4], %[r4] \n\t"      /* r4 = b0 */
571 |         
572 |         "movs %[r6], %[r3] \n\t"        /* r6 = a1 */
573 |         "muls %[r6], %[r5], %[r6] \n\t" /* r6 = a1 * b1 */
574 |         "muls %[r3], %[r4], %[r3] \n\t" /* r3 = b0 * a1 */
575 |         "muls %[r5], %[r0], %[r5] \n\t" /* r5 = a0 * b1 */
576 |         "muls %[r0], %[r4], %[r0] \n\t" /* r0 = a0 * b0 */
577 |         
578 |         /* Add middle terms */
579 |         "lsls %[r4], %[r3], #16 \n\t"
580 |         "lsrs %[r3], %[r3], #16 \n\t"
581 |         "adds %[r0], %[r4] \n\t"
582 |         "adcs %[r6], %[r3] \n\t"
583 |         
584 |         "lsls %[r4], %[r5], #16 \n\t"
585 |         "lsrs %[r5], %[r5], #16 \n\t"
586 |         "adds %[r0], %[r4] \n\t"
587 |         "adcs %[r6], %[r5] \n\t"
588 |         
589 |         "mov %[r3], r10\n\t"
590 |         "mov %[r4], r11\n\t"
591 |         "mov %[r5], r12\n\t"
592 |         "adds %[r3], %[r0] \n\t"         /* add low word to c0 */
593 |         "adcs %[r4], %[r6] \n\t"         /* add high word to c1, including carry */
594 |         "movs %[r0], #0 \n\t"            /* r0 = 0 (does not affect carry bit) */
595 |         "adcs %[r5], %[r0] \n\t"         /* add carry to c2 */
596 |         
597 |         "mov %[r6], r14\n\t" /* r6 = k */
598 | 
599 |         "adds %[r7], #4 \n\t"   /* i += 4 */
600 |         "cmp %[r7], r8 \n\t"    /* i > (num_words - 1) (times 4)? */
601 |         "bgt 4f \n\t"           /*   if so, exit the loop */
602 |         "cmp %[r7], %[r6] \n\t" /* i <= k? */
603 |         "ble 3b \n\t"           /*   if so, continue looping */
604 |         
605 |         "4: \n\t" /* end inner loop */
606 |         
607 |         "ldr %[r0], [sp, #0] \n\t" /* r0 = result */
608 |         
609 |         "str %[r3], [%[r0], %[r6]] \n\t" /* result[k] = c0 */
610 |         "mov %[r3], %[r4] \n\t"          /* c0 = c1 */
611 |         "mov %[r4], %[r5] \n\t"          /* c1 = c2 */
612 |         "movs %[r5], #0 \n\t"            /* c2 = 0 */
613 |         "adds %[r6], #4 \n\t"            /* k += 4 */
614 |         "cmp %[r6], r8 \n\t"             /* k <= (num_words - 1) (times 4) ? */
615 |         "ble 1b \n\t"                    /*   if so, loop back, start with i = 0 */
616 |         "cmp %[r6], r9 \n\t"             /* k <= (num_words * 2 - 2) (times 4) ? */
617 |         "ble 2b \n\t"                    /*   if so, loop back, with i = (k + 1) - num_words */
618 |         /* end outer loop */
619 |         
620 |         "str %[r3], [%[r0], %[r6]] \n\t" /* result[num_words * 2 - 1] = c0 */
621 |         "pop {%[r0]} \n\t"               /* pop result off the stack */
622 |         
623 |         RESUME_SYNTAX
624 |         : [r3] "+l" (num_words), [r4] "=&l" (r4),
625 |           [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7)
626 |         : [r0] "l" (result), [r1] "l" (left), [r2] "l" (right)
627 |         : "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
628 |     );
629 | #endif
630 | }
631 | #define asm_mult 1
632 | #endif
633 | 
634 | #if uECC_SQUARE_FUNC
635 | #if !asm_square
636 | uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
637 |                                   const uECC_word_t *left,
638 |                                   wordcount_t num_words) {
639 | #if (uECC_PLATFORM != uECC_arm_thumb)
640 |     uint32_t c0 = 0;
641 |     uint32_t c1 = 0;
642 |     uint32_t c2 = 0;
643 |     uint32_t k = 0;
644 |     uint32_t i, tt;
645 |     uint32_t t0, t1;
646 |     
647 |     __asm__ volatile (
648 |         ".syntax unified \n\t"
649 |         
650 |         "1: \n\t" /* outer loop (k < num_words) */
651 |         "movs %[i], #0 \n\t" /* i = 0 */
652 |         "b 3f \n\t"
653 |         
654 |         "2: \n\t" /* outer loop (k >= num_words) */
655 |         "movs %[i], %[k] \n\t"         /* i = k */
656 |         "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */
657 |         
658 |         "3: \n\t" /* inner loop */
659 |         "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */
660 |         
661 |         "ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = left[k - i] */
662 |         "ldr %[t0], [%[left], %[i]] \n\t"  /* t0 = left[i] */
663 |         
664 |         "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */
665 |         
666 |         "cmp %[i], %[tt] \n\t"      /* (i < k - i) ? */
667 |         "bge 4f \n\t"               /*   if i >= k - i, skip */
668 |         "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */
669 |         "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */
670 |         "adcs %[c2], %[c2], #0 \n\t"    /* add carry to c2 */
671 |         
672 |         "4: \n\t"
673 |         "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */
674 |         "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */
675 |         "adcs %[c2], %[c2], #0 \n\t"    /* add carry to c2 */
676 |         
677 |         "adds %[i], #4 \n\t"          /* i += 4 */
678 |         "cmp %[i], %[k] \n\t"         /* i >= k? */
679 |         "bge 5f \n\t"                 /*   if so, exit the loop */
680 |         "subs %[tt], %[k], %[i] \n\t" /* tt = k - i */
681 |         "cmp %[i], %[tt] \n\t"        /* i <= k - i? */
682 |         "ble 3b \n\t"                 /*   if so, continue looping */
683 |         
684 |         "5: \n\t" /* end inner loop */
685 |         
686 |         "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */
687 |         "mov %[c0], %[c1] \n\t"       /* c0 = c1 */
688 |         "mov %[c1], %[c2] \n\t"       /* c1 = c2 */
689 |         "movs %[c2], #0 \n\t"         /* c2 = 0 */
690 |         "adds %[k], #4 \n\t"          /* k += 4 */
691 |         "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */
692 |         "ble 1b \n\t"                 /*   if so, loop back, start with i = 0 */
693 |         "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */
694 |         "ble 2b \n\t"                 /*   if so, loop back, start with i = (k + 1) - num_words */
695 |         /* end outer loop */
696 |         
697 |         "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */
698 |         RESUME_SYNTAX
699 |         : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2),
700 |           [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1)
701 |         : [result] "r" (result), [left] "r" (left), [last_word] "r" ((num_words - 1) * 4)
702 |         : "cc", "memory"
703 |     );
704 |     
705 | #else
706 |     uint32_t r3, r4, r5, r6, r7;
707 | 
708 |     __asm__ volatile (
709 |         ".syntax unified \n\t"
710 |         "subs %[r2], #1 \n\t" /* r2 = num_words - 1 */
711 |         "lsls %[r2], #2 \n\t" /* r2 = (num_words - 1) * 4 */
712 |         "mov r8, %[r2] \n\t"  /* r8 = (num_words - 1) * 4 */
713 |         "lsls %[r2], #1 \n\t" /* r2 = (num_words - 1) * 8 */
714 |         "mov r9, %[r2] \n\t"  /* r9 = (num_words - 1) * 8 */
715 |         "movs %[r2], #0 \n\t" /* c0 = 0 */
716 |         "movs %[r3], #0 \n\t" /* c1 = 0 */
717 |         "movs %[r4], #0 \n\t" /* c2 = 0 */
718 |         "movs %[r5], #0 \n\t" /* k = 0 */
719 |         
720 |         "push {%[r0]} \n\t" /* keep result on the stack */
721 |         
722 |         "1: \n\t" /* outer loop (k < num_words) */
723 |         "movs %[r6], #0 \n\t" /* r6 = i = 0 */
724 |         "b 3f \n\t"
725 |         
726 |         "2: \n\t" /* outer loop (k >= num_words) */
727 |         "movs %[r6], %[r5] \n\t" /* r6 = k */
728 |         "mov %[r0], r8 \n\t"     /* r0 = (num_words - 1) * 4 */
729 |         "subs %[r6], %[r0] \n\t" /* r6 = i = k - (num_words - 1) (times 4) */
730 |         
731 |         "3: \n\t" /* inner loop */
732 |         "mov r10, %[r2] \n\t"
733 |         "mov r11, %[r3] \n\t"
734 |         "mov r12, %[r4] \n\t"
735 |         "mov r14, %[r5] \n\t"
736 |         "subs %[r7], %[r5], %[r6] \n\t"  /* r7 = k - i */
737 |         
738 |         "ldr %[r3], [%[r1], %[r7]] \n\t" /* r3 = left[k - i] */
739 |         "ldr %[r0], [%[r1], %[r6]] \n\t" /* r0 = left[i] */
740 |         
741 |         "lsrs %[r2], %[r0], #16 \n\t" /* r2 = a1 */
742 |         "uxth %[r0], %[r0] \n\t"      /* r0 = a0 */
743 |         
744 |         "lsrs %[r4], %[r3], #16 \n\t" /* r4 = b1 */
745 |         "uxth %[r3], %[r3] \n\t"      /* r3 = b0 */
746 |         
747 |         "movs %[r5], %[r2] \n\t"        /* r5 = a1 */
748 |         "muls %[r5], %[r4], %[r5] \n\t" /* r5 = a1 * b1 */
749 |         "muls %[r2], %[r3], %[r2] \n\t" /* r2 = b0 * a1 */
750 |         "muls %[r4], %[r0], %[r4] \n\t" /* r4 = a0 * b1 */
751 |         "muls %[r0], %[r3], %[r0] \n\t" /* r0 = a0 * b0 */
752 |         
753 |         /* Add middle terms */
754 |         "lsls %[r3], %[r2], #16 \n\t"
755 |         "lsrs %[r2], %[r2], #16 \n\t"
756 |         "adds %[r0], %[r3] \n\t"
757 |         "adcs %[r5], %[r2] \n\t"
758 |         
759 |         "lsls %[r3], %[r4], #16 \n\t"
760 |         "lsrs %[r4], %[r4], #16 \n\t"
761 |         "adds %[r0], %[r3] \n\t"
762 |         "adcs %[r5], %[r4] \n\t"
763 |         
764 |         /* Add to acc, doubling if necessary */
765 |         "mov %[r2], r10\n\t"
766 |         "mov %[r3], r11\n\t"
767 |         "mov %[r4], r12\n\t"
768 |         
769 |         "cmp %[r6], %[r7] \n\t"    /* (i < k - i) ? */
770 |         "bge 4f \n\t"            /*   if i >= k - i, skip */
771 |         "movs %[r7], #0 \n\t"    /* r7 = 0 */
772 |         "adds %[r2], %[r0] \n\t" /* add low word to c0 */
773 |         "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */
774 |         "adcs %[r4], %[r7] \n\t" /* add carry to c2 */
775 |         "4: \n\t"
776 |         "movs %[r7], #0 \n\t"    /* r7 = 0 */
777 |         "adds %[r2], %[r0] \n\t" /* add low word to c0 */
778 |         "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */
779 |         "adcs %[r4], %[r7] \n\t" /* add carry to c2 */
780 |         
781 |         "mov %[r5], r14\n\t" /* r5 = k */
782 |         
783 |         "adds %[r6], #4 \n\t"           /* i += 4 */
784 |         "cmp %[r6], %[r5] \n\t"         /* i >= k? */
785 |         "bge 5f \n\t"                   /*   if so, exit the loop */
786 |         "subs %[r7], %[r5], %[r6] \n\t" /* r7 = k - i */
787 |         "cmp %[r6], %[r7] \n\t"         /* i <= k - i? */
788 |         "ble 3b \n\t"                   /*   if so, continue looping */
789 |         
790 |         "5: \n\t" /* end inner loop */
791 |         
792 |         "ldr %[r0], [sp, #0] \n\t" /* r0 = result */
793 |         
794 |         "str %[r2], [%[r0], %[r5]] \n\t" /* result[k] = c0 */
795 |         "mov %[r2], %[r3] \n\t"          /* c0 = c1 */
796 |         "mov %[r3], %[r4] \n\t"          /* c1 = c2 */
797 |         "movs %[r4], #0 \n\t"            /* c2 = 0 */
798 |         "adds %[r5], #4 \n\t"            /* k += 4 */
799 |         "cmp %[r5], r8 \n\t"             /* k <= (num_words - 1) (times 4) ? */
800 |         "ble 1b \n\t"                    /*   if so, loop back, start with i = 0 */
801 |         "cmp %[r5], r9 \n\t"             /* k <= (num_words * 2 - 2) (times 4) ? */
802 |         "ble 2b \n\t"                    /*   if so, loop back, with i = (k + 1) - num_words */
803 |         /* end outer loop */
804 |         
805 |         "str %[r2], [%[r0], %[r5]] \n\t" /* result[num_words * 2 - 1] = c0 */
806 |         "pop {%[r0]} \n\t"               /* pop result off the stack */
807 | 
808 |         RESUME_SYNTAX
809 |         : [r2] "+l" (num_words), [r3] "=&l" (r3), [r4] "=&l" (r4),
810 |           [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7)
811 |         : [r0] "l" (result), [r1] "l" (left)
812 |         : "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
813 |     );
814 | #endif
815 | }
816 | #define asm_square 1
817 | #endif
818 | #endif /* uECC_SQUARE_FUNC */
819 | 
820 | #endif /* _UECC_ASM_ARM_H_ */
821 | 


--------------------------------------------------------------------------------
/asm_avr.inc:
--------------------------------------------------------------------------------
   1 | /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
   2 | 
   3 | #ifndef _UECC_ASM_AVR_H_
   4 | #define _UECC_ASM_AVR_H_
   5 | 
   6 | #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
   7 |     #define uECC_MIN_WORDS 32
   8 | #endif
   9 | #if uECC_SUPPORTS_secp224r1
  10 |     #undef uECC_MIN_WORDS
  11 |     #define uECC_MIN_WORDS 28
  12 | #endif
  13 | #if uECC_SUPPORTS_secp192r1
  14 |     #undef uECC_MIN_WORDS
  15 |     #define uECC_MIN_WORDS 24
  16 | #endif
  17 | #if uECC_SUPPORTS_secp160r1
  18 |     #undef uECC_MIN_WORDS
  19 |     #define uECC_MIN_WORDS 20
  20 | #endif
  21 | 
  22 | #if __AVR_HAVE_EIJMP_EICALL__
  23 |     #define IJMP "eijmp \n\t"
  24 | #else
  25 |     #define IJMP "ijmp \n\t"
  26 | #endif
  27 | 
  28 | #if (uECC_OPTIMIZATION_LEVEL >= 2)
  29 | 
  30 | uECC_VLI_API void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words) {
  31 |     volatile uECC_word_t *v = vli;
  32 |     __asm__ volatile (
  33 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
  34 |         "ldi r30, pm_lo8(1f) \n\t"
  35 |         "ldi r31, pm_hi8(1f) \n\t"
  36 |         "sub r30, %[num] \n\t"
  37 |         "sbc r31, __zero_reg__ \n\t"
  38 |         IJMP
  39 |     #endif
  40 | 
  41 |         REPEAT(uECC_MAX_WORDS, "st x+, __zero_reg__ \n\t")
  42 |         "1: \n\t"
  43 |         : "+x" (v)
  44 |         : [num] "r" (num_words)
  45 |         :
  46 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
  47 |           "r30", "r31", "cc"
  48 |     #endif
  49 |     );
  50 | }
  51 | #define asm_clear 1
  52 | 
  53 | uECC_VLI_API void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordcount_t num_words) {
  54 |     volatile uECC_word_t *d = dest;
  55 |     __asm__ volatile (
  56 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
  57 |         "ldi r30, pm_lo8(1f) \n\t"
  58 |         "ldi r31, pm_hi8(1f) \n\t"
  59 |         "sub r30, %[num] \n\t"
  60 |         "sbc r31, __zero_reg__ \n\t"
  61 |         IJMP
  62 |     #endif
  63 | 
  64 |         REPEAT(uECC_MAX_WORDS,
  65 |             "ld r0, y+ \n\t"
  66 |             "st x+, r0 \n\t")
  67 |         "1: \n\t"
  68 |         : "+x" (d), "+y" (src)
  69 |         : [num] "r" ((uint8_t)(num_words * 2))
  70 |         : "r0"
  71 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
  72 |           , "r30", "r31", "cc"
  73 |     #endif
  74 |     );
  75 | }
  76 | #define asm_set 1
  77 | 
  78 | uECC_VLI_API void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words) {
  79 |     volatile uECC_word_t *v = vli;
  80 |     __asm__ volatile (
  81 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
  82 |         "ldi r30, pm_lo8(1f) \n\t"
  83 |         "ldi r31, pm_hi8(1f) \n\t"
  84 |         "sub r30, %[jump] \n\t"
  85 |         "sbc r31, __zero_reg__ \n\t"
  86 |     #endif
  87 | 
  88 |         "add r26, %[num] \n\t"
  89 |         "adc r27, __zero_reg__ \n\t"
  90 |         "ld r0, -x \n\t"
  91 |         "lsr r0 \n\t"
  92 |         "st x, r0 \n\t"
  93 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
  94 |         IJMP
  95 |     #endif
  96 | 
  97 |         REPEAT(DEC(uECC_MAX_WORDS),
  98 |             "ld r0, -x \n\t"
  99 |             "ror r0 \n\t"
 100 |             "st x, r0 \n\t")
 101 |         "1: \n\t"
 102 |         : "+x" (v)
 103 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
 104 |         : [num] "r" (num_words), [jump] "r" ((uint8_t)(3 * (num_words - 1)))
 105 |         : "r0", "r30", "r31", "cc"
 106 |     #else
 107 |         : [num] "r" (num_words)
 108 |         : "r0", "cc"
 109 |     #endif
 110 |     );
 111 | }
 112 | #define asm_rshift1 1
 113 | 
 114 | #define ADD_RJPM_TABLE(N)       \
 115 |     "movw r30, %A[result] \n\t" \
 116 |     "rjmp add_%=_" #N " \n\t"
 117 | 
 118 | #define ADD_RJPM_DEST(N)     \
 119 |     "add_%=_" #N ":"         \
 120 |     "ld %[clb], x+ \n\t"     \
 121 |     "ld %[rb], y+ \n\t"      \
 122 |     "adc %[clb], %[rb] \n\t" \
 123 |     "st z+, %[clb] \n\t"
 124 | 
 125 | uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
 126 |                                       const uECC_word_t *left,
 127 |                                       const uECC_word_t *right,
 128 |                                       wordcount_t num_words) {
 129 |     volatile uECC_word_t *r = result;
 130 |     uint8_t carry;
 131 |     uint8_t right_byte;
 132 | 
 133 |     __asm__ volatile (
 134 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
 135 |         "ldi r30, pm_lo8(add_%=_" STR(uECC_MAX_WORDS) ") \n\t"
 136 |         "ldi r31, pm_hi8(add_%=_" STR(uECC_MAX_WORDS) ") \n\t"
 137 |         "sub r30, %[num] \n\t"
 138 |         "sbc r31, __zero_reg__ \n\t"
 139 |     #endif
 140 | 
 141 |         "clc \n\t"
 142 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
 143 |         IJMP
 144 |         REPEATM(uECC_MAX_WORDS, ADD_RJPM_TABLE)
 145 |     #endif
 146 | 
 147 |         REPEATM(uECC_MAX_WORDS, ADD_RJPM_DEST)
 148 | 
 149 |         "mov %[clb], __zero_reg__ \n\t"
 150 |         "adc %[clb], %[clb] \n\t" /* Store carry bit. */
 151 | 
 152 |         : "+x" (left), "+y" (right),
 153 |           [clb] "=&r" (carry), [rb] "=&r" (right_byte)
 154 |         : [result] "r" (r), [num] "r" ((uint8_t)(num_words * 2))
 155 |         : "r30", "r31", "cc"
 156 |     );
 157 |     return carry;
 158 | }
 159 | #define asm_add 1
 160 | 
 161 | #define SUB_RJPM_TABLE(N)       \
 162 |     "movw r30, %A[result] \n\t" \
 163 |     "rjmp sub_%=_" #N " \n\t"
 164 | 
 165 | #define SUB_RJPM_DEST(N)     \
 166 |     "sub_%=_" #N ":"         \
 167 |     "ld %[clb], x+ \n\t"     \
 168 |     "ld %[rb], y+ \n\t"      \
 169 |     "sbc %[clb], %[rb] \n\t" \
 170 |     "st z+, %[clb] \n\t"
 171 | 
 172 | uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
 173 |                                       const uECC_word_t *left,
 174 |                                       const uECC_word_t *right,
 175 |                                       wordcount_t num_words) {
 176 |     volatile uECC_word_t *r = result;
 177 |     uint8_t carry;
 178 |     uint8_t right_byte;
 179 | 
 180 |     __asm__ volatile (
 181 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
 182 |         "ldi r30, pm_lo8(sub_%=_" STR(uECC_MAX_WORDS) ") \n\t"
 183 |         "ldi r31, pm_hi8(sub_%=_" STR(uECC_MAX_WORDS) ") \n\t"
 184 |         "sub r30, %[num] \n\t"
 185 |         "sbc r31, __zero_reg__ \n\t"
 186 |     #endif
 187 | 
 188 |         "clc \n\t"
 189 |     #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
 190 |         IJMP
 191 |         REPEATM(uECC_MAX_WORDS, SUB_RJPM_TABLE)
 192 |     #endif
 193 | 
 194 |         REPEATM(uECC_MAX_WORDS, SUB_RJPM_DEST)
 195 | 
 196 |         "mov %[clb], __zero_reg__ \n\t"
 197 |         "adc %[clb], %[clb] \n\t" /* Store carry bit. */
 198 | 
 199 |         : "+x" (left), "+y" (right),
 200 |           [clb] "=&r" (carry), [rb] "=&r" (right_byte)
 201 |         : [result] "r" (r), [num] "r" ((uint8_t)(num_words * 2))
 202 |         : "r30", "r31", "cc"
 203 |     );
 204 |     return carry;
 205 | }
 206 | #define asm_sub 1
 207 | 
 208 | #if (uECC_OPTIMIZATION_LEVEL >= 3)
 209 | 
 210 | #include "asm_avr_mult_square.inc"
 211 | 
 212 | __attribute((noinline))
 213 | uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
 214 |                                 const uECC_word_t *left,
 215 |                                 const uECC_word_t *right,
 216 |                                 wordcount_t num_words) {
 217 |     /* num_words should already be in r18. */
 218 |     register wordcount_t r18 __asm__("r18") = num_words;
 219 | 
 220 |     __asm__ volatile (
 221 |         "push r18 \n\t"
 222 | #if (uECC_MIN_WORDS == 20)
 223 |         FAST_MULT_ASM_20
 224 |         "pop r18 \n\t"
 225 |     #if (uECC_MAX_WORDS > 20)
 226 |         FAST_MULT_ASM_20_TO_24
 227 |     #endif
 228 |     #if (uECC_MAX_WORDS > 24)
 229 |         FAST_MULT_ASM_24_TO_28
 230 |     #endif
 231 |     #if (uECC_MAX_WORDS > 28)
 232 |         FAST_MULT_ASM_28_TO_32
 233 |     #endif
 234 | #elif (uECC_MIN_WORDS == 24)
 235 |         FAST_MULT_ASM_24
 236 |         "pop r18 \n\t"
 237 |     #if (uECC_MAX_WORDS > 24)
 238 |         FAST_MULT_ASM_24_TO_28
 239 |     #endif
 240 |     #if (uECC_MAX_WORDS > 28)
 241 |         FAST_MULT_ASM_28_TO_32
 242 |     #endif
 243 | #elif (uECC_MIN_WORDS == 28)
 244 |         FAST_MULT_ASM_28
 245 |         "pop r18 \n\t"
 246 |     #if (uECC_MAX_WORDS > 28)
 247 |         FAST_MULT_ASM_28_TO_32
 248 |     #endif
 249 | #elif (uECC_MIN_WORDS == 32)
 250 |         FAST_MULT_ASM_32
 251 |         "pop r18 \n\t"
 252 | #endif
 253 |         "2: \n\t"
 254 |         "eor r1, r1 \n\t"
 255 |         : "+x" (left), "+y" (right), "+z" (result)
 256 |         : "r" (r18)
 257 |         : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
 258 |           "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r19", "r20",
 259 |           "r21", "r22", "r23", "r24", "r25", "cc"
 260 |     );
 261 | }
 262 | #define asm_mult 1
 263 | 
 264 | #if uECC_SQUARE_FUNC
 265 | __attribute((noinline))
 266 | uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
 267 |                                   const uECC_word_t *left,
 268 |                                   wordcount_t num_words) {
 269 |     /* num_words should already be in r20. */
 270 |     register wordcount_t r20 __asm__("r20") = num_words;
 271 | 
 272 |     __asm__ volatile (
 273 |         "push r20 \n\t"
 274 | #if (uECC_MIN_WORDS == 20)
 275 |         FAST_SQUARE_ASM_20
 276 |         "pop r20 \n\t"
 277 |     #if (uECC_MAX_WORDS > 20)
 278 |         FAST_SQUARE_ASM_20_TO_24
 279 |     #endif
 280 |     #if (uECC_MAX_WORDS > 24)
 281 |         FAST_SQUARE_ASM_24_TO_28
 282 |     #endif
 283 |     #if (uECC_MAX_WORDS > 28)
 284 |         FAST_SQUARE_ASM_28_TO_32
 285 |     #endif
 286 | #elif (uECC_MIN_WORDS == 24)
 287 |         FAST_SQUARE_ASM_24
 288 |         "pop r20 \n\t"
 289 |     #if (uECC_MAX_WORDS > 24)
 290 |         FAST_SQUARE_ASM_24_TO_28
 291 |     #endif
 292 |     #if (uECC_MAX_WORDS > 28)
 293 |         FAST_SQUARE_ASM_28_TO_32
 294 |     #endif
 295 | #elif (uECC_MIN_WORDS == 28)
 296 |         FAST_SQUARE_ASM_28
 297 |         "pop r20 \n\t"
 298 |     #if (uECC_MAX_WORDS > 28)
 299 |         FAST_SQUARE_ASM_28_TO_32
 300 |     #endif
 301 | #elif (uECC_MIN_WORDS == 32)
 302 |         FAST_SQUARE_ASM_32
 303 |         "pop r20 \n\t"
 304 | #endif
 305 |         "2: \n\t"
 306 |         "eor r1, r1 \n\t"
 307 |         : "+x" (left), "+z" (result)
 308 |         : "r" (r20)
 309 |         : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
 310 |           "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19",
 311 |           "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc"
 312 |     );
 313 | }
 314 | #define asm_square 1
 315 | #endif /* uECC_SQUARE_FUNC */
 316 | 
 317 | #endif /* (uECC_OPTIMIZATION_LEVEL >= 3) */
 318 | 
 319 | #if uECC_SUPPORTS_secp160r1
 320 | static const struct uECC_Curve_t curve_secp160r1;
 321 | static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) {
 322 |     uint8_t carry = 0;
 323 |     __asm__ volatile (
 324 |         "in r30, __SP_L__ \n\t"
 325 |     	"in r31, __SP_H__ \n\t"
 326 |     	"sbiw r30, 24 \n\t"
 327 |     	"in r0, __SREG__ \n\t"
 328 |     	"cli \n\t"
 329 |     	"out __SP_H__, r31 \n\t"
 330 |     	"out __SREG__, r0 \n\t"
 331 |     	"out __SP_L__, r30 \n\t"
 332 | 
 333 |     	"adiw r30, 25 \n\t" /* we are shifting by 31 bits, so shift over 4 bytes
 334 |     	                       (+ 1 since z initially points below the stack) */
 335 |         "adiw r26, 40 \n\t" /* end of product */
 336 |         "ld r18, -x \n\t"   /* Load word. */
 337 |         "lsr r18 \n\t"      /* Shift. */
 338 |         "st -z, r18 \n\t"   /* Store the first result word. */
 339 | 
 340 |         /* Now we just do the remaining words with the carry bit (using ROR) */
 341 |         REPEAT(19,
 342 |             "ld r18, -x \n\t"
 343 |             "ror r18 \n\t"
 344 |             "st -z, r18 \n\t")
 345 | 
 346 |         "eor r18, r18 \n\t" /* r18 = 0 */
 347 |         "ror r18 \n\t"      /* get last bit */
 348 |         "st -z, r18 \n\t"   /* store it */
 349 | 
 350 |         "sbiw r30, 3 \n\t" /* move z back to point at tmp */
 351 |         /* now we add right */
 352 |         "ld r18, x+ \n\t"
 353 |         "st z+, r18 \n\t" /* the first 3 bytes do not need to be added */
 354 |         "ld r18, x+ \n\t"
 355 |         "st z+, r18 \n\t"
 356 |         "ld r18, x+ \n\t"
 357 |         "st z+, r18 \n\t"
 358 | 
 359 |         "ld r18, x+ \n\t"
 360 |         "ld r19, z \n\t"
 361 |         "add r18, r19 \n\t"
 362 |         "st z+, r18 \n\t"
 363 | 
 364 |         /* Now we just do the remaining words with the carry bit (using ADC) */
 365 |         REPEAT(16,
 366 |             "ld r18, x+ \n\t"
 367 |             "ld r19, z \n\t"
 368 |             "adc r18, r19 \n\t"
 369 |             "st z+, r18 \n\t")
 370 | 
 371 |         /* Propagate over the remaining bytes of result */
 372 |         "ld r18, z \n\t"
 373 |         "adc r18, r1 \n\t"
 374 |         "st z+, r18 \n\t"
 375 | 
 376 |         "ld r18, z \n\t"
 377 |         "adc r18, r1 \n\t"
 378 |         "st z+, r18 \n\t"
 379 | 
 380 |         "ld r18, z \n\t"
 381 |         "adc r18, r1 \n\t"
 382 |         "st z+, r18 \n\t"
 383 | 
 384 |         "ld r18, z \n\t"
 385 |         "adc r18, r1 \n\t"
 386 |         "st z+, r18 \n\t"
 387 | 
 388 |         "sbiw r30, 24 \n\t" /* move z back to point at tmp */
 389 |         "sbiw r26, 40 \n\t" /* move x back to point at product */
 390 | 
 391 |         /* add low bytes of tmp to product, storing in result */
 392 |         "ld r18, z+ \n\t"
 393 |         "ld r19, x+ \n\t"
 394 |         "add r18, r19 \n\t"
 395 |         "st y+, r18 \n\t"
 396 |         REPEAT(19,
 397 |             "ld r18, z+ \n\t"
 398 |             "ld r19, x+ \n\t"
 399 |             "adc r18, r19 \n\t"
 400 |             "st y+, r18 \n\t")
 401 |         "adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
 402 |         /* at this point x is at the end of product, y is at the end of result,
 403 |            z is 20 bytes into tmp */
 404 |         "sbiw r28, 20 \n\t" /* move y back to point at result */
 405 |         "adiw r30, 4 \n\t"  /* move z to point to the end of tmp */
 406 | 
 407 |         /* do omega_mult again with the 4 relevant bytes */
 408 |         /* z points to the end of tmp, x points to the end of product */
 409 |         "ld r18, -z \n\t" /* Load word. */
 410 |         "lsr r18 \n\t"    /* Shift. */
 411 |         "st -x, r18 \n\t" /* Store the first result word. */
 412 | 
 413 |         "ld r18, -z \n\t"
 414 |         "ror r18 \n\t"
 415 |         "st -x, r18 \n\t"
 416 |         "ld r18, -z \n\t"
 417 |         "ror r18 \n\t"
 418 |         "st -x, r18 \n\t"
 419 |         "ld r18, -z \n\t"
 420 |         "ror r18 \n\t"
 421 |         "st -x, r18 \n\t"
 422 | 
 423 |         "eor r18, r18 \n\t" /* r18 = 0 */
 424 |         "ror r18 \n\t"      /* get last bit */
 425 |         "st -x, r18 \n\t"   /* store it */
 426 | 
 427 |         "sbiw r26, 3 \n\t" /* move x back to point at beginning */
 428 |         /* now we add a copy of the 4 bytes */
 429 |         "ld r18, z+ \n\t"
 430 |         "st x+, r18 \n\t" /* the first 3 bytes do not need to be added */
 431 |         "ld r18, z+ \n\t"
 432 |         "st x+, r18 \n\t"
 433 |         "ld r18, z+ \n\t"
 434 |         "st x+, r18 \n\t"
 435 | 
 436 |         "ld r18, z+ \n\t"
 437 |         "ld r19, x \n\t"
 438 |         "add r18, r19 \n\t"
 439 |         "st x+, r18 \n\t"
 440 | 
 441 |         /* Propagate over the remaining bytes */
 442 |         "ld r18, x \n\t"
 443 |         "adc r18, r1 \n\t"
 444 |         "st x+, r18 \n\t"
 445 | 
 446 |         "ld r18, x \n\t"
 447 |         "adc r18, r1 \n\t"
 448 |         "st x+, r18 \n\t"
 449 | 
 450 |         "ld r18, x \n\t"
 451 |         "adc r18, r1 \n\t"
 452 |         "st x+, r18 \n\t"
 453 | 
 454 |         "ld r18, x \n\t"
 455 |         "adc r18, r1 \n\t"
 456 |         "st x+, r18 \n\t"
 457 | 
 458 |         /* now z points to the end of tmp, x points to the end of product
 459 |            (y still points at result) */
 460 |         "sbiw r26, 8 \n\t" /* move x back to point at beginning of actual data */
 461 |         /* add into result */
 462 |         "ld r18, x+ \n\t"
 463 |         "ld r19, y \n\t"
 464 |         "add r18, r19 \n\t"
 465 |         "st y+, r18 \n\t"
 466 |         REPEAT(7,
 467 |             "ld r18, x+ \n\t"
 468 |             "ld r19, y \n\t"
 469 |             "adc r18, r19 \n\t"
 470 |             "st y+, r18 \n\t")
 471 | 
 472 |         /* Done adding, now propagate carry bit */
 473 |         REPEAT(12,
 474 |             "ld r18, y \n\t"
 475 |             "adc r18, __zero_reg__ \n\t"
 476 |             "st y+, r18 \n\t")
 477 | 
 478 |         "adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
 479 |         "sbiw r28, 20 \n\t" /* move y back to point at result */
 480 | 
 481 |         "sbiw r30, 1 \n\t" /* fix stack pointer */
 482 |     	"in r0, __SREG__ \n\t"
 483 |     	"cli \n\t"
 484 |     	"out __SP_H__, r31 \n\t"
 485 |     	"out __SREG__, r0 \n\t"
 486 |     	"out __SP_L__, r30 \n\t"
 487 | 
 488 |         : "+x" (product), [carry] "+r" (carry)
 489 |         : "y" (result)
 490 |         : "r0", "r18", "r19", "r30", "r31", "cc"
 491 |     );
 492 | 
 493 |     if (carry > 0) {
 494 |         --carry;
 495 |         uECC_vli_sub(result, result, curve_secp160r1.p, 20);
 496 |     }
 497 |     if (carry > 0) {
 498 |         uECC_vli_sub(result, result, curve_secp160r1.p, 20);
 499 |     }
 500 |     if (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, 20) > 0) {
 501 |         uECC_vli_sub(result, result, curve_secp160r1.p, 20);
 502 |     }
 503 | }
 504 | #define asm_mmod_fast_secp160r1 1
 505 | #endif /* uECC_SUPPORTS_secp160r1 */
 506 | 
 507 | #if uECC_SUPPORTS_secp256k1
 508 | static const struct uECC_Curve_t curve_secp256k1;
 509 | static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product) {
 510 |     uint8_t carry = 0;
 511 |     __asm__ volatile (
 512 |         "in r30, __SP_L__ \n\t"
 513 |     	"in r31, __SP_H__ \n\t"
 514 |     	"sbiw r30, 37 \n\t"
 515 |     	"in r0, __SREG__ \n\t"
 516 |     	"cli \n\t"
 517 |     	"out __SP_H__, r31 \n\t"
 518 |     	"out __SREG__, r0 \n\t"
 519 |     	"out __SP_L__, r30 \n\t"
 520 | 
 521 |     	"adiw r30, 1 \n\t"  /* add 1 since z initially points below the stack */
 522 |         "adiw r26, 32 \n\t" /* product + uECC_WORDS */
 523 |         "ldi r25, 0x03 \n\t"
 524 |         "ldi r24, 0xD1 \n\t"
 525 |         "ld r18, x+ \n\t"
 526 |         "ld r19, x+ \n\t"
 527 |         "ld r20, x+ \n\t"
 528 |         "ld r21, x+ \n\t"
 529 | 
 530 |         "mul r24, r18 \n\t"
 531 |         "st z+, r0 \n\t"
 532 |         "mov r22, r1 \n\t"
 533 |         "ldi r23, 0 \n\t"
 534 | 
 535 |         "mul r24, r19 \n\t"
 536 |         "add r22, r0 \n\t"
 537 |         "adc r23, r1 \n\t" /* can't overflow */
 538 |         "mul r25, r18 \n\t"
 539 |         "add r22, r0 \n\t"
 540 |         "adc r23, r1 \n\t" /* can't overflow */
 541 |         "st z+, r22 \n\t"
 542 |         "ldi r22, 0 \n\t"
 543 | 
 544 |         "mul r24, r20 \n\t"
 545 |         "add r23, r0 \n\t"
 546 |         "adc r22, r1 \n\t"
 547 |         "mul r25, r19 \n\t"
 548 |         "add r23, r0 \n\t"
 549 |         "adc r22, r1 \n\t"
 550 |         "st z+, r23 \n\t"
 551 |         "ldi r23, 0 \n\t"
 552 | 
 553 |         "mul r24, r21 \n\t"
 554 |         "add r22, r0 \n\t"
 555 |         "adc r23, r1 \n\t"
 556 |         "mul r25, r20 \n\t"
 557 |         "add r22, r0 \n\t"
 558 |         "adc r23, r1 \n\t"
 559 |         "st z+, r22 \n\t"
 560 |         "ldi r22, 0 \n\t"
 561 | 
 562 |         /* now we start adding the 2^32 part as well */
 563 |         "add r23, r18 \n\t" // 28
 564 |         "adc r22, r22 \n\t"
 565 |         "ld r18, x+ \n\t"
 566 |         "mul r24, r18 \n\t"
 567 |         "add r23, r0 \n\t"
 568 |         "adc r22, r1 \n\t"
 569 |         "mul r25, r21 \n\t"
 570 |         "add r23, r0 \n\t"
 571 |         "adc r22, r1 \n\t"
 572 |         "st z+, r23 \n\t"
 573 |         "ldi r23, 0 \n\t"
 574 | 
 575 |         "add r22, r19 \n\t" // 27
 576 |         "adc r23, r23 \n\t"
 577 |         "ld r19, x+ \n\t"
 578 |         "mul r24, r19 \n\t"
 579 |         "add r22, r0 \n\t"
 580 |         "adc r23, r1 \n\t"
 581 |         "mul r25, r18 \n\t"
 582 |         "add r22, r0 \n\t"
 583 |         "adc r23, r1 \n\t"
 584 |         "st z+, r22 \n\t"
 585 |         "ldi r22, 0 \n\t"
 586 | 
 587 |         REPEAT(6, // 26 - 3
 588 |             "add r23, r20 \n\t"
 589 |             "adc r22, r22 \n\t"
 590 |             "ld r20, x+ \n\t"
 591 |             "mul r24, r20 \n\t"
 592 |             "add r23, r0 \n\t"
 593 |             "adc r22, r1 \n\t"
 594 |             "mul r25, r19 \n\t"
 595 |             "add r23, r0 \n\t"
 596 |             "adc r22, r1 \n\t"
 597 |             "st z+, r23 \n\t"
 598 |             "ldi r23, 0 \n\t"
 599 | 
 600 |             "add r22, r21 \n\t"
 601 |             "adc r23, r23 \n\t"
 602 |             "ld r21, x+ \n\t"
 603 |             "mul r24, r21 \n\t"
 604 |             "add r22, r0 \n\t"
 605 |             "adc r23, r1 \n\t"
 606 |             "mul r25, r20 \n\t"
 607 |             "add r22, r0 \n\t"
 608 |             "adc r23, r1 \n\t"
 609 |             "st z+, r22 \n\t"
 610 |             "ldi r22, 0 \n\t"
 611 | 
 612 |             "add r23, r18 \n\t"
 613 |             "adc r22, r22 \n\t"
 614 |             "ld r18, x+ \n\t"
 615 |             "mul r24, r18 \n\t"
 616 |             "add r23, r0 \n\t"
 617 |             "adc r22, r1 \n\t"
 618 |             "mul r25, r21 \n\t"
 619 |             "add r23, r0 \n\t"
 620 |             "adc r22, r1 \n\t"
 621 |             "st z+, r23 \n\t"
 622 |             "ldi r23, 0 \n\t"
 623 | 
 624 |             "add r22, r19 \n\t"
 625 |             "adc r23, r23 \n\t"
 626 |             "ld r19, x+ \n\t"
 627 |             "mul r24, r19 \n\t"
 628 |             "add r22, r0 \n\t"
 629 |             "adc r23, r1 \n\t"
 630 |             "mul r25, r18 \n\t"
 631 |             "add r22, r0 \n\t"
 632 |             "adc r23, r1 \n\t"
 633 |             "st z+, r22 \n\t"
 634 |             "ldi r22, 0 \n\t")
 635 | 
 636 |         "add r23, r20 \n\t" // 2
 637 |         "adc r22, r22 \n\t"
 638 |         "ld r20, x+ \n\t"
 639 |         "mul r24, r20 \n\t"
 640 |         "add r23, r0 \n\t"
 641 |         "adc r22, r1 \n\t"
 642 |         "mul r25, r19 \n\t"
 643 |         "add r23, r0 \n\t"
 644 |         "adc r22, r1 \n\t"
 645 |         "st z+, r23 \n\t"
 646 |         "ldi r23, 0 \n\t"
 647 | 
 648 |         "add r22, r21 \n\t" // 1
 649 |         "adc r23, r23 \n\t"
 650 |         "ld r21, x+ \n\t"
 651 |         "mul r24, r21 \n\t"
 652 |         "add r22, r0 \n\t"
 653 |         "adc r23, r1 \n\t"
 654 |         "mul r25, r20 \n\t"
 655 |         "add r22, r0 \n\t"
 656 |         "adc r23, r1 \n\t"
 657 |         "st z+, r22 \n\t"
 658 |         "ldi r22, 0 \n\t"
 659 | 
 660 |         /* Now finish the carries etc */
 661 |         "add r23, r18 \n\t"
 662 |         "adc r22, r22 \n\t"
 663 |         "mul r25, r21 \n\t"
 664 |         "add r23, r0 \n\t"
 665 |         "adc r22, r1 \n\t"
 666 |         "st z+, r23 \n\t"
 667 |         "ldi r23, 0 \n\t"
 668 | 
 669 |         "add r22, r19 \n\t"
 670 |         "adc r23, r23 \n\t"
 671 |         "st z+, r22 \n\t"
 672 |         "ldi r22, 0 \n\t"
 673 | 
 674 |         "add r23, r20 \n\t"
 675 |         "adc r22, r22 \n\t"
 676 |         "st z+, r23 \n\t"
 677 |         "ldi r23, 0 \n\t"
 678 | 
 679 |         "add r22, r21 \n\t"
 680 |         "adc r23, r23 \n\t"
 681 |         "st z+, r22 \n\t"
 682 |         "st z+, r23 \n\t"
 683 |         "eor r1, r1 \n\t" /* make r1 be 0 again */
 684 | 
 685 |         "sbiw r30, 37 \n\t" /* move z back to point at tmp */
 686 |         "subi r26, 64 \n\t" /* move x back to point at product */
 687 |         "sbc r27, __zero_reg__ \n\t"
 688 | 
 689 |         /* add low bytes of tmp to product, storing in result */
 690 |         "ld r18, z+ \n\t"
 691 |         "ld r19, x+ \n\t"
 692 |         "add r18, r19 \n\t"
 693 |         "st y+, r18 \n\t"
 694 |         REPEAT(31,
 695 |             "ld r18, z+ \n\t"
 696 |             "ld r19, x+ \n\t"
 697 |             "adc r18, r19 \n\t"
 698 |             "st y+, r18 \n\t")
 699 | 
 700 |         "adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
 701 |         /* at this point x is at the end of product, y is at the end of result,
 702 |            z is 32 bytes into tmp */
 703 |         "sbiw r28, 32 \n\t" /* move y back to point at result */
 704 | 
 705 |         /* do omega_mult again with the 5 relevant bytes */
 706 |         /* z points to tmp + uECC_WORDS, x points to the end of product */
 707 |         "sbiw r26, 32 \n\t" /* shift x back to point into the product buffer
 708 |                                (we can overwrite it now) */
 709 |         "ld r18, z+ \n\t"
 710 |         "ld r19, z+ \n\t"
 711 |         "ld r20, z+ \n\t"
 712 |         "ld r21, z+ \n\t"
 713 | 
 714 |         "mul r24, r18 \n\t"
 715 |         "st x+, r0 \n\t"
 716 |         "mov r22, r1 \n\t"
 717 |         "ldi r23, 0 \n\t"
 718 | 
 719 |         "mul r24, r19 \n\t"
 720 |         "add r22, r0 \n\t"
 721 |         "adc r23, r1 \n\t" /* can't overflow */
 722 |         "mul r25, r18 \n\t"
 723 |         "add r22, r0 \n\t"
 724 |         "adc r23, r1 \n\t" /* can't overflow */
 725 |         "st x+, r22 \n\t"
 726 |         "ldi r22, 0 \n\t"
 727 | 
 728 |         "mul r24, r20 \n\t"
 729 |         "add r23, r0 \n\t"
 730 |         "adc r22, r1 \n\t"
 731 |         "mul r25, r19 \n\t"
 732 |         "add r23, r0 \n\t"
 733 |         "adc r22, r1 \n\t"
 734 |         "st x+, r23 \n\t"
 735 |         "ldi r23, 0 \n\t"
 736 | 
 737 |         "mul r24, r21 \n\t"
 738 |         "add r22, r0 \n\t"
 739 |         "adc r23, r1 \n\t"
 740 |         "mul r25, r20 \n\t"
 741 |         "add r22, r0 \n\t"
 742 |         "adc r23, r1 \n\t"
 743 |         "st x+, r22 \n\t"
 744 |         "ldi r22, 0 \n\t"
 745 | 
 746 |         "add r23, r18 \n\t"
 747 |         "adc r22, r22 \n\t"
 748 |         "ld r18, z+ \n\t"
 749 |         "mul r24, r18 \n\t"
 750 |         "add r23, r0 \n\t"
 751 |         "adc r22, r1 \n\t"
 752 |         "mul r25, r21 \n\t"
 753 |         "add r23, r0 \n\t"
 754 |         "adc r22, r1 \n\t"
 755 |         "st x+, r23 \n\t"
 756 |         "ldi r23, 0 \n\t"
 757 | 
 758 |         /* Now finish the carries etc */
 759 |         "add r22, r19 \n\t"
 760 |         "adc r23, r23 \n\t"
 761 |         "mul r25, r18 \n\t"
 762 |         "add r22, r0 \n\t"
 763 |         "adc r23, r1 \n\t"
 764 |         "st x+, r22 \n\t"
 765 |         "ldi r22, 0 \n\t"
 766 | 
 767 |         "add r23, r20 \n\t"
 768 |         "adc r22, r22 \n\t"
 769 |         "st x+, r23 \n\t"
 770 |         "ldi r23, 0 \n\t"
 771 | 
 772 |         "add r22, r21 \n\t"
 773 |         "adc r23, r23 \n\t"
 774 |         "st x+, r22 \n\t"
 775 |         "ldi r22, 0 \n\t"
 776 | 
 777 |         "add r23, r18 \n\t"
 778 |         "adc r22, r22 \n\t"
 779 |         "st x+, r23 \n\t"
 780 |         "st x+, r22 \n\t"
 781 |         "eor r1, r1 \n\t" /* make r1 be 0 again */
 782 | 
 783 |         /* now z points to the end of tmp, x points to the end of product
 784 |            (y still points at result) */
 785 |         "sbiw r26, 10 \n\t" /* move x back to point at beginning of actual data */
 786 |         /* add into result */
 787 |         "ld r18, x+ \n\t"
 788 |         "ld r19, y \n\t"
 789 |         "add r18, r19 \n\t"
 790 |         "st y+, r18 \n\t"
 791 |         REPEAT(9,
 792 |             "ld r18, x+ \n\t"
 793 |             "ld r19, y \n\t"
 794 |             "adc r18, r19 \n\t"
 795 |             "st y+, r18 \n\t")
 796 | 
 797 |         /* Done adding, now propagate carry bit */
 798 |         REPEAT(22,
 799 |             "ld r18, y \n\t"
 800 |             "adc r18, __zero_reg__ \n\t"
 801 |             "st y+, r18 \n\t")
 802 | 
 803 |         "adc %[carry], __zero_reg__ \n\t"    /* Store carry bit (carry flag is cleared). */
 804 |         "sbiw r28, 32 \n\t" /* move y back to point at result */
 805 | 
 806 |         "sbiw r30, 1 \n\t" /* fix stack pointer */
 807 |     	"in r0, __SREG__ \n\t"
 808 |     	"cli \n\t"
 809 |     	"out __SP_H__, r31 \n\t"
 810 |     	"out __SREG__, r0 \n\t"
 811 |     	"out __SP_L__, r30 \n\t"
 812 | 
 813 |         : "+x" (product), [carry] "+r" (carry)
 814 |         : "y" (result)
 815 |         : "r0", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r30", "r31", "cc"
 816 |     );
 817 | 
 818 |     if (carry > 0) {
 819 |         --carry;
 820 |         uECC_vli_sub(result, result, curve_secp256k1.p, 32);
 821 |     }
 822 |     if (carry > 0) {
 823 |         uECC_vli_sub(result, result, curve_secp256k1.p, 32);
 824 |     }
 825 |     if (uECC_vli_cmp_unsafe(result, curve_secp256k1.p, 32) > 0) {
 826 |         uECC_vli_sub(result, result, curve_secp256k1.p, 32);
 827 |     }
 828 | }
 829 | #define asm_mmod_fast_secp256k1 1
 830 | #endif /* uECC_SUPPORTS_secp256k1 */
 831 | 
 832 | #endif /* (uECC_OPTIMIZATION_LEVEL >= 2) */
 833 | 
 834 | /* ---- "Small" implementations ---- */
 835 | 
 836 | #if !asm_add
 837 | uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
 838 |                                       const uECC_word_t *left,
 839 |                                       const uECC_word_t *right,
 840 |                                       wordcount_t num_words) {
 841 |     volatile uECC_word_t *r = result;
 842 |     uint8_t carry = 0;
 843 |     uint8_t left_byte;
 844 |     uint8_t right_byte;
 845 | 
 846 |     __asm__ volatile (
 847 |         "clc \n\t"
 848 | 
 849 |         "1: \n\t"
 850 |         "ld %[left], x+ \n\t"  /* Load left byte. */
 851 |         "ld %[right], y+ \n\t" /* Load right byte. */
 852 |         "adc %[left], %[right] \n\t" /* Add. */
 853 |         "st z+, %[left] \n\t"  /* Store the result. */
 854 |         "dec %[i] \n\t"
 855 |         "brne 1b \n\t"
 856 | 
 857 |         "adc %[carry], %[carry] \n\t" /* Store carry bit. */
 858 | 
 859 |         : "+z" (r), "+x" (left), "+y" (right), [i] "+r" (num_words),
 860 |             [carry] "+r" (carry), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
 861 |         :
 862 |         : "cc"
 863 |     );
 864 |     return carry;
 865 | }
 866 | #define asm_add 1
 867 | #endif
 868 | 
 869 | #if !asm_sub
 870 | uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
 871 |                                       const uECC_word_t *left,
 872 |                                       const uECC_word_t *right,
 873 |                                       wordcount_t num_words) {
 874 |     volatile uECC_word_t *r = result;
 875 |     uint8_t borrow = 0;
 876 |     uint8_t left_byte;
 877 |     uint8_t right_byte;
 878 | 
 879 |     __asm__ volatile (
 880 |         "clc \n\t"
 881 | 
 882 |         "1: \n\t"
 883 |         "ld %[left], x+ \n\t"  /* Load left byte. */
 884 |         "ld %[right], y+ \n\t" /* Load right byte. */
 885 |         "sbc %[left], %[right] \n\t" /* Subtract. */
 886 |         "st z+, %[left] \n\t"  /* Store the result. */
 887 |         "dec %[i] \n\t"
 888 |         "brne 1b \n\t"
 889 | 
 890 |         "adc %[borrow], %[borrow] \n\t" /* Store carry bit in borrow. */
 891 | 
 892 |         : "+z" (r), "+x" (left), "+y" (right), [i] "+r" (num_words),
 893 |             [borrow] "+r" (borrow), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
 894 |         :
 895 |         : "cc"
 896 |     );
 897 |     return borrow;
 898 | }
 899 | #define asm_sub 1
 900 | #endif
 901 | 
 902 | #if !asm_mult
 903 | __attribute((noinline))
 904 | uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
 905 |                                 const uECC_word_t *left,
 906 |                                 const uECC_word_t *right,
 907 |                                 wordcount_t num_words) {
 908 |     volatile uECC_word_t *r = result;
 909 |     uint8_t r0 = 0;
 910 |     uint8_t r1 = 0;
 911 |     uint8_t r2 = 0;
 912 |     uint8_t zero = 0;
 913 |     uint8_t k, i;
 914 | 
 915 |     __asm__ volatile (
 916 |         "ldi %[k], 1 \n\t" /* k = 1; k < num_words; ++k */
 917 | 
 918 |         "1: \n\t"
 919 |         "ldi %[i], 0 \n\t"  /* i = 0; i < k; ++i */
 920 | 
 921 |         "add r28, %[k] \n\t" /* pre-add right ptr */
 922 |         "adc r29, %[zero] \n\t"
 923 | 
 924 |         "2: \n\t"
 925 |         "ld r0, x+ \n\t"
 926 |         "ld r1, -y \n\t"
 927 |         "mul r0, r1 \n\t"
 928 | 
 929 |         "add %[r0], r0 \n\t"
 930 |         "adc %[r1], r1 \n\t"
 931 |         "adc %[r2], %[zero] \n\t"
 932 | 
 933 |         "inc %[i] \n\t"
 934 |         "cp %[i], %[k] \n\t"
 935 |         "brlo 2b \n\t" /* loop if i < k */
 936 | 
 937 |         "sub r26, %[k] \n\t" /* fix up left ptr */
 938 |         "sbc r27, %[zero] \n\t"
 939 | 
 940 |         "st z+, %[r0] \n\t"  /* Store the result. */
 941 |         "mov %[r0], %[r1] \n\t"
 942 |         "mov %[r1], %[r2] \n\t"
 943 |         "mov %[r2], %[zero] \n\t"
 944 | 
 945 |         "inc %[k] \n\t"
 946 |         "cp %[k], %[num] \n\t"
 947 |         "brlo 1b \n\t" /* loop if k < num_words */
 948 | 
 949 |         /* second half */
 950 |         "mov %[k], %[num] \n\t" /* k = num_words; k > 0; --k */
 951 |         "add r28, %[num] \n\t" /* move right ptr to point at the end of right */
 952 |         "adc r29, %[zero] \n\t"
 953 | 
 954 |         "1: \n\t"
 955 |         "ldi %[i], 0 \n\t" /* i = 0; i < k; ++i */
 956 | 
 957 |         "2: \n\t"
 958 |         "ld r0, x+ \n\t"
 959 |         "ld r1, -y \n\t"
 960 |         "mul r0, r1 \n\t"
 961 | 
 962 |         "add %[r0], r0 \n\t"
 963 |         "adc %[r1], r1 \n\t"
 964 |         "adc %[r2], %[zero] \n\t"
 965 | 
 966 |         "inc %[i] \n\t"
 967 |         "cp %[i], %[k] \n\t"
 968 |         "brlo 2b \n\t" /* loop if i < k */
 969 | 
 970 |         "add r28, %[k] \n\t" /* fix up right ptr */
 971 |         "adc r29, %[zero] \n\t"
 972 | 
 973 |         "st z+, %[r0] \n\t"  /* Store the result. */
 974 |         "mov %[r0], %[r1] \n\t"
 975 |         "mov %[r1], %[r2] \n\t"
 976 |         "mov %[r2], %[zero] \n\t"
 977 | 
 978 |         "dec %[k] \n\t"
 979 |         "sub r26, %[k] \n\t" /* fix up left ptr (after k is decremented, so next time
 980 |                                 we start 1 higher) */
 981 |         "sbc r27, %[zero] \n\t"
 982 | 
 983 |         "cp %[k], %[zero] \n\t"
 984 |         "brne 1b \n\t" /* loop if k > 0 */
 985 | 
 986 |         "st z+, %[r0] \n\t"  /* Store last result byte. */
 987 |         "eor r1, r1 \n\t" /* fix r1 to be 0 again */
 988 | 
 989 |         : "+z" (result), "+x" (left), "+y" (right),
 990 |           [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
 991 |           [zero] "+r" (zero), [num] "+r" (num_words),
 992 |           [k] "=&r" (k), [i] "=&r" (i)
 993 |         :
 994 |         : "r0", "cc"
 995 |     );
 996 | }
 997 | #define asm_mult 1
 998 | #endif
 999 | 
1000 | #if (uECC_SQUARE_FUNC && !asm_square)
1001 | uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
1002 |                                   const uECC_word_t *left,
1003 |                                   wordcount_t num_words) {
1004 |     volatile uECC_word_t *r = result;
1005 |     uint8_t r0 = 0;
1006 |     uint8_t r1 = 0;
1007 |     uint8_t r2 = 0;
1008 |     uint8_t zero = 0;
1009 |     uint8_t k;
1010 | 
1011 |     __asm__ volatile (
1012 |         "ldi %[k], 1 \n\t" /* k = 1; k < num_words * 2; ++k */
1013 | 
1014 |         "1: \n\t"
1015 | 
1016 |         "movw r26, %[orig] \n\t"  /* copy orig ptr to 'left' ptr */
1017 |         "movw r30, %[orig] \n\t"  /* copy orig ptr to 'right' ptr */
1018 |         "cp %[k], %[num] \n\t"
1019 |         "brlo 2f \n\t"
1020 |         "breq 2f \n\t"
1021 | 
1022 |         /* when k > num_words, we start from (k - num_words) on the 'left' ptr */
1023 |         "add r26, %[k] \n\t"
1024 |         "adc r27, %[zero] \n\t"
1025 |         "sub r26, %[num] \n\t"
1026 |         "sbc r27, %[zero] \n\t"
1027 |         "add r30, %[num] \n\t" /* move right ptr to point at the end */
1028 |         "adc r31, %[zero] \n\t"
1029 |         "rjmp 3f \n\t"
1030 | 
1031 |         "2: \n\t" /* when k <= num_words, we add k to the 'right' ptr */
1032 |         "add r30, %[k] \n\t" /* pre-add 'right' ptr */
1033 |         "adc r31, %[zero] \n\t"
1034 | 
1035 |         "3: \n\t"
1036 |         "ld r0, x+ \n\t"
1037 |         "cp r26, r30 \n\t" /* if left == right here, then we are done after this mult
1038 |                               (and we don't need to double) */
1039 |         "breq 4f \n\t"
1040 |         "ld r1, -z \n\t"
1041 |         "mul r0, r1 \n\t"
1042 | 
1043 |         /* add twice since it costs the same as doubling */
1044 |         "add %[r0], r0 \n\t"
1045 |         "adc %[r1], r1 \n\t"
1046 |         "adc %[r2], %[zero] \n\t"
1047 |         "add %[r0], r0 \n\t"
1048 |         "adc %[r1], r1 \n\t"
1049 |         "adc %[r2], %[zero] \n\t"
1050 | 
1051 |         "cpse r26, r30 \n\t" /* if left == right here, then we are done */
1052 |         "rjmp 3b \n\t"
1053 |         "rjmp 5f \n\t" /* skip code for non-doubled mult */
1054 | 
1055 |         "4: \n\t"
1056 |         "ld r1, -z \n\t"
1057 |         "mul r0, r1 \n\t"
1058 |         "add %[r0], r0 \n\t"
1059 |         "adc %[r1], r1 \n\t"
1060 |         "adc %[r2], %[zero] \n\t"
1061 | 
1062 |         "5: \n\t"
1063 |         "movw r30, %[result] \n\t" /* make z point to result */
1064 |         "st z+, %[r0] \n\t"        /* Store the result. */
1065 |         "movw %[result], r30 \n\t" /* update result ptr*/
1066 |         "mov %[r0], %[r1] \n\t"
1067 |         "mov %[r1], %[r2] \n\t"
1068 |         "mov %[r2], %[zero] \n\t"
1069 | 
1070 |         "inc %[k] \n\t"
1071 |         "cp %[k], %[max] \n\t"
1072 |         "brlo 1b \n\t" /* loop if k < num_words * 2 */
1073 | 
1074 |         "movw r30, %[result] \n\t"  /* make z point to result */
1075 |         "st z+, %[r0] \n\t"  /* Store last result byte. */
1076 |         "eor r1, r1 \n\t" /* fix r1 to be 0 again */
1077 | 
1078 |         : [result] "+r" (r),
1079 |           [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (zero),
1080 |           [k] "=&a" (k)
1081 |         : [orig] "r" (left), [max] "r" ((uint8_t)(2 * num_words)),
1082 |           [num] "r" (num_words)
1083 |         : "r0", "r26", "r27", "r30", "r31", "cc"
1084 |     );
1085 | }
1086 | #define asm_square 1
1087 | #endif /* uECC_SQUARE_FUNC && !asm_square */
1088 | 
1089 | #endif /* _UECC_ASM_AVR_H_ */
1090 | 


--------------------------------------------------------------------------------
/emk_project.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | c, link, asm, utils = emk.module("c", "link", "asm", "utils")
  4 | 
  5 | default_compile_flags = ["-fvisibility=hidden", "-Wall", "-Wextra", "-Wshadow", "-Werror", "-Wno-missing-field-initializers", "-Wno-unused-parameter", \
  6 |     "-Wno-comment", "-Wno-unused", "-Wno-unknown-pragmas"]
  7 | default_link_flags = []
  8 | opt_flags = {"dbg":["-g"], "std":["-O2"], "max":["-O3"], "small":["-Os"]}
  9 | opt_link_flags = {"dbg":[], "std":[], "max":[], "small":[]}
 10 | c_flags = ["-std=c99"]
 11 | cxx_flags = ["-std=c++11", "-Wno-reorder", "-fno-rtti", "-fno-exceptions"]
 12 | c_link_flags = []
 13 | cxx_link_flags = ["-fno-rtti", "-fno-exceptions"]
 14 | 
 15 | if "root" in emk.options:
 16 |     root = emk.options["root"]
 17 | else:
 18 |     root = "/"
 19 | 
 20 | def setup_build_dir():
 21 |     build_arch = None
 22 |     if "arch" in emk.options:
 23 |         build_arch = emk.options["arch"]
 24 |     elif not emk.cleaning:
 25 |         build_arch = "osx"
 26 |     emk.options["arch"] = build_arch
 27 | 
 28 |     opt_level = None
 29 |     if "opt" in emk.options:
 30 |         level = emk.options["opt"]
 31 |         if level in opt_flags:
 32 |             opt_level = level
 33 |         else:
 34 |             emk.log.warning("Unknown optimization level '%s'" % (level))
 35 |     elif not emk.cleaning:
 36 |         opt_level = "dbg"
 37 |     emk.options["opt"] = opt_level
 38 | 
 39 |     dirs = ["__build__"]
 40 |     if build_arch:
 41 |         dirs.append(build_arch)
 42 |     if opt_level:
 43 |         dirs.append(opt_level)
 44 |     emk.build_dir = os.path.join(*dirs)
 45 | 
 46 | def setup_osx():
 47 |     global c
 48 |     global link
 49 | 
 50 |     flags = [("-arch", "x86_64"), "-fno-common", "-Wnewline-eof"]
 51 |     c.flags.extend(flags)
 52 |     c.cxx.flags += ["-stdlib=libc++"]
 53 |     link.cxx.flags += ["-stdlib=libc++"]
 54 | 
 55 |     link_flags = [("-arch", "x86_64")]
 56 |     link.local_flags.extend(link_flags)
 57 | 
 58 | def setup_avr():
 59 |     global c
 60 |     global link
 61 | 
 62 |     c.compiler = c.GccCompiler(root + "Projects/avr-tools/bin/avr-")
 63 |     c.flags += ["-mmcu=atmega256rfr2", "-ffunction-sections", "-fdata-sections"]
 64 |     link.linker = link.GccLinker(root + "Projects/avr-tools/bin/avr-")
 65 |     link.flags += ["-mmcu=atmega256rfr2", "-mrelax", "-Wl,--gc-sections"]
 66 |     link.strip = True
 67 | 
 68 | def setup_arm_thumb():
 69 |     global c
 70 |     global link
 71 |     global asm
 72 |     global utils
 73 | 
 74 |     asm.assembler = asm.GccAssembler(root + "cross/arm_cortex/bin/arm-none-eabi-")
 75 |     c.compiler = c.GccCompiler(root + "cross/arm_cortex/bin/arm-none-eabi-")
 76 |     link.linker = link.GccLinker(root + "cross/arm_cortex/bin/arm-none-eabi-")
 77 | 
 78 |     c.flags.extend(["-mcpu=cortex-m0", "-mthumb", "-ffunction-sections", "-fdata-sections", "-fno-builtin-fprintf", "-fno-builtin-printf"])
 79 |     c.defines["LPC11XX"] = 1
 80 | 
 81 |     link.local_flags.extend(["-mcpu=cortex-m0", "-mthumb", "-nostartfiles", "-nostdlib", "-Wl,--gc-sections"])
 82 |     link.local_flags.extend(["-Tflash.lds", "-L" + root + "Projects/lpc11xx/core", root + "Projects/lpc11xx/core/" + emk.build_dir + "/board_cstartup.o"])
 83 |     link.local_syslibs += ["gcc"]
 84 |     link.depdirs += [root + "Projects/lpc11xx/stdlib"]
 85 | 
 86 |     def do_objcopy(produces, requires):
 87 |         utils.call(root + "cross/arm_cortex/bin/arm-none-eabi-objcopy", "-O", "binary", requires[0], produces[0])
 88 | 
 89 |     def handle_exe(path):
 90 |         emk.depend(path, root + "Projects/lpc11xx/core/" + emk.build_dir + "/board_cstartup.o")
 91 |         emk.rule(do_objcopy, path + ".bin", path, cwd_safe=True, ex_safe=True)
 92 |         emk.autobuild(path + ".bin")
 93 | 
 94 |     link.exe_funcs.append(handle_exe)
 95 |     link.strip = True
 96 | 
 97 |     emk.recurse(root + "Projects/lpc11xx/core")
 98 | 
 99 | def setup_linux_rpi():
100 |     global c
101 |     global link
102 | 
103 |     c.compiler = c.GccCompiler("/Volumes/xtools/arm-none-linux-gnueabi/bin/arm-none-linux-gnueabi-")
104 |     link.linker = link.GccLinker("/Volumes/xtools/arm-none-linux-gnueabi/bin/arm-none-linux-gnueabi-")
105 | 
106 |     c.flags.extend(["-fomit-frame-pointer"])
107 | 
108 | setup_build_dir()
109 | 
110 | setup_funcs = {"osx":setup_osx, "avr":setup_avr, "arm_thumb":setup_arm_thumb, "rpi": setup_linux_rpi}
111 | 
112 | if not emk.cleaning:
113 |     build_arch = emk.options["arch"]
114 |     opt_level = emk.options["opt"]
115 | 
116 |     c.flags.extend(default_compile_flags)
117 |     c.flags.extend(opt_flags[opt_level])
118 |     c.c.flags.extend(c_flags)
119 |     c.cxx.flags.extend(cxx_flags)
120 |     link.local_flags.extend(default_link_flags)
121 |     link.local_flags.extend(opt_link_flags[opt_level])
122 |     link.c.local_flags.extend(c_link_flags)
123 |     link.cxx.local_flags.extend(cxx_link_flags)
124 | 
125 |     c.include_dirs.append("$:proj:$")
126 | 
127 |     if build_arch in setup_funcs:
128 |         setup_funcs[build_arch]()
129 |     else:
130 |         raise emk.BuildError("Unknown target arch '%s'" % (build_arch))
131 | 
132 |     c.defines["TARGET_ARCH_" + build_arch.upper()] = 1
133 | 


--------------------------------------------------------------------------------
/emk_rules.py:
--------------------------------------------------------------------------------
1 | c, link = emk.module("c", "link")
2 | 
3 | emk.subdir("test")
4 | 


--------------------------------------------------------------------------------
/examples/ecc_test/ecc_test.ino:
--------------------------------------------------------------------------------
 1 | #include <uECC.h>
 2 | 
 3 | static int RNG(uint8_t *dest, unsigned size) {
 4 |   // Use the least-significant bits from the ADC for an unconnected pin (or connected to a source of 
 5 |   // random noise). This can take a long time to generate random data if the result of analogRead(0) 
 6 |   // doesn't change very frequently.
 7 |   while (size) {
 8 |     uint8_t val = 0;
 9 |     for (unsigned i = 0; i < 8; ++i) {
10 |       int init = analogRead(0);
11 |       int count = 0;
12 |       while (analogRead(0) == init) {
13 |         ++count;
14 |       }
15 |       
16 |       if (count == 0) {
17 |          val = (val << 1) | (init & 0x01);
18 |       } else {
19 |          val = (val << 1) | (count & 0x01);
20 |       }
21 |     }
22 |     *dest = val;
23 |     ++dest;
24 |     --size;
25 |   }
26 |   // NOTE: it would be a good idea to hash the resulting random data using SHA-256 or similar.
27 |   return 1;
28 | }
29 | 
30 | void setup() {
31 |   Serial.begin(115200);
32 |   Serial.print("Testing ecc\n");
33 |   uECC_set_rng(&RNG);
34 | }
35 | 
36 | void loop() {
37 |   const struct uECC_Curve_t * curve = uECC_secp160r1();
38 |   uint8_t private1[21];
39 |   uint8_t private2[21];
40 |   
41 |   uint8_t public1[40];
42 |   uint8_t public2[40];
43 |   
44 |   uint8_t secret1[20];
45 |   uint8_t secret2[20];
46 |   
47 |   unsigned long a = millis();
48 |   uECC_make_key(public1, private1, curve);
49 |   unsigned long b = millis();
50 |   
51 |   Serial.print("Made key 1 in "); Serial.println(b-a);
52 |   a = millis();
53 |   uECC_make_key(public2, private2, curve);
54 |   b = millis();
55 |   Serial.print("Made key 2 in "); Serial.println(b-a);
56 | 
57 |   a = millis();
58 |   int r = uECC_shared_secret(public2, private1, secret1, curve);
59 |   b = millis();
60 |   Serial.print("Shared secret 1 in "); Serial.println(b-a);
61 |   if (!r) {
62 |     Serial.print("shared_secret() failed (1)\n");
63 |     return;
64 |   }
65 | 
66 |   a = millis();
67 |   r = uECC_shared_secret(public1, private2, secret2, curve);
68 |   b = millis();
69 |   Serial.print("Shared secret 2 in "); Serial.println(b-a);
70 |   if (!r) {
71 |     Serial.print("shared_secret() failed (2)\n");
72 |     return;
73 |   }
74 |     
75 |   if (memcmp(secret1, secret2, 20) != 0) {
76 |     Serial.print("Shared secrets are not identical!\n");
77 |   } else {
78 |     Serial.print("Shared secrets are identical\n");
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/library.properties:
--------------------------------------------------------------------------------
 1 | name=micro-ecc
 2 | version=1.0.0
 3 | author=Kenneth MacKay
 4 | maintainer=Kenneth MacKay
 5 | sentence=uECC
 6 | paragraph=A small and fast ECDH and ECDSA implementation for 8-bit, 32-bit, and 64-bit processors.
 7 | category=Other
 8 | url=https://github.com/kmackay/micro-ecc
 9 | architectures=*
10 | 


--------------------------------------------------------------------------------
/platform-specific.inc:
--------------------------------------------------------------------------------
 1 | /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
 2 | 
 3 | #ifndef _UECC_PLATFORM_SPECIFIC_H_
 4 | #define _UECC_PLATFORM_SPECIFIC_H_
 5 | 
 6 | #include "types.h"
 7 | 
 8 | #if (defined(_WIN32) || defined(_WIN64))
 9 | /* Windows */
10 | 
11 | // use pragma syntax to prevent tweaking the linker script for getting CryptXYZ function
12 | #pragma comment(lib, "crypt32.lib")
13 | #pragma comment(lib, "advapi32.lib")
14 | 
15 | #define WIN32_LEAN_AND_MEAN
16 | #include <windows.h>
17 | #include <wincrypt.h>
18 | 
19 | static int default_RNG(uint8_t *dest, unsigned size) {
20 |     HCRYPTPROV prov;
21 |     if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) {
22 |         return 0;
23 |     }
24 | 
25 |     CryptGenRandom(prov, size, (BYTE *)dest);
26 |     CryptReleaseContext(prov, 0);
27 |     return 1;
28 | }
29 | #define default_RNG_defined 1
30 | 
31 | #elif defined(unix) || defined(__linux__) || defined(__unix__) || defined(__unix) || \
32 |     (defined(__APPLE__) && defined(__MACH__)) || defined(uECC_POSIX)
33 | 
34 | /* Some POSIX-like system with /dev/urandom or /dev/random. */
35 | #include <sys/types.h>
36 | #include <fcntl.h>
37 | #include <unistd.h>
38 | 
39 | #ifndef O_CLOEXEC
40 |     #define O_CLOEXEC 0
41 | #endif
42 | 
43 | static int default_RNG(uint8_t *dest, unsigned size) {
44 |     int fd = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
45 |     if (fd == -1) {
46 |         fd = open("/dev/random", O_RDONLY | O_CLOEXEC);
47 |         if (fd == -1) {
48 |             return 0;
49 |         }
50 |     }
51 |     
52 |     char *ptr = (char *)dest;
53 |     size_t left = size;
54 |     while (left > 0) {
55 |         ssize_t bytes_read = read(fd, ptr, left);
56 |         if (bytes_read <= 0) { // read failed
57 |             close(fd);
58 |             return 0;
59 |         }
60 |         left -= bytes_read;
61 |         ptr += bytes_read;
62 |     }
63 |     
64 |     close(fd);
65 |     return 1;
66 | }
67 | #define default_RNG_defined 1
68 | 
69 | #elif defined(RIOT_VERSION)
70 | 
71 | #include <random.h>
72 | 
73 | static int default_RNG(uint8_t *dest, unsigned size) {
74 |     random_bytes(dest, size);
75 |     return 1;
76 | }
77 | #define default_RNG_defined 1
78 | 
79 | #elif defined(NRF52_SERIES)
80 | 
81 | #include "app_error.h"
82 | #include "nrf_crypto_rng.h"
83 | 
84 | static int default_RNG(uint8_t *dest, unsigned size) 
85 | {
86 |     // make sure to call nrf_crypto_init and nrf_crypto_rng_init first
87 |     ret_code_t ret_code = nrf_crypto_rng_vector_generate(dest, size);
88 |     return (ret_code == NRF_SUCCESS) ? 1 : 0;
89 | }
90 | #define default_RNG_defined 1
91 | 
92 | #endif /* platform */
93 | 
94 | #endif /* _UECC_PLATFORM_SPECIFIC_H_ */
95 | 


--------------------------------------------------------------------------------
/scripts/mult_arm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import sys
  4 | 
  5 | if len(sys.argv) < 2:
  6 |     print "Provide the integer size in 32-bit words"
  7 |     sys.exit(1)
  8 | 
  9 | size = int(sys.argv[1])
 10 | 
 11 | full_rows = size // 3
 12 | init_size = size % 3
 13 | 
 14 | if init_size == 0:
 15 |     full_rows = full_rows - 1
 16 |     init_size = 3
 17 | 
 18 | def emit(line, *args):
 19 |     s = '"' + line + r' \n\t"'
 20 |     print s % args
 21 | 
 22 | rx = [3, 4, 5]
 23 | ry = [6, 7, 8]
 24 | 
 25 | #### set up registers
 26 | emit("add r0, %s", (size - init_size) * 4) # move z
 27 | emit("add r2, %s", (size - init_size) * 4) # move y
 28 | 
 29 | emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size)]))
 30 | emit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(init_size)]))
 31 | 
 32 | print ""
 33 | if init_size == 1:
 34 |     emit("umull r9, r10, r3, r6")
 35 |     emit("stmia r0!, {r9, r10}")
 36 | else:
 37 |     #### first two multiplications of initial block
 38 |     emit("umull r11, r12, r3, r6")
 39 |     emit("stmia r0!, {r11}")
 40 |     print ""
 41 |     emit("mov r10, #0")
 42 |     emit("umull r11, r9, r3, r7")
 43 |     emit("adds r12, r12, r11")
 44 |     emit("adc r9, r9, #0")
 45 |     emit("umull r11, r14, r4, r6")
 46 |     emit("adds r12, r12, r11")
 47 |     emit("adcs r9, r9, r14")
 48 |     emit("adc r10, r10, #0")
 49 |     emit("stmia r0!, {r12}")
 50 |     print ""
 51 | 
 52 |     #### rest of initial block, with moving accumulator registers
 53 |     acc = [9, 10, 11, 12, 14]
 54 |     if init_size == 3:
 55 |         emit("mov r%s, #0", acc[2])
 56 |         for i in xrange(0, 3):
 57 |             emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
 58 |             emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
 59 |             emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
 60 |             emit("adc r%s, r%s, #0", acc[2], acc[2])
 61 |         emit("stmia r0!, {r%s}", acc[0])
 62 |         print ""
 63 |         acc = acc[1:] + acc[:1]
 64 | 
 65 |         emit("mov r%s, #0", acc[2])
 66 |         for i in xrange(0, 2):
 67 |             emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i + 1], ry[2 - i])
 68 |             emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
 69 |             emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
 70 |             emit("adc r%s, r%s, #0", acc[2], acc[2])
 71 |         emit("stmia r0!, {r%s}", acc[0])
 72 |         print ""
 73 |         acc = acc[1:] + acc[:1]
 74 |     
 75 |     emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[init_size-1], ry[init_size-1])
 76 |     emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
 77 |     emit("adc r%s, r%s, r%s", acc[1], acc[1], acc[4])
 78 |     emit("stmia r0!, {r%s}", acc[0])
 79 |     emit("stmia r0!, {r%s}", acc[1])
 80 | print ""
 81 | 
 82 | #### reset y and z pointers
 83 | emit("sub r0, %s", (2 * init_size + 3) * 4)
 84 | emit("sub r2, %s", (init_size + 3) * 4)
 85 | 
 86 | #### load y registers
 87 | emit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(3)]))
 88 | 
 89 | #### load additional x registers
 90 | if init_size != 3:
 91 |     emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size, 3)]))
 92 | print ""
 93 | 
 94 | prev_size = init_size
 95 | for row in xrange(full_rows):
 96 |     emit("umull r11, r12, r3, r6")
 97 |     emit("stmia r0!, {r11}")
 98 |     print ""
 99 |     emit("mov r10, #0")
100 |     emit("umull r11, r9, r3, r7")
101 |     emit("adds r12, r12, r11")
102 |     emit("adc r9, r9, #0")
103 |     emit("umull r11, r14, r4, r6")
104 |     emit("adds r12, r12, r11")
105 |     emit("adcs r9, r9, r14")
106 |     emit("adc r10, r10, #0")
107 |     emit("stmia r0!, {r12}")
108 |     print ""
109 | 
110 |     acc = [9, 10, 11, 12, 14]
111 |     emit("mov r%s, #0", acc[2])
112 |     for i in xrange(0, 3):
113 |         emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
114 |         emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
115 |         emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
116 |         emit("adc r%s, r%s, #0", acc[2], acc[2])
117 |     emit("stmia r0!, {r%s}", acc[0])
118 |     print ""
119 |     acc = acc[1:] + acc[:1]
120 | 
121 |     #### now we need to start shifting x and loading from z
122 |     x_regs = [3, 4, 5]
123 |     for r in xrange(0, prev_size):
124 |         x_regs = x_regs[1:] + x_regs[:1]
125 |         emit("ldmia r1!, {r%s}", x_regs[2])
126 |         emit("mov r%s, #0", acc[2])
127 |         for i in xrange(0, 3):
128 |             emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], ry[2 - i])
129 |             emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
130 |             emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
131 |             emit("adc r%s, r%s, #0", acc[2], acc[2])
132 |         emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
133 |         emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
134 |         emit("adcs r%s, r%s, #0", acc[1], acc[1])
135 |         emit("adc r%s, r%s, #0", acc[2], acc[2])
136 |         emit("stmia r0!, {r%s}", acc[0])
137 |         print ""
138 |         acc = acc[1:] + acc[:1]
139 | 
140 |     # done shifting x, start shifting y
141 |     y_regs = [6, 7, 8]
142 |     for r in xrange(0, prev_size):
143 |         y_regs = y_regs[1:] + y_regs[:1]
144 |         emit("ldmia r2!, {r%s}", y_regs[2])
145 |         emit("mov r%s, #0", acc[2])
146 |         for i in xrange(0, 3):
147 |             emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], y_regs[2 - i])
148 |             emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
149 |             emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
150 |             emit("adc r%s, r%s, #0", acc[2], acc[2])
151 |         emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
152 |         emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
153 |         emit("adcs r%s, r%s, #0", acc[1], acc[1])
154 |         emit("adc r%s, r%s, #0", acc[2], acc[2])
155 |         emit("stmia r0!, {r%s}", acc[0])
156 |         print ""
157 |         acc = acc[1:] + acc[:1]
158 | 
159 |     # done both shifts, do remaining corner
160 |     emit("mov r%s, #0", acc[2])
161 |     for i in xrange(0, 2):
162 |         emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i + 1], y_regs[2 - i])
163 |         emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
164 |         emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
165 |         emit("adc r%s, r%s, #0", acc[2], acc[2])
166 |     emit("stmia r0!, {r%s}", acc[0])
167 |     print ""
168 |     acc = acc[1:] + acc[:1]
169 |     
170 |     emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[2], y_regs[2])
171 |     emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
172 |     emit("adc r%s, r%s, r%s", acc[1], acc[1], acc[4])
173 |     emit("stmia r0!, {r%s}", acc[0])
174 |     emit("stmia r0!, {r%s}", acc[1])
175 |     print ""
176 |     
177 |     prev_size = prev_size + 3
178 |     if row < full_rows - 1:
179 |         #### reset x, y and z pointers
180 |         emit("sub r0, %s", (2 * prev_size + 3) * 4)
181 |         emit("sub r1, %s", prev_size * 4)
182 |         emit("sub r2, %s", (prev_size + 3) * 4)
183 | 
184 |         #### load x and y registers
185 |         emit("ldmia r1!, {%s}", ",".join(["r%s" % (rx[i]) for i in xrange(3)]))
186 |         emit("ldmia r2!, {%s}", ",".join(["r%s" % (ry[i]) for i in xrange(3)]))
187 |         
188 |         print ""
189 | 


--------------------------------------------------------------------------------
/scripts/mult_avr.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import sys
  4 | 
  5 | if len(sys.argv) < 2:
  6 |     print "Provide the integer size in bytes"
  7 |     sys.exit(1)
  8 | 
  9 | size = int(sys.argv[1])
 10 | 
 11 | full_rows = size // 10
 12 | init_size = size % 10
 13 | 
 14 | if init_size == 0:
 15 |     full_rows = full_rows - 1
 16 |     init_size = 10
 17 | 
 18 | def rx(i):
 19 |     return i + 2
 20 | 
 21 | def ry(i):
 22 |     return i + 12
 23 | 
 24 | def emit(line, *args):
 25 |     s = '"' + line + r' \n\t"'
 26 |     print s % args
 27 | 
 28 | #### set up registers
 29 | emit("adiw r30, %s", size - init_size) # move z
 30 | emit("adiw r28, %s", size - init_size) # move y
 31 | 
 32 | for i in xrange(init_size):
 33 |     emit("ld r%s, x+", rx(i))
 34 | for i in xrange(init_size):
 35 |     emit("ld r%s, y+", ry(i))
 36 | 
 37 | emit("ldi r25, 0")
 38 | print ""
 39 | if init_size == 1:
 40 |     emit("mul r2, r12")
 41 |     emit("st z+, r0")
 42 |     emit("st z+, r1")
 43 | else:
 44 |     #### first two multiplications of initial block
 45 |     emit("ldi r23, 0")
 46 |     emit("mul r2, r12")
 47 |     emit("st z+, r0")
 48 |     emit("mov r22, r1")
 49 |     print ""
 50 |     emit("ldi r24, 0")
 51 |     emit("mul r2, r13")
 52 |     emit("add r22, r0")
 53 |     emit("adc r23, r1")
 54 |     emit("mul r3, r12")
 55 |     emit("add r22, r0")
 56 |     emit("adc r23, r1")
 57 |     emit("adc r24, r25")
 58 |     emit("st z+, r22")
 59 |     print ""
 60 | 
 61 |     #### rest of initial block, with moving accumulator registers
 62 |     acc = [23, 24, 22]
 63 |     for r in xrange(2, init_size):
 64 |         emit("ldi r%s, 0", acc[2])
 65 |         for i in xrange(0, r+1):
 66 |             emit("mul r%s, r%s", rx(i), ry(r - i))
 67 |             emit("add r%s, r0", acc[0])
 68 |             emit("adc r%s, r1", acc[1])
 69 |             emit("adc r%s, r25", acc[2])
 70 |         emit("st z+, r%s", acc[0])
 71 |         print ""
 72 |         acc = acc[1:] + acc[:1]
 73 |     for r in xrange(1, init_size-1):
 74 |         emit("ldi r%s, 0", acc[2])
 75 |         for i in xrange(0, init_size-r):
 76 |             emit("mul r%s, r%s", rx(r+i), ry((init_size-1) - i))
 77 |             emit("add r%s, r0", acc[0])
 78 |             emit("adc r%s, r1", acc[1])
 79 |             emit("adc r%s, r25", acc[2])
 80 |         emit("st z+, r%s", acc[0])
 81 |         print ""
 82 |         acc = acc[1:] + acc[:1]
 83 |     emit("mul r%s, r%s", rx(init_size-1), ry(init_size-1))
 84 |     emit("add r%s, r0", acc[0])
 85 |     emit("adc r%s, r1", acc[1])
 86 |     emit("st z+, r%s", acc[0])
 87 |     emit("st z+, r%s", acc[1])
 88 | print ""
 89 | 
 90 | #### reset y and z pointers
 91 | emit("sbiw r30, %s", 2 * init_size + 10)
 92 | emit("sbiw r28, %s", init_size + 10)
 93 | 
 94 | #### load y registers
 95 | for i in xrange(10):
 96 |     emit("ld r%s, y+", ry(i))
 97 | 
 98 | #### load additional x registers
 99 | for i in xrange(init_size, 10):
100 |     emit("ld r%s, x+", rx(i))
101 | print ""
102 | 
103 | prev_size = init_size
104 | for row in xrange(full_rows):
105 |     #### do x = 0-9, y = 0-9 multiplications
106 |     emit("ldi r23, 0")
107 |     emit("mul r2, r12")
108 |     emit("st z+, r0")
109 |     emit("mov r22, r1")
110 |     print ""
111 |     emit("ldi r24, 0")
112 |     emit("mul r2, r13")
113 |     emit("add r22, r0")
114 |     emit("adc r23, r1")
115 |     emit("mul r3, r12")
116 |     emit("add r22, r0")
117 |     emit("adc r23, r1")
118 |     emit("adc r24, r25")
119 |     emit("st z+, r22")
120 |     print ""
121 | 
122 |     acc = [23, 24, 22]
123 |     for r in xrange(2, 10):
124 |         emit("ldi r%s, 0", acc[2])
125 |         for i in xrange(0, r+1):
126 |             emit("mul r%s, r%s", rx(i), ry(r - i))
127 |             emit("add r%s, r0", acc[0])
128 |             emit("adc r%s, r1", acc[1])
129 |             emit("adc r%s, r25", acc[2])
130 |         emit("st z+, r%s", acc[0])
131 |         print ""
132 |         acc = acc[1:] + acc[:1]
133 | 
134 |     #### now we need to start shifting x and loading from z
135 |     x_regs = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
136 |     for r in xrange(0, prev_size):
137 |         x_regs = x_regs[1:] + x_regs[:1]
138 |         emit("ld r%s, x+", x_regs[9]) # load next byte of left
139 |         emit("ldi r%s, 0", acc[2])
140 |         for i in xrange(0, 10):
141 |             emit("mul r%s, r%s", x_regs[i], ry(9 - i))
142 |             emit("add r%s, r0", acc[0])
143 |             emit("adc r%s, r1", acc[1])
144 |             emit("adc r%s, r25", acc[2])
145 |         emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment)
146 |         emit("add r%s, r0", acc[0])
147 |         emit("adc r%s, r25", acc[1])
148 |         emit("adc r%s, r25", acc[2])
149 |         emit("st z+, r%s", acc[0]) # store next byte (z increments)
150 |         print ""
151 |         acc = acc[1:] + acc[:1]
152 | 
153 |     # done shifting x, start shifting y
154 |     y_regs = [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
155 |     for r in xrange(0, prev_size):
156 |         y_regs = y_regs[1:] + y_regs[:1]
157 |         emit("ld r%s, y+", y_regs[9]) # load next byte of right
158 |         emit("ldi r%s, 0", acc[2])
159 |         for i in xrange(0, 10):
160 |             emit("mul r%s, r%s", x_regs[i], y_regs[9 -i])
161 |             emit("add r%s, r0", acc[0])
162 |             emit("adc r%s, r1", acc[1])
163 |             emit("adc r%s, r25", acc[2])
164 |         emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment)
165 |         emit("add r%s, r0", acc[0])
166 |         emit("adc r%s, r25", acc[1])
167 |         emit("adc r%s, r25", acc[2])
168 |         emit("st z+, r%s", acc[0]) # store next byte (z increments)
169 |         print ""
170 |         acc = acc[1:] + acc[:1]
171 | 
172 |     # done both shifts, do remaining corner
173 |     for r in xrange(1, 9):
174 |         emit("ldi r%s, 0", acc[2])
175 |         for i in xrange(0, 10-r):
176 |             emit("mul r%s, r%s", x_regs[r+i], y_regs[9 - i])
177 |             emit("add r%s, r0", acc[0])
178 |             emit("adc r%s, r1", acc[1])
179 |             emit("adc r%s, r25", acc[2])
180 |         emit("st z+, r%s", acc[0])
181 |         print ""
182 |         acc = acc[1:] + acc[:1]
183 |     emit("mul r%s, r%s", x_regs[9], y_regs[9])
184 |     emit("add r%s, r0", acc[0])
185 |     emit("adc r%s, r1", acc[1])
186 |     emit("st z+, r%s", acc[0])
187 |     emit("st z+, r%s", acc[1])
188 |     print ""
189 |     
190 |     prev_size = prev_size + 10
191 |     if row < full_rows - 1:
192 |         #### reset x, y and z pointers
193 |         emit("sbiw r30, %s", 2 * prev_size + 10)
194 |         emit("sbiw r28, %s", prev_size + 10)
195 |         emit("sbiw r26, %s", prev_size)
196 | 
197 |         #### load x and y registers
198 |         for i in xrange(10):
199 |             emit("ld r%s, x+", rx(i))
200 |             emit("ld r%s, y+", ry(i))
201 |         print ""
202 | 
203 | emit("eor r1, r1")
204 | 


--------------------------------------------------------------------------------
/scripts/mult_avr_extra.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import sys
  4 | 
  5 | if len(sys.argv) < 2:
  6 |     print "Provide the integer size in bytes"
  7 |     sys.exit(1)
  8 | 
  9 | size = int(sys.argv[1])
 10 | 
 11 | def lhi(i):
 12 |     return i + 2
 13 | 
 14 | def rhi(i):
 15 |     return i + 6
 16 | 
 17 | left_lo = [10, 11, 12, 13]
 18 | right_lo = [14, 15, 16, 17]
 19 | 
 20 | def llo(i):
 21 |     return left_lo[i]
 22 | 
 23 | def rlo(i):
 24 |     return right_lo[i]
 25 | 
 26 | def emit(line, *args):
 27 |     s = '"' + line + r' \n\t"'
 28 |     print s % args
 29 | 
 30 | def update_low():
 31 |     global left_lo
 32 |     global right_lo
 33 |     left_lo = left_lo[1:] + left_lo[:1]
 34 |     right_lo = right_lo[1:] + right_lo[:1]
 35 |     emit("ld r%s, x+", left_lo[3])
 36 |     emit("ld r%s, y+", right_lo[3])
 37 | 
 38 | accum = [19, 20, 21]
 39 | 
 40 | def acc(i):
 41 |     return accum[i]
 42 | 
 43 | def rotate_acc():
 44 |     global accum
 45 |     accum = accum[1:] + accum[:1]
 46 | 
 47 | # Load high values
 48 | for i in xrange(4):
 49 |     emit("ld r%s, x+", lhi(i))
 50 |     emit("ld r%s, y+", rhi(i))
 51 | 
 52 | emit("sbiw r26, %s", size + 4)
 53 | emit("sbiw r28, %s", size + 4)
 54 | emit("sbiw r30, %s", size)
 55 | 
 56 | # Load low values
 57 | for i in xrange(4):
 58 |     emit("ld r%s, x+", llo(i))
 59 |     emit("ld r%s, y+", rlo(i))
 60 | print ""
 61 | 
 62 | # Compute initial triangles
 63 | emit("mul r%s, r%s", lhi(0), rlo(0))
 64 | emit("mov r%s, r0", acc(0))
 65 | emit("mov r%s, r1", acc(1))
 66 | emit("ldi r%s, 0", acc(2))
 67 | emit("ld r0, z")
 68 | emit("add r%s, r0", acc(0))
 69 | emit("adc r%s, r25", acc(1))
 70 | emit("mul r%s, r%s", rhi(0), llo(0))
 71 | emit("add r%s, r0", acc(0))
 72 | emit("adc r%s, r1", acc(1))
 73 | emit("adc r%s, r25", acc(2))
 74 | emit("st z+, r%s", acc(0))
 75 | print ""
 76 | rotate_acc()
 77 | 
 78 | for i in xrange(1, 4):
 79 |     emit("ldi r%s, 0", acc(2))
 80 |     emit("ld r0, z")
 81 |     emit("add r%s, r0", acc(0))
 82 |     emit("adc r%s, r25", acc(1))
 83 |     for j in xrange(i + 1):
 84 |         emit("mul r%s, r%s", lhi(j), rlo(i-j))
 85 |         emit("add r%s, r0", acc(0))
 86 |         emit("adc r%s, r1", acc(1))
 87 |         emit("adc r%s, r25", acc(2))
 88 |         emit("mul r%s, r%s", rhi(j), llo(i-j))
 89 |         emit("add r%s, r0", acc(0))
 90 |         emit("adc r%s, r1", acc(1))
 91 |         emit("adc r%s, r25", acc(2))
 92 |     emit("st z+, r%s", acc(0))
 93 |     print ""
 94 |     rotate_acc()
 95 | 
 96 | # Compute rows overlapping old block
 97 | for i in xrange(4, size):
 98 |     emit("ldi r%s, 0", acc(2))
 99 |     emit("ld r0, z")
100 |     emit("add r%s, r0", acc(0))
101 |     emit("adc r%s, r25", acc(1))
102 |     update_low()
103 |     for j in xrange(4):
104 |         emit("mul r%s, r%s", lhi(j), rlo(3-j))
105 |         emit("add r%s, r0", acc(0))
106 |         emit("adc r%s, r1", acc(1))
107 |         emit("adc r%s, r25", acc(2))
108 |         emit("mul r%s, r%s", rhi(j), llo(3-j))
109 |         emit("add r%s, r0", acc(0))
110 |         emit("adc r%s, r1", acc(1))
111 |         emit("adc r%s, r25", acc(2))
112 |     emit("st z+, r%s", acc(0))
113 |     print ""
114 |     rotate_acc()
115 | 
116 | # Compute new triangle
117 | left_combined = [llo(1), llo(2), llo(3), lhi(0), lhi(1), lhi(2), lhi(3)]
118 | right_combined = [rlo(1), rlo(2), rlo(3), rhi(0), rhi(1), rhi(2), rhi(3)]
119 | 
120 | def left(i):
121 |     return left_combined[i]
122 | 
123 | def right(i):
124 |     return right_combined[i]
125 | 
126 | for i in xrange(6):
127 |     emit("ldi r%s, 0", acc(2))
128 |     for j in xrange(7 - i):
129 |         emit("mul r%s, r%s", left(i+j), right(6-j))
130 |         emit("add r%s, r0", acc(0))
131 |         emit("adc r%s, r1", acc(1))
132 |         emit("adc r%s, r25", acc(2))
133 |     emit("st z+, r%s", acc(0))
134 |     print ""
135 |     rotate_acc()
136 | 
137 | emit("mul r%s, r%s", left(6), right(6))
138 | emit("add r%s, r0", acc(0))
139 | emit("adc r%s, r1", acc(1))
140 | emit("st z+, r%s", acc(0))
141 | emit("st z+, r%s", acc(1))
142 | emit("adiw r26, 4")
143 | emit("adiw r28, 4")
144 | 


--------------------------------------------------------------------------------
/scripts/square_arm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import sys
  4 | 
  5 | if len(sys.argv) < 2:
  6 |     print "Provide the integer size in 32-bit words"
  7 |     sys.exit(1)
  8 | 
  9 | size = int(sys.argv[1])
 10 | 
 11 | if size > 8:
 12 |     print "This script doesn't work with integer size %s due to laziness" % (size)
 13 |     sys.exit(1)
 14 | 
 15 | init_size = 0
 16 | if size > 6:
 17 |     init_size = size - 6
 18 | 
 19 | def emit(line, *args):
 20 |     s = '"' + line + r' \n\t"'
 21 |     print s % args
 22 | 
 23 | def mulacc(acc, r1, r2):
 24 |     if size <= 6:
 25 |         emit("umull r1, r14, r%s, r%s", r1, r2)
 26 |         emit("adds r%s, r%s, r1", acc[0], acc[0])
 27 |         emit("adcs r%s, r%s, r14", acc[1], acc[1])
 28 |         emit("adc r%s, r%s, #0", acc[2], acc[2])
 29 |     else:
 30 |         emit("mov r14, r%s", acc[1])
 31 |         emit("umlal r%s, r%s, r%s, r%s", acc[0], acc[1], r1, r2)
 32 |         emit("cmp r14, r%s", acc[1])
 33 |         emit("it hi")
 34 |         emit("adchi r%s, r%s, #0", acc[2], acc[2])
 35 | 
 36 | r = [2, 3, 4, 5, 6, 7]
 37 | 
 38 | s = size - init_size
 39 | 
 40 | if init_size == 1:
 41 |     emit("ldmia r1!, {r2}")
 42 |     emit("add r1, %s", (size - init_size * 2) * 4)
 43 |     emit("ldmia r1!, {r5}")
 44 |     
 45 |     emit("add r0, %s", (size - init_size) * 4)
 46 |     emit("umull r8, r9, r2, r5")
 47 |     emit("stmia r0!, {r8, r9}")
 48 |     
 49 |     emit("sub r0, %s", (size + init_size) * 4)
 50 |     emit("sub r1, %s", (size) * 4)
 51 |     print ""
 52 | elif init_size == 2:
 53 |     emit("ldmia r1!, {r2, r3}")
 54 |     emit("add r1, %s", (size - init_size * 2) * 4)
 55 |     emit("ldmia r1!, {r5, r6}")
 56 |     
 57 |     emit("add r0, %s", (size - init_size) * 4)
 58 |     print ""
 59 | 
 60 |     emit("umull r8, r9, r2, r5")
 61 |     emit("stmia r0!, {r8}")
 62 |     print ""
 63 |     
 64 |     emit("umull r12, r10, r2, r6")
 65 |     emit("adds r9, r9, r12")
 66 |     emit("adc r10, r10, #0")
 67 |     emit("stmia r0!, {r9}")
 68 |     print ""
 69 |     
 70 |     emit("umull r8, r9, r3, r6")
 71 |     emit("adds r10, r10, r8")
 72 |     emit("adc r11, r9, #0")
 73 |     emit("stmia r0!, {r10, r11}")
 74 |     print ""
 75 |     
 76 |     emit("sub r0, %s", (size + init_size) * 4)
 77 |     emit("sub r1, %s", (size) * 4)
 78 | 
 79 | # load input words
 80 | emit("ldmia r1!, {%s}", ", ".join(["r%s" % (r[i]) for i in xrange(s)]))
 81 | print ""
 82 | 
 83 | emit("umull r11, r12, r2, r2")
 84 | emit("stmia r0!, {r11}")
 85 | print ""
 86 | emit("mov r9, #0")
 87 | emit("umull r10, r11, r2, r3")
 88 | emit("adds r12, r12, r10")
 89 | emit("adcs r8, r11, #0")
 90 | emit("adc r9, r9, #0")
 91 | emit("adds r12, r12, r10")
 92 | emit("adcs r8, r8, r11")
 93 | emit("adc r9, r9, #0")
 94 | emit("stmia r0!, {r12}")
 95 | print ""
 96 | emit("mov r10, #0")
 97 | emit("umull r11, r12, r2, r4")
 98 | emit("adds r11, r11, r11")
 99 | emit("adcs r12, r12, r12")
100 | emit("adc r10, r10, #0")
101 | emit("adds r8, r8, r11")
102 | emit("adcs r9, r9, r12")
103 | emit("adc r10, r10, #0")
104 | emit("umull r11, r12, r3, r3")
105 | emit("adds r8, r8, r11")
106 | emit("adcs r9, r9, r12")
107 | emit("adc r10, r10, #0")
108 | emit("stmia r0!, {r8}")
109 | print ""
110 | 
111 | acc = [8, 9, 10]
112 | old_acc = [11, 12]
113 | for i in xrange(3, s):
114 |     emit("mov r%s, #0", old_acc[1])
115 |     tmp = [acc[1], acc[2]]
116 |     acc = [acc[0], old_acc[0], old_acc[1]]
117 |     old_acc = tmp
118 |     
119 |     # gather non-equal words
120 |     emit("umull r%s, r%s, r%s, r%s", acc[0], acc[1], r[0], r[i])
121 |     for j in xrange(1, (i+1)//2):
122 |         mulacc(acc, r[j], r[i-j])
123 |     # multiply by 2
124 |     emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
125 |     emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
126 |     emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
127 |     
128 |     # add equal word (if any)
129 |     if ((i+1) % 2) != 0:
130 |         mulacc(acc, r[i//2], r[i//2])
131 |     
132 |     # add old accumulator
133 |     emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
134 |     emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
135 |     emit("adc r%s, r%s, #0", acc[2], acc[2])
136 |     
137 |     # store
138 |     emit("stmia r0!, {r%s}", acc[0])
139 |     print ""
140 | 
141 | regs = list(r)
142 | for i in xrange(init_size):
143 |     regs = regs[1:] + regs[:1]
144 |     emit("ldmia r1!, {r%s}", regs[5])
145 |     
146 |     for limit in [4, 5]:
147 |         emit("mov r%s, #0", old_acc[1])
148 |         tmp = [acc[1], acc[2]]
149 |         acc = [acc[0], old_acc[0], old_acc[1]]
150 |         old_acc = tmp
151 |     
152 |         # gather non-equal words
153 |         emit("umull r%s, r%s, r%s, r%s", acc[0], acc[1], regs[0], regs[limit])
154 |         for j in xrange(1, (limit+1)//2):
155 |             mulacc(acc, regs[j], regs[limit-j])
156 |     
157 |         emit("ldr r14, [r0]") # load stored value from initial block, and add to accumulator
158 |         emit("adds r%s, r%s, r14", acc[0], acc[0])
159 |         emit("adcs r%s, r%s, #0", acc[1], acc[1])
160 |         emit("adc r%s, r%s, #0", acc[2], acc[2])
161 |     
162 |         # multiply by 2
163 |         emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
164 |         emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
165 |         emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
166 |     
167 |         # add equal word
168 |         if limit == 4:
169 |             mulacc(acc, regs[2], regs[2])
170 |     
171 |         # add old accumulator
172 |         emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
173 |         emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
174 |         emit("adc r%s, r%s, #0", acc[2], acc[2])
175 |     
176 |         # store
177 |         emit("stmia r0!, {r%s}", acc[0])
178 |         print ""
179 | 
180 | for i in xrange(1, s-3):
181 |     emit("mov r%s, #0", old_acc[1])
182 |     tmp = [acc[1], acc[2]]
183 |     acc = [acc[0], old_acc[0], old_acc[1]]
184 |     old_acc = tmp
185 | 
186 |     # gather non-equal words
187 |     emit("umull r%s, r%s, r%s, r%s", acc[0], acc[1], regs[i], regs[s - 1])
188 |     for j in xrange(1, (s-i)//2):
189 |         mulacc(acc, regs[i+j], regs[s - 1 - j])
190 | 
191 |     # multiply by 2
192 |     emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
193 |     emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
194 |     emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
195 | 
196 |     # add equal word (if any)
197 |     if ((s-i) % 2) != 0:
198 |         mulacc(acc, regs[i + (s-i)//2], regs[i + (s-i)//2])
199 | 
200 |     # add old accumulator
201 |     emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
202 |     emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
203 |     emit("adc r%s, r%s, #0", acc[2], acc[2])
204 | 
205 |     # store
206 |     emit("stmia r0!, {r%s}", acc[0])
207 |     print ""
208 | 
209 | acc = acc[1:] + acc[:1]
210 | emit("mov r%s, #0", acc[2])
211 | emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 3], regs[s - 1])
212 | emit("adds r1, r1, r1")
213 | emit("adcs r%s, r%s, r%s", old_acc[1], old_acc[1], old_acc[1])
214 | emit("adc r%s, r%s, #0", acc[2], acc[2])
215 | emit("adds r%s, r%s, r1", acc[0], acc[0])
216 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
217 | emit("adc r%s, r%s, #0", acc[2], acc[2])
218 | emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 2], regs[s - 2])
219 | emit("adds r%s, r%s, r1", acc[0], acc[0])
220 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
221 | emit("adc r%s, r%s, #0", acc[2], acc[2])
222 | emit("stmia r0!, {r%s}", acc[0])
223 | print ""
224 | 
225 | acc = acc[1:] + acc[:1]
226 | emit("mov r%s, #0", acc[2])
227 | emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 2], regs[s - 1])
228 | emit("adds r1, r1, r1")
229 | emit("adcs r%s, r%s, r%s", old_acc[1], old_acc[1], old_acc[1])
230 | emit("adc r%s, r%s, #0", acc[2], acc[2])
231 | emit("adds r%s, r%s, r1", acc[0], acc[0])
232 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
233 | emit("adc r%s, r%s, #0", acc[2], acc[2])
234 | emit("stmia r0!, {r%s}", acc[0])
235 | print ""
236 | 
237 | acc = acc[1:] + acc[:1]
238 | emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 1], regs[s - 1])
239 | emit("adds r%s, r%s, r1", acc[0], acc[0])
240 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
241 | emit("stmia r0!, {r%s}", acc[0])
242 | emit("stmia r0!, {r%s}", acc[1])
243 | 


--------------------------------------------------------------------------------
/scripts/square_avr.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import sys
  4 | 
  5 | if len(sys.argv) < 2:
  6 |     print "Provide the integer size in bytes"
  7 |     sys.exit(1)
  8 | 
  9 | size = int(sys.argv[1])
 10 | 
 11 | if size > 40:
 12 |     print "This script doesn't work with integer size %s due to laziness" % (size)
 13 |     sys.exit(1)
 14 | 
 15 | init_size = size - 20
 16 | if size < 20:
 17 |     init_size = 0
 18 | 
 19 | def rg(i):
 20 |     return i + 2
 21 | 
 22 | def lo(i):
 23 |     return i + 2
 24 | 
 25 | def hi(i):
 26 |     return i + 12
 27 | 
 28 | def emit(line, *args):
 29 |     s = '"' + line + r' \n\t"'
 30 |     print s % args
 31 | 
 32 | #### set up registers
 33 | zero = "r25"
 34 | emit("ldi %s, 0", zero) # zero register
 35 | 
 36 | if init_size > 0:
 37 |     emit("movw r28, r26") # y = x
 38 |     h = (init_size + 1)//2
 39 |     
 40 |     for i in xrange(h):
 41 |         emit("ld r%s, x+", lo(i))
 42 |     emit("adiw r28, %s", size - init_size) # move y to other end
 43 |     for i in xrange(h):
 44 |         emit("ld r%s, y+", hi(i))
 45 | 
 46 |     emit("adiw r30, %s", size - init_size) # move z
 47 | 
 48 |     if init_size == 1:
 49 |         emit("mul %s, %s", lo(0), hi(0))
 50 |         emit("st z+, r0")
 51 |         emit("st z+, r1")
 52 |     else:
 53 |         #### first one
 54 |         print ""
 55 |         emit("ldi r23, 0")
 56 |         emit("mul %s, %s", lo(0), hi(0))
 57 |         emit("st z+, r0")
 58 |         emit("mov r22, r1")
 59 |         print ""
 60 | 
 61 |         #### rest of initial block, with moving accumulator registers
 62 |         acc = [22, 23, 24]
 63 |         for r in xrange(1, h):
 64 |             emit("ldi r%s, 0", acc[2])
 65 |             for i in xrange(0, (r+2)//2):
 66 |                 emit("mul r%s, r%s", lo(i), hi(r - i))
 67 |                 emit("add r%s, r0", acc[0])
 68 |                 emit("adc r%s, r1", acc[1])
 69 |                 emit("adc r%s, %s", acc[2], zero)
 70 |             emit("st z+, r%s", acc[0])
 71 |             print ""
 72 |             acc = acc[1:] + acc[:1]
 73 |         
 74 |         lo_r = range(2, 2 + h)
 75 |         hi_r = range(12, 12 + h)
 76 |         
 77 |         # now we need to start loading more from the high end
 78 |         for r in xrange(h, init_size):
 79 |             hi_r = hi_r[1:] + hi_r[:1]
 80 |             emit("ld r%s, y+", hi_r[h-1])
 81 |             
 82 |             emit("ldi r%s, 0", acc[2])
 83 |             for i in xrange(0, (r+2)//2):
 84 |                 emit("mul r%s, r%s", lo(i), hi_r[h - 1 - i])
 85 |                 emit("add r%s, r0", acc[0])
 86 |                 emit("adc r%s, r1", acc[1])
 87 |                 emit("adc r%s, %s", acc[2], zero)
 88 |             emit("st z+, r%s", acc[0])
 89 |             print ""
 90 |             acc = acc[1:] + acc[:1]
 91 |             
 92 |         # loaded all of the high end bytes; now need to start loading the rest of the low end
 93 |         for r in xrange(1, init_size-h):
 94 |             lo_r = lo_r[1:] + lo_r[:1]
 95 |             emit("ld r%s, x+", lo_r[h-1])
 96 |             
 97 |             emit("ldi r%s, 0", acc[2])
 98 |             for i in xrange(0, (init_size+1 - r)//2):
 99 |                 emit("mul r%s, r%s", lo_r[i], hi_r[h - 1 - i])
100 |                 emit("add r%s, r0", acc[0])
101 |                 emit("adc r%s, r1", acc[1])
102 |                 emit("adc r%s, %s", acc[2], zero)
103 |             emit("st z+, r%s", acc[0])
104 |             print ""
105 |             acc = acc[1:] + acc[:1]
106 |         
107 |         lo_r = lo_r[1:] + lo_r[:1]
108 |         emit("ld r%s, x+", lo_r[h-1])
109 |         
110 |         # now we have loaded everything, and we just need to finish the last corner
111 |         for r in xrange(init_size-h, init_size-1):
112 |             emit("ldi r%s, 0", acc[2])
113 |             for i in xrange(0, (init_size+1 - r)//2):
114 |                 emit("mul r%s, r%s", lo_r[i], hi_r[h - 1 - i])
115 |                 emit("add r%s, r0", acc[0])
116 |                 emit("adc r%s, r1", acc[1])
117 |                 emit("adc r%s, %s", acc[2], zero)
118 |             emit("st z+, r%s", acc[0])
119 |             print ""
120 |             acc = acc[1:] + acc[:1]
121 |             lo_r = lo_r[1:] + lo_r[:1] # make the indexing easy
122 |         
123 |         emit("mul r%s, r%s", lo_r[0], hi_r[h - 1])
124 |         emit("add r%s, r0", acc[0])
125 |         emit("adc r%s, r1", acc[1])
126 |         emit("st z+, r%s", acc[0])
127 |         emit("st z+, r%s", acc[1])
128 |     print ""
129 |     emit("sbiw r26, %s", init_size) # reset x
130 |     emit("sbiw r30, %s", size + init_size) # reset z
131 | 
132 | # TODO you could do more rows of size 20 here if your integers are larger than 40 bytes
133 | 
134 | s = size - init_size
135 | 
136 | for i in xrange(s):
137 |     emit("ld r%s, x+", rg(i))
138 | 
139 | #### first few columns
140 | # NOTE: this is only valid if size >= 3
141 | print ""
142 | emit("ldi r23, 0")
143 | emit("mul r%s, r%s", rg(0), rg(0))
144 | emit("st z+, r0")
145 | emit("mov r22, r1")
146 | print ""
147 | emit("ldi r24, 0")
148 | emit("mul r%s, r%s", rg(0), rg(1))
149 | emit("add r22, r0")
150 | emit("adc r23, r1")
151 | emit("adc r24, %s", zero)
152 | emit("add r22, r0")
153 | emit("adc r23, r1")
154 | emit("adc r24, %s", zero)
155 | emit("st z+, r22")
156 | print ""
157 | emit("ldi r22, 0")
158 | emit("mul r%s, r%s", rg(0), rg(2))
159 | emit("add r23, r0")
160 | emit("adc r24, r1")
161 | emit("adc r22, %s", zero)
162 | emit("add r23, r0")
163 | emit("adc r24, r1")
164 | emit("adc r22, %s", zero)
165 | emit("mul r%s, r%s", rg(1), rg(1))
166 | emit("add r23, r0")
167 | emit("adc r24, r1")
168 | emit("adc r22, %s", zero)
169 | emit("st z+, r23")
170 | print ""
171 | 
172 | acc = [23, 24, 22]
173 | old_acc = [28, 29]
174 | for i in xrange(3, s):
175 |     emit("ldi r%s, 0", old_acc[1])
176 |     tmp = [acc[1], acc[2]]
177 |     acc = [acc[0], old_acc[0], old_acc[1]]
178 |     old_acc = tmp
179 |     
180 |     # gather non-equal words
181 |     emit("mul r%s, r%s", rg(0), rg(i))
182 |     emit("mov r%s, r0", acc[0])
183 |     emit("mov r%s, r1", acc[1])
184 |     for j in xrange(1, (i+1)//2):
185 |         emit("mul r%s, r%s", rg(j), rg(i-j))
186 |         emit("add r%s, r0", acc[0])
187 |         emit("adc r%s, r1", acc[1])
188 |         emit("adc r%s, %s", acc[2], zero)
189 |     # multiply by 2
190 |     emit("lsl r%s", acc[0])
191 |     emit("rol r%s", acc[1])
192 |     emit("rol r%s", acc[2])
193 |     
194 |     # add equal word (if any)
195 |     if ((i+1) % 2) != 0:
196 |         emit("mul r%s, r%s", rg(i//2), rg(i//2))
197 |         emit("add r%s, r0", acc[0])
198 |         emit("adc r%s, r1", acc[1])
199 |         emit("adc r%s, %s", acc[2], zero)
200 |     
201 |     # add old accumulator
202 |     emit("add r%s, r%s", acc[0], old_acc[0])
203 |     emit("adc r%s, r%s", acc[1], old_acc[1])
204 |     emit("adc r%s, %s", acc[2], zero)
205 |     
206 |     # store
207 |     emit("st z+, r%s", acc[0])
208 |     print ""
209 | 
210 | regs = range(2, 22)
211 | for i in xrange(init_size):
212 |     regs = regs[1:] + regs[:1]
213 |     emit("ld r%s, x+", regs[19])
214 |     
215 |     for limit in [18, 19]:
216 |         emit("ldi r%s, 0", old_acc[1])
217 |         tmp = [acc[1], acc[2]]
218 |         acc = [acc[0], old_acc[0], old_acc[1]]
219 |         old_acc = tmp
220 |     
221 |         # gather non-equal words
222 |         emit("mul r%s, r%s", regs[0], regs[limit])
223 |         emit("mov r%s, r0", acc[0])
224 |         emit("mov r%s, r1", acc[1])
225 |         for j in xrange(1, (limit+1)//2):
226 |             emit("mul r%s, r%s", regs[j], regs[limit-j])
227 |             emit("add r%s, r0", acc[0])
228 |             emit("adc r%s, r1", acc[1])
229 |             emit("adc r%s, %s", acc[2], zero)
230 |     
231 |         emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment)
232 |         emit("add r%s, r0", acc[0])
233 |         emit("adc r%s, r25", acc[1])
234 |         emit("adc r%s, r25", acc[2])
235 |     
236 |         # multiply by 2
237 |         emit("lsl r%s", acc[0])
238 |         emit("rol r%s", acc[1])
239 |         emit("rol r%s", acc[2])
240 |     
241 |         # add equal word
242 |         if limit == 18:
243 |             emit("mul r%s, r%s", regs[9], regs[9])
244 |             emit("add r%s, r0", acc[0])
245 |             emit("adc r%s, r1", acc[1])
246 |             emit("adc r%s, %s", acc[2], zero)
247 |     
248 |         # add old accumulator
249 |         emit("add r%s, r%s", acc[0], old_acc[0])
250 |         emit("adc r%s, r%s", acc[1], old_acc[1])
251 |         emit("adc r%s, %s", acc[2], zero)
252 |     
253 |         # store
254 |         emit("st z+, r%s", acc[0])
255 |         print ""
256 | 
257 | for i in xrange(1, s-3):
258 |     emit("ldi r%s, 0", old_acc[1])
259 |     tmp = [acc[1], acc[2]]
260 |     acc = [acc[0], old_acc[0], old_acc[1]]
261 |     old_acc = tmp
262 | 
263 |     # gather non-equal words
264 |     emit("mul r%s, r%s", regs[i], regs[s - 1])
265 |     emit("mov r%s, r0", acc[0])
266 |     emit("mov r%s, r1", acc[1])
267 |     for j in xrange(1, (s-i)//2):
268 |         emit("mul r%s, r%s", regs[i+j], regs[s - 1 - j])
269 |         emit("add r%s, r0", acc[0])
270 |         emit("adc r%s, r1", acc[1])
271 |         emit("adc r%s, %s", acc[2], zero)
272 |     # multiply by 2
273 |     emit("lsl r%s", acc[0])
274 |     emit("rol r%s", acc[1])
275 |     emit("rol r%s", acc[2])
276 | 
277 |     # add equal word (if any)
278 |     if ((s-i) % 2) != 0:
279 |         emit("mul r%s, r%s", regs[i + (s-i)//2], regs[i + (s-i)//2])
280 |         emit("add r%s, r0", acc[0])
281 |         emit("adc r%s, r1", acc[1])
282 |         emit("adc r%s, %s", acc[2], zero)
283 | 
284 |     # add old accumulator
285 |     emit("add r%s, r%s", acc[0], old_acc[0])
286 |     emit("adc r%s, r%s", acc[1], old_acc[1])
287 |     emit("adc r%s, %s", acc[2], zero)
288 | 
289 |     # store
290 |     emit("st z+, r%s", acc[0])
291 |     print ""
292 | 
293 | acc = acc[1:] + acc[:1]
294 | emit("ldi r%s, 0", acc[2])
295 | emit("mul r%s, r%s", regs[17], regs[19])
296 | emit("add r%s, r0", acc[0])
297 | emit("adc r%s, r1", acc[1])
298 | emit("adc r%s, %s", acc[2], zero)
299 | emit("add r%s, r0", acc[0])
300 | emit("adc r%s, r1", acc[1])
301 | emit("adc r%s, %s", acc[2], zero)
302 | emit("mul r%s, r%s", regs[18], regs[18])
303 | emit("add r%s, r0", acc[0])
304 | emit("adc r%s, r1", acc[1])
305 | emit("adc r%s, %s", acc[2], zero)
306 | emit("st z+, r%s", acc[0])
307 | print ""
308 | 
309 | acc = acc[1:] + acc[:1]
310 | emit("ldi r%s, 0", acc[2])
311 | emit("mul r%s, r%s", regs[18], regs[19])
312 | emit("add r%s, r0", acc[0])
313 | emit("adc r%s, r1", acc[1])
314 | emit("adc r%s, %s", acc[2], zero)
315 | emit("add r%s, r0", acc[0])
316 | emit("adc r%s, r1", acc[1])
317 | emit("adc r%s, %s", acc[2], zero)
318 | emit("st z+, r%s", acc[0])
319 | print ""
320 | 
321 | emit("mul r%s, r%s", regs[19], regs[19])
322 | emit("add r%s, r0", acc[1])
323 | emit("adc r%s, r1", acc[2])
324 | emit("st z+, r%s", acc[1])
325 | 
326 | emit("st z+, r%s", acc[2])
327 | emit("eor r1, r1")
328 | 


--------------------------------------------------------------------------------
/test/ecdsa_test_vectors.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2020, Kenneth MacKay. Licensed under the BSD 2-clause license. */
  2 | 
  3 | #include "uECC.h"
  4 | 
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | #include <string.h>
  8 | 
  9 | typedef struct {
 10 |   const char* private_key;
 11 |   const char* public_key;
 12 |   const char* k;
 13 |   const char* hash;
 14 |   const char* r;
 15 |   const char* s;
 16 | } Test;
 17 | 
 18 | Test secp256k1_tests[] = {
 19 |     {
 20 |         "ebb2c082fd7727890a28ac82f6bdf97bad8de9f5d7c9028692de1a255cad3e0f",
 21 |         "779dd197a5df977ed2cf6cb31d82d43328b790dc6b3b7d4437a427bd5847dfcde94b724a555b6d017bb7607c3e3281daf5b1699d6ef4124975c9237b917d426f",
 22 |         "49a0d7b786ec9cde0d0721d72804befd06571c974b191efb42ecf322ba9ddd9a",
 23 |         "4b688df40bcedbe641ddb16ff0a1842d9c67ea1c3bf63f3e0471baa664531d1a",
 24 |         "241097efbf8b63bf145c8961dbdf10c310efbb3b2676bbc0f8b08505c9e2f795",
 25 |         "021006b7838609339e8b415a7f9acb1b661828131aef1ecbc7955dfb01f3ca0e"
 26 |     },
 27 | };
 28 | 
 29 | extern int uECC_sign_with_k(const uint8_t *private_key,
 30 |                             const uint8_t *message_hash,
 31 |                             unsigned hash_size,
 32 |                             const uint8_t *k,
 33 |                             uint8_t *signature,
 34 |                             uECC_Curve curve);
 35 | 
 36 | 
 37 | void vli_print(uint8_t *vli, unsigned int size) {
 38 |     for(unsigned i=0; i<size; ++i) {
 39 |         printf("%02X ", (unsigned)vli[i]);
 40 |     }
 41 |     printf("\n");
 42 | }
 43 | 
 44 | void strtobytes(const char* str, uint8_t* bytes, int count) {
 45 |   for (int c = 0; c < count; ++c) {
 46 |     if (sscanf(str, "%2hhx", &bytes[c]) != 1) {
 47 |       printf("Failed to read string to bytes");
 48 |       exit(1);
 49 |     }
 50 |     str += 2;
 51 |   }
 52 | }
 53 | 
 54 | int run(Test* tests, int num_tests, uECC_Curve curve) {
 55 |     uint8_t private[32] = {0};
 56 |     uint8_t public[64] = {0};
 57 |     uint8_t k[32] = {0};
 58 |     uint8_t hash[32] = {0};
 59 |     uint8_t r[32] = {0};
 60 |     uint8_t s[32] = {0};
 61 | 
 62 |     uint8_t signature[64] = {0};
 63 | 
 64 |     int result;
 65 |     int i;
 66 |     int private_key_size;
 67 |     int public_key_size;
 68 |     int all_success = 1;
 69 | 
 70 |     private_key_size = uECC_curve_private_key_size(curve);
 71 |     public_key_size = uECC_curve_public_key_size(curve);
 72 | 
 73 |     for (i = 0; i < num_tests; ++i) {
 74 |         strtobytes(tests[i].private_key, private, private_key_size);
 75 |         strtobytes(tests[i].public_key, public, public_key_size);
 76 |         strtobytes(tests[i].k, k, private_key_size);
 77 |         strtobytes(tests[i].hash, hash, private_key_size);
 78 |         strtobytes(tests[i].r, r, private_key_size);
 79 |         strtobytes(tests[i].s, s, private_key_size);
 80 | 
 81 |         result = uECC_sign_with_k(private, hash, private_key_size, k, signature, curve);
 82 |         if (!result) {
 83 |             all_success = 0;
 84 |             printf("  Sign failed for test %d\n", i);
 85 |         }
 86 |         if (result) {
 87 |             if (memcmp(signature, r, private_key_size) != 0) {
 88 |                 all_success = 0;
 89 |                 printf("  Got incorrect r for test %d\n", i);
 90 |                 printf("    Expected: ");
 91 |                 vli_print(r, private_key_size);
 92 |                 printf("    Calculated: ");
 93 |                 vli_print(signature, private_key_size);
 94 |             }
 95 |             if (memcmp(signature + private_key_size, s, private_key_size) != 0) {
 96 |                 all_success = 0;
 97 |                 printf("  Got incorrect s for test %d\n", i);
 98 |                 printf("    Expected: ");
 99 |                 vli_print(s, private_key_size);
100 |                 printf("    Calculated: ");
101 |                 vli_print(signature + private_key_size, private_key_size);
102 |             }
103 | 
104 |             result = uECC_verify(public, hash, private_key_size, signature, curve);
105 |             if (!result) {
106 |                 printf("  Verify failed for test %d\n", i);
107 |             }
108 |         }
109 |     }
110 | 
111 |     return all_success;
112 | }
113 | 
114 | #define RUN_TESTS(curve) \
115 |     printf(#curve ":\n"); \
116 |     if (run(curve##_tests, sizeof(curve##_tests) / sizeof(curve##_tests[0]), uECC_##curve()) ) { \
117 |         printf("  All passed\n"); \
118 |     } else { \
119 |         printf("  Failed\n"); \
120 |     }
121 | 
122 | int main() {
123 | #if uECC_SUPPORTS_secp256k1
124 |     RUN_TESTS(secp256k1)
125 | #endif
126 | 
127 |     return 0;
128 | }
129 | 


--------------------------------------------------------------------------------
/test/emk_rules.py:
--------------------------------------------------------------------------------
1 | c, link = emk.module("c", "link")
2 | link.depdirs += [
3 |     "$:proj:$"
4 | ]
5 | 


--------------------------------------------------------------------------------
/test/public_key_test_vectors.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2020, Kenneth MacKay. Licensed under the BSD 2-clause license. */
  2 | 
  3 | #include "uECC.h"
  4 | 
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | #include <string.h>
  8 | 
  9 | typedef struct {
 10 |   const char* k;
 11 |   const char* Q;
 12 |   int success;
 13 | } Test;
 14 | 
 15 | Test secp160r1_tests[] = {
 16 |     /* Note, I couldn't find any test vectors for secp160r1 online, so these are just
 17 |        generated on my desktop using uECC. */
 18 |     {
 19 |         "000000000000000000000000000000000000000000",
 20 |         "00000000000000000000000000000000000000000000000000000000000000000000000000000000",
 21 |         0
 22 |     },
 23 |     {
 24 |         "000000000000000000000000000000000000000001",
 25 |         "00000000000000000000000000000000000000000000000000000000000000000000000000000000",
 26 |         0
 27 |     },
 28 |     {
 29 |         "000000000000000000000000000000000000000002",
 30 |         "02F997F33C5ED04C55D3EDF8675D3E92E8F46686F083A323482993E9440E817E21CFB7737DF8797B",
 31 |         1
 32 |     },
 33 |     {
 34 |         "000000000000000000000000000000000000000003",
 35 |         "7B76FF541EF363F2DF13DE1650BD48DAA958BC59C915CA790D8C8877B55BE0079D12854FFE9F6F5A",
 36 |         1
 37 |     },
 38 |     {   /* n - 4 */
 39 |         "0100000000000000000001F4C8F927AED3CA752253",
 40 |         "B4041D8683BE99F0AFE01C307B1AD4C100CF2A88C0CD35127BE0F73FF99F338B350B5A42864112F7",
 41 |         1
 42 |     },
 43 |     {   /* n - 3 */
 44 |         "0100000000000000000001F4C8F927AED3CA752254",
 45 |         "7B76FF541EF363F2DF13DE1650BD48DAA958BC5936EA3586F27377884AA41FF862ED7AAF816090A5",
 46 |         1
 47 |     },
 48 |     {   /* n - 2 */
 49 |         "0100000000000000000001F4C8F927AED3CA752255",
 50 |         "00000000000000000000000000000000000000000000000000000000000000000000000000000000",
 51 |         0
 52 |     },
 53 |     {   /* n - 1 */
 54 |         "0100000000000000000001F4C8F927AED3CA752256",
 55 |         "00000000000000000000000000000000000000000000000000000000000000000000000000000000",
 56 |         0
 57 |     },
 58 |     {   /* n */
 59 |         "0100000000000000000001F4C8F927AED3CA752257",
 60 |         "00000000000000000000000000000000000000000000000000000000000000000000000000000000",
 61 |         0
 62 |     },
 63 | };
 64 | 
 65 | 
 66 | Test secp192r1_tests[] = {
 67 |     {
 68 |         "000000000000000000000000000000000000000000000000",
 69 |         "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
 70 |         0
 71 |     },
 72 |     {
 73 |         "000000000000000000000000000000000000000000000001",
 74 |         "188DA80EB03090F67CBF20EB43A18800F4FF0AFD82FF101207192B95FFC8DA78631011ED6B24CDD573F977A11E794811",
 75 |         0
 76 |     },
 77 |     {
 78 |         "000000000000000000000000000000000000000000000002",
 79 |         "DAFEBF5828783F2AD35534631588A3F629A70FB16982A888DD6BDA0D993DA0FA46B27BBC141B868F59331AFA5C7E93AB",
 80 |         1
 81 |     },
 82 |     {
 83 |         "000000000000000000000000000000000000000000000003",
 84 |         "76E32A2557599E6EDCD283201FB2B9AADFD0D359CBB263DA782C37E372BA4520AA62E0FED121D49EF3B543660CFD05FD",
 85 |         1
 86 |     },
 87 |     {   /* n - 4 */
 88 |         "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D2282D",
 89 |         "35433907297CC378B0015703374729D7A4FE46647084E4BA5D9B667B0DECA3CFE15C534F88932B0DDAC764CEE24C41CD",
 90 |         1
 91 |     },
 92 |     {   /* n - 3 */
 93 |         "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D2282E",
 94 |         "76E32A2557599E6EDCD283201FB2B9AADFD0D359CBB263DA87D3C81C8D45BADF559D1F012EDE2B600C4ABC99F302FA02",
 95 |         1
 96 |     },
 97 |     {   /* n - 2 */
 98 |         "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D2282F",
 99 |         "DAFEBF5828783F2AD35534631588A3F629A70FB16982A888229425F266C25F05B94D8443EBE4796FA6CCE505A3816C54",
100 |         0
101 |     },
102 |     {   /* n - 1 */
103 |         "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D22830",
104 |         "188DA80EB03090F67CBF20EB43A18800F4FF0AFD82FF1012F8E6D46A003725879CEFEE1294DB32298C06885EE186B7EE",
105 |         0
106 |     },
107 |     {   /* n */
108 |         "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D22831",
109 |         "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
110 |         0
111 |     },
112 | };
113 | 
114 | Test secp224r1_tests[] = {
115 |     {
116 |         "00000000000000000000000000000000000000000000000000000000",
117 |         "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
118 |         0
119 |     },
120 |     {
121 |         "00000000000000000000000000000000000000000000000000000001",
122 |         "B70E0CBD6BB4BF7F321390B94A03C1D356C21122343280D6115C1D21BD376388B5F723FB4C22DFE6CD4375A05A07476444D5819985007E34",
123 |         0
124 |     },
125 |     {
126 |         "00000000000000000000000000000000000000000000000000000002",
127 |         "706A46DC76DCB76798E60E6D89474788D16DC18032D268FD1A704FA61C2B76A7BC25E7702A704FA986892849FCA629487ACF3709D2E4E8BB",
128 |         1
129 |     },
130 |     {
131 |         "00000000000000000000000000000000000000000000000000000003",
132 |         "DF1B1D66A551D0D31EFF822558B9D2CC75C2180279FE0D08FD896D04A3F7F03CADD0BE444C0AA56830130DDF77D317344E1AF3591981A925",
133 |         1
134 |     },
135 |     {   /* n - 4 */
136 |         "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A39",
137 |         "AE99FEEBB5D26945B54892092A8AEE02912930FA41CD114E40447301FB7DA7F5F13A43B81774373C879CD32D6934C05FA758EEB14FCFAB38",
138 |         1
139 |     },
140 |     {   /* n - 3 */
141 |         "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3A",
142 |         "DF1B1D66A551D0D31EFF822558B9D2CC75C2180279FE0D08FD896D045C080FC3522F41BBB3F55A97CFECF21F882CE8CBB1E50CA6E67E56DC",
143 |         1
144 |     },
145 |     {   /* n - 2 */
146 |         "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3B",
147 |         "706A46DC76DCB76798E60E6D89474788D16DC18032D268FD1A704FA6E3D4895843DA188FD58FB0567976D7B50359D6B78530C8F62D1B1746",
148 |         0
149 |     },
150 |     {   /* n - 1 */
151 |         "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3C",
152 |         "B70E0CBD6BB4BF7F321390B94A03C1D356C21122343280D6115C1D2142C89C774A08DC04B3DD201932BC8A5EA5F8B89BBB2A7E667AFF81CD",
153 |         0
154 |     },
155 |     {   /* n */
156 |         "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3D",
157 |         "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
158 |         0
159 |     },
160 | };
161 | 
162 | Test secp256r1_tests[] = {
163 |     {
164 |         "0000000000000000000000000000000000000000000000000000000000000000",
165 |         "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
166 |         0
167 |     },
168 |     {
169 |         "0000000000000000000000000000000000000000000000000000000000000001",
170 |         "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C2964FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5",
171 |         0
172 |     },
173 |     {
174 |         "0000000000000000000000000000000000000000000000000000000000000002",
175 |         "7CF27B188D034F7E8A52380304B51AC3C08969E277F21B35A60B48FC4766997807775510DB8ED040293D9AC69F7430DBBA7DADE63CE982299E04B79D227873D1",
176 |         1
177 |     },
178 |     {
179 |         "0000000000000000000000000000000000000000000000000000000000000003",
180 |         "5ECBE4D1A6330A44C8F7EF951D4BF165E6C6B721EFADA985FB41661BC6E7FD6C8734640C4998FF7E374B06CE1A64A2ECD82AB036384FB83D9A79B127A27D5032",
181 |         1
182 |     },
183 |     {   /* n - 4 */
184 |         "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC63254D",
185 |         "E2534A3532D08FBBA02DDE659EE62BD0031FE2DB785596EF509302446B0308521F0EA8A4B39CC339E62011A02579D289B103693D0CF11FFAA3BD3DC0E7B12739",
186 |         1
187 |     },
188 |     {   /* n - 3 */
189 |         "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC63254E",
190 |         "5ECBE4D1A6330A44C8F7EF951D4BF165E6C6B721EFADA985FB41661BC6E7FD6C78CB9BF2B6670082C8B4F931E59B5D1327D54FCAC7B047C265864ED85D82AFCD",
191 |         1
192 |     },
193 |     {   /* n - 2 */
194 |         "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC63254F",
195 |         "7CF27B188D034F7E8A52380304B51AC3C08969E277F21B35A60B48FC47669978F888AAEE24712FC0D6C26539608BCF244582521AC3167DD661FB4862DD878C2E",
196 |         0
197 |     },
198 |     {   /* n - 1 */
199 |         "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632550",
200 |         "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296B01CBD1C01E58065711814B583F061E9D431CCA994CEA1313449BF97C840AE0A",
201 |         0
202 |     },
203 |     {   /* n */
204 |         "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551",
205 |         "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
206 |         0
207 |     },
208 | };
209 | 
210 | Test secp256k1_tests[] = {
211 |     {
212 |         "0000000000000000000000000000000000000000000000000000000000000000",
213 |         "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
214 |         0
215 |     },
216 |     {
217 |         "0000000000000000000000000000000000000000000000000000000000000001",
218 |         "79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8",
219 |         0
220 |     },
221 |     {
222 |         "0000000000000000000000000000000000000000000000000000000000000002",
223 |         "C6047F9441ED7D6D3045406E95C07CD85C778E4B8CEF3CA7ABAC09B95C709EE51AE168FEA63DC339A3C58419466CEAEEF7F632653266D0E1236431A950CFE52A",
224 |         1
225 |     },
226 |     {
227 |         "0000000000000000000000000000000000000000000000000000000000000003",
228 |         "F9308A019258C31049344F85F89D5229B531C845836F99B08601F113BCE036F9388F7B0F632DE8140FE337E62A37F3566500A99934C2231B6CB9FD7584B8E672",
229 |         1
230 |     },
231 |     {   /* n - 4 */
232 |         "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD036413D",
233 |         "E493DBF1C10D80F3581E4904930B1404CC6C13900EE0758474FA94ABE8C4CD13AE1266C15F2BAA48A9BD1DF6715AEBB7269851CC404201BF30168422B88C630D",
234 |         1
235 |     },
236 |     {   /* n - 3 */
237 |         "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD036413E",
238 |         "F9308A019258C31049344F85F89D5229B531C845836F99B08601F113BCE036F9C77084F09CD217EBF01CC819D5C80CA99AFF5666CB3DDCE4934602897B4715BD",
239 |         1
240 |     },
241 |     {   /* n - 2 */
242 |         "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD036413F",
243 |         "C6047F9441ED7D6D3045406E95C07CD85C778E4B8CEF3CA7ABAC09B95C709EE5E51E970159C23CC65C3A7BE6B99315110809CD9ACD992F1EDC9BCE55AF301705",
244 |         0
245 |     },
246 |     {   /* n - 1 */
247 |         "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364140",
248 |         "79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798B7C52588D95C3B9AA25B0403F1EEF75702E84BB7597AABE663B82F6F04EF2777",
249 |         0
250 |     },
251 |     {   /* n */
252 |         "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141",
253 |         "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
254 |         0
255 |     },
256 | };
257 | 
258 | 
259 | void vli_print(uint8_t *vli, unsigned int size) {
260 |     for(unsigned i=0; i<size; ++i) {
261 |         printf("%02X ", (unsigned)vli[i]);
262 |     }
263 |     printf("\n");
264 | }
265 | 
266 | void strtobytes(const char* str, uint8_t* bytes, int count) {
267 |   for (int c = 0; c < count; ++c) {
268 |     if (sscanf(str, "%2hhx", &bytes[c]) != 1) {
269 |       printf("Failed to read string to bytes");
270 |       exit(1);
271 |     }
272 |     str += 2;
273 |   }
274 | }
275 | 
276 | int run(Test* tests, int num_tests, uECC_Curve curve) {
277 |     uint8_t private[32] = {0};
278 |     uint8_t public[64] = {0};
279 |     uint8_t expected[64] = {0};
280 |     int result;
281 |     int i;
282 |     int private_key_size;
283 |     int public_key_size;
284 |     int all_success = 1;
285 | 
286 |     private_key_size = uECC_curve_private_key_size(curve);
287 |     public_key_size = uECC_curve_public_key_size(curve);
288 | 
289 |     for (i = 0; i < num_tests; ++i) {
290 |         strtobytes(tests[i].k, private, private_key_size);
291 |         result = uECC_compute_public_key(private, public, curve);
292 |         if (result != tests[i].success) {
293 |             all_success = 0;
294 |             printf("  Got unexpected result from test %d: %d\n", i, result);
295 |         }
296 |         if (result) {
297 |             strtobytes(tests[i].Q, expected, public_key_size);
298 |             if (memcmp(public, expected, public_key_size) != 0) {
299 |                 all_success = 0;
300 |                 printf("  Got incorrect public key for test %d\n", i);
301 |                 printf("    Expected: ");
302 |                 vli_print(expected, public_key_size);
303 |                 printf("    Calculated: ");
304 |                 vli_print(public, public_key_size);
305 |             }
306 |         }
307 |     }
308 | 
309 |     return all_success;
310 | }
311 | 
312 | #define RUN_TESTS(curve) \
313 |     printf(#curve ":\n"); \
314 |     if (run(curve##_tests, sizeof(curve##_tests) / sizeof(curve##_tests[0]), uECC_##curve()) ) { \
315 |         printf("  All passed\n"); \
316 |     } else { \
317 |         printf("  Failed\n"); \
318 |     }
319 | 
320 | int main() {
321 | #if uECC_SUPPORTS_secp160r1
322 |     RUN_TESTS(secp160r1)
323 | #endif
324 | #if uECC_SUPPORTS_secp192r1
325 |     RUN_TESTS(secp192r1)
326 | #endif
327 | #if uECC_SUPPORTS_secp224r1
328 |     RUN_TESTS(secp224r1)
329 | #endif
330 | #if uECC_SUPPORTS_secp256r1
331 |     RUN_TESTS(secp256r1)
332 | #endif
333 | #if uECC_SUPPORTS_secp256k1
334 |     RUN_TESTS(secp256k1)
335 | #endif
336 | 
337 |     return 0;
338 | }
339 | 


--------------------------------------------------------------------------------
/test/test_compress.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
 2 | 
 3 | #include "uECC.h"
 4 | 
 5 | #include <stdio.h>
 6 | #include <string.h>
 7 | 
 8 | #ifndef uECC_TEST_NUMBER_OF_ITERATIONS
 9 | #define uECC_TEST_NUMBER_OF_ITERATIONS   256
10 | #endif
11 | 
12 | void vli_print(char *str, uint8_t *vli, unsigned int size) {
13 |     printf("%s ", str);
14 |     for(unsigned i=0; i<size; ++i) {
15 |         printf("%02X ", (unsigned)vli[i]);
16 |     }
17 |     printf("\n");
18 | }
19 | 
20 | int main() {
21 |     uint8_t public[64];
22 |     uint8_t private[32];
23 |     uint8_t compressed_point[33];
24 |     uint8_t decompressed_point[64];
25 | 
26 |     int i;
27 |     int c;
28 |     
29 |     const struct uECC_Curve_t * curves[5];
30 |     int num_curves = 0;
31 | #if uECC_SUPPORTS_secp160r1
32 |     curves[num_curves++] = uECC_secp160r1();
33 | #endif
34 | #if uECC_SUPPORTS_secp192r1
35 |     curves[num_curves++] = uECC_secp192r1();
36 | #endif
37 | #if uECC_SUPPORTS_secp224r1
38 |     curves[num_curves++] = uECC_secp224r1();
39 | #endif
40 | #if uECC_SUPPORTS_secp256r1
41 |     curves[num_curves++] = uECC_secp256r1();
42 | #endif
43 | #if uECC_SUPPORTS_secp256k1
44 |     curves[num_curves++] = uECC_secp256k1();
45 | #endif
46 |     
47 |     printf("Testing compression and decompression of %d random EC points\n",
48 |            uECC_TEST_NUMBER_OF_ITERATIONS);
49 | 
50 |     for (c = 0; c < num_curves; ++c) {
51 |         for (i = 0; i < uECC_TEST_NUMBER_OF_ITERATIONS; ++i) {
52 |             printf(".");
53 |             fflush(stdout);
54 |             
55 |             memset(public, 0, sizeof(public));
56 |             memset(decompressed_point, 0, sizeof(decompressed_point));
57 | 
58 |             /* Generate arbitrary EC point (public) on Curve */
59 |             if (!uECC_make_key(public, private, curves[c])) {
60 |                 printf("uECC_make_key() failed\n");
61 |                 continue;
62 |             }
63 | 
64 |             /* compress and decompress point */
65 |             uECC_compress(public, compressed_point, curves[c]);
66 |             uECC_decompress(compressed_point, decompressed_point, curves[c]);
67 | 
68 |             if (memcmp(public, decompressed_point, sizeof(public)) != 0) {
69 |                 printf("Original and decompressed points are not identical!\n");
70 |                 vli_print("Original point =     ", public, sizeof(public));
71 |                 vli_print("Compressed point =   ", compressed_point, sizeof(compressed_point));
72 |                 vli_print("Decompressed point = ", decompressed_point, sizeof(decompressed_point));
73 |             }
74 |         }
75 |         printf("\n");
76 |     }
77 | 
78 |     return 0;
79 | }
80 | 


--------------------------------------------------------------------------------
/test/test_compute.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
 2 | 
 3 | #include "uECC.h"
 4 | 
 5 | #include <stdio.h>
 6 | #include <string.h>
 7 | 
 8 | void vli_print(char *str, uint8_t *vli, unsigned int size) {
 9 |     printf("%s ", str);
10 |     for(unsigned i=0; i<size; ++i) {
11 |         printf("%02X ", (unsigned)vli[i]);
12 |     }
13 |     printf("\n");
14 | }
15 | 
16 | int main() {
17 |     int i;
18 |     int success;
19 |     uint8_t private[32];
20 |     uint8_t public[64];
21 |     uint8_t public_computed[64];
22 |     
23 |     int c;
24 |     
25 |     const struct uECC_Curve_t * curves[5];
26 |     int num_curves = 0;
27 | #if uECC_SUPPORTS_secp160r1
28 |     curves[num_curves++] = uECC_secp160r1();
29 | #endif
30 | #if uECC_SUPPORTS_secp192r1
31 |     curves[num_curves++] = uECC_secp192r1();
32 | #endif
33 | #if uECC_SUPPORTS_secp224r1
34 |     curves[num_curves++] = uECC_secp224r1();
35 | #endif
36 | #if uECC_SUPPORTS_secp256r1
37 |     curves[num_curves++] = uECC_secp256r1();
38 | #endif
39 | #if uECC_SUPPORTS_secp256k1
40 |     curves[num_curves++] = uECC_secp256k1();
41 | #endif
42 | 
43 |     printf("Testing 256 random private key pairs\n");
44 |     for (c = 0; c < num_curves; ++c) {
45 |         for (i = 0; i < 256; ++i) {
46 |             printf(".");
47 |             fflush(stdout);
48 |             
49 |             memset(public, 0, sizeof(public));
50 |             memset(public_computed, 0, sizeof(public_computed));
51 |             
52 |             if (!uECC_make_key(public, private, curves[c])) {
53 |                 printf("uECC_make_key() failed\n");
54 |                 continue;
55 |             }
56 | 
57 |             if (!uECC_compute_public_key(private, public_computed, curves[c])) {
58 |                 printf("uECC_compute_public_key() failed\n");
59 |             }
60 | 
61 |             if (memcmp(public, public_computed, sizeof(public)) != 0) {
62 |                 printf("Computed and provided public keys are not identical!\n");
63 |                 vli_print("Computed public key = ", public_computed, sizeof(public_computed));
64 |                 vli_print("Provided public key = ", public, sizeof(public));
65 |                 vli_print("Private key = ", private, sizeof(private));
66 |             }
67 |         }
68 |         
69 |         printf("\n");
70 |         printf("Testing private key = 0\n");
71 | 
72 |         memset(private, 0, sizeof(private));
73 |         success = uECC_compute_public_key(private, public_computed, curves[c]);
74 |         if (success) {
75 |             printf("uECC_compute_public_key() should have failed\n");
76 |         }
77 |         printf("\n");
78 |     }
79 |     
80 |     return 0;
81 | }
82 | 


--------------------------------------------------------------------------------
/test/test_ecdh.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
 2 | 
 3 | #include "uECC.h"
 4 | 
 5 | #include <stdio.h>
 6 | #include <string.h>
 7 | 
 8 | void vli_print(uint8_t *vli, unsigned int size) {
 9 |     for(unsigned i=0; i<size; ++i) {
10 |         printf("%02X ", (unsigned)vli[i]);
11 |     }
12 | }
13 | 
14 | int main() {
15 |     int i, c;
16 |     uint8_t private1[32] = {0};
17 |     uint8_t private2[32] = {0};
18 |     uint8_t public1[64] = {0};
19 |     uint8_t public2[64] = {0};
20 |     uint8_t secret1[32] = {0};
21 |     uint8_t secret2[32] = {0};
22 |     
23 |     const struct uECC_Curve_t * curves[5];
24 |     int num_curves = 0;
25 | #if uECC_SUPPORTS_secp160r1
26 |     curves[num_curves++] = uECC_secp160r1();
27 | #endif
28 | #if uECC_SUPPORTS_secp192r1
29 |     curves[num_curves++] = uECC_secp192r1();
30 | #endif
31 | #if uECC_SUPPORTS_secp224r1
32 |     curves[num_curves++] = uECC_secp224r1();
33 | #endif
34 | #if uECC_SUPPORTS_secp256r1
35 |     curves[num_curves++] = uECC_secp256r1();
36 | #endif
37 | #if uECC_SUPPORTS_secp256k1
38 |     curves[num_curves++] = uECC_secp256k1();
39 | #endif
40 |     
41 |     printf("Testing 256 random private key pairs\n");
42 | 
43 |     for (c = 0; c < num_curves; ++c) {
44 |         for (i = 0; i < 256; ++i) {
45 |             printf(".");
46 |             fflush(stdout);
47 | 
48 |             if (!uECC_make_key(public1, private1, curves[c]) ||
49 |                 !uECC_make_key(public2, private2, curves[c])) {
50 |                 printf("uECC_make_key() failed\n");
51 |                 return 1;
52 |             }
53 | 
54 |             if (!uECC_shared_secret(public2, private1, secret1, curves[c])) {
55 |                 printf("shared_secret() failed (1)\n");
56 |                 return 1;
57 |             }
58 | 
59 |             if (!uECC_shared_secret(public1, private2, secret2, curves[c])) {
60 |                 printf("shared_secret() failed (2)\n");
61 |                 return 1;
62 |             }
63 |         
64 |             if (memcmp(secret1, secret2, sizeof(secret1)) != 0) {
65 |                 printf("Shared secrets are not identical!\n");
66 |                 printf("Private key 1 = ");
67 |                 vli_print(private1, 32);
68 |                 printf("\n");
69 |                 printf("Private key 2 = ");
70 |                 vli_print(private2, 32);
71 |                 printf("\n");
72 |                 printf("Public key 1 = ");
73 |                 vli_print(public1, 64);
74 |                 printf("\n");
75 |                 printf("Public key 2 = ");
76 |                 vli_print(public2, 64);
77 |                 printf("\n");
78 |                 printf("Shared secret 1 = ");
79 |                 vli_print(secret1, 32);
80 |                 printf("\n");
81 |                 printf("Shared secret 2 = ");
82 |                 vli_print(secret2, 32);
83 |                 printf("\n");
84 |             }
85 |         }
86 |         printf("\n");
87 |     }
88 |     
89 |     return 0;
90 | }
91 | 


--------------------------------------------------------------------------------
/test/test_ecdsa.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
 2 | 
 3 | #include "uECC.h"
 4 | 
 5 | #include <stdio.h>
 6 | #include <string.h>
 7 | 
 8 | int main() {
 9 |     int i, c;
10 |     uint8_t private[32] = {0};
11 |     uint8_t public[64] = {0};
12 |     uint8_t hash[32] = {0};
13 |     uint8_t sig[64] = {0};
14 | 
15 |     const struct uECC_Curve_t * curves[5];
16 |     int num_curves = 0;
17 | #if uECC_SUPPORTS_secp160r1
18 |     curves[num_curves++] = uECC_secp160r1();
19 | #endif
20 | #if uECC_SUPPORTS_secp192r1
21 |     curves[num_curves++] = uECC_secp192r1();
22 | #endif
23 | #if uECC_SUPPORTS_secp224r1
24 |     curves[num_curves++] = uECC_secp224r1();
25 | #endif
26 | #if uECC_SUPPORTS_secp256r1
27 |     curves[num_curves++] = uECC_secp256r1();
28 | #endif
29 | #if uECC_SUPPORTS_secp256k1
30 |     curves[num_curves++] = uECC_secp256k1();
31 | #endif
32 |     
33 |     printf("Testing 256 signatures\n");
34 |     for (c = 0; c < num_curves; ++c) {
35 |         for (i = 0; i < 256; ++i) {
36 |             printf(".");
37 |             fflush(stdout);
38 | 
39 |             if (!uECC_make_key(public, private, curves[c])) {
40 |                 printf("uECC_make_key() failed\n");
41 |                 return 1;
42 |             }
43 |             memcpy(hash, public, sizeof(hash));
44 |             
45 |             if (!uECC_sign(private, hash, sizeof(hash), sig, curves[c])) {
46 |                 printf("uECC_sign() failed\n");
47 |                 return 1;
48 |             }
49 | 
50 |             if (!uECC_verify(public, hash, sizeof(hash), sig, curves[c])) {
51 |                 printf("uECC_verify() failed\n");
52 |                 return 1;
53 |             }
54 |         }
55 |         printf("\n");
56 |     }
57 |     
58 |     return 0;
59 | }
60 | 


--------------------------------------------------------------------------------
/test/test_ecdsa_deterministic.c.example:
--------------------------------------------------------------------------------
 1 | /* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
 2 | 
 3 | #include "uECC.h"
 4 | 
 5 | #include <stdio.h>
 6 | #include <string.h>
 7 | 
 8 | #define SHA256_BLOCK_LENGTH  64
 9 | #define SHA256_DIGEST_LENGTH 32
10 | 
11 | typedef struct SHA256_CTX {
12 | 	uint32_t	state[8];
13 | 	uint64_t	bitcount;
14 | 	uint8_t	buffer[SHA256_BLOCK_LENGTH];
15 | } SHA256_CTX;
16 | 
17 | extern void SHA256_Init(SHA256_CTX *ctx);
18 | extern void SHA256_Update(SHA256_CTX *ctx, const uint8_t *message, size_t message_size);
19 | extern void SHA256_Final(uint8_t digest[SHA256_DIGEST_LENGTH], SHA256_CTX *ctx);
20 | 
21 | typedef struct SHA256_HashContext {
22 |     uECC_HashContext uECC;
23 |     SHA256_CTX ctx;
24 | } SHA256_HashContext;
25 | 
26 | static void init_SHA256(const uECC_HashContext *base) {
27 |     SHA256_HashContext *context = (SHA256_HashContext *)base;
28 |     SHA256_Init(&context->ctx);
29 | }
30 | 
31 | static void update_SHA256(const uECC_HashContext *base,
32 |                           const uint8_t *message,
33 |                           unsigned message_size) {
34 |     SHA256_HashContext *context = (SHA256_HashContext *)base;
35 |     SHA256_Update(&context->ctx, message, message_size);
36 | }
37 | 
38 | static void finish_SHA256(const uECC_HashContext *base, uint8_t *hash_result) {
39 |     SHA256_HashContext *context = (SHA256_HashContext *)base;
40 |     SHA256_Final(hash_result, &context->ctx);
41 | }
42 | 
43 | int main() {
44 |     int i, c;
45 |     uint8_t private[32] = {0};
46 |     uint8_t public[64] = {0};
47 |     uint8_t hash[32] = {0};
48 |     uint8_t sig[64] = {0};
49 |     
50 |     uint8_t tmp[2 * SHA256_DIGEST_LENGTH + SHA256_BLOCK_LENGTH];
51 |     SHA256_HashContext ctx = {{
52 |         &init_SHA256,
53 |         &update_SHA256,
54 |         &finish_SHA256,
55 |         SHA256_BLOCK_LENGTH,
56 |         SHA256_DIGEST_LENGTH,
57 |         tmp
58 |     }};
59 | 
60 |     const struct uECC_Curve_t * curves[5];
61 |     curves[0] = uECC_secp160r1();
62 |     curves[1] = uECC_secp192r1();
63 |     curves[2] = uECC_secp224r1();
64 |     curves[3] = uECC_secp256r1();
65 |     curves[4] = uECC_secp256k1();
66 |     
67 |     printf("Testing 256 signatures\n");
68 |     for (c = 0; c < 5; ++c) {
69 |         for (i = 0; i < 256; ++i) {
70 |             printf(".");
71 |             fflush(stdout);
72 | 
73 |             if (!uECC_make_key(public, private, curves[c])) {
74 |                 printf("uECC_make_key() failed\n");
75 |                 return 1;
76 |             }
77 |             memcpy(hash, public, sizeof(hash));
78 |             
79 |             if (!uECC_sign_deterministic(private, hash, sizeof(hash), &ctx.uECC, sig, curves[c])) {
80 |                 printf("uECC_sign() failed\n");
81 |                 return 1;
82 |             }
83 | 
84 |             if (!uECC_verify(public, hash, sizeof(hash), sig, curves[c])) {
85 |                 printf("uECC_verify() failed\n");
86 |                 return 1;
87 |             }
88 |         }
89 |         printf("\n");
90 |     }
91 |     
92 |     return 0;
93 | }
94 | 


--------------------------------------------------------------------------------
/types.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
  2 | 
  3 | #ifndef _UECC_TYPES_H_
  4 | #define _UECC_TYPES_H_
  5 | 
  6 | #ifndef uECC_PLATFORM
  7 |     #if __AVR__
  8 |         #define uECC_PLATFORM uECC_avr
  9 |     #elif defined(__thumb2__) || defined(_M_ARMT) /* I think MSVC only supports Thumb-2 targets */
 10 |         #define uECC_PLATFORM uECC_arm_thumb2
 11 |     #elif defined(__thumb__)
 12 |         #define uECC_PLATFORM uECC_arm_thumb
 13 |     #elif defined(__arm__) || defined(_M_ARM)
 14 |         #define uECC_PLATFORM uECC_arm
 15 |     #elif defined(__aarch64__)
 16 |         #define uECC_PLATFORM uECC_arm64
 17 |     #elif defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__I86__)
 18 |         #define uECC_PLATFORM uECC_x86
 19 |     #elif defined(__amd64__) || defined(_M_X64)
 20 |         #define uECC_PLATFORM uECC_x86_64
 21 |     #else
 22 |         #define uECC_PLATFORM uECC_arch_other
 23 |     #endif
 24 | #endif
 25 | 
 26 | #ifndef uECC_ARM_USE_UMAAL
 27 |     #if (uECC_PLATFORM == uECC_arm) && (__ARM_ARCH >= 6)
 28 |         #define uECC_ARM_USE_UMAAL 1
 29 |     #elif (uECC_PLATFORM == uECC_arm_thumb2) && (__ARM_ARCH >= 6) && !__ARM_ARCH_7M__
 30 |         #define uECC_ARM_USE_UMAAL 1
 31 |     #else
 32 |         #define uECC_ARM_USE_UMAAL 0
 33 |     #endif
 34 | #endif
 35 | 
 36 | #ifndef uECC_WORD_SIZE
 37 |     #if uECC_PLATFORM == uECC_avr
 38 |         #define uECC_WORD_SIZE 1
 39 |     #elif (uECC_PLATFORM == uECC_x86_64 || uECC_PLATFORM == uECC_arm64)
 40 |         #define uECC_WORD_SIZE 8
 41 |     #else
 42 |         #define uECC_WORD_SIZE 4
 43 |     #endif
 44 | #endif
 45 | 
 46 | #if (uECC_WORD_SIZE != 1) && (uECC_WORD_SIZE != 4) && (uECC_WORD_SIZE != 8)
 47 |     #error "Unsupported value for uECC_WORD_SIZE"
 48 | #endif
 49 | 
 50 | #if ((uECC_PLATFORM == uECC_avr) && (uECC_WORD_SIZE != 1))
 51 |     #pragma message ("uECC_WORD_SIZE must be 1 for AVR")
 52 |     #undef uECC_WORD_SIZE
 53 |     #define uECC_WORD_SIZE 1
 54 | #endif
 55 | 
 56 | #if ((uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \
 57 |         uECC_PLATFORM ==  uECC_arm_thumb2) && \
 58 |      (uECC_WORD_SIZE != 4))
 59 |     #pragma message ("uECC_WORD_SIZE must be 4 for ARM")
 60 |     #undef uECC_WORD_SIZE
 61 |     #define uECC_WORD_SIZE 4
 62 | #endif
 63 | 
 64 | #if defined(__SIZEOF_INT128__) || ((__clang_major__ * 100 + __clang_minor__) >= 302)
 65 |     #define SUPPORTS_INT128 1
 66 | #else
 67 |     #define SUPPORTS_INT128 0
 68 | #endif
 69 | 
 70 | typedef int8_t wordcount_t;
 71 | typedef int16_t bitcount_t;
 72 | typedef int8_t cmpresult_t;
 73 | 
 74 | #if (uECC_WORD_SIZE == 1)
 75 | 
 76 | typedef uint8_t uECC_word_t;
 77 | typedef uint16_t uECC_dword_t;
 78 | 
 79 | #define HIGH_BIT_SET 0x80
 80 | #define uECC_WORD_BITS 8
 81 | #define uECC_WORD_BITS_SHIFT 3
 82 | #define uECC_WORD_BITS_MASK 0x07
 83 | 
 84 | #elif (uECC_WORD_SIZE == 4)
 85 | 
 86 | typedef uint32_t uECC_word_t;
 87 | typedef uint64_t uECC_dword_t;
 88 | 
 89 | #define HIGH_BIT_SET 0x80000000
 90 | #define uECC_WORD_BITS 32
 91 | #define uECC_WORD_BITS_SHIFT 5
 92 | #define uECC_WORD_BITS_MASK 0x01F
 93 | 
 94 | #elif (uECC_WORD_SIZE == 8)
 95 | 
 96 | typedef uint64_t uECC_word_t;
 97 | #if SUPPORTS_INT128
 98 | typedef unsigned __int128 uECC_dword_t;
 99 | #endif
100 | 
101 | #define HIGH_BIT_SET 0x8000000000000000ull
102 | #define uECC_WORD_BITS 64
103 | #define uECC_WORD_BITS_SHIFT 6
104 | #define uECC_WORD_BITS_MASK 0x03F
105 | 
106 | #endif /* uECC_WORD_SIZE */
107 | 
108 | #endif /* _UECC_TYPES_H_ */
109 | 


--------------------------------------------------------------------------------
/uECC.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
  2 | 
  3 | #ifndef _UECC_H_
  4 | #define _UECC_H_
  5 | 
  6 | #include <stdint.h>
  7 | 
  8 | /* Platform selection options.
  9 | If uECC_PLATFORM is not defined, the code will try to guess it based on compiler macros.
 10 | Possible values for uECC_PLATFORM are defined below: */
 11 | #define uECC_arch_other 0
 12 | #define uECC_x86        1
 13 | #define uECC_x86_64     2
 14 | #define uECC_arm        3
 15 | #define uECC_arm_thumb  4
 16 | #define uECC_arm_thumb2 5
 17 | #define uECC_arm64      6
 18 | #define uECC_avr        7
 19 | 
 20 | /* If desired, you can define uECC_WORD_SIZE as appropriate for your platform (1, 4, or 8 bytes).
 21 | If uECC_WORD_SIZE is not explicitly defined then it will be automatically set based on your
 22 | platform. */
 23 | 
 24 | /* Optimization level; trade speed for code size.
 25 |    Larger values produce code that is faster but larger.
 26 |    Currently supported values are 0 - 4; 0 is unusably slow for most applications.
 27 |    Optimization level 4 currently only has an effect ARM platforms where more than one
 28 |    curve is enabled. */
 29 | #ifndef uECC_OPTIMIZATION_LEVEL
 30 |     #define uECC_OPTIMIZATION_LEVEL 2
 31 | #endif
 32 | 
 33 | /* uECC_SQUARE_FUNC - If enabled (defined as nonzero), this will cause a specific function to be
 34 | used for (scalar) squaring instead of the generic multiplication function. This can make things
 35 | faster somewhat faster, but increases the code size. */
 36 | #ifndef uECC_SQUARE_FUNC
 37 |     #define uECC_SQUARE_FUNC 0
 38 | #endif
 39 | 
 40 | /* uECC_VLI_NATIVE_LITTLE_ENDIAN - If enabled (defined as nonzero), this will switch to native
 41 | little-endian format for *all* arrays passed in and out of the public API. This includes public
 42 | and private keys, shared secrets, signatures and message hashes.
 43 | Using this switch reduces the amount of call stack memory used by uECC, since less intermediate
 44 | translations are required.
 45 | Note that this will *only* work on native little-endian processors and it will treat the uint8_t
 46 | arrays passed into the public API as word arrays, therefore requiring the provided byte arrays
 47 | to be word aligned on architectures that do not support unaligned accesses.
 48 | IMPORTANT: Keys and signatures generated with uECC_VLI_NATIVE_LITTLE_ENDIAN=1 are incompatible
 49 | with keys and signatures generated with uECC_VLI_NATIVE_LITTLE_ENDIAN=0; all parties must use
 50 | the same endianness. */
 51 | #ifndef uECC_VLI_NATIVE_LITTLE_ENDIAN
 52 |     #define uECC_VLI_NATIVE_LITTLE_ENDIAN 0
 53 | #endif
 54 | 
 55 | /* Curve support selection. Set to 0 to remove that curve. */
 56 | #ifndef uECC_SUPPORTS_secp160r1
 57 |     #define uECC_SUPPORTS_secp160r1 1
 58 | #endif
 59 | #ifndef uECC_SUPPORTS_secp192r1
 60 |     #define uECC_SUPPORTS_secp192r1 1
 61 | #endif
 62 | #ifndef uECC_SUPPORTS_secp224r1
 63 |     #define uECC_SUPPORTS_secp224r1 1
 64 | #endif
 65 | #ifndef uECC_SUPPORTS_secp256r1
 66 |     #define uECC_SUPPORTS_secp256r1 1
 67 | #endif
 68 | #ifndef uECC_SUPPORTS_secp256k1
 69 |     #define uECC_SUPPORTS_secp256k1 1
 70 | #endif
 71 | 
 72 | /* Specifies whether compressed point format is supported.
 73 |    Set to 0 to disable point compression/decompression functions. */
 74 | #ifndef uECC_SUPPORT_COMPRESSED_POINT
 75 |     #define uECC_SUPPORT_COMPRESSED_POINT 1
 76 | #endif
 77 | 
 78 | struct uECC_Curve_t;
 79 | typedef const struct uECC_Curve_t * uECC_Curve;
 80 | 
 81 | #ifdef __cplusplus
 82 | extern "C"
 83 | {
 84 | #endif
 85 | 
 86 | #if uECC_SUPPORTS_secp160r1
 87 | uECC_Curve uECC_secp160r1(void);
 88 | #endif
 89 | #if uECC_SUPPORTS_secp192r1
 90 | uECC_Curve uECC_secp192r1(void);
 91 | #endif
 92 | #if uECC_SUPPORTS_secp224r1
 93 | uECC_Curve uECC_secp224r1(void);
 94 | #endif
 95 | #if uECC_SUPPORTS_secp256r1
 96 | uECC_Curve uECC_secp256r1(void);
 97 | #endif
 98 | #if uECC_SUPPORTS_secp256k1
 99 | uECC_Curve uECC_secp256k1(void);
100 | #endif
101 | 
102 | /* uECC_RNG_Function type
103 | The RNG function should fill 'size' random bytes into 'dest'. It should return 1 if
104 | 'dest' was filled with random data, or 0 if the random data could not be generated.
105 | The filled-in values should be either truly random, or from a cryptographically-secure PRNG.
106 | 
107 | A correctly functioning RNG function must be set (using uECC_set_rng()) before calling
108 | uECC_make_key() or uECC_sign().
109 | 
110 | Setting a correctly functioning RNG function improves the resistance to side-channel attacks
111 | for uECC_shared_secret() and uECC_sign_deterministic().
112 | 
113 | A correct RNG function is set by default when building for Windows, Linux, or OS X.
114 | If you are building on another POSIX-compliant system that supports /dev/random or /dev/urandom,
115 | you can define uECC_POSIX to use the predefined RNG. For embedded platforms there is no predefined
116 | RNG function; you must provide your own.
117 | */
118 | typedef int (*uECC_RNG_Function)(uint8_t *dest, unsigned size);
119 | 
120 | /* uECC_set_rng() function.
121 | Set the function that will be used to generate random bytes. The RNG function should
122 | return 1 if the random data was generated, or 0 if the random data could not be generated.
123 | 
124 | On platforms where there is no predefined RNG function (eg embedded platforms), this must
125 | be called before uECC_make_key() or uECC_sign() are used.
126 | 
127 | Inputs:
128 |     rng_function - The function that will be used to generate random bytes.
129 | */
130 | void uECC_set_rng(uECC_RNG_Function rng_function);
131 | 
132 | /* uECC_get_rng() function.
133 | 
134 | Returns the function that will be used to generate random bytes.
135 | */
136 | uECC_RNG_Function uECC_get_rng(void);
137 | 
138 | /* uECC_curve_private_key_size() function.
139 | 
140 | Returns the size of a private key for the curve in bytes.
141 | */
142 | int uECC_curve_private_key_size(uECC_Curve curve);
143 | 
144 | /* uECC_curve_public_key_size() function.
145 | 
146 | Returns the size of a public key for the curve in bytes.
147 | */
148 | int uECC_curve_public_key_size(uECC_Curve curve);
149 | 
150 | /* uECC_make_key() function.
151 | Create a public/private key pair.
152 | 
153 | Outputs:
154 |     public_key  - Will be filled in with the public key. Must be at least 2 * the curve size
155 |                   (in bytes) long. For example, if the curve is secp256r1, public_key must be 64
156 |                   bytes long.
157 |     private_key - Will be filled in with the private key. Must be as long as the curve order; this
158 |                   is typically the same as the curve size, except for secp160r1. For example, if the
159 |                   curve is secp256r1, private_key must be 32 bytes long.
160 | 
161 |                   For secp160r1, private_key must be 21 bytes long! Note that the first byte will
162 |                   almost always be 0 (there is about a 1 in 2^80 chance of it being non-zero).
163 | 
164 | Returns 1 if the key pair was generated successfully, 0 if an error occurred.
165 | */
166 | int uECC_make_key(uint8_t *public_key, uint8_t *private_key, uECC_Curve curve);
167 | 
168 | /* uECC_shared_secret() function.
169 | Compute a shared secret given your secret key and someone else's public key. If the public key
170 | is not from a trusted source and has not been previously verified, you should verify it first
171 | using uECC_valid_public_key().
172 | Note: It is recommended that you hash the result of uECC_shared_secret() before using it for
173 | symmetric encryption or HMAC.
174 | 
175 | Inputs:
176 |     public_key  - The public key of the remote party.
177 |     private_key - Your private key.
178 | 
179 | Outputs:
180 |     secret - Will be filled in with the shared secret value. Must be the same size as the
181 |              curve size; for example, if the curve is secp256r1, secret must be 32 bytes long.
182 | 
183 | Returns 1 if the shared secret was generated successfully, 0 if an error occurred.
184 | */
185 | int uECC_shared_secret(const uint8_t *public_key,
186 |                        const uint8_t *private_key,
187 |                        uint8_t *secret,
188 |                        uECC_Curve curve);
189 | 
190 | #if uECC_SUPPORT_COMPRESSED_POINT
191 | /* uECC_compress() function.
192 | Compress a public key.
193 | 
194 | Inputs:
195 |     public_key - The public key to compress.
196 | 
197 | Outputs:
198 |     compressed - Will be filled in with the compressed public key. Must be at least
199 |                  (curve size + 1) bytes long; for example, if the curve is secp256r1,
200 |                  compressed must be 33 bytes long.
201 | */
202 | void uECC_compress(const uint8_t *public_key, uint8_t *compressed, uECC_Curve curve);
203 | 
204 | /* uECC_decompress() function.
205 | Decompress a compressed public key.
206 | 
207 | Inputs:
208 |     compressed - The compressed public key.
209 | 
210 | Outputs:
211 |     public_key - Will be filled in with the decompressed public key.
212 | */
213 | void uECC_decompress(const uint8_t *compressed, uint8_t *public_key, uECC_Curve curve);
214 | #endif /* uECC_SUPPORT_COMPRESSED_POINT */
215 | 
216 | /* uECC_valid_public_key() function.
217 | Check to see if a public key is valid.
218 | 
219 | Note that you are not required to check for a valid public key before using any other uECC
220 | functions. However, you may wish to avoid spending CPU time computing a shared secret or
221 | verifying a signature using an invalid public key.
222 | 
223 | Inputs:
224 |     public_key - The public key to check.
225 | 
226 | Returns 1 if the public key is valid, 0 if it is invalid.
227 | */
228 | int uECC_valid_public_key(const uint8_t *public_key, uECC_Curve curve);
229 | 
230 | /* uECC_compute_public_key() function.
231 | Compute the corresponding public key for a private key.
232 | 
233 | Inputs:
234 |     private_key - The private key to compute the public key for
235 | 
236 | Outputs:
237 |     public_key - Will be filled in with the corresponding public key
238 | 
239 | Returns 1 if the key was computed successfully, 0 if an error occurred.
240 | */
241 | int uECC_compute_public_key(const uint8_t *private_key, uint8_t *public_key, uECC_Curve curve);
242 | 
243 | /* uECC_sign() function.
244 | Generate an ECDSA signature for a given hash value.
245 | 
246 | Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and pass it in to
247 | this function along with your private key.
248 | 
249 | Inputs:
250 |     private_key  - Your private key.
251 |     message_hash - The hash of the message to sign.
252 |     hash_size    - The size of message_hash in bytes.
253 | 
254 | Outputs:
255 |     signature - Will be filled in with the signature value. Must be at least 2 * curve size long.
256 |                 For example, if the curve is secp256r1, signature must be 64 bytes long.
257 | 
258 | Returns 1 if the signature generated successfully, 0 if an error occurred.
259 | */
260 | int uECC_sign(const uint8_t *private_key,
261 |               const uint8_t *message_hash,
262 |               unsigned hash_size,
263 |               uint8_t *signature,
264 |               uECC_Curve curve);
265 | 
266 | /* uECC_HashContext structure.
267 | This is used to pass in an arbitrary hash function to uECC_sign_deterministic().
268 | The structure will be used for multiple hash computations; each time a new hash
269 | is computed, init_hash() will be called, followed by one or more calls to
270 | update_hash(), and finally a call to finish_hash() to produce the resulting hash.
271 | 
272 | The intention is that you will create a structure that includes uECC_HashContext
273 | followed by any hash-specific data. For example:
274 | 
275 | typedef struct SHA256_HashContext {
276 |     uECC_HashContext uECC;
277 |     SHA256_CTX ctx;
278 | } SHA256_HashContext;
279 | 
280 | void init_SHA256(uECC_HashContext *base) {
281 |     SHA256_HashContext *context = (SHA256_HashContext *)base;
282 |     SHA256_Init(&context->ctx);
283 | }
284 | 
285 | void update_SHA256(uECC_HashContext *base,
286 |                    const uint8_t *message,
287 |                    unsigned message_size) {
288 |     SHA256_HashContext *context = (SHA256_HashContext *)base;
289 |     SHA256_Update(&context->ctx, message, message_size);
290 | }
291 | 
292 | void finish_SHA256(uECC_HashContext *base, uint8_t *hash_result) {
293 |     SHA256_HashContext *context = (SHA256_HashContext *)base;
294 |     SHA256_Final(hash_result, &context->ctx);
295 | }
296 | 
297 | ... when signing ...
298 | {
299 |     uint8_t tmp[32 + 32 + 64];
300 |     SHA256_HashContext ctx = {{&init_SHA256, &update_SHA256, &finish_SHA256, 64, 32, tmp}};
301 |     uECC_sign_deterministic(key, message_hash, &ctx.uECC, signature);
302 | }
303 | */
304 | typedef struct uECC_HashContext {
305 |     void (*init_hash)(const struct uECC_HashContext *context);
306 |     void (*update_hash)(const struct uECC_HashContext *context,
307 |                         const uint8_t *message,
308 |                         unsigned message_size);
309 |     void (*finish_hash)(const struct uECC_HashContext *context, uint8_t *hash_result);
310 |     unsigned block_size; /* Hash function block size in bytes, eg 64 for SHA-256. */
311 |     unsigned result_size; /* Hash function result size in bytes, eg 32 for SHA-256. */
312 |     uint8_t *tmp; /* Must point to a buffer of at least (2 * result_size + block_size) bytes. */
313 | } uECC_HashContext;
314 | 
315 | /* uECC_sign_deterministic() function.
316 | Generate an ECDSA signature for a given hash value, using a deterministic algorithm
317 | (see RFC 6979). You do not need to set the RNG using uECC_set_rng() before calling
318 | this function; however, if the RNG is defined it will improve resistance to side-channel
319 | attacks.
320 | 
321 | Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and pass it to
322 | this function along with your private key and a hash context. Note that the message_hash
323 | does not need to be computed with the same hash function used by hash_context.
324 | 
325 | Inputs:
326 |     private_key  - Your private key.
327 |     message_hash - The hash of the message to sign.
328 |     hash_size    - The size of message_hash in bytes.
329 |     hash_context - A hash context to use.
330 | 
331 | Outputs:
332 |     signature - Will be filled in with the signature value.
333 | 
334 | Returns 1 if the signature generated successfully, 0 if an error occurred.
335 | */
336 | int uECC_sign_deterministic(const uint8_t *private_key,
337 |                             const uint8_t *message_hash,
338 |                             unsigned hash_size,
339 |                             const uECC_HashContext *hash_context,
340 |                             uint8_t *signature,
341 |                             uECC_Curve curve);
342 | 
343 | /* uECC_verify() function.
344 | Verify an ECDSA signature.
345 | 
346 | Usage: Compute the hash of the signed data using the same hash as the signer and
347 | pass it to this function along with the signer's public key and the signature values (r and s).
348 | 
349 | Inputs:
350 |     public_key   - The signer's public key.
351 |     message_hash - The hash of the signed data.
352 |     hash_size    - The size of message_hash in bytes.
353 |     signature    - The signature value.
354 | 
355 | Returns 1 if the signature is valid, 0 if it is invalid.
356 | */
357 | int uECC_verify(const uint8_t *public_key,
358 |                 const uint8_t *message_hash,
359 |                 unsigned hash_size,
360 |                 const uint8_t *signature,
361 |                 uECC_Curve curve);
362 | 
363 | #ifdef __cplusplus
364 | } /* end of extern "C" */
365 | #endif
366 | 
367 | #endif /* _UECC_H_ */
368 | 


--------------------------------------------------------------------------------
/uECC_vli.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
  2 | 
  3 | #ifndef _UECC_VLI_H_
  4 | #define _UECC_VLI_H_
  5 | 
  6 | #include "uECC.h"
  7 | #include "types.h"
  8 | 
  9 | /* Functions for raw large-integer manipulation. These are only available
 10 |    if uECC.c is compiled with uECC_ENABLE_VLI_API defined to 1. */
 11 | #ifndef uECC_ENABLE_VLI_API
 12 |     #define uECC_ENABLE_VLI_API 0
 13 | #endif
 14 | 
 15 | #ifdef __cplusplus
 16 | extern "C"
 17 | {
 18 | #endif
 19 | 
 20 | #if uECC_ENABLE_VLI_API
 21 | 
 22 | void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words);
 23 | 
 24 | /* Constant-time comparison to zero - secure way to compare long integers */
 25 | /* Returns 1 if vli == 0, 0 otherwise. */
 26 | uECC_word_t uECC_vli_isZero(const uECC_word_t *vli, wordcount_t num_words);
 27 | 
 28 | /* Returns nonzero if bit 'bit' of vli is set. */
 29 | uECC_word_t uECC_vli_testBit(const uECC_word_t *vli, bitcount_t bit);
 30 | 
 31 | /* Counts the number of bits required to represent vli. */
 32 | bitcount_t uECC_vli_numBits(const uECC_word_t *vli, const wordcount_t max_words);
 33 | 
 34 | /* Sets dest = src. */
 35 | void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordcount_t num_words);
 36 | 
 37 | /* Constant-time comparison function - secure way to compare long integers */
 38 | /* Returns one if left == right, zero otherwise */
 39 | uECC_word_t uECC_vli_equal(const uECC_word_t *left,
 40 |                            const uECC_word_t *right,
 41 |                            wordcount_t num_words);
 42 | 
 43 | /* Constant-time comparison function - secure way to compare long integers */
 44 | /* Returns sign of left - right, in constant time. */
 45 | cmpresult_t uECC_vli_cmp(const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words);
 46 | 
 47 | /* Computes vli = vli >> 1. */
 48 | void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words);
 49 | 
 50 | /* Computes result = left + right, returning carry. Can modify in place. */
 51 | uECC_word_t uECC_vli_add(uECC_word_t *result,
 52 |                          const uECC_word_t *left,
 53 |                          const uECC_word_t *right,
 54 |                          wordcount_t num_words);
 55 | 
 56 | /* Computes result = left - right, returning borrow. Can modify in place. */
 57 | uECC_word_t uECC_vli_sub(uECC_word_t *result,
 58 |                          const uECC_word_t *left,
 59 |                          const uECC_word_t *right,
 60 |                          wordcount_t num_words);
 61 | 
 62 | /* Computes result = left * right. Result must be 2 * num_words long. */
 63 | void uECC_vli_mult(uECC_word_t *result,
 64 |                    const uECC_word_t *left,
 65 |                    const uECC_word_t *right,
 66 |                    wordcount_t num_words);
 67 | 
 68 | /* Computes result = left^2. Result must be 2 * num_words long. */
 69 | void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, wordcount_t num_words);
 70 | 
 71 | /* Computes result = (left + right) % mod.
 72 |    Assumes that left < mod and right < mod, and that result does not overlap mod. */
 73 | void uECC_vli_modAdd(uECC_word_t *result,
 74 |                      const uECC_word_t *left,
 75 |                      const uECC_word_t *right,
 76 |                      const uECC_word_t *mod,
 77 |                      wordcount_t num_words);
 78 | 
 79 | /* Computes result = (left - right) % mod.
 80 |    Assumes that left < mod and right < mod, and that result does not overlap mod. */
 81 | void uECC_vli_modSub(uECC_word_t *result,
 82 |                      const uECC_word_t *left,
 83 |                      const uECC_word_t *right,
 84 |                      const uECC_word_t *mod,
 85 |                      wordcount_t num_words);
 86 | 
 87 | /* Computes result = product % mod, where product is 2N words long.
 88 |    Currently only designed to work for mod == curve->p or curve_n. */
 89 | void uECC_vli_mmod(uECC_word_t *result,
 90 |                    uECC_word_t *product,
 91 |                    const uECC_word_t *mod,
 92 |                    wordcount_t num_words);
 93 | 
 94 | /* Calculates result = product (mod curve->p), where product is up to
 95 |    2 * curve->num_words long. */
 96 | void uECC_vli_mmod_fast(uECC_word_t *result, uECC_word_t *product, uECC_Curve curve);
 97 | 
 98 | /* Computes result = (left * right) % mod.
 99 |    Currently only designed to work for mod == curve->p or curve_n. */
100 | void uECC_vli_modMult(uECC_word_t *result,
101 |                       const uECC_word_t *left,
102 |                       const uECC_word_t *right,
103 |                       const uECC_word_t *mod,
104 |                       wordcount_t num_words);
105 | 
106 | /* Computes result = (left * right) % curve->p. */
107 | void uECC_vli_modMult_fast(uECC_word_t *result,
108 |                            const uECC_word_t *left,
109 |                            const uECC_word_t *right,
110 |                            uECC_Curve curve);
111 | 
112 | /* Computes result = left^2 % mod.
113 |    Currently only designed to work for mod == curve->p or curve_n. */
114 | void uECC_vli_modSquare(uECC_word_t *result,
115 |                         const uECC_word_t *left,
116 |                         const uECC_word_t *mod,
117 |                         wordcount_t num_words);
118 | 
119 | /* Computes result = left^2 % curve->p. */
120 | void uECC_vli_modSquare_fast(uECC_word_t *result, const uECC_word_t *left, uECC_Curve curve);
121 | 
122 | /* Computes result = (1 / input) % mod.*/
123 | void uECC_vli_modInv(uECC_word_t *result,
124 |                      const uECC_word_t *input,
125 |                      const uECC_word_t *mod,
126 |                      wordcount_t num_words);
127 | 
128 | #if uECC_SUPPORT_COMPRESSED_POINT
129 | /* Calculates a = sqrt(a) (mod curve->p) */
130 | void uECC_vli_mod_sqrt(uECC_word_t *a, uECC_Curve curve);
131 | #endif
132 | 
133 | /* Converts an integer in uECC native format to big-endian bytes. */
134 | void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, const uECC_word_t *native);
135 | /* Converts big-endian bytes to an integer in uECC native format. */
136 | void uECC_vli_bytesToNative(uECC_word_t *native, const uint8_t *bytes, int num_bytes);
137 | 
138 | unsigned uECC_curve_num_words(uECC_Curve curve);
139 | unsigned uECC_curve_num_bytes(uECC_Curve curve);
140 | unsigned uECC_curve_num_bits(uECC_Curve curve);
141 | unsigned uECC_curve_num_n_words(uECC_Curve curve);
142 | unsigned uECC_curve_num_n_bytes(uECC_Curve curve);
143 | unsigned uECC_curve_num_n_bits(uECC_Curve curve);
144 | 
145 | const uECC_word_t *uECC_curve_p(uECC_Curve curve);
146 | const uECC_word_t *uECC_curve_n(uECC_Curve curve);
147 | const uECC_word_t *uECC_curve_G(uECC_Curve curve);
148 | const uECC_word_t *uECC_curve_b(uECC_Curve curve);
149 | 
150 | int uECC_valid_point(const uECC_word_t *point, uECC_Curve curve);
151 | 
152 | /* Multiplies a point by a scalar. Points are represented by the X coordinate followed by
153 |    the Y coordinate in the same array, both coordinates are curve->num_words long. Note
154 |    that scalar must be curve->num_n_words long (NOT curve->num_words). */
155 | void uECC_point_mult(uECC_word_t *result,
156 |                      const uECC_word_t *point,
157 |                      const uECC_word_t *scalar,
158 |                      uECC_Curve curve);
159 | 
160 | /* Generates a random integer in the range 0 < random < top.
161 |    Both random and top have num_words words. */
162 | int uECC_generate_random_int(uECC_word_t *random,
163 |                              const uECC_word_t *top,
164 |                              wordcount_t num_words);
165 | 
166 | #endif /* uECC_ENABLE_VLI_API */
167 | 
168 | #ifdef __cplusplus
169 | } /* end of extern "C" */
170 | #endif
171 | 
172 | #endif /* _UECC_VLI_H_ */
173 | 


--------------------------------------------------------------------------------