├── .gitignore ├── LICENSE.txt ├── README.md ├── asm_arm.inc ├── asm_arm_mult_square.inc ├── asm_arm_mult_square_umaal.inc ├── asm_avr.inc ├── asm_avr_mult_square.inc ├── curve-specific.inc ├── emk_project.py ├── emk_rules.py ├── examples └── ecc_test │ └── ecc_test.ino ├── library.properties ├── platform-specific.inc ├── scripts ├── mult_arm.py ├── mult_avr.py ├── mult_avr_extra.py ├── square_arm.py └── square_avr.py ├── test ├── ecdsa_test_vectors.c ├── emk_rules.py ├── public_key_test_vectors.c ├── test_compress.c ├── test_compute.c ├── test_ecdh.c ├── test_ecdsa.c └── test_ecdsa_deterministic.c.example ├── types.h ├── uECC.c ├── uECC.h └── uECC_vli.h /.gitignore: -------------------------------------------------------------------------------- 1 | __build__/ 2 | __pycache__ 3 | *.pyc 4 | *.pyo 5 | *.pyd 6 | *.pyz 7 | *.egg-info/ 8 | *.a 9 | *.o 10 | *.so 11 | .DS_Store 12 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Kenneth MacKay 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright notice, 9 | this list of conditions and the following disclaimer in the documentation 10 | and/or other materials provided with the distribution. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 13 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 16 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 19 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | micro-ecc 2 | ========== 3 | 4 | A small and fast ECDH and ECDSA implementation for 8-bit, 32-bit, and 64-bit processors. 5 | 6 | The static version of micro-ecc (ie, where the curve was selected at compile-time) can be found in the "static" branch. 7 | 8 | Features 9 | -------- 10 | 11 | * Resistant to known side-channel attacks. 12 | * Written in C, with optional GCC inline assembly for AVR, ARM and Thumb platforms. 13 | * Supports 8, 32, and 64-bit architectures. 14 | * Small code size. 15 | * No dynamic memory allocation. 16 | * Support for 5 standard curves: secp160r1, secp192r1, secp224r1, secp256r1, and secp256k1. 17 | * BSD 2-clause license. 18 | 19 | Usage Notes 20 | ----------- 21 | ### Point Representation ### 22 | Compressed points are represented in the standard format as defined in http://www.secg.org/sec1-v2.pdf; uncompressed points are represented in standard format, but without the `0x04` prefix. All functions except `uECC_decompress()` only accept uncompressed points; use `uECC_compress()` and `uECC_decompress()` to convert between compressed and uncompressed point representations. 23 | 24 | Private keys are represented in the standard format. 25 | 26 | ### Using the Code ### 27 | 28 | I recommend just copying (or symlink) the uECC files into your project. Then just `#include "uECC.h"` to use the micro-ecc functions. 29 | 30 | For use with Arduino, you can use the Library Manager to download micro-ecc (**Sketch**=>**Include Library**=>**Manage Libraries**). You can then use uECC just like any other Arduino library (uECC should show up in the **Sketch**=>**Import Library** submenu). 31 | 32 | See uECC.h for documentation for each function. 33 | 34 | ### Compilation Notes ### 35 | 36 | * Should compile with any C/C++ compiler that supports stdint.h (this includes Visual Studio 2013). 37 | * If you want to change the defaults for any of the uECC compile-time options (such as `uECC_OPTIMIZATION_LEVEL`), you must change them in your Makefile or similar so that uECC.c is compiled with the desired values (ie, compile uECC.c with `-DuECC_OPTIMIZATION_LEVEL=3` or whatever). 38 | * When compiling for a Thumb-1 platform, you must use the `-fomit-frame-pointer` GCC option (this is enabled by default when compiling with `-O1` or higher). 39 | * When compiling for an ARM/Thumb-2 platform with `uECC_OPTIMIZATION_LEVEL` >= 3, you must use the `-fomit-frame-pointer` GCC option (this is enabled by default when compiling with `-O1` or higher). 40 | * When compiling for AVR, you must have optimizations enabled (compile with `-O1` or higher). 41 | * When building for Windows, you will need to link in the `advapi32.lib` system library. 42 | -------------------------------------------------------------------------------- /asm_arm.inc: -------------------------------------------------------------------------------- 1 | /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ 2 | 3 | #ifndef _UECC_ASM_ARM_H_ 4 | #define _UECC_ASM_ARM_H_ 5 | 6 | #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) 7 | #define uECC_MIN_WORDS 8 8 | #endif 9 | #if uECC_SUPPORTS_secp224r1 10 | #undef uECC_MIN_WORDS 11 | #define uECC_MIN_WORDS 7 12 | #endif 13 | #if uECC_SUPPORTS_secp192r1 14 | #undef uECC_MIN_WORDS 15 | #define uECC_MIN_WORDS 6 16 | #endif 17 | #if uECC_SUPPORTS_secp160r1 18 | #undef uECC_MIN_WORDS 19 | #define uECC_MIN_WORDS 5 20 | #endif 21 | 22 | #if (uECC_PLATFORM == uECC_arm_thumb) 23 | #define REG_RW "+&l" 24 | #define REG_WRITE "=&l" 25 | #else 26 | #define REG_RW "+&r" 27 | #define REG_WRITE "=&r" 28 | #endif 29 | 30 | #if (uECC_PLATFORM == uECC_arm_thumb || uECC_PLATFORM == uECC_arm_thumb2) 31 | #define REG_RW_LO "+&l" 32 | #define REG_WRITE_LO "=&l" 33 | #else 34 | #define REG_RW_LO "+&r" 35 | #define REG_WRITE_LO "=&r" 36 | #endif 37 | 38 | #if (uECC_PLATFORM == uECC_arm_thumb2) 39 | #define RESUME_SYNTAX 40 | #else 41 | #define RESUME_SYNTAX ".syntax divided \n\t" 42 | #endif 43 | 44 | #if (uECC_OPTIMIZATION_LEVEL >= 2) 45 | 46 | uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, 47 | const uECC_word_t *left, 48 | const uECC_word_t *right, 49 | wordcount_t num_words) { 50 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 51 | #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2) 52 | uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1; 53 | #else /* ARM */ 54 | uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4; 55 | #endif 56 | #endif 57 | uint32_t carry; 58 | uint32_t left_word; 59 | uint32_t right_word; 60 | 61 | __asm__ volatile ( 62 | ".syntax unified \n\t" 63 | "movs %[carry], #0 \n\t" 64 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 65 | "adr %[left], 1f \n\t" 66 | ".align 4 \n\t" 67 | "adds %[jump], %[left] \n\t" 68 | #endif 69 | 70 | "ldmia %[lptr]!, {%[left]} \n\t" 71 | "ldmia %[rptr]!, {%[right]} \n\t" 72 | "adds %[left], %[right] \n\t" 73 | "stmia %[dptr]!, {%[left]} \n\t" 74 | 75 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 76 | "bx %[jump] \n\t" 77 | #endif 78 | "1: \n\t" 79 | REPEAT(DEC(uECC_MAX_WORDS), 80 | "ldmia %[lptr]!, {%[left]} \n\t" 81 | "ldmia %[rptr]!, {%[right]} \n\t" 82 | "adcs %[left], %[right] \n\t" 83 | "stmia %[dptr]!, {%[left]} \n\t") 84 | 85 | "adcs %[carry], %[carry] \n\t" 86 | RESUME_SYNTAX 87 | : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right), 88 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 89 | [jump] REG_RW_LO (jump), 90 | #endif 91 | [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word), 92 | [right] REG_WRITE_LO (right_word) 93 | : 94 | : "cc", "memory" 95 | ); 96 | return carry; 97 | } 98 | #define asm_add 1 99 | 100 | uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, 101 | const uECC_word_t *left, 102 | const uECC_word_t *right, 103 | wordcount_t num_words) { 104 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 105 | #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2) 106 | uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1; 107 | #else /* ARM */ 108 | uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4; 109 | #endif 110 | #endif 111 | uint32_t carry; 112 | uint32_t left_word; 113 | uint32_t right_word; 114 | 115 | __asm__ volatile ( 116 | ".syntax unified \n\t" 117 | "movs %[carry], #0 \n\t" 118 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 119 | "adr %[left], 1f \n\t" 120 | ".align 4 \n\t" 121 | "adds %[jump], %[left] \n\t" 122 | #endif 123 | 124 | "ldmia %[lptr]!, {%[left]} \n\t" 125 | "ldmia %[rptr]!, {%[right]} \n\t" 126 | "subs %[left], %[right] \n\t" 127 | "stmia %[dptr]!, {%[left]} \n\t" 128 | 129 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 130 | "bx %[jump] \n\t" 131 | #endif 132 | "1: \n\t" 133 | REPEAT(DEC(uECC_MAX_WORDS), 134 | "ldmia %[lptr]!, {%[left]} \n\t" 135 | "ldmia %[rptr]!, {%[right]} \n\t" 136 | "sbcs %[left], %[right] \n\t" 137 | "stmia %[dptr]!, {%[left]} \n\t") 138 | 139 | "adcs %[carry], %[carry] \n\t" 140 | RESUME_SYNTAX 141 | : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right), 142 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 143 | [jump] REG_RW_LO (jump), 144 | #endif 145 | [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word), 146 | [right] REG_WRITE_LO (right_word) 147 | : 148 | : "cc", "memory" 149 | ); 150 | return !carry; /* Note that on ARM, carry flag set means "no borrow" when subtracting 151 | (for some reason...) */ 152 | } 153 | #define asm_sub 1 154 | 155 | #endif /* (uECC_OPTIMIZATION_LEVEL >= 2) */ 156 | 157 | #if (uECC_OPTIMIZATION_LEVEL >= 3) 158 | 159 | #if (uECC_PLATFORM != uECC_arm_thumb) 160 | 161 | #if uECC_ARM_USE_UMAAL 162 | #include "asm_arm_mult_square_umaal.inc" 163 | #else 164 | #include "asm_arm_mult_square.inc" 165 | #endif 166 | 167 | #if (uECC_OPTIMIZATION_LEVEL == 3) 168 | 169 | uECC_VLI_API void uECC_vli_mult(uint32_t *result, 170 | const uint32_t *left, 171 | const uint32_t *right, 172 | wordcount_t num_words) { 173 | register uint32_t *r0 __asm__("r0") = result; 174 | register const uint32_t *r1 __asm__("r1") = left; 175 | register const uint32_t *r2 __asm__("r2") = right; 176 | register uint32_t r3 __asm__("r3") = num_words; 177 | 178 | __asm__ volatile ( 179 | ".syntax unified \n\t" 180 | #if (uECC_MIN_WORDS == 5) 181 | FAST_MULT_ASM_5 182 | #if (uECC_MAX_WORDS > 5) 183 | FAST_MULT_ASM_5_TO_6 184 | #endif 185 | #if (uECC_MAX_WORDS > 6) 186 | FAST_MULT_ASM_6_TO_7 187 | #endif 188 | #if (uECC_MAX_WORDS > 7) 189 | FAST_MULT_ASM_7_TO_8 190 | #endif 191 | #elif (uECC_MIN_WORDS == 6) 192 | FAST_MULT_ASM_6 193 | #if (uECC_MAX_WORDS > 6) 194 | FAST_MULT_ASM_6_TO_7 195 | #endif 196 | #if (uECC_MAX_WORDS > 7) 197 | FAST_MULT_ASM_7_TO_8 198 | #endif 199 | #elif (uECC_MIN_WORDS == 7) 200 | FAST_MULT_ASM_7 201 | #if (uECC_MAX_WORDS > 7) 202 | FAST_MULT_ASM_7_TO_8 203 | #endif 204 | #elif (uECC_MIN_WORDS == 8) 205 | FAST_MULT_ASM_8 206 | #endif 207 | "1: \n\t" 208 | RESUME_SYNTAX 209 | : "+r" (r0), "+r" (r1), "+r" (r2) 210 | : "r" (r3) 211 | : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 212 | ); 213 | } 214 | #define asm_mult 1 215 | 216 | #if uECC_SQUARE_FUNC 217 | uECC_VLI_API void uECC_vli_square(uECC_word_t *result, 218 | const uECC_word_t *left, 219 | wordcount_t num_words) { 220 | register uint32_t *r0 __asm__("r0") = result; 221 | register const uint32_t *r1 __asm__("r1") = left; 222 | register uint32_t r2 __asm__("r2") = num_words; 223 | 224 | __asm__ volatile ( 225 | ".syntax unified \n\t" 226 | #if (uECC_MIN_WORDS == 5) 227 | FAST_SQUARE_ASM_5 228 | #if (uECC_MAX_WORDS > 5) 229 | FAST_SQUARE_ASM_5_TO_6 230 | #endif 231 | #if (uECC_MAX_WORDS > 6) 232 | FAST_SQUARE_ASM_6_TO_7 233 | #endif 234 | #if (uECC_MAX_WORDS > 7) 235 | FAST_SQUARE_ASM_7_TO_8 236 | #endif 237 | #elif (uECC_MIN_WORDS == 6) 238 | FAST_SQUARE_ASM_6 239 | #if (uECC_MAX_WORDS > 6) 240 | FAST_SQUARE_ASM_6_TO_7 241 | #endif 242 | #if (uECC_MAX_WORDS > 7) 243 | FAST_SQUARE_ASM_7_TO_8 244 | #endif 245 | #elif (uECC_MIN_WORDS == 7) 246 | FAST_SQUARE_ASM_7 247 | #if (uECC_MAX_WORDS > 7) 248 | FAST_SQUARE_ASM_7_TO_8 249 | #endif 250 | #elif (uECC_MIN_WORDS == 8) 251 | FAST_SQUARE_ASM_8 252 | #endif 253 | 254 | "1: \n\t" 255 | RESUME_SYNTAX 256 | : "+r" (r0), "+r" (r1) 257 | : "r" (r2) 258 | : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 259 | ); 260 | } 261 | #define asm_square 1 262 | #endif /* uECC_SQUARE_FUNC */ 263 | 264 | #else /* (uECC_OPTIMIZATION_LEVEL > 3) */ 265 | 266 | uECC_VLI_API void uECC_vli_mult(uint32_t *result, 267 | const uint32_t *left, 268 | const uint32_t *right, 269 | wordcount_t num_words) { 270 | register uint32_t *r0 __asm__("r0") = result; 271 | register const uint32_t *r1 __asm__("r1") = left; 272 | register const uint32_t *r2 __asm__("r2") = right; 273 | register uint32_t r3 __asm__("r3") = num_words; 274 | 275 | #if uECC_SUPPORTS_secp160r1 276 | if (num_words == 5) { 277 | __asm__ volatile ( 278 | ".syntax unified \n\t" 279 | FAST_MULT_ASM_5 280 | RESUME_SYNTAX 281 | : "+r" (r0), "+r" (r1), "+r" (r2) 282 | : "r" (r3) 283 | : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 284 | ); 285 | return; 286 | } 287 | #endif 288 | #if uECC_SUPPORTS_secp192r1 289 | if (num_words == 6) { 290 | __asm__ volatile ( 291 | ".syntax unified \n\t" 292 | FAST_MULT_ASM_6 293 | RESUME_SYNTAX 294 | : "+r" (r0), "+r" (r1), "+r" (r2) 295 | : "r" (r3) 296 | : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 297 | ); 298 | return; 299 | } 300 | #endif 301 | #if uECC_SUPPORTS_secp224r1 302 | if (num_words == 7) { 303 | __asm__ volatile ( 304 | ".syntax unified \n\t" 305 | FAST_MULT_ASM_7 306 | RESUME_SYNTAX 307 | : "+r" (r0), "+r" (r1), "+r" (r2) 308 | : "r" (r3) 309 | : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 310 | ); 311 | return; 312 | } 313 | #endif 314 | #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) 315 | if (num_words == 8) { 316 | __asm__ volatile ( 317 | ".syntax unified \n\t" 318 | FAST_MULT_ASM_8 319 | RESUME_SYNTAX 320 | : "+r" (r0), "+r" (r1), "+r" (r2) 321 | : "r" (r3) 322 | : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 323 | ); 324 | return; 325 | } 326 | #endif 327 | } 328 | #define asm_mult 1 329 | 330 | #if uECC_SQUARE_FUNC 331 | uECC_VLI_API void uECC_vli_square(uECC_word_t *result, 332 | const uECC_word_t *left, 333 | wordcount_t num_words) { 334 | register uint32_t *r0 __asm__("r0") = result; 335 | register const uint32_t *r1 __asm__("r1") = left; 336 | register uint32_t r2 __asm__("r2") = num_words; 337 | 338 | #if uECC_SUPPORTS_secp160r1 339 | if (num_words == 5) { 340 | __asm__ volatile ( 341 | ".syntax unified \n\t" 342 | FAST_SQUARE_ASM_5 343 | RESUME_SYNTAX 344 | : "+r" (r0), "+r" (r1) 345 | : "r" (r2) 346 | : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 347 | ); 348 | return; 349 | } 350 | #endif 351 | #if uECC_SUPPORTS_secp192r1 352 | if (num_words == 6) { 353 | __asm__ volatile ( 354 | ".syntax unified \n\t" 355 | FAST_SQUARE_ASM_6 356 | RESUME_SYNTAX 357 | : "+r" (r0), "+r" (r1) 358 | : "r" (r2) 359 | : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 360 | ); 361 | return; 362 | } 363 | #endif 364 | #if uECC_SUPPORTS_secp224r1 365 | if (num_words == 7) { 366 | __asm__ volatile ( 367 | ".syntax unified \n\t" 368 | FAST_SQUARE_ASM_7 369 | RESUME_SYNTAX 370 | : "+r" (r0), "+r" (r1) 371 | : "r" (r2) 372 | : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 373 | ); 374 | return; 375 | } 376 | #endif 377 | #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) 378 | if (num_words == 8) { 379 | __asm__ volatile ( 380 | ".syntax unified \n\t" 381 | FAST_SQUARE_ASM_8 382 | RESUME_SYNTAX 383 | : "+r" (r0), "+r" (r1) 384 | : "r" (r2) 385 | : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 386 | ); 387 | return; 388 | } 389 | #endif 390 | } 391 | #define asm_square 1 392 | #endif /* uECC_SQUARE_FUNC */ 393 | 394 | #endif /* (uECC_OPTIMIZATION_LEVEL > 3) */ 395 | 396 | #endif /* uECC_PLATFORM != uECC_arm_thumb */ 397 | 398 | #endif /* (uECC_OPTIMIZATION_LEVEL >= 3) */ 399 | 400 | /* ---- "Small" implementations ---- */ 401 | 402 | #if !asm_add 403 | uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, 404 | const uECC_word_t *left, 405 | const uECC_word_t *right, 406 | wordcount_t num_words) { 407 | uint32_t carry = 0; 408 | uint32_t left_word; 409 | uint32_t right_word; 410 | 411 | __asm__ volatile ( 412 | ".syntax unified \n\t" 413 | "1: \n\t" 414 | "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */ 415 | "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */ 416 | "lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */ 417 | "adcs %[left], %[left], %[right] \n\t" /* Add with carry. */ 418 | "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */ 419 | "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */ 420 | "subs %[ctr], #1 \n\t" /* Decrement counter. */ 421 | "bne 1b \n\t" /* Loop until counter == 0. */ 422 | RESUME_SYNTAX 423 | : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right), 424 | [ctr] REG_RW (num_words), [carry] REG_RW (carry), 425 | [left] REG_WRITE (left_word), [right] REG_WRITE (right_word) 426 | : 427 | : "cc", "memory" 428 | ); 429 | return carry; 430 | } 431 | #define asm_add 1 432 | #endif 433 | 434 | #if !asm_sub 435 | uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, 436 | const uECC_word_t *left, 437 | const uECC_word_t *right, 438 | wordcount_t num_words) { 439 | uint32_t carry = 1; /* carry = 1 initially (means don't borrow) */ 440 | uint32_t left_word; 441 | uint32_t right_word; 442 | 443 | __asm__ volatile ( 444 | ".syntax unified \n\t" 445 | "1: \n\t" 446 | "ldmia %[lptr]!, {%[left]} \n\t" /* Load left word. */ 447 | "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */ 448 | "lsrs %[carry], #1 \n\t" /* Set up carry flag (carry = 0 after this). */ 449 | "sbcs %[left], %[left], %[right] \n\t" /* Subtract with borrow. */ 450 | "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */ 451 | "stmia %[dptr]!, {%[left]} \n\t" /* Store result word. */ 452 | "subs %[ctr], #1 \n\t" /* Decrement counter. */ 453 | "bne 1b \n\t" /* Loop until counter == 0. */ 454 | RESUME_SYNTAX 455 | : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right), 456 | [ctr] REG_RW (num_words), [carry] REG_RW (carry), 457 | [left] REG_WRITE (left_word), [right] REG_WRITE (right_word) 458 | : 459 | : "cc", "memory" 460 | ); 461 | return !carry; 462 | } 463 | #define asm_sub 1 464 | #endif 465 | 466 | #if !asm_mult 467 | uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, 468 | const uECC_word_t *left, 469 | const uECC_word_t *right, 470 | wordcount_t num_words) { 471 | #if (uECC_PLATFORM != uECC_arm_thumb) 472 | uint32_t c0 = 0; 473 | uint32_t c1 = 0; 474 | uint32_t c2 = 0; 475 | uint32_t k = 0; 476 | uint32_t i; 477 | uint32_t t0, t1; 478 | 479 | __asm__ volatile ( 480 | ".syntax unified \n\t" 481 | 482 | "1: \n\t" /* outer loop (k < num_words) */ 483 | "movs %[i], #0 \n\t" /* i = 0 */ 484 | "b 3f \n\t" 485 | 486 | "2: \n\t" /* outer loop (k >= num_words) */ 487 | "movs %[i], %[k] \n\t" /* i = k */ 488 | "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */ 489 | 490 | "3: \n\t" /* inner loop */ 491 | "subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */ 492 | 493 | "ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = right[k - i] */ 494 | "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */ 495 | 496 | "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */ 497 | 498 | "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */ 499 | "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */ 500 | "adcs %[c2], %[c2], #0 \n\t" /* add carry to c2 */ 501 | 502 | "adds %[i], #4 \n\t" /* i += 4 */ 503 | "cmp %[i], %[last_word] \n\t" /* i > (num_words - 1) (times 4)? */ 504 | "bgt 4f \n\t" /* if so, exit the loop */ 505 | "cmp %[i], %[k] \n\t" /* i <= k? */ 506 | "ble 3b \n\t" /* if so, continue looping */ 507 | 508 | "4: \n\t" /* end inner loop */ 509 | 510 | "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */ 511 | "mov %[c0], %[c1] \n\t" /* c0 = c1 */ 512 | "mov %[c1], %[c2] \n\t" /* c1 = c2 */ 513 | "movs %[c2], #0 \n\t" /* c2 = 0 */ 514 | "adds %[k], #4 \n\t" /* k += 4 */ 515 | "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */ 516 | "ble 1b \n\t" /* if so, loop back, start with i = 0 */ 517 | "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */ 518 | "ble 2b \n\t" /* if so, loop back, start with i = (k + 1) - num_words */ 519 | /* end outer loop */ 520 | 521 | "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */ 522 | RESUME_SYNTAX 523 | : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), 524 | [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1) 525 | : [result] "r" (result), [left] "r" (left), [right] "r" (right), 526 | [last_word] "r" ((num_words - 1) * 4) 527 | : "cc", "memory" 528 | ); 529 | 530 | #else /* Thumb-1 */ 531 | uint32_t r4, r5, r6, r7; 532 | 533 | __asm__ volatile ( 534 | ".syntax unified \n\t" 535 | "subs %[r3], #1 \n\t" /* r3 = num_words - 1 */ 536 | "lsls %[r3], #2 \n\t" /* r3 = (num_words - 1) * 4 */ 537 | "mov r8, %[r3] \n\t" /* r8 = (num_words - 1) * 4 */ 538 | "lsls %[r3], #1 \n\t" /* r3 = (num_words - 1) * 8 */ 539 | "mov r9, %[r3] \n\t" /* r9 = (num_words - 1) * 8 */ 540 | "movs %[r3], #0 \n\t" /* c0 = 0 */ 541 | "movs %[r4], #0 \n\t" /* c1 = 0 */ 542 | "movs %[r5], #0 \n\t" /* c2 = 0 */ 543 | "movs %[r6], #0 \n\t" /* k = 0 */ 544 | 545 | "push {%[r0]} \n\t" /* keep result on the stack */ 546 | 547 | "1: \n\t" /* outer loop (k < num_words) */ 548 | "movs %[r7], #0 \n\t" /* r7 = i = 0 */ 549 | "b 3f \n\t" 550 | 551 | "2: \n\t" /* outer loop (k >= num_words) */ 552 | "movs %[r7], %[r6] \n\t" /* r7 = k */ 553 | "mov %[r0], r8 \n\t" /* r0 = (num_words - 1) * 4 */ 554 | "subs %[r7], %[r0] \n\t" /* r7 = i = k - (num_words - 1) (times 4) */ 555 | 556 | "3: \n\t" /* inner loop */ 557 | "mov r10, %[r3] \n\t" 558 | "mov r11, %[r4] \n\t" 559 | "mov r12, %[r5] \n\t" 560 | "mov r14, %[r6] \n\t" 561 | "subs %[r0], %[r6], %[r7] \n\t" /* r0 = k - i */ 562 | 563 | "ldr %[r4], [%[r2], %[r0]] \n\t" /* r4 = right[k - i] */ 564 | "ldr %[r0], [%[r1], %[r7]] \n\t" /* r0 = left[i] */ 565 | 566 | "lsrs %[r3], %[r0], #16 \n\t" /* r3 = a1 */ 567 | "uxth %[r0], %[r0] \n\t" /* r0 = a0 */ 568 | 569 | "lsrs %[r5], %[r4], #16 \n\t" /* r5 = b1 */ 570 | "uxth %[r4], %[r4] \n\t" /* r4 = b0 */ 571 | 572 | "movs %[r6], %[r3] \n\t" /* r6 = a1 */ 573 | "muls %[r6], %[r5], %[r6] \n\t" /* r6 = a1 * b1 */ 574 | "muls %[r3], %[r4], %[r3] \n\t" /* r3 = b0 * a1 */ 575 | "muls %[r5], %[r0], %[r5] \n\t" /* r5 = a0 * b1 */ 576 | "muls %[r0], %[r4], %[r0] \n\t" /* r0 = a0 * b0 */ 577 | 578 | /* Add middle terms */ 579 | "lsls %[r4], %[r3], #16 \n\t" 580 | "lsrs %[r3], %[r3], #16 \n\t" 581 | "adds %[r0], %[r4] \n\t" 582 | "adcs %[r6], %[r3] \n\t" 583 | 584 | "lsls %[r4], %[r5], #16 \n\t" 585 | "lsrs %[r5], %[r5], #16 \n\t" 586 | "adds %[r0], %[r4] \n\t" 587 | "adcs %[r6], %[r5] \n\t" 588 | 589 | "mov %[r3], r10\n\t" 590 | "mov %[r4], r11\n\t" 591 | "mov %[r5], r12\n\t" 592 | "adds %[r3], %[r0] \n\t" /* add low word to c0 */ 593 | "adcs %[r4], %[r6] \n\t" /* add high word to c1, including carry */ 594 | "movs %[r0], #0 \n\t" /* r0 = 0 (does not affect carry bit) */ 595 | "adcs %[r5], %[r0] \n\t" /* add carry to c2 */ 596 | 597 | "mov %[r6], r14\n\t" /* r6 = k */ 598 | 599 | "adds %[r7], #4 \n\t" /* i += 4 */ 600 | "cmp %[r7], r8 \n\t" /* i > (num_words - 1) (times 4)? */ 601 | "bgt 4f \n\t" /* if so, exit the loop */ 602 | "cmp %[r7], %[r6] \n\t" /* i <= k? */ 603 | "ble 3b \n\t" /* if so, continue looping */ 604 | 605 | "4: \n\t" /* end inner loop */ 606 | 607 | "ldr %[r0], [sp, #0] \n\t" /* r0 = result */ 608 | 609 | "str %[r3], [%[r0], %[r6]] \n\t" /* result[k] = c0 */ 610 | "mov %[r3], %[r4] \n\t" /* c0 = c1 */ 611 | "mov %[r4], %[r5] \n\t" /* c1 = c2 */ 612 | "movs %[r5], #0 \n\t" /* c2 = 0 */ 613 | "adds %[r6], #4 \n\t" /* k += 4 */ 614 | "cmp %[r6], r8 \n\t" /* k <= (num_words - 1) (times 4) ? */ 615 | "ble 1b \n\t" /* if so, loop back, start with i = 0 */ 616 | "cmp %[r6], r9 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */ 617 | "ble 2b \n\t" /* if so, loop back, with i = (k + 1) - num_words */ 618 | /* end outer loop */ 619 | 620 | "str %[r3], [%[r0], %[r6]] \n\t" /* result[num_words * 2 - 1] = c0 */ 621 | "pop {%[r0]} \n\t" /* pop result off the stack */ 622 | 623 | RESUME_SYNTAX 624 | : [r3] "+l" (num_words), [r4] "=&l" (r4), 625 | [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7) 626 | : [r0] "l" (result), [r1] "l" (left), [r2] "l" (right) 627 | : "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 628 | ); 629 | #endif 630 | } 631 | #define asm_mult 1 632 | #endif 633 | 634 | #if uECC_SQUARE_FUNC 635 | #if !asm_square 636 | uECC_VLI_API void uECC_vli_square(uECC_word_t *result, 637 | const uECC_word_t *left, 638 | wordcount_t num_words) { 639 | #if (uECC_PLATFORM != uECC_arm_thumb) 640 | uint32_t c0 = 0; 641 | uint32_t c1 = 0; 642 | uint32_t c2 = 0; 643 | uint32_t k = 0; 644 | uint32_t i, tt; 645 | uint32_t t0, t1; 646 | 647 | __asm__ volatile ( 648 | ".syntax unified \n\t" 649 | 650 | "1: \n\t" /* outer loop (k < num_words) */ 651 | "movs %[i], #0 \n\t" /* i = 0 */ 652 | "b 3f \n\t" 653 | 654 | "2: \n\t" /* outer loop (k >= num_words) */ 655 | "movs %[i], %[k] \n\t" /* i = k */ 656 | "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */ 657 | 658 | "3: \n\t" /* inner loop */ 659 | "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */ 660 | 661 | "ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = left[k - i] */ 662 | "ldr %[t0], [%[left], %[i]] \n\t" /* t0 = left[i] */ 663 | 664 | "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */ 665 | 666 | "cmp %[i], %[tt] \n\t" /* (i < k - i) ? */ 667 | "bge 4f \n\t" /* if i >= k - i, skip */ 668 | "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */ 669 | "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */ 670 | "adcs %[c2], %[c2], #0 \n\t" /* add carry to c2 */ 671 | 672 | "4: \n\t" 673 | "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */ 674 | "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */ 675 | "adcs %[c2], %[c2], #0 \n\t" /* add carry to c2 */ 676 | 677 | "adds %[i], #4 \n\t" /* i += 4 */ 678 | "cmp %[i], %[k] \n\t" /* i >= k? */ 679 | "bge 5f \n\t" /* if so, exit the loop */ 680 | "subs %[tt], %[k], %[i] \n\t" /* tt = k - i */ 681 | "cmp %[i], %[tt] \n\t" /* i <= k - i? */ 682 | "ble 3b \n\t" /* if so, continue looping */ 683 | 684 | "5: \n\t" /* end inner loop */ 685 | 686 | "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */ 687 | "mov %[c0], %[c1] \n\t" /* c0 = c1 */ 688 | "mov %[c1], %[c2] \n\t" /* c1 = c2 */ 689 | "movs %[c2], #0 \n\t" /* c2 = 0 */ 690 | "adds %[k], #4 \n\t" /* k += 4 */ 691 | "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */ 692 | "ble 1b \n\t" /* if so, loop back, start with i = 0 */ 693 | "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */ 694 | "ble 2b \n\t" /* if so, loop back, start with i = (k + 1) - num_words */ 695 | /* end outer loop */ 696 | 697 | "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */ 698 | RESUME_SYNTAX 699 | : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2), 700 | [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1) 701 | : [result] "r" (result), [left] "r" (left), [last_word] "r" ((num_words - 1) * 4) 702 | : "cc", "memory" 703 | ); 704 | 705 | #else 706 | uint32_t r3, r4, r5, r6, r7; 707 | 708 | __asm__ volatile ( 709 | ".syntax unified \n\t" 710 | "subs %[r2], #1 \n\t" /* r2 = num_words - 1 */ 711 | "lsls %[r2], #2 \n\t" /* r2 = (num_words - 1) * 4 */ 712 | "mov r8, %[r2] \n\t" /* r8 = (num_words - 1) * 4 */ 713 | "lsls %[r2], #1 \n\t" /* r2 = (num_words - 1) * 8 */ 714 | "mov r9, %[r2] \n\t" /* r9 = (num_words - 1) * 8 */ 715 | "movs %[r2], #0 \n\t" /* c0 = 0 */ 716 | "movs %[r3], #0 \n\t" /* c1 = 0 */ 717 | "movs %[r4], #0 \n\t" /* c2 = 0 */ 718 | "movs %[r5], #0 \n\t" /* k = 0 */ 719 | 720 | "push {%[r0]} \n\t" /* keep result on the stack */ 721 | 722 | "1: \n\t" /* outer loop (k < num_words) */ 723 | "movs %[r6], #0 \n\t" /* r6 = i = 0 */ 724 | "b 3f \n\t" 725 | 726 | "2: \n\t" /* outer loop (k >= num_words) */ 727 | "movs %[r6], %[r5] \n\t" /* r6 = k */ 728 | "mov %[r0], r8 \n\t" /* r0 = (num_words - 1) * 4 */ 729 | "subs %[r6], %[r0] \n\t" /* r6 = i = k - (num_words - 1) (times 4) */ 730 | 731 | "3: \n\t" /* inner loop */ 732 | "mov r10, %[r2] \n\t" 733 | "mov r11, %[r3] \n\t" 734 | "mov r12, %[r4] \n\t" 735 | "mov r14, %[r5] \n\t" 736 | "subs %[r7], %[r5], %[r6] \n\t" /* r7 = k - i */ 737 | 738 | "ldr %[r3], [%[r1], %[r7]] \n\t" /* r3 = left[k - i] */ 739 | "ldr %[r0], [%[r1], %[r6]] \n\t" /* r0 = left[i] */ 740 | 741 | "lsrs %[r2], %[r0], #16 \n\t" /* r2 = a1 */ 742 | "uxth %[r0], %[r0] \n\t" /* r0 = a0 */ 743 | 744 | "lsrs %[r4], %[r3], #16 \n\t" /* r4 = b1 */ 745 | "uxth %[r3], %[r3] \n\t" /* r3 = b0 */ 746 | 747 | "movs %[r5], %[r2] \n\t" /* r5 = a1 */ 748 | "muls %[r5], %[r4], %[r5] \n\t" /* r5 = a1 * b1 */ 749 | "muls %[r2], %[r3], %[r2] \n\t" /* r2 = b0 * a1 */ 750 | "muls %[r4], %[r0], %[r4] \n\t" /* r4 = a0 * b1 */ 751 | "muls %[r0], %[r3], %[r0] \n\t" /* r0 = a0 * b0 */ 752 | 753 | /* Add middle terms */ 754 | "lsls %[r3], %[r2], #16 \n\t" 755 | "lsrs %[r2], %[r2], #16 \n\t" 756 | "adds %[r0], %[r3] \n\t" 757 | "adcs %[r5], %[r2] \n\t" 758 | 759 | "lsls %[r3], %[r4], #16 \n\t" 760 | "lsrs %[r4], %[r4], #16 \n\t" 761 | "adds %[r0], %[r3] \n\t" 762 | "adcs %[r5], %[r4] \n\t" 763 | 764 | /* Add to acc, doubling if necessary */ 765 | "mov %[r2], r10\n\t" 766 | "mov %[r3], r11\n\t" 767 | "mov %[r4], r12\n\t" 768 | 769 | "cmp %[r6], %[r7] \n\t" /* (i < k - i) ? */ 770 | "bge 4f \n\t" /* if i >= k - i, skip */ 771 | "movs %[r7], #0 \n\t" /* r7 = 0 */ 772 | "adds %[r2], %[r0] \n\t" /* add low word to c0 */ 773 | "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */ 774 | "adcs %[r4], %[r7] \n\t" /* add carry to c2 */ 775 | "4: \n\t" 776 | "movs %[r7], #0 \n\t" /* r7 = 0 */ 777 | "adds %[r2], %[r0] \n\t" /* add low word to c0 */ 778 | "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */ 779 | "adcs %[r4], %[r7] \n\t" /* add carry to c2 */ 780 | 781 | "mov %[r5], r14\n\t" /* r5 = k */ 782 | 783 | "adds %[r6], #4 \n\t" /* i += 4 */ 784 | "cmp %[r6], %[r5] \n\t" /* i >= k? */ 785 | "bge 5f \n\t" /* if so, exit the loop */ 786 | "subs %[r7], %[r5], %[r6] \n\t" /* r7 = k - i */ 787 | "cmp %[r6], %[r7] \n\t" /* i <= k - i? */ 788 | "ble 3b \n\t" /* if so, continue looping */ 789 | 790 | "5: \n\t" /* end inner loop */ 791 | 792 | "ldr %[r0], [sp, #0] \n\t" /* r0 = result */ 793 | 794 | "str %[r2], [%[r0], %[r5]] \n\t" /* result[k] = c0 */ 795 | "mov %[r2], %[r3] \n\t" /* c0 = c1 */ 796 | "mov %[r3], %[r4] \n\t" /* c1 = c2 */ 797 | "movs %[r4], #0 \n\t" /* c2 = 0 */ 798 | "adds %[r5], #4 \n\t" /* k += 4 */ 799 | "cmp %[r5], r8 \n\t" /* k <= (num_words - 1) (times 4) ? */ 800 | "ble 1b \n\t" /* if so, loop back, start with i = 0 */ 801 | "cmp %[r5], r9 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */ 802 | "ble 2b \n\t" /* if so, loop back, with i = (k + 1) - num_words */ 803 | /* end outer loop */ 804 | 805 | "str %[r2], [%[r0], %[r5]] \n\t" /* result[num_words * 2 - 1] = c0 */ 806 | "pop {%[r0]} \n\t" /* pop result off the stack */ 807 | 808 | RESUME_SYNTAX 809 | : [r2] "+l" (num_words), [r3] "=&l" (r3), [r4] "=&l" (r4), 810 | [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7) 811 | : [r0] "l" (result), [r1] "l" (left) 812 | : "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory" 813 | ); 814 | #endif 815 | } 816 | #define asm_square 1 817 | #endif 818 | #endif /* uECC_SQUARE_FUNC */ 819 | 820 | #endif /* _UECC_ASM_ARM_H_ */ 821 | -------------------------------------------------------------------------------- /asm_avr.inc: -------------------------------------------------------------------------------- 1 | /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ 2 | 3 | #ifndef _UECC_ASM_AVR_H_ 4 | #define _UECC_ASM_AVR_H_ 5 | 6 | #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) 7 | #define uECC_MIN_WORDS 32 8 | #endif 9 | #if uECC_SUPPORTS_secp224r1 10 | #undef uECC_MIN_WORDS 11 | #define uECC_MIN_WORDS 28 12 | #endif 13 | #if uECC_SUPPORTS_secp192r1 14 | #undef uECC_MIN_WORDS 15 | #define uECC_MIN_WORDS 24 16 | #endif 17 | #if uECC_SUPPORTS_secp160r1 18 | #undef uECC_MIN_WORDS 19 | #define uECC_MIN_WORDS 20 20 | #endif 21 | 22 | #if __AVR_HAVE_EIJMP_EICALL__ 23 | #define IJMP "eijmp \n\t" 24 | #else 25 | #define IJMP "ijmp \n\t" 26 | #endif 27 | 28 | #if (uECC_OPTIMIZATION_LEVEL >= 2) 29 | 30 | uECC_VLI_API void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words) { 31 | volatile uECC_word_t *v = vli; 32 | __asm__ volatile ( 33 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 34 | "ldi r30, pm_lo8(1f) \n\t" 35 | "ldi r31, pm_hi8(1f) \n\t" 36 | "sub r30, %[num] \n\t" 37 | "sbc r31, __zero_reg__ \n\t" 38 | IJMP 39 | #endif 40 | 41 | REPEAT(uECC_MAX_WORDS, "st x+, __zero_reg__ \n\t") 42 | "1: \n\t" 43 | : "+x" (v) 44 | : [num] "r" (num_words) 45 | : 46 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 47 | "r30", "r31", "cc" 48 | #endif 49 | ); 50 | } 51 | #define asm_clear 1 52 | 53 | uECC_VLI_API void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordcount_t num_words) { 54 | volatile uECC_word_t *d = dest; 55 | __asm__ volatile ( 56 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 57 | "ldi r30, pm_lo8(1f) \n\t" 58 | "ldi r31, pm_hi8(1f) \n\t" 59 | "sub r30, %[num] \n\t" 60 | "sbc r31, __zero_reg__ \n\t" 61 | IJMP 62 | #endif 63 | 64 | REPEAT(uECC_MAX_WORDS, 65 | "ld r0, y+ \n\t" 66 | "st x+, r0 \n\t") 67 | "1: \n\t" 68 | : "+x" (d), "+y" (src) 69 | : [num] "r" ((uint8_t)(num_words * 2)) 70 | : "r0" 71 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 72 | , "r30", "r31", "cc" 73 | #endif 74 | ); 75 | } 76 | #define asm_set 1 77 | 78 | uECC_VLI_API void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words) { 79 | volatile uECC_word_t *v = vli; 80 | __asm__ volatile ( 81 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 82 | "ldi r30, pm_lo8(1f) \n\t" 83 | "ldi r31, pm_hi8(1f) \n\t" 84 | "sub r30, %[jump] \n\t" 85 | "sbc r31, __zero_reg__ \n\t" 86 | #endif 87 | 88 | "add r26, %[num] \n\t" 89 | "adc r27, __zero_reg__ \n\t" 90 | "ld r0, -x \n\t" 91 | "lsr r0 \n\t" 92 | "st x, r0 \n\t" 93 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 94 | IJMP 95 | #endif 96 | 97 | REPEAT(DEC(uECC_MAX_WORDS), 98 | "ld r0, -x \n\t" 99 | "ror r0 \n\t" 100 | "st x, r0 \n\t") 101 | "1: \n\t" 102 | : "+x" (v) 103 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 104 | : [num] "r" (num_words), [jump] "r" ((uint8_t)(3 * (num_words - 1))) 105 | : "r0", "r30", "r31", "cc" 106 | #else 107 | : [num] "r" (num_words) 108 | : "r0", "cc" 109 | #endif 110 | ); 111 | } 112 | #define asm_rshift1 1 113 | 114 | #define ADD_RJPM_TABLE(N) \ 115 | "movw r30, %A[result] \n\t" \ 116 | "rjmp add_%=_" #N " \n\t" 117 | 118 | #define ADD_RJPM_DEST(N) \ 119 | "add_%=_" #N ":" \ 120 | "ld %[clb], x+ \n\t" \ 121 | "ld %[rb], y+ \n\t" \ 122 | "adc %[clb], %[rb] \n\t" \ 123 | "st z+, %[clb] \n\t" 124 | 125 | uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, 126 | const uECC_word_t *left, 127 | const uECC_word_t *right, 128 | wordcount_t num_words) { 129 | volatile uECC_word_t *r = result; 130 | uint8_t carry; 131 | uint8_t right_byte; 132 | 133 | __asm__ volatile ( 134 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 135 | "ldi r30, pm_lo8(add_%=_" STR(uECC_MAX_WORDS) ") \n\t" 136 | "ldi r31, pm_hi8(add_%=_" STR(uECC_MAX_WORDS) ") \n\t" 137 | "sub r30, %[num] \n\t" 138 | "sbc r31, __zero_reg__ \n\t" 139 | #endif 140 | 141 | "clc \n\t" 142 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 143 | IJMP 144 | REPEATM(uECC_MAX_WORDS, ADD_RJPM_TABLE) 145 | #endif 146 | 147 | REPEATM(uECC_MAX_WORDS, ADD_RJPM_DEST) 148 | 149 | "mov %[clb], __zero_reg__ \n\t" 150 | "adc %[clb], %[clb] \n\t" /* Store carry bit. */ 151 | 152 | : "+x" (left), "+y" (right), 153 | [clb] "=&r" (carry), [rb] "=&r" (right_byte) 154 | : [result] "r" (r), [num] "r" ((uint8_t)(num_words * 2)) 155 | : "r30", "r31", "cc" 156 | ); 157 | return carry; 158 | } 159 | #define asm_add 1 160 | 161 | #define SUB_RJPM_TABLE(N) \ 162 | "movw r30, %A[result] \n\t" \ 163 | "rjmp sub_%=_" #N " \n\t" 164 | 165 | #define SUB_RJPM_DEST(N) \ 166 | "sub_%=_" #N ":" \ 167 | "ld %[clb], x+ \n\t" \ 168 | "ld %[rb], y+ \n\t" \ 169 | "sbc %[clb], %[rb] \n\t" \ 170 | "st z+, %[clb] \n\t" 171 | 172 | uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, 173 | const uECC_word_t *left, 174 | const uECC_word_t *right, 175 | wordcount_t num_words) { 176 | volatile uECC_word_t *r = result; 177 | uint8_t carry; 178 | uint8_t right_byte; 179 | 180 | __asm__ volatile ( 181 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 182 | "ldi r30, pm_lo8(sub_%=_" STR(uECC_MAX_WORDS) ") \n\t" 183 | "ldi r31, pm_hi8(sub_%=_" STR(uECC_MAX_WORDS) ") \n\t" 184 | "sub r30, %[num] \n\t" 185 | "sbc r31, __zero_reg__ \n\t" 186 | #endif 187 | 188 | "clc \n\t" 189 | #if (uECC_MAX_WORDS != uECC_MIN_WORDS) 190 | IJMP 191 | REPEATM(uECC_MAX_WORDS, SUB_RJPM_TABLE) 192 | #endif 193 | 194 | REPEATM(uECC_MAX_WORDS, SUB_RJPM_DEST) 195 | 196 | "mov %[clb], __zero_reg__ \n\t" 197 | "adc %[clb], %[clb] \n\t" /* Store carry bit. */ 198 | 199 | : "+x" (left), "+y" (right), 200 | [clb] "=&r" (carry), [rb] "=&r" (right_byte) 201 | : [result] "r" (r), [num] "r" ((uint8_t)(num_words * 2)) 202 | : "r30", "r31", "cc" 203 | ); 204 | return carry; 205 | } 206 | #define asm_sub 1 207 | 208 | #if (uECC_OPTIMIZATION_LEVEL >= 3) 209 | 210 | #include "asm_avr_mult_square.inc" 211 | 212 | __attribute((noinline)) 213 | uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, 214 | const uECC_word_t *left, 215 | const uECC_word_t *right, 216 | wordcount_t num_words) { 217 | /* num_words should already be in r18. */ 218 | register wordcount_t r18 __asm__("r18") = num_words; 219 | 220 | __asm__ volatile ( 221 | "push r18 \n\t" 222 | #if (uECC_MIN_WORDS == 20) 223 | FAST_MULT_ASM_20 224 | "pop r18 \n\t" 225 | #if (uECC_MAX_WORDS > 20) 226 | FAST_MULT_ASM_20_TO_24 227 | #endif 228 | #if (uECC_MAX_WORDS > 24) 229 | FAST_MULT_ASM_24_TO_28 230 | #endif 231 | #if (uECC_MAX_WORDS > 28) 232 | FAST_MULT_ASM_28_TO_32 233 | #endif 234 | #elif (uECC_MIN_WORDS == 24) 235 | FAST_MULT_ASM_24 236 | "pop r18 \n\t" 237 | #if (uECC_MAX_WORDS > 24) 238 | FAST_MULT_ASM_24_TO_28 239 | #endif 240 | #if (uECC_MAX_WORDS > 28) 241 | FAST_MULT_ASM_28_TO_32 242 | #endif 243 | #elif (uECC_MIN_WORDS == 28) 244 | FAST_MULT_ASM_28 245 | "pop r18 \n\t" 246 | #if (uECC_MAX_WORDS > 28) 247 | FAST_MULT_ASM_28_TO_32 248 | #endif 249 | #elif (uECC_MIN_WORDS == 32) 250 | FAST_MULT_ASM_32 251 | "pop r18 \n\t" 252 | #endif 253 | "2: \n\t" 254 | "eor r1, r1 \n\t" 255 | : "+x" (left), "+y" (right), "+z" (result) 256 | : "r" (r18) 257 | : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", 258 | "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r19", "r20", 259 | "r21", "r22", "r23", "r24", "r25", "cc" 260 | ); 261 | } 262 | #define asm_mult 1 263 | 264 | #if uECC_SQUARE_FUNC 265 | __attribute((noinline)) 266 | uECC_VLI_API void uECC_vli_square(uECC_word_t *result, 267 | const uECC_word_t *left, 268 | wordcount_t num_words) { 269 | /* num_words should already be in r20. */ 270 | register wordcount_t r20 __asm__("r20") = num_words; 271 | 272 | __asm__ volatile ( 273 | "push r20 \n\t" 274 | #if (uECC_MIN_WORDS == 20) 275 | FAST_SQUARE_ASM_20 276 | "pop r20 \n\t" 277 | #if (uECC_MAX_WORDS > 20) 278 | FAST_SQUARE_ASM_20_TO_24 279 | #endif 280 | #if (uECC_MAX_WORDS > 24) 281 | FAST_SQUARE_ASM_24_TO_28 282 | #endif 283 | #if (uECC_MAX_WORDS > 28) 284 | FAST_SQUARE_ASM_28_TO_32 285 | #endif 286 | #elif (uECC_MIN_WORDS == 24) 287 | FAST_SQUARE_ASM_24 288 | "pop r20 \n\t" 289 | #if (uECC_MAX_WORDS > 24) 290 | FAST_SQUARE_ASM_24_TO_28 291 | #endif 292 | #if (uECC_MAX_WORDS > 28) 293 | FAST_SQUARE_ASM_28_TO_32 294 | #endif 295 | #elif (uECC_MIN_WORDS == 28) 296 | FAST_SQUARE_ASM_28 297 | "pop r20 \n\t" 298 | #if (uECC_MAX_WORDS > 28) 299 | FAST_SQUARE_ASM_28_TO_32 300 | #endif 301 | #elif (uECC_MIN_WORDS == 32) 302 | FAST_SQUARE_ASM_32 303 | "pop r20 \n\t" 304 | #endif 305 | "2: \n\t" 306 | "eor r1, r1 \n\t" 307 | : "+x" (left), "+z" (result) 308 | : "r" (r20) 309 | : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", 310 | "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", 311 | "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc" 312 | ); 313 | } 314 | #define asm_square 1 315 | #endif /* uECC_SQUARE_FUNC */ 316 | 317 | #endif /* (uECC_OPTIMIZATION_LEVEL >= 3) */ 318 | 319 | #if uECC_SUPPORTS_secp160r1 320 | static const struct uECC_Curve_t curve_secp160r1; 321 | static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) { 322 | uint8_t carry = 0; 323 | __asm__ volatile ( 324 | "in r30, __SP_L__ \n\t" 325 | "in r31, __SP_H__ \n\t" 326 | "sbiw r30, 24 \n\t" 327 | "in r0, __SREG__ \n\t" 328 | "cli \n\t" 329 | "out __SP_H__, r31 \n\t" 330 | "out __SREG__, r0 \n\t" 331 | "out __SP_L__, r30 \n\t" 332 | 333 | "adiw r30, 25 \n\t" /* we are shifting by 31 bits, so shift over 4 bytes 334 | (+ 1 since z initially points below the stack) */ 335 | "adiw r26, 40 \n\t" /* end of product */ 336 | "ld r18, -x \n\t" /* Load word. */ 337 | "lsr r18 \n\t" /* Shift. */ 338 | "st -z, r18 \n\t" /* Store the first result word. */ 339 | 340 | /* Now we just do the remaining words with the carry bit (using ROR) */ 341 | REPEAT(19, 342 | "ld r18, -x \n\t" 343 | "ror r18 \n\t" 344 | "st -z, r18 \n\t") 345 | 346 | "eor r18, r18 \n\t" /* r18 = 0 */ 347 | "ror r18 \n\t" /* get last bit */ 348 | "st -z, r18 \n\t" /* store it */ 349 | 350 | "sbiw r30, 3 \n\t" /* move z back to point at tmp */ 351 | /* now we add right */ 352 | "ld r18, x+ \n\t" 353 | "st z+, r18 \n\t" /* the first 3 bytes do not need to be added */ 354 | "ld r18, x+ \n\t" 355 | "st z+, r18 \n\t" 356 | "ld r18, x+ \n\t" 357 | "st z+, r18 \n\t" 358 | 359 | "ld r18, x+ \n\t" 360 | "ld r19, z \n\t" 361 | "add r18, r19 \n\t" 362 | "st z+, r18 \n\t" 363 | 364 | /* Now we just do the remaining words with the carry bit (using ADC) */ 365 | REPEAT(16, 366 | "ld r18, x+ \n\t" 367 | "ld r19, z \n\t" 368 | "adc r18, r19 \n\t" 369 | "st z+, r18 \n\t") 370 | 371 | /* Propagate over the remaining bytes of result */ 372 | "ld r18, z \n\t" 373 | "adc r18, r1 \n\t" 374 | "st z+, r18 \n\t" 375 | 376 | "ld r18, z \n\t" 377 | "adc r18, r1 \n\t" 378 | "st z+, r18 \n\t" 379 | 380 | "ld r18, z \n\t" 381 | "adc r18, r1 \n\t" 382 | "st z+, r18 \n\t" 383 | 384 | "ld r18, z \n\t" 385 | "adc r18, r1 \n\t" 386 | "st z+, r18 \n\t" 387 | 388 | "sbiw r30, 24 \n\t" /* move z back to point at tmp */ 389 | "sbiw r26, 40 \n\t" /* move x back to point at product */ 390 | 391 | /* add low bytes of tmp to product, storing in result */ 392 | "ld r18, z+ \n\t" 393 | "ld r19, x+ \n\t" 394 | "add r18, r19 \n\t" 395 | "st y+, r18 \n\t" 396 | REPEAT(19, 397 | "ld r18, z+ \n\t" 398 | "ld r19, x+ \n\t" 399 | "adc r18, r19 \n\t" 400 | "st y+, r18 \n\t") 401 | "adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */ 402 | /* at this point x is at the end of product, y is at the end of result, 403 | z is 20 bytes into tmp */ 404 | "sbiw r28, 20 \n\t" /* move y back to point at result */ 405 | "adiw r30, 4 \n\t" /* move z to point to the end of tmp */ 406 | 407 | /* do omega_mult again with the 4 relevant bytes */ 408 | /* z points to the end of tmp, x points to the end of product */ 409 | "ld r18, -z \n\t" /* Load word. */ 410 | "lsr r18 \n\t" /* Shift. */ 411 | "st -x, r18 \n\t" /* Store the first result word. */ 412 | 413 | "ld r18, -z \n\t" 414 | "ror r18 \n\t" 415 | "st -x, r18 \n\t" 416 | "ld r18, -z \n\t" 417 | "ror r18 \n\t" 418 | "st -x, r18 \n\t" 419 | "ld r18, -z \n\t" 420 | "ror r18 \n\t" 421 | "st -x, r18 \n\t" 422 | 423 | "eor r18, r18 \n\t" /* r18 = 0 */ 424 | "ror r18 \n\t" /* get last bit */ 425 | "st -x, r18 \n\t" /* store it */ 426 | 427 | "sbiw r26, 3 \n\t" /* move x back to point at beginning */ 428 | /* now we add a copy of the 4 bytes */ 429 | "ld r18, z+ \n\t" 430 | "st x+, r18 \n\t" /* the first 3 bytes do not need to be added */ 431 | "ld r18, z+ \n\t" 432 | "st x+, r18 \n\t" 433 | "ld r18, z+ \n\t" 434 | "st x+, r18 \n\t" 435 | 436 | "ld r18, z+ \n\t" 437 | "ld r19, x \n\t" 438 | "add r18, r19 \n\t" 439 | "st x+, r18 \n\t" 440 | 441 | /* Propagate over the remaining bytes */ 442 | "ld r18, x \n\t" 443 | "adc r18, r1 \n\t" 444 | "st x+, r18 \n\t" 445 | 446 | "ld r18, x \n\t" 447 | "adc r18, r1 \n\t" 448 | "st x+, r18 \n\t" 449 | 450 | "ld r18, x \n\t" 451 | "adc r18, r1 \n\t" 452 | "st x+, r18 \n\t" 453 | 454 | "ld r18, x \n\t" 455 | "adc r18, r1 \n\t" 456 | "st x+, r18 \n\t" 457 | 458 | /* now z points to the end of tmp, x points to the end of product 459 | (y still points at result) */ 460 | "sbiw r26, 8 \n\t" /* move x back to point at beginning of actual data */ 461 | /* add into result */ 462 | "ld r18, x+ \n\t" 463 | "ld r19, y \n\t" 464 | "add r18, r19 \n\t" 465 | "st y+, r18 \n\t" 466 | REPEAT(7, 467 | "ld r18, x+ \n\t" 468 | "ld r19, y \n\t" 469 | "adc r18, r19 \n\t" 470 | "st y+, r18 \n\t") 471 | 472 | /* Done adding, now propagate carry bit */ 473 | REPEAT(12, 474 | "ld r18, y \n\t" 475 | "adc r18, __zero_reg__ \n\t" 476 | "st y+, r18 \n\t") 477 | 478 | "adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */ 479 | "sbiw r28, 20 \n\t" /* move y back to point at result */ 480 | 481 | "sbiw r30, 1 \n\t" /* fix stack pointer */ 482 | "in r0, __SREG__ \n\t" 483 | "cli \n\t" 484 | "out __SP_H__, r31 \n\t" 485 | "out __SREG__, r0 \n\t" 486 | "out __SP_L__, r30 \n\t" 487 | 488 | : "+x" (product), [carry] "+r" (carry) 489 | : "y" (result) 490 | : "r0", "r18", "r19", "r30", "r31", "cc" 491 | ); 492 | 493 | if (carry > 0) { 494 | --carry; 495 | uECC_vli_sub(result, result, curve_secp160r1.p, 20); 496 | } 497 | if (carry > 0) { 498 | uECC_vli_sub(result, result, curve_secp160r1.p, 20); 499 | } 500 | if (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, 20) > 0) { 501 | uECC_vli_sub(result, result, curve_secp160r1.p, 20); 502 | } 503 | } 504 | #define asm_mmod_fast_secp160r1 1 505 | #endif /* uECC_SUPPORTS_secp160r1 */ 506 | 507 | #if uECC_SUPPORTS_secp256k1 508 | static const struct uECC_Curve_t curve_secp256k1; 509 | static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product) { 510 | uint8_t carry = 0; 511 | __asm__ volatile ( 512 | "in r30, __SP_L__ \n\t" 513 | "in r31, __SP_H__ \n\t" 514 | "sbiw r30, 37 \n\t" 515 | "in r0, __SREG__ \n\t" 516 | "cli \n\t" 517 | "out __SP_H__, r31 \n\t" 518 | "out __SREG__, r0 \n\t" 519 | "out __SP_L__, r30 \n\t" 520 | 521 | "adiw r30, 1 \n\t" /* add 1 since z initially points below the stack */ 522 | "adiw r26, 32 \n\t" /* product + uECC_WORDS */ 523 | "ldi r25, 0x03 \n\t" 524 | "ldi r24, 0xD1 \n\t" 525 | "ld r18, x+ \n\t" 526 | "ld r19, x+ \n\t" 527 | "ld r20, x+ \n\t" 528 | "ld r21, x+ \n\t" 529 | 530 | "mul r24, r18 \n\t" 531 | "st z+, r0 \n\t" 532 | "mov r22, r1 \n\t" 533 | "ldi r23, 0 \n\t" 534 | 535 | "mul r24, r19 \n\t" 536 | "add r22, r0 \n\t" 537 | "adc r23, r1 \n\t" /* can't overflow */ 538 | "mul r25, r18 \n\t" 539 | "add r22, r0 \n\t" 540 | "adc r23, r1 \n\t" /* can't overflow */ 541 | "st z+, r22 \n\t" 542 | "ldi r22, 0 \n\t" 543 | 544 | "mul r24, r20 \n\t" 545 | "add r23, r0 \n\t" 546 | "adc r22, r1 \n\t" 547 | "mul r25, r19 \n\t" 548 | "add r23, r0 \n\t" 549 | "adc r22, r1 \n\t" 550 | "st z+, r23 \n\t" 551 | "ldi r23, 0 \n\t" 552 | 553 | "mul r24, r21 \n\t" 554 | "add r22, r0 \n\t" 555 | "adc r23, r1 \n\t" 556 | "mul r25, r20 \n\t" 557 | "add r22, r0 \n\t" 558 | "adc r23, r1 \n\t" 559 | "st z+, r22 \n\t" 560 | "ldi r22, 0 \n\t" 561 | 562 | /* now we start adding the 2^32 part as well */ 563 | "add r23, r18 \n\t" // 28 564 | "adc r22, r22 \n\t" 565 | "ld r18, x+ \n\t" 566 | "mul r24, r18 \n\t" 567 | "add r23, r0 \n\t" 568 | "adc r22, r1 \n\t" 569 | "mul r25, r21 \n\t" 570 | "add r23, r0 \n\t" 571 | "adc r22, r1 \n\t" 572 | "st z+, r23 \n\t" 573 | "ldi r23, 0 \n\t" 574 | 575 | "add r22, r19 \n\t" // 27 576 | "adc r23, r23 \n\t" 577 | "ld r19, x+ \n\t" 578 | "mul r24, r19 \n\t" 579 | "add r22, r0 \n\t" 580 | "adc r23, r1 \n\t" 581 | "mul r25, r18 \n\t" 582 | "add r22, r0 \n\t" 583 | "adc r23, r1 \n\t" 584 | "st z+, r22 \n\t" 585 | "ldi r22, 0 \n\t" 586 | 587 | REPEAT(6, // 26 - 3 588 | "add r23, r20 \n\t" 589 | "adc r22, r22 \n\t" 590 | "ld r20, x+ \n\t" 591 | "mul r24, r20 \n\t" 592 | "add r23, r0 \n\t" 593 | "adc r22, r1 \n\t" 594 | "mul r25, r19 \n\t" 595 | "add r23, r0 \n\t" 596 | "adc r22, r1 \n\t" 597 | "st z+, r23 \n\t" 598 | "ldi r23, 0 \n\t" 599 | 600 | "add r22, r21 \n\t" 601 | "adc r23, r23 \n\t" 602 | "ld r21, x+ \n\t" 603 | "mul r24, r21 \n\t" 604 | "add r22, r0 \n\t" 605 | "adc r23, r1 \n\t" 606 | "mul r25, r20 \n\t" 607 | "add r22, r0 \n\t" 608 | "adc r23, r1 \n\t" 609 | "st z+, r22 \n\t" 610 | "ldi r22, 0 \n\t" 611 | 612 | "add r23, r18 \n\t" 613 | "adc r22, r22 \n\t" 614 | "ld r18, x+ \n\t" 615 | "mul r24, r18 \n\t" 616 | "add r23, r0 \n\t" 617 | "adc r22, r1 \n\t" 618 | "mul r25, r21 \n\t" 619 | "add r23, r0 \n\t" 620 | "adc r22, r1 \n\t" 621 | "st z+, r23 \n\t" 622 | "ldi r23, 0 \n\t" 623 | 624 | "add r22, r19 \n\t" 625 | "adc r23, r23 \n\t" 626 | "ld r19, x+ \n\t" 627 | "mul r24, r19 \n\t" 628 | "add r22, r0 \n\t" 629 | "adc r23, r1 \n\t" 630 | "mul r25, r18 \n\t" 631 | "add r22, r0 \n\t" 632 | "adc r23, r1 \n\t" 633 | "st z+, r22 \n\t" 634 | "ldi r22, 0 \n\t") 635 | 636 | "add r23, r20 \n\t" // 2 637 | "adc r22, r22 \n\t" 638 | "ld r20, x+ \n\t" 639 | "mul r24, r20 \n\t" 640 | "add r23, r0 \n\t" 641 | "adc r22, r1 \n\t" 642 | "mul r25, r19 \n\t" 643 | "add r23, r0 \n\t" 644 | "adc r22, r1 \n\t" 645 | "st z+, r23 \n\t" 646 | "ldi r23, 0 \n\t" 647 | 648 | "add r22, r21 \n\t" // 1 649 | "adc r23, r23 \n\t" 650 | "ld r21, x+ \n\t" 651 | "mul r24, r21 \n\t" 652 | "add r22, r0 \n\t" 653 | "adc r23, r1 \n\t" 654 | "mul r25, r20 \n\t" 655 | "add r22, r0 \n\t" 656 | "adc r23, r1 \n\t" 657 | "st z+, r22 \n\t" 658 | "ldi r22, 0 \n\t" 659 | 660 | /* Now finish the carries etc */ 661 | "add r23, r18 \n\t" 662 | "adc r22, r22 \n\t" 663 | "mul r25, r21 \n\t" 664 | "add r23, r0 \n\t" 665 | "adc r22, r1 \n\t" 666 | "st z+, r23 \n\t" 667 | "ldi r23, 0 \n\t" 668 | 669 | "add r22, r19 \n\t" 670 | "adc r23, r23 \n\t" 671 | "st z+, r22 \n\t" 672 | "ldi r22, 0 \n\t" 673 | 674 | "add r23, r20 \n\t" 675 | "adc r22, r22 \n\t" 676 | "st z+, r23 \n\t" 677 | "ldi r23, 0 \n\t" 678 | 679 | "add r22, r21 \n\t" 680 | "adc r23, r23 \n\t" 681 | "st z+, r22 \n\t" 682 | "st z+, r23 \n\t" 683 | "eor r1, r1 \n\t" /* make r1 be 0 again */ 684 | 685 | "sbiw r30, 37 \n\t" /* move z back to point at tmp */ 686 | "subi r26, 64 \n\t" /* move x back to point at product */ 687 | "sbc r27, __zero_reg__ \n\t" 688 | 689 | /* add low bytes of tmp to product, storing in result */ 690 | "ld r18, z+ \n\t" 691 | "ld r19, x+ \n\t" 692 | "add r18, r19 \n\t" 693 | "st y+, r18 \n\t" 694 | REPEAT(31, 695 | "ld r18, z+ \n\t" 696 | "ld r19, x+ \n\t" 697 | "adc r18, r19 \n\t" 698 | "st y+, r18 \n\t") 699 | 700 | "adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */ 701 | /* at this point x is at the end of product, y is at the end of result, 702 | z is 32 bytes into tmp */ 703 | "sbiw r28, 32 \n\t" /* move y back to point at result */ 704 | 705 | /* do omega_mult again with the 5 relevant bytes */ 706 | /* z points to tmp + uECC_WORDS, x points to the end of product */ 707 | "sbiw r26, 32 \n\t" /* shift x back to point into the product buffer 708 | (we can overwrite it now) */ 709 | "ld r18, z+ \n\t" 710 | "ld r19, z+ \n\t" 711 | "ld r20, z+ \n\t" 712 | "ld r21, z+ \n\t" 713 | 714 | "mul r24, r18 \n\t" 715 | "st x+, r0 \n\t" 716 | "mov r22, r1 \n\t" 717 | "ldi r23, 0 \n\t" 718 | 719 | "mul r24, r19 \n\t" 720 | "add r22, r0 \n\t" 721 | "adc r23, r1 \n\t" /* can't overflow */ 722 | "mul r25, r18 \n\t" 723 | "add r22, r0 \n\t" 724 | "adc r23, r1 \n\t" /* can't overflow */ 725 | "st x+, r22 \n\t" 726 | "ldi r22, 0 \n\t" 727 | 728 | "mul r24, r20 \n\t" 729 | "add r23, r0 \n\t" 730 | "adc r22, r1 \n\t" 731 | "mul r25, r19 \n\t" 732 | "add r23, r0 \n\t" 733 | "adc r22, r1 \n\t" 734 | "st x+, r23 \n\t" 735 | "ldi r23, 0 \n\t" 736 | 737 | "mul r24, r21 \n\t" 738 | "add r22, r0 \n\t" 739 | "adc r23, r1 \n\t" 740 | "mul r25, r20 \n\t" 741 | "add r22, r0 \n\t" 742 | "adc r23, r1 \n\t" 743 | "st x+, r22 \n\t" 744 | "ldi r22, 0 \n\t" 745 | 746 | "add r23, r18 \n\t" 747 | "adc r22, r22 \n\t" 748 | "ld r18, z+ \n\t" 749 | "mul r24, r18 \n\t" 750 | "add r23, r0 \n\t" 751 | "adc r22, r1 \n\t" 752 | "mul r25, r21 \n\t" 753 | "add r23, r0 \n\t" 754 | "adc r22, r1 \n\t" 755 | "st x+, r23 \n\t" 756 | "ldi r23, 0 \n\t" 757 | 758 | /* Now finish the carries etc */ 759 | "add r22, r19 \n\t" 760 | "adc r23, r23 \n\t" 761 | "mul r25, r18 \n\t" 762 | "add r22, r0 \n\t" 763 | "adc r23, r1 \n\t" 764 | "st x+, r22 \n\t" 765 | "ldi r22, 0 \n\t" 766 | 767 | "add r23, r20 \n\t" 768 | "adc r22, r22 \n\t" 769 | "st x+, r23 \n\t" 770 | "ldi r23, 0 \n\t" 771 | 772 | "add r22, r21 \n\t" 773 | "adc r23, r23 \n\t" 774 | "st x+, r22 \n\t" 775 | "ldi r22, 0 \n\t" 776 | 777 | "add r23, r18 \n\t" 778 | "adc r22, r22 \n\t" 779 | "st x+, r23 \n\t" 780 | "st x+, r22 \n\t" 781 | "eor r1, r1 \n\t" /* make r1 be 0 again */ 782 | 783 | /* now z points to the end of tmp, x points to the end of product 784 | (y still points at result) */ 785 | "sbiw r26, 10 \n\t" /* move x back to point at beginning of actual data */ 786 | /* add into result */ 787 | "ld r18, x+ \n\t" 788 | "ld r19, y \n\t" 789 | "add r18, r19 \n\t" 790 | "st y+, r18 \n\t" 791 | REPEAT(9, 792 | "ld r18, x+ \n\t" 793 | "ld r19, y \n\t" 794 | "adc r18, r19 \n\t" 795 | "st y+, r18 \n\t") 796 | 797 | /* Done adding, now propagate carry bit */ 798 | REPEAT(22, 799 | "ld r18, y \n\t" 800 | "adc r18, __zero_reg__ \n\t" 801 | "st y+, r18 \n\t") 802 | 803 | "adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */ 804 | "sbiw r28, 32 \n\t" /* move y back to point at result */ 805 | 806 | "sbiw r30, 1 \n\t" /* fix stack pointer */ 807 | "in r0, __SREG__ \n\t" 808 | "cli \n\t" 809 | "out __SP_H__, r31 \n\t" 810 | "out __SREG__, r0 \n\t" 811 | "out __SP_L__, r30 \n\t" 812 | 813 | : "+x" (product), [carry] "+r" (carry) 814 | : "y" (result) 815 | : "r0", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r30", "r31", "cc" 816 | ); 817 | 818 | if (carry > 0) { 819 | --carry; 820 | uECC_vli_sub(result, result, curve_secp256k1.p, 32); 821 | } 822 | if (carry > 0) { 823 | uECC_vli_sub(result, result, curve_secp256k1.p, 32); 824 | } 825 | if (uECC_vli_cmp_unsafe(result, curve_secp256k1.p, 32) > 0) { 826 | uECC_vli_sub(result, result, curve_secp256k1.p, 32); 827 | } 828 | } 829 | #define asm_mmod_fast_secp256k1 1 830 | #endif /* uECC_SUPPORTS_secp256k1 */ 831 | 832 | #endif /* (uECC_OPTIMIZATION_LEVEL >= 2) */ 833 | 834 | /* ---- "Small" implementations ---- */ 835 | 836 | #if !asm_add 837 | uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, 838 | const uECC_word_t *left, 839 | const uECC_word_t *right, 840 | wordcount_t num_words) { 841 | volatile uECC_word_t *r = result; 842 | uint8_t carry = 0; 843 | uint8_t left_byte; 844 | uint8_t right_byte; 845 | 846 | __asm__ volatile ( 847 | "clc \n\t" 848 | 849 | "1: \n\t" 850 | "ld %[left], x+ \n\t" /* Load left byte. */ 851 | "ld %[right], y+ \n\t" /* Load right byte. */ 852 | "adc %[left], %[right] \n\t" /* Add. */ 853 | "st z+, %[left] \n\t" /* Store the result. */ 854 | "dec %[i] \n\t" 855 | "brne 1b \n\t" 856 | 857 | "adc %[carry], %[carry] \n\t" /* Store carry bit. */ 858 | 859 | : "+z" (r), "+x" (left), "+y" (right), [i] "+r" (num_words), 860 | [carry] "+r" (carry), [left] "=&r" (left_byte), [right] "=&r" (right_byte) 861 | : 862 | : "cc" 863 | ); 864 | return carry; 865 | } 866 | #define asm_add 1 867 | #endif 868 | 869 | #if !asm_sub 870 | uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, 871 | const uECC_word_t *left, 872 | const uECC_word_t *right, 873 | wordcount_t num_words) { 874 | volatile uECC_word_t *r = result; 875 | uint8_t borrow = 0; 876 | uint8_t left_byte; 877 | uint8_t right_byte; 878 | 879 | __asm__ volatile ( 880 | "clc \n\t" 881 | 882 | "1: \n\t" 883 | "ld %[left], x+ \n\t" /* Load left byte. */ 884 | "ld %[right], y+ \n\t" /* Load right byte. */ 885 | "sbc %[left], %[right] \n\t" /* Subtract. */ 886 | "st z+, %[left] \n\t" /* Store the result. */ 887 | "dec %[i] \n\t" 888 | "brne 1b \n\t" 889 | 890 | "adc %[borrow], %[borrow] \n\t" /* Store carry bit in borrow. */ 891 | 892 | : "+z" (r), "+x" (left), "+y" (right), [i] "+r" (num_words), 893 | [borrow] "+r" (borrow), [left] "=&r" (left_byte), [right] "=&r" (right_byte) 894 | : 895 | : "cc" 896 | ); 897 | return borrow; 898 | } 899 | #define asm_sub 1 900 | #endif 901 | 902 | #if !asm_mult 903 | __attribute((noinline)) 904 | uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, 905 | const uECC_word_t *left, 906 | const uECC_word_t *right, 907 | wordcount_t num_words) { 908 | volatile uECC_word_t *r = result; 909 | uint8_t r0 = 0; 910 | uint8_t r1 = 0; 911 | uint8_t r2 = 0; 912 | uint8_t zero = 0; 913 | uint8_t k, i; 914 | 915 | __asm__ volatile ( 916 | "ldi %[k], 1 \n\t" /* k = 1; k < num_words; ++k */ 917 | 918 | "1: \n\t" 919 | "ldi %[i], 0 \n\t" /* i = 0; i < k; ++i */ 920 | 921 | "add r28, %[k] \n\t" /* pre-add right ptr */ 922 | "adc r29, %[zero] \n\t" 923 | 924 | "2: \n\t" 925 | "ld r0, x+ \n\t" 926 | "ld r1, -y \n\t" 927 | "mul r0, r1 \n\t" 928 | 929 | "add %[r0], r0 \n\t" 930 | "adc %[r1], r1 \n\t" 931 | "adc %[r2], %[zero] \n\t" 932 | 933 | "inc %[i] \n\t" 934 | "cp %[i], %[k] \n\t" 935 | "brlo 2b \n\t" /* loop if i < k */ 936 | 937 | "sub r26, %[k] \n\t" /* fix up left ptr */ 938 | "sbc r27, %[zero] \n\t" 939 | 940 | "st z+, %[r0] \n\t" /* Store the result. */ 941 | "mov %[r0], %[r1] \n\t" 942 | "mov %[r1], %[r2] \n\t" 943 | "mov %[r2], %[zero] \n\t" 944 | 945 | "inc %[k] \n\t" 946 | "cp %[k], %[num] \n\t" 947 | "brlo 1b \n\t" /* loop if k < num_words */ 948 | 949 | /* second half */ 950 | "mov %[k], %[num] \n\t" /* k = num_words; k > 0; --k */ 951 | "add r28, %[num] \n\t" /* move right ptr to point at the end of right */ 952 | "adc r29, %[zero] \n\t" 953 | 954 | "1: \n\t" 955 | "ldi %[i], 0 \n\t" /* i = 0; i < k; ++i */ 956 | 957 | "2: \n\t" 958 | "ld r0, x+ \n\t" 959 | "ld r1, -y \n\t" 960 | "mul r0, r1 \n\t" 961 | 962 | "add %[r0], r0 \n\t" 963 | "adc %[r1], r1 \n\t" 964 | "adc %[r2], %[zero] \n\t" 965 | 966 | "inc %[i] \n\t" 967 | "cp %[i], %[k] \n\t" 968 | "brlo 2b \n\t" /* loop if i < k */ 969 | 970 | "add r28, %[k] \n\t" /* fix up right ptr */ 971 | "adc r29, %[zero] \n\t" 972 | 973 | "st z+, %[r0] \n\t" /* Store the result. */ 974 | "mov %[r0], %[r1] \n\t" 975 | "mov %[r1], %[r2] \n\t" 976 | "mov %[r2], %[zero] \n\t" 977 | 978 | "dec %[k] \n\t" 979 | "sub r26, %[k] \n\t" /* fix up left ptr (after k is decremented, so next time 980 | we start 1 higher) */ 981 | "sbc r27, %[zero] \n\t" 982 | 983 | "cp %[k], %[zero] \n\t" 984 | "brne 1b \n\t" /* loop if k > 0 */ 985 | 986 | "st z+, %[r0] \n\t" /* Store last result byte. */ 987 | "eor r1, r1 \n\t" /* fix r1 to be 0 again */ 988 | 989 | : "+z" (result), "+x" (left), "+y" (right), 990 | [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), 991 | [zero] "+r" (zero), [num] "+r" (num_words), 992 | [k] "=&r" (k), [i] "=&r" (i) 993 | : 994 | : "r0", "cc" 995 | ); 996 | } 997 | #define asm_mult 1 998 | #endif 999 | 1000 | #if (uECC_SQUARE_FUNC && !asm_square) 1001 | uECC_VLI_API void uECC_vli_square(uECC_word_t *result, 1002 | const uECC_word_t *left, 1003 | wordcount_t num_words) { 1004 | volatile uECC_word_t *r = result; 1005 | uint8_t r0 = 0; 1006 | uint8_t r1 = 0; 1007 | uint8_t r2 = 0; 1008 | uint8_t zero = 0; 1009 | uint8_t k; 1010 | 1011 | __asm__ volatile ( 1012 | "ldi %[k], 1 \n\t" /* k = 1; k < num_words * 2; ++k */ 1013 | 1014 | "1: \n\t" 1015 | 1016 | "movw r26, %[orig] \n\t" /* copy orig ptr to 'left' ptr */ 1017 | "movw r30, %[orig] \n\t" /* copy orig ptr to 'right' ptr */ 1018 | "cp %[k], %[num] \n\t" 1019 | "brlo 2f \n\t" 1020 | "breq 2f \n\t" 1021 | 1022 | /* when k > num_words, we start from (k - num_words) on the 'left' ptr */ 1023 | "add r26, %[k] \n\t" 1024 | "adc r27, %[zero] \n\t" 1025 | "sub r26, %[num] \n\t" 1026 | "sbc r27, %[zero] \n\t" 1027 | "add r30, %[num] \n\t" /* move right ptr to point at the end */ 1028 | "adc r31, %[zero] \n\t" 1029 | "rjmp 3f \n\t" 1030 | 1031 | "2: \n\t" /* when k <= num_words, we add k to the 'right' ptr */ 1032 | "add r30, %[k] \n\t" /* pre-add 'right' ptr */ 1033 | "adc r31, %[zero] \n\t" 1034 | 1035 | "3: \n\t" 1036 | "ld r0, x+ \n\t" 1037 | "cp r26, r30 \n\t" /* if left == right here, then we are done after this mult 1038 | (and we don't need to double) */ 1039 | "breq 4f \n\t" 1040 | "ld r1, -z \n\t" 1041 | "mul r0, r1 \n\t" 1042 | 1043 | /* add twice since it costs the same as doubling */ 1044 | "add %[r0], r0 \n\t" 1045 | "adc %[r1], r1 \n\t" 1046 | "adc %[r2], %[zero] \n\t" 1047 | "add %[r0], r0 \n\t" 1048 | "adc %[r1], r1 \n\t" 1049 | "adc %[r2], %[zero] \n\t" 1050 | 1051 | "cpse r26, r30 \n\t" /* if left == right here, then we are done */ 1052 | "rjmp 3b \n\t" 1053 | "rjmp 5f \n\t" /* skip code for non-doubled mult */ 1054 | 1055 | "4: \n\t" 1056 | "ld r1, -z \n\t" 1057 | "mul r0, r1 \n\t" 1058 | "add %[r0], r0 \n\t" 1059 | "adc %[r1], r1 \n\t" 1060 | "adc %[r2], %[zero] \n\t" 1061 | 1062 | "5: \n\t" 1063 | "movw r30, %[result] \n\t" /* make z point to result */ 1064 | "st z+, %[r0] \n\t" /* Store the result. */ 1065 | "movw %[result], r30 \n\t" /* update result ptr*/ 1066 | "mov %[r0], %[r1] \n\t" 1067 | "mov %[r1], %[r2] \n\t" 1068 | "mov %[r2], %[zero] \n\t" 1069 | 1070 | "inc %[k] \n\t" 1071 | "cp %[k], %[max] \n\t" 1072 | "brlo 1b \n\t" /* loop if k < num_words * 2 */ 1073 | 1074 | "movw r30, %[result] \n\t" /* make z point to result */ 1075 | "st z+, %[r0] \n\t" /* Store last result byte. */ 1076 | "eor r1, r1 \n\t" /* fix r1 to be 0 again */ 1077 | 1078 | : [result] "+r" (r), 1079 | [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (zero), 1080 | [k] "=&a" (k) 1081 | : [orig] "r" (left), [max] "r" ((uint8_t)(2 * num_words)), 1082 | [num] "r" (num_words) 1083 | : "r0", "r26", "r27", "r30", "r31", "cc" 1084 | ); 1085 | } 1086 | #define asm_square 1 1087 | #endif /* uECC_SQUARE_FUNC && !asm_square */ 1088 | 1089 | #endif /* _UECC_ASM_AVR_H_ */ 1090 | -------------------------------------------------------------------------------- /emk_project.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | c, link, asm, utils = emk.module("c", "link", "asm", "utils") 4 | 5 | default_compile_flags = ["-fvisibility=hidden", "-Wall", "-Wextra", "-Wshadow", "-Werror", "-Wno-missing-field-initializers", "-Wno-unused-parameter", \ 6 | "-Wno-comment", "-Wno-unused", "-Wno-unknown-pragmas"] 7 | default_link_flags = [] 8 | opt_flags = {"dbg":["-g"], "std":["-O2"], "max":["-O3"], "small":["-Os"]} 9 | opt_link_flags = {"dbg":[], "std":[], "max":[], "small":[]} 10 | c_flags = ["-std=c99"] 11 | cxx_flags = ["-std=c++11", "-Wno-reorder", "-fno-rtti", "-fno-exceptions"] 12 | c_link_flags = [] 13 | cxx_link_flags = ["-fno-rtti", "-fno-exceptions"] 14 | 15 | if "root" in emk.options: 16 | root = emk.options["root"] 17 | else: 18 | root = "/" 19 | 20 | def setup_build_dir(): 21 | build_arch = None 22 | if "arch" in emk.options: 23 | build_arch = emk.options["arch"] 24 | elif not emk.cleaning: 25 | build_arch = "osx" 26 | emk.options["arch"] = build_arch 27 | 28 | opt_level = None 29 | if "opt" in emk.options: 30 | level = emk.options["opt"] 31 | if level in opt_flags: 32 | opt_level = level 33 | else: 34 | emk.log.warning("Unknown optimization level '%s'" % (level)) 35 | elif not emk.cleaning: 36 | opt_level = "dbg" 37 | emk.options["opt"] = opt_level 38 | 39 | dirs = ["__build__"] 40 | if build_arch: 41 | dirs.append(build_arch) 42 | if opt_level: 43 | dirs.append(opt_level) 44 | emk.build_dir = os.path.join(*dirs) 45 | 46 | def setup_osx(): 47 | global c 48 | global link 49 | 50 | flags = [("-arch", "x86_64"), "-fno-common", "-Wnewline-eof"] 51 | c.flags.extend(flags) 52 | c.cxx.flags += ["-stdlib=libc++"] 53 | link.cxx.flags += ["-stdlib=libc++"] 54 | 55 | link_flags = [("-arch", "x86_64")] 56 | link.local_flags.extend(link_flags) 57 | 58 | def setup_avr(): 59 | global c 60 | global link 61 | 62 | c.compiler = c.GccCompiler(root + "Projects/avr-tools/bin/avr-") 63 | c.flags += ["-mmcu=atmega256rfr2", "-ffunction-sections", "-fdata-sections"] 64 | link.linker = link.GccLinker(root + "Projects/avr-tools/bin/avr-") 65 | link.flags += ["-mmcu=atmega256rfr2", "-mrelax", "-Wl,--gc-sections"] 66 | link.strip = True 67 | 68 | def setup_arm_thumb(): 69 | global c 70 | global link 71 | global asm 72 | global utils 73 | 74 | asm.assembler = asm.GccAssembler(root + "cross/arm_cortex/bin/arm-none-eabi-") 75 | c.compiler = c.GccCompiler(root + "cross/arm_cortex/bin/arm-none-eabi-") 76 | link.linker = link.GccLinker(root + "cross/arm_cortex/bin/arm-none-eabi-") 77 | 78 | c.flags.extend(["-mcpu=cortex-m0", "-mthumb", "-ffunction-sections", "-fdata-sections", "-fno-builtin-fprintf", "-fno-builtin-printf"]) 79 | c.defines["LPC11XX"] = 1 80 | 81 | link.local_flags.extend(["-mcpu=cortex-m0", "-mthumb", "-nostartfiles", "-nostdlib", "-Wl,--gc-sections"]) 82 | link.local_flags.extend(["-Tflash.lds", "-L" + root + "Projects/lpc11xx/core", root + "Projects/lpc11xx/core/" + emk.build_dir + "/board_cstartup.o"]) 83 | link.local_syslibs += ["gcc"] 84 | link.depdirs += [root + "Projects/lpc11xx/stdlib"] 85 | 86 | def do_objcopy(produces, requires): 87 | utils.call(root + "cross/arm_cortex/bin/arm-none-eabi-objcopy", "-O", "binary", requires[0], produces[0]) 88 | 89 | def handle_exe(path): 90 | emk.depend(path, root + "Projects/lpc11xx/core/" + emk.build_dir + "/board_cstartup.o") 91 | emk.rule(do_objcopy, path + ".bin", path, cwd_safe=True, ex_safe=True) 92 | emk.autobuild(path + ".bin") 93 | 94 | link.exe_funcs.append(handle_exe) 95 | link.strip = True 96 | 97 | emk.recurse(root + "Projects/lpc11xx/core") 98 | 99 | def setup_linux_rpi(): 100 | global c 101 | global link 102 | 103 | c.compiler = c.GccCompiler("/Volumes/xtools/arm-none-linux-gnueabi/bin/arm-none-linux-gnueabi-") 104 | link.linker = link.GccLinker("/Volumes/xtools/arm-none-linux-gnueabi/bin/arm-none-linux-gnueabi-") 105 | 106 | c.flags.extend(["-fomit-frame-pointer"]) 107 | 108 | setup_build_dir() 109 | 110 | setup_funcs = {"osx":setup_osx, "avr":setup_avr, "arm_thumb":setup_arm_thumb, "rpi": setup_linux_rpi} 111 | 112 | if not emk.cleaning: 113 | build_arch = emk.options["arch"] 114 | opt_level = emk.options["opt"] 115 | 116 | c.flags.extend(default_compile_flags) 117 | c.flags.extend(opt_flags[opt_level]) 118 | c.c.flags.extend(c_flags) 119 | c.cxx.flags.extend(cxx_flags) 120 | link.local_flags.extend(default_link_flags) 121 | link.local_flags.extend(opt_link_flags[opt_level]) 122 | link.c.local_flags.extend(c_link_flags) 123 | link.cxx.local_flags.extend(cxx_link_flags) 124 | 125 | c.include_dirs.append("$:proj:$") 126 | 127 | if build_arch in setup_funcs: 128 | setup_funcs[build_arch]() 129 | else: 130 | raise emk.BuildError("Unknown target arch '%s'" % (build_arch)) 131 | 132 | c.defines["TARGET_ARCH_" + build_arch.upper()] = 1 133 | -------------------------------------------------------------------------------- /emk_rules.py: -------------------------------------------------------------------------------- 1 | c, link = emk.module("c", "link") 2 | 3 | emk.subdir("test") 4 | -------------------------------------------------------------------------------- /examples/ecc_test/ecc_test.ino: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static int RNG(uint8_t *dest, unsigned size) { 4 | // Use the least-significant bits from the ADC for an unconnected pin (or connected to a source of 5 | // random noise). This can take a long time to generate random data if the result of analogRead(0) 6 | // doesn't change very frequently. 7 | while (size) { 8 | uint8_t val = 0; 9 | for (unsigned i = 0; i < 8; ++i) { 10 | int init = analogRead(0); 11 | int count = 0; 12 | while (analogRead(0) == init) { 13 | ++count; 14 | } 15 | 16 | if (count == 0) { 17 | val = (val << 1) | (init & 0x01); 18 | } else { 19 | val = (val << 1) | (count & 0x01); 20 | } 21 | } 22 | *dest = val; 23 | ++dest; 24 | --size; 25 | } 26 | // NOTE: it would be a good idea to hash the resulting random data using SHA-256 or similar. 27 | return 1; 28 | } 29 | 30 | void setup() { 31 | Serial.begin(115200); 32 | Serial.print("Testing ecc\n"); 33 | uECC_set_rng(&RNG); 34 | } 35 | 36 | void loop() { 37 | const struct uECC_Curve_t * curve = uECC_secp160r1(); 38 | uint8_t private1[21]; 39 | uint8_t private2[21]; 40 | 41 | uint8_t public1[40]; 42 | uint8_t public2[40]; 43 | 44 | uint8_t secret1[20]; 45 | uint8_t secret2[20]; 46 | 47 | unsigned long a = millis(); 48 | uECC_make_key(public1, private1, curve); 49 | unsigned long b = millis(); 50 | 51 | Serial.print("Made key 1 in "); Serial.println(b-a); 52 | a = millis(); 53 | uECC_make_key(public2, private2, curve); 54 | b = millis(); 55 | Serial.print("Made key 2 in "); Serial.println(b-a); 56 | 57 | a = millis(); 58 | int r = uECC_shared_secret(public2, private1, secret1, curve); 59 | b = millis(); 60 | Serial.print("Shared secret 1 in "); Serial.println(b-a); 61 | if (!r) { 62 | Serial.print("shared_secret() failed (1)\n"); 63 | return; 64 | } 65 | 66 | a = millis(); 67 | r = uECC_shared_secret(public1, private2, secret2, curve); 68 | b = millis(); 69 | Serial.print("Shared secret 2 in "); Serial.println(b-a); 70 | if (!r) { 71 | Serial.print("shared_secret() failed (2)\n"); 72 | return; 73 | } 74 | 75 | if (memcmp(secret1, secret2, 20) != 0) { 76 | Serial.print("Shared secrets are not identical!\n"); 77 | } else { 78 | Serial.print("Shared secrets are identical\n"); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /library.properties: -------------------------------------------------------------------------------- 1 | name=micro-ecc 2 | version=1.0.0 3 | author=Kenneth MacKay 4 | maintainer=Kenneth MacKay 5 | sentence=uECC 6 | paragraph=A small and fast ECDH and ECDSA implementation for 8-bit, 32-bit, and 64-bit processors. 7 | category=Other 8 | url=https://github.com/kmackay/micro-ecc 9 | architectures=* 10 | -------------------------------------------------------------------------------- /platform-specific.inc: -------------------------------------------------------------------------------- 1 | /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ 2 | 3 | #ifndef _UECC_PLATFORM_SPECIFIC_H_ 4 | #define _UECC_PLATFORM_SPECIFIC_H_ 5 | 6 | #include "types.h" 7 | 8 | #if (defined(_WIN32) || defined(_WIN64)) 9 | /* Windows */ 10 | 11 | // use pragma syntax to prevent tweaking the linker script for getting CryptXYZ function 12 | #pragma comment(lib, "crypt32.lib") 13 | #pragma comment(lib, "advapi32.lib") 14 | 15 | #define WIN32_LEAN_AND_MEAN 16 | #include 17 | #include 18 | 19 | static int default_RNG(uint8_t *dest, unsigned size) { 20 | HCRYPTPROV prov; 21 | if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) { 22 | return 0; 23 | } 24 | 25 | CryptGenRandom(prov, size, (BYTE *)dest); 26 | CryptReleaseContext(prov, 0); 27 | return 1; 28 | } 29 | #define default_RNG_defined 1 30 | 31 | #elif defined(unix) || defined(__linux__) || defined(__unix__) || defined(__unix) || \ 32 | (defined(__APPLE__) && defined(__MACH__)) || defined(uECC_POSIX) 33 | 34 | /* Some POSIX-like system with /dev/urandom or /dev/random. */ 35 | #include 36 | #include 37 | #include 38 | 39 | #ifndef O_CLOEXEC 40 | #define O_CLOEXEC 0 41 | #endif 42 | 43 | static int default_RNG(uint8_t *dest, unsigned size) { 44 | int fd = open("/dev/urandom", O_RDONLY | O_CLOEXEC); 45 | if (fd == -1) { 46 | fd = open("/dev/random", O_RDONLY | O_CLOEXEC); 47 | if (fd == -1) { 48 | return 0; 49 | } 50 | } 51 | 52 | char *ptr = (char *)dest; 53 | size_t left = size; 54 | while (left > 0) { 55 | ssize_t bytes_read = read(fd, ptr, left); 56 | if (bytes_read <= 0) { // read failed 57 | close(fd); 58 | return 0; 59 | } 60 | left -= bytes_read; 61 | ptr += bytes_read; 62 | } 63 | 64 | close(fd); 65 | return 1; 66 | } 67 | #define default_RNG_defined 1 68 | 69 | #elif defined(RIOT_VERSION) 70 | 71 | #include 72 | 73 | static int default_RNG(uint8_t *dest, unsigned size) { 74 | random_bytes(dest, size); 75 | return 1; 76 | } 77 | #define default_RNG_defined 1 78 | 79 | #elif defined(NRF52_SERIES) 80 | 81 | #include "app_error.h" 82 | #include "nrf_crypto_rng.h" 83 | 84 | static int default_RNG(uint8_t *dest, unsigned size) 85 | { 86 | // make sure to call nrf_crypto_init and nrf_crypto_rng_init first 87 | ret_code_t ret_code = nrf_crypto_rng_vector_generate(dest, size); 88 | return (ret_code == NRF_SUCCESS) ? 1 : 0; 89 | } 90 | #define default_RNG_defined 1 91 | 92 | #endif /* platform */ 93 | 94 | #endif /* _UECC_PLATFORM_SPECIFIC_H_ */ 95 | -------------------------------------------------------------------------------- /scripts/mult_arm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | if len(sys.argv) < 2: 6 | print "Provide the integer size in 32-bit words" 7 | sys.exit(1) 8 | 9 | size = int(sys.argv[1]) 10 | 11 | full_rows = size // 3 12 | init_size = size % 3 13 | 14 | if init_size == 0: 15 | full_rows = full_rows - 1 16 | init_size = 3 17 | 18 | def emit(line, *args): 19 | s = '"' + line + r' \n\t"' 20 | print s % args 21 | 22 | rx = [3, 4, 5] 23 | ry = [6, 7, 8] 24 | 25 | #### set up registers 26 | emit("add r0, %s", (size - init_size) * 4) # move z 27 | emit("add r2, %s", (size - init_size) * 4) # move y 28 | 29 | emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size)])) 30 | emit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(init_size)])) 31 | 32 | print "" 33 | if init_size == 1: 34 | emit("umull r9, r10, r3, r6") 35 | emit("stmia r0!, {r9, r10}") 36 | else: 37 | #### first two multiplications of initial block 38 | emit("umull r11, r12, r3, r6") 39 | emit("stmia r0!, {r11}") 40 | print "" 41 | emit("mov r10, #0") 42 | emit("umull r11, r9, r3, r7") 43 | emit("adds r12, r12, r11") 44 | emit("adc r9, r9, #0") 45 | emit("umull r11, r14, r4, r6") 46 | emit("adds r12, r12, r11") 47 | emit("adcs r9, r9, r14") 48 | emit("adc r10, r10, #0") 49 | emit("stmia r0!, {r12}") 50 | print "" 51 | 52 | #### rest of initial block, with moving accumulator registers 53 | acc = [9, 10, 11, 12, 14] 54 | if init_size == 3: 55 | emit("mov r%s, #0", acc[2]) 56 | for i in xrange(0, 3): 57 | emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i]) 58 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3]) 59 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4]) 60 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 61 | emit("stmia r0!, {r%s}", acc[0]) 62 | print "" 63 | acc = acc[1:] + acc[:1] 64 | 65 | emit("mov r%s, #0", acc[2]) 66 | for i in xrange(0, 2): 67 | emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i + 1], ry[2 - i]) 68 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3]) 69 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4]) 70 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 71 | emit("stmia r0!, {r%s}", acc[0]) 72 | print "" 73 | acc = acc[1:] + acc[:1] 74 | 75 | emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[init_size-1], ry[init_size-1]) 76 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3]) 77 | emit("adc r%s, r%s, r%s", acc[1], acc[1], acc[4]) 78 | emit("stmia r0!, {r%s}", acc[0]) 79 | emit("stmia r0!, {r%s}", acc[1]) 80 | print "" 81 | 82 | #### reset y and z pointers 83 | emit("sub r0, %s", (2 * init_size + 3) * 4) 84 | emit("sub r2, %s", (init_size + 3) * 4) 85 | 86 | #### load y registers 87 | emit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(3)])) 88 | 89 | #### load additional x registers 90 | if init_size != 3: 91 | emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size, 3)])) 92 | print "" 93 | 94 | prev_size = init_size 95 | for row in xrange(full_rows): 96 | emit("umull r11, r12, r3, r6") 97 | emit("stmia r0!, {r11}") 98 | print "" 99 | emit("mov r10, #0") 100 | emit("umull r11, r9, r3, r7") 101 | emit("adds r12, r12, r11") 102 | emit("adc r9, r9, #0") 103 | emit("umull r11, r14, r4, r6") 104 | emit("adds r12, r12, r11") 105 | emit("adcs r9, r9, r14") 106 | emit("adc r10, r10, #0") 107 | emit("stmia r0!, {r12}") 108 | print "" 109 | 110 | acc = [9, 10, 11, 12, 14] 111 | emit("mov r%s, #0", acc[2]) 112 | for i in xrange(0, 3): 113 | emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i]) 114 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3]) 115 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4]) 116 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 117 | emit("stmia r0!, {r%s}", acc[0]) 118 | print "" 119 | acc = acc[1:] + acc[:1] 120 | 121 | #### now we need to start shifting x and loading from z 122 | x_regs = [3, 4, 5] 123 | for r in xrange(0, prev_size): 124 | x_regs = x_regs[1:] + x_regs[:1] 125 | emit("ldmia r1!, {r%s}", x_regs[2]) 126 | emit("mov r%s, #0", acc[2]) 127 | for i in xrange(0, 3): 128 | emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], ry[2 - i]) 129 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3]) 130 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4]) 131 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 132 | emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator 133 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3]) 134 | emit("adcs r%s, r%s, #0", acc[1], acc[1]) 135 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 136 | emit("stmia r0!, {r%s}", acc[0]) 137 | print "" 138 | acc = acc[1:] + acc[:1] 139 | 140 | # done shifting x, start shifting y 141 | y_regs = [6, 7, 8] 142 | for r in xrange(0, prev_size): 143 | y_regs = y_regs[1:] + y_regs[:1] 144 | emit("ldmia r2!, {r%s}", y_regs[2]) 145 | emit("mov r%s, #0", acc[2]) 146 | for i in xrange(0, 3): 147 | emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], y_regs[2 - i]) 148 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3]) 149 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4]) 150 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 151 | emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator 152 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3]) 153 | emit("adcs r%s, r%s, #0", acc[1], acc[1]) 154 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 155 | emit("stmia r0!, {r%s}", acc[0]) 156 | print "" 157 | acc = acc[1:] + acc[:1] 158 | 159 | # done both shifts, do remaining corner 160 | emit("mov r%s, #0", acc[2]) 161 | for i in xrange(0, 2): 162 | emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i + 1], y_regs[2 - i]) 163 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3]) 164 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4]) 165 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 166 | emit("stmia r0!, {r%s}", acc[0]) 167 | print "" 168 | acc = acc[1:] + acc[:1] 169 | 170 | emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[2], y_regs[2]) 171 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3]) 172 | emit("adc r%s, r%s, r%s", acc[1], acc[1], acc[4]) 173 | emit("stmia r0!, {r%s}", acc[0]) 174 | emit("stmia r0!, {r%s}", acc[1]) 175 | print "" 176 | 177 | prev_size = prev_size + 3 178 | if row < full_rows - 1: 179 | #### reset x, y and z pointers 180 | emit("sub r0, %s", (2 * prev_size + 3) * 4) 181 | emit("sub r1, %s", prev_size * 4) 182 | emit("sub r2, %s", (prev_size + 3) * 4) 183 | 184 | #### load x and y registers 185 | emit("ldmia r1!, {%s}", ",".join(["r%s" % (rx[i]) for i in xrange(3)])) 186 | emit("ldmia r2!, {%s}", ",".join(["r%s" % (ry[i]) for i in xrange(3)])) 187 | 188 | print "" 189 | -------------------------------------------------------------------------------- /scripts/mult_avr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | if len(sys.argv) < 2: 6 | print "Provide the integer size in bytes" 7 | sys.exit(1) 8 | 9 | size = int(sys.argv[1]) 10 | 11 | full_rows = size // 10 12 | init_size = size % 10 13 | 14 | if init_size == 0: 15 | full_rows = full_rows - 1 16 | init_size = 10 17 | 18 | def rx(i): 19 | return i + 2 20 | 21 | def ry(i): 22 | return i + 12 23 | 24 | def emit(line, *args): 25 | s = '"' + line + r' \n\t"' 26 | print s % args 27 | 28 | #### set up registers 29 | emit("adiw r30, %s", size - init_size) # move z 30 | emit("adiw r28, %s", size - init_size) # move y 31 | 32 | for i in xrange(init_size): 33 | emit("ld r%s, x+", rx(i)) 34 | for i in xrange(init_size): 35 | emit("ld r%s, y+", ry(i)) 36 | 37 | emit("ldi r25, 0") 38 | print "" 39 | if init_size == 1: 40 | emit("mul r2, r12") 41 | emit("st z+, r0") 42 | emit("st z+, r1") 43 | else: 44 | #### first two multiplications of initial block 45 | emit("ldi r23, 0") 46 | emit("mul r2, r12") 47 | emit("st z+, r0") 48 | emit("mov r22, r1") 49 | print "" 50 | emit("ldi r24, 0") 51 | emit("mul r2, r13") 52 | emit("add r22, r0") 53 | emit("adc r23, r1") 54 | emit("mul r3, r12") 55 | emit("add r22, r0") 56 | emit("adc r23, r1") 57 | emit("adc r24, r25") 58 | emit("st z+, r22") 59 | print "" 60 | 61 | #### rest of initial block, with moving accumulator registers 62 | acc = [23, 24, 22] 63 | for r in xrange(2, init_size): 64 | emit("ldi r%s, 0", acc[2]) 65 | for i in xrange(0, r+1): 66 | emit("mul r%s, r%s", rx(i), ry(r - i)) 67 | emit("add r%s, r0", acc[0]) 68 | emit("adc r%s, r1", acc[1]) 69 | emit("adc r%s, r25", acc[2]) 70 | emit("st z+, r%s", acc[0]) 71 | print "" 72 | acc = acc[1:] + acc[:1] 73 | for r in xrange(1, init_size-1): 74 | emit("ldi r%s, 0", acc[2]) 75 | for i in xrange(0, init_size-r): 76 | emit("mul r%s, r%s", rx(r+i), ry((init_size-1) - i)) 77 | emit("add r%s, r0", acc[0]) 78 | emit("adc r%s, r1", acc[1]) 79 | emit("adc r%s, r25", acc[2]) 80 | emit("st z+, r%s", acc[0]) 81 | print "" 82 | acc = acc[1:] + acc[:1] 83 | emit("mul r%s, r%s", rx(init_size-1), ry(init_size-1)) 84 | emit("add r%s, r0", acc[0]) 85 | emit("adc r%s, r1", acc[1]) 86 | emit("st z+, r%s", acc[0]) 87 | emit("st z+, r%s", acc[1]) 88 | print "" 89 | 90 | #### reset y and z pointers 91 | emit("sbiw r30, %s", 2 * init_size + 10) 92 | emit("sbiw r28, %s", init_size + 10) 93 | 94 | #### load y registers 95 | for i in xrange(10): 96 | emit("ld r%s, y+", ry(i)) 97 | 98 | #### load additional x registers 99 | for i in xrange(init_size, 10): 100 | emit("ld r%s, x+", rx(i)) 101 | print "" 102 | 103 | prev_size = init_size 104 | for row in xrange(full_rows): 105 | #### do x = 0-9, y = 0-9 multiplications 106 | emit("ldi r23, 0") 107 | emit("mul r2, r12") 108 | emit("st z+, r0") 109 | emit("mov r22, r1") 110 | print "" 111 | emit("ldi r24, 0") 112 | emit("mul r2, r13") 113 | emit("add r22, r0") 114 | emit("adc r23, r1") 115 | emit("mul r3, r12") 116 | emit("add r22, r0") 117 | emit("adc r23, r1") 118 | emit("adc r24, r25") 119 | emit("st z+, r22") 120 | print "" 121 | 122 | acc = [23, 24, 22] 123 | for r in xrange(2, 10): 124 | emit("ldi r%s, 0", acc[2]) 125 | for i in xrange(0, r+1): 126 | emit("mul r%s, r%s", rx(i), ry(r - i)) 127 | emit("add r%s, r0", acc[0]) 128 | emit("adc r%s, r1", acc[1]) 129 | emit("adc r%s, r25", acc[2]) 130 | emit("st z+, r%s", acc[0]) 131 | print "" 132 | acc = acc[1:] + acc[:1] 133 | 134 | #### now we need to start shifting x and loading from z 135 | x_regs = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11] 136 | for r in xrange(0, prev_size): 137 | x_regs = x_regs[1:] + x_regs[:1] 138 | emit("ld r%s, x+", x_regs[9]) # load next byte of left 139 | emit("ldi r%s, 0", acc[2]) 140 | for i in xrange(0, 10): 141 | emit("mul r%s, r%s", x_regs[i], ry(9 - i)) 142 | emit("add r%s, r0", acc[0]) 143 | emit("adc r%s, r1", acc[1]) 144 | emit("adc r%s, r25", acc[2]) 145 | emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment) 146 | emit("add r%s, r0", acc[0]) 147 | emit("adc r%s, r25", acc[1]) 148 | emit("adc r%s, r25", acc[2]) 149 | emit("st z+, r%s", acc[0]) # store next byte (z increments) 150 | print "" 151 | acc = acc[1:] + acc[:1] 152 | 153 | # done shifting x, start shifting y 154 | y_regs = [12, 13, 14, 15, 16, 17, 18, 19, 20, 21] 155 | for r in xrange(0, prev_size): 156 | y_regs = y_regs[1:] + y_regs[:1] 157 | emit("ld r%s, y+", y_regs[9]) # load next byte of right 158 | emit("ldi r%s, 0", acc[2]) 159 | for i in xrange(0, 10): 160 | emit("mul r%s, r%s", x_regs[i], y_regs[9 -i]) 161 | emit("add r%s, r0", acc[0]) 162 | emit("adc r%s, r1", acc[1]) 163 | emit("adc r%s, r25", acc[2]) 164 | emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment) 165 | emit("add r%s, r0", acc[0]) 166 | emit("adc r%s, r25", acc[1]) 167 | emit("adc r%s, r25", acc[2]) 168 | emit("st z+, r%s", acc[0]) # store next byte (z increments) 169 | print "" 170 | acc = acc[1:] + acc[:1] 171 | 172 | # done both shifts, do remaining corner 173 | for r in xrange(1, 9): 174 | emit("ldi r%s, 0", acc[2]) 175 | for i in xrange(0, 10-r): 176 | emit("mul r%s, r%s", x_regs[r+i], y_regs[9 - i]) 177 | emit("add r%s, r0", acc[0]) 178 | emit("adc r%s, r1", acc[1]) 179 | emit("adc r%s, r25", acc[2]) 180 | emit("st z+, r%s", acc[0]) 181 | print "" 182 | acc = acc[1:] + acc[:1] 183 | emit("mul r%s, r%s", x_regs[9], y_regs[9]) 184 | emit("add r%s, r0", acc[0]) 185 | emit("adc r%s, r1", acc[1]) 186 | emit("st z+, r%s", acc[0]) 187 | emit("st z+, r%s", acc[1]) 188 | print "" 189 | 190 | prev_size = prev_size + 10 191 | if row < full_rows - 1: 192 | #### reset x, y and z pointers 193 | emit("sbiw r30, %s", 2 * prev_size + 10) 194 | emit("sbiw r28, %s", prev_size + 10) 195 | emit("sbiw r26, %s", prev_size) 196 | 197 | #### load x and y registers 198 | for i in xrange(10): 199 | emit("ld r%s, x+", rx(i)) 200 | emit("ld r%s, y+", ry(i)) 201 | print "" 202 | 203 | emit("eor r1, r1") 204 | -------------------------------------------------------------------------------- /scripts/mult_avr_extra.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | if len(sys.argv) < 2: 6 | print "Provide the integer size in bytes" 7 | sys.exit(1) 8 | 9 | size = int(sys.argv[1]) 10 | 11 | def lhi(i): 12 | return i + 2 13 | 14 | def rhi(i): 15 | return i + 6 16 | 17 | left_lo = [10, 11, 12, 13] 18 | right_lo = [14, 15, 16, 17] 19 | 20 | def llo(i): 21 | return left_lo[i] 22 | 23 | def rlo(i): 24 | return right_lo[i] 25 | 26 | def emit(line, *args): 27 | s = '"' + line + r' \n\t"' 28 | print s % args 29 | 30 | def update_low(): 31 | global left_lo 32 | global right_lo 33 | left_lo = left_lo[1:] + left_lo[:1] 34 | right_lo = right_lo[1:] + right_lo[:1] 35 | emit("ld r%s, x+", left_lo[3]) 36 | emit("ld r%s, y+", right_lo[3]) 37 | 38 | accum = [19, 20, 21] 39 | 40 | def acc(i): 41 | return accum[i] 42 | 43 | def rotate_acc(): 44 | global accum 45 | accum = accum[1:] + accum[:1] 46 | 47 | # Load high values 48 | for i in xrange(4): 49 | emit("ld r%s, x+", lhi(i)) 50 | emit("ld r%s, y+", rhi(i)) 51 | 52 | emit("sbiw r26, %s", size + 4) 53 | emit("sbiw r28, %s", size + 4) 54 | emit("sbiw r30, %s", size) 55 | 56 | # Load low values 57 | for i in xrange(4): 58 | emit("ld r%s, x+", llo(i)) 59 | emit("ld r%s, y+", rlo(i)) 60 | print "" 61 | 62 | # Compute initial triangles 63 | emit("mul r%s, r%s", lhi(0), rlo(0)) 64 | emit("mov r%s, r0", acc(0)) 65 | emit("mov r%s, r1", acc(1)) 66 | emit("ldi r%s, 0", acc(2)) 67 | emit("ld r0, z") 68 | emit("add r%s, r0", acc(0)) 69 | emit("adc r%s, r25", acc(1)) 70 | emit("mul r%s, r%s", rhi(0), llo(0)) 71 | emit("add r%s, r0", acc(0)) 72 | emit("adc r%s, r1", acc(1)) 73 | emit("adc r%s, r25", acc(2)) 74 | emit("st z+, r%s", acc(0)) 75 | print "" 76 | rotate_acc() 77 | 78 | for i in xrange(1, 4): 79 | emit("ldi r%s, 0", acc(2)) 80 | emit("ld r0, z") 81 | emit("add r%s, r0", acc(0)) 82 | emit("adc r%s, r25", acc(1)) 83 | for j in xrange(i + 1): 84 | emit("mul r%s, r%s", lhi(j), rlo(i-j)) 85 | emit("add r%s, r0", acc(0)) 86 | emit("adc r%s, r1", acc(1)) 87 | emit("adc r%s, r25", acc(2)) 88 | emit("mul r%s, r%s", rhi(j), llo(i-j)) 89 | emit("add r%s, r0", acc(0)) 90 | emit("adc r%s, r1", acc(1)) 91 | emit("adc r%s, r25", acc(2)) 92 | emit("st z+, r%s", acc(0)) 93 | print "" 94 | rotate_acc() 95 | 96 | # Compute rows overlapping old block 97 | for i in xrange(4, size): 98 | emit("ldi r%s, 0", acc(2)) 99 | emit("ld r0, z") 100 | emit("add r%s, r0", acc(0)) 101 | emit("adc r%s, r25", acc(1)) 102 | update_low() 103 | for j in xrange(4): 104 | emit("mul r%s, r%s", lhi(j), rlo(3-j)) 105 | emit("add r%s, r0", acc(0)) 106 | emit("adc r%s, r1", acc(1)) 107 | emit("adc r%s, r25", acc(2)) 108 | emit("mul r%s, r%s", rhi(j), llo(3-j)) 109 | emit("add r%s, r0", acc(0)) 110 | emit("adc r%s, r1", acc(1)) 111 | emit("adc r%s, r25", acc(2)) 112 | emit("st z+, r%s", acc(0)) 113 | print "" 114 | rotate_acc() 115 | 116 | # Compute new triangle 117 | left_combined = [llo(1), llo(2), llo(3), lhi(0), lhi(1), lhi(2), lhi(3)] 118 | right_combined = [rlo(1), rlo(2), rlo(3), rhi(0), rhi(1), rhi(2), rhi(3)] 119 | 120 | def left(i): 121 | return left_combined[i] 122 | 123 | def right(i): 124 | return right_combined[i] 125 | 126 | for i in xrange(6): 127 | emit("ldi r%s, 0", acc(2)) 128 | for j in xrange(7 - i): 129 | emit("mul r%s, r%s", left(i+j), right(6-j)) 130 | emit("add r%s, r0", acc(0)) 131 | emit("adc r%s, r1", acc(1)) 132 | emit("adc r%s, r25", acc(2)) 133 | emit("st z+, r%s", acc(0)) 134 | print "" 135 | rotate_acc() 136 | 137 | emit("mul r%s, r%s", left(6), right(6)) 138 | emit("add r%s, r0", acc(0)) 139 | emit("adc r%s, r1", acc(1)) 140 | emit("st z+, r%s", acc(0)) 141 | emit("st z+, r%s", acc(1)) 142 | emit("adiw r26, 4") 143 | emit("adiw r28, 4") 144 | -------------------------------------------------------------------------------- /scripts/square_arm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | if len(sys.argv) < 2: 6 | print "Provide the integer size in 32-bit words" 7 | sys.exit(1) 8 | 9 | size = int(sys.argv[1]) 10 | 11 | if size > 8: 12 | print "This script doesn't work with integer size %s due to laziness" % (size) 13 | sys.exit(1) 14 | 15 | init_size = 0 16 | if size > 6: 17 | init_size = size - 6 18 | 19 | def emit(line, *args): 20 | s = '"' + line + r' \n\t"' 21 | print s % args 22 | 23 | def mulacc(acc, r1, r2): 24 | if size <= 6: 25 | emit("umull r1, r14, r%s, r%s", r1, r2) 26 | emit("adds r%s, r%s, r1", acc[0], acc[0]) 27 | emit("adcs r%s, r%s, r14", acc[1], acc[1]) 28 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 29 | else: 30 | emit("mov r14, r%s", acc[1]) 31 | emit("umlal r%s, r%s, r%s, r%s", acc[0], acc[1], r1, r2) 32 | emit("cmp r14, r%s", acc[1]) 33 | emit("it hi") 34 | emit("adchi r%s, r%s, #0", acc[2], acc[2]) 35 | 36 | r = [2, 3, 4, 5, 6, 7] 37 | 38 | s = size - init_size 39 | 40 | if init_size == 1: 41 | emit("ldmia r1!, {r2}") 42 | emit("add r1, %s", (size - init_size * 2) * 4) 43 | emit("ldmia r1!, {r5}") 44 | 45 | emit("add r0, %s", (size - init_size) * 4) 46 | emit("umull r8, r9, r2, r5") 47 | emit("stmia r0!, {r8, r9}") 48 | 49 | emit("sub r0, %s", (size + init_size) * 4) 50 | emit("sub r1, %s", (size) * 4) 51 | print "" 52 | elif init_size == 2: 53 | emit("ldmia r1!, {r2, r3}") 54 | emit("add r1, %s", (size - init_size * 2) * 4) 55 | emit("ldmia r1!, {r5, r6}") 56 | 57 | emit("add r0, %s", (size - init_size) * 4) 58 | print "" 59 | 60 | emit("umull r8, r9, r2, r5") 61 | emit("stmia r0!, {r8}") 62 | print "" 63 | 64 | emit("umull r12, r10, r2, r6") 65 | emit("adds r9, r9, r12") 66 | emit("adc r10, r10, #0") 67 | emit("stmia r0!, {r9}") 68 | print "" 69 | 70 | emit("umull r8, r9, r3, r6") 71 | emit("adds r10, r10, r8") 72 | emit("adc r11, r9, #0") 73 | emit("stmia r0!, {r10, r11}") 74 | print "" 75 | 76 | emit("sub r0, %s", (size + init_size) * 4) 77 | emit("sub r1, %s", (size) * 4) 78 | 79 | # load input words 80 | emit("ldmia r1!, {%s}", ", ".join(["r%s" % (r[i]) for i in xrange(s)])) 81 | print "" 82 | 83 | emit("umull r11, r12, r2, r2") 84 | emit("stmia r0!, {r11}") 85 | print "" 86 | emit("mov r9, #0") 87 | emit("umull r10, r11, r2, r3") 88 | emit("adds r12, r12, r10") 89 | emit("adcs r8, r11, #0") 90 | emit("adc r9, r9, #0") 91 | emit("adds r12, r12, r10") 92 | emit("adcs r8, r8, r11") 93 | emit("adc r9, r9, #0") 94 | emit("stmia r0!, {r12}") 95 | print "" 96 | emit("mov r10, #0") 97 | emit("umull r11, r12, r2, r4") 98 | emit("adds r11, r11, r11") 99 | emit("adcs r12, r12, r12") 100 | emit("adc r10, r10, #0") 101 | emit("adds r8, r8, r11") 102 | emit("adcs r9, r9, r12") 103 | emit("adc r10, r10, #0") 104 | emit("umull r11, r12, r3, r3") 105 | emit("adds r8, r8, r11") 106 | emit("adcs r9, r9, r12") 107 | emit("adc r10, r10, #0") 108 | emit("stmia r0!, {r8}") 109 | print "" 110 | 111 | acc = [8, 9, 10] 112 | old_acc = [11, 12] 113 | for i in xrange(3, s): 114 | emit("mov r%s, #0", old_acc[1]) 115 | tmp = [acc[1], acc[2]] 116 | acc = [acc[0], old_acc[0], old_acc[1]] 117 | old_acc = tmp 118 | 119 | # gather non-equal words 120 | emit("umull r%s, r%s, r%s, r%s", acc[0], acc[1], r[0], r[i]) 121 | for j in xrange(1, (i+1)//2): 122 | mulacc(acc, r[j], r[i-j]) 123 | # multiply by 2 124 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0]) 125 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1]) 126 | emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2]) 127 | 128 | # add equal word (if any) 129 | if ((i+1) % 2) != 0: 130 | mulacc(acc, r[i//2], r[i//2]) 131 | 132 | # add old accumulator 133 | emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0]) 134 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1]) 135 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 136 | 137 | # store 138 | emit("stmia r0!, {r%s}", acc[0]) 139 | print "" 140 | 141 | regs = list(r) 142 | for i in xrange(init_size): 143 | regs = regs[1:] + regs[:1] 144 | emit("ldmia r1!, {r%s}", regs[5]) 145 | 146 | for limit in [4, 5]: 147 | emit("mov r%s, #0", old_acc[1]) 148 | tmp = [acc[1], acc[2]] 149 | acc = [acc[0], old_acc[0], old_acc[1]] 150 | old_acc = tmp 151 | 152 | # gather non-equal words 153 | emit("umull r%s, r%s, r%s, r%s", acc[0], acc[1], regs[0], regs[limit]) 154 | for j in xrange(1, (limit+1)//2): 155 | mulacc(acc, regs[j], regs[limit-j]) 156 | 157 | emit("ldr r14, [r0]") # load stored value from initial block, and add to accumulator 158 | emit("adds r%s, r%s, r14", acc[0], acc[0]) 159 | emit("adcs r%s, r%s, #0", acc[1], acc[1]) 160 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 161 | 162 | # multiply by 2 163 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0]) 164 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1]) 165 | emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2]) 166 | 167 | # add equal word 168 | if limit == 4: 169 | mulacc(acc, regs[2], regs[2]) 170 | 171 | # add old accumulator 172 | emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0]) 173 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1]) 174 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 175 | 176 | # store 177 | emit("stmia r0!, {r%s}", acc[0]) 178 | print "" 179 | 180 | for i in xrange(1, s-3): 181 | emit("mov r%s, #0", old_acc[1]) 182 | tmp = [acc[1], acc[2]] 183 | acc = [acc[0], old_acc[0], old_acc[1]] 184 | old_acc = tmp 185 | 186 | # gather non-equal words 187 | emit("umull r%s, r%s, r%s, r%s", acc[0], acc[1], regs[i], regs[s - 1]) 188 | for j in xrange(1, (s-i)//2): 189 | mulacc(acc, regs[i+j], regs[s - 1 - j]) 190 | 191 | # multiply by 2 192 | emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0]) 193 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1]) 194 | emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2]) 195 | 196 | # add equal word (if any) 197 | if ((s-i) % 2) != 0: 198 | mulacc(acc, regs[i + (s-i)//2], regs[i + (s-i)//2]) 199 | 200 | # add old accumulator 201 | emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0]) 202 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1]) 203 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 204 | 205 | # store 206 | emit("stmia r0!, {r%s}", acc[0]) 207 | print "" 208 | 209 | acc = acc[1:] + acc[:1] 210 | emit("mov r%s, #0", acc[2]) 211 | emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 3], regs[s - 1]) 212 | emit("adds r1, r1, r1") 213 | emit("adcs r%s, r%s, r%s", old_acc[1], old_acc[1], old_acc[1]) 214 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 215 | emit("adds r%s, r%s, r1", acc[0], acc[0]) 216 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1]) 217 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 218 | emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 2], regs[s - 2]) 219 | emit("adds r%s, r%s, r1", acc[0], acc[0]) 220 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1]) 221 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 222 | emit("stmia r0!, {r%s}", acc[0]) 223 | print "" 224 | 225 | acc = acc[1:] + acc[:1] 226 | emit("mov r%s, #0", acc[2]) 227 | emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 2], regs[s - 1]) 228 | emit("adds r1, r1, r1") 229 | emit("adcs r%s, r%s, r%s", old_acc[1], old_acc[1], old_acc[1]) 230 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 231 | emit("adds r%s, r%s, r1", acc[0], acc[0]) 232 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1]) 233 | emit("adc r%s, r%s, #0", acc[2], acc[2]) 234 | emit("stmia r0!, {r%s}", acc[0]) 235 | print "" 236 | 237 | acc = acc[1:] + acc[:1] 238 | emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 1], regs[s - 1]) 239 | emit("adds r%s, r%s, r1", acc[0], acc[0]) 240 | emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1]) 241 | emit("stmia r0!, {r%s}", acc[0]) 242 | emit("stmia r0!, {r%s}", acc[1]) 243 | -------------------------------------------------------------------------------- /scripts/square_avr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | if len(sys.argv) < 2: 6 | print "Provide the integer size in bytes" 7 | sys.exit(1) 8 | 9 | size = int(sys.argv[1]) 10 | 11 | if size > 40: 12 | print "This script doesn't work with integer size %s due to laziness" % (size) 13 | sys.exit(1) 14 | 15 | init_size = size - 20 16 | if size < 20: 17 | init_size = 0 18 | 19 | def rg(i): 20 | return i + 2 21 | 22 | def lo(i): 23 | return i + 2 24 | 25 | def hi(i): 26 | return i + 12 27 | 28 | def emit(line, *args): 29 | s = '"' + line + r' \n\t"' 30 | print s % args 31 | 32 | #### set up registers 33 | zero = "r25" 34 | emit("ldi %s, 0", zero) # zero register 35 | 36 | if init_size > 0: 37 | emit("movw r28, r26") # y = x 38 | h = (init_size + 1)//2 39 | 40 | for i in xrange(h): 41 | emit("ld r%s, x+", lo(i)) 42 | emit("adiw r28, %s", size - init_size) # move y to other end 43 | for i in xrange(h): 44 | emit("ld r%s, y+", hi(i)) 45 | 46 | emit("adiw r30, %s", size - init_size) # move z 47 | 48 | if init_size == 1: 49 | emit("mul %s, %s", lo(0), hi(0)) 50 | emit("st z+, r0") 51 | emit("st z+, r1") 52 | else: 53 | #### first one 54 | print "" 55 | emit("ldi r23, 0") 56 | emit("mul %s, %s", lo(0), hi(0)) 57 | emit("st z+, r0") 58 | emit("mov r22, r1") 59 | print "" 60 | 61 | #### rest of initial block, with moving accumulator registers 62 | acc = [22, 23, 24] 63 | for r in xrange(1, h): 64 | emit("ldi r%s, 0", acc[2]) 65 | for i in xrange(0, (r+2)//2): 66 | emit("mul r%s, r%s", lo(i), hi(r - i)) 67 | emit("add r%s, r0", acc[0]) 68 | emit("adc r%s, r1", acc[1]) 69 | emit("adc r%s, %s", acc[2], zero) 70 | emit("st z+, r%s", acc[0]) 71 | print "" 72 | acc = acc[1:] + acc[:1] 73 | 74 | lo_r = range(2, 2 + h) 75 | hi_r = range(12, 12 + h) 76 | 77 | # now we need to start loading more from the high end 78 | for r in xrange(h, init_size): 79 | hi_r = hi_r[1:] + hi_r[:1] 80 | emit("ld r%s, y+", hi_r[h-1]) 81 | 82 | emit("ldi r%s, 0", acc[2]) 83 | for i in xrange(0, (r+2)//2): 84 | emit("mul r%s, r%s", lo(i), hi_r[h - 1 - i]) 85 | emit("add r%s, r0", acc[0]) 86 | emit("adc r%s, r1", acc[1]) 87 | emit("adc r%s, %s", acc[2], zero) 88 | emit("st z+, r%s", acc[0]) 89 | print "" 90 | acc = acc[1:] + acc[:1] 91 | 92 | # loaded all of the high end bytes; now need to start loading the rest of the low end 93 | for r in xrange(1, init_size-h): 94 | lo_r = lo_r[1:] + lo_r[:1] 95 | emit("ld r%s, x+", lo_r[h-1]) 96 | 97 | emit("ldi r%s, 0", acc[2]) 98 | for i in xrange(0, (init_size+1 - r)//2): 99 | emit("mul r%s, r%s", lo_r[i], hi_r[h - 1 - i]) 100 | emit("add r%s, r0", acc[0]) 101 | emit("adc r%s, r1", acc[1]) 102 | emit("adc r%s, %s", acc[2], zero) 103 | emit("st z+, r%s", acc[0]) 104 | print "" 105 | acc = acc[1:] + acc[:1] 106 | 107 | lo_r = lo_r[1:] + lo_r[:1] 108 | emit("ld r%s, x+", lo_r[h-1]) 109 | 110 | # now we have loaded everything, and we just need to finish the last corner 111 | for r in xrange(init_size-h, init_size-1): 112 | emit("ldi r%s, 0", acc[2]) 113 | for i in xrange(0, (init_size+1 - r)//2): 114 | emit("mul r%s, r%s", lo_r[i], hi_r[h - 1 - i]) 115 | emit("add r%s, r0", acc[0]) 116 | emit("adc r%s, r1", acc[1]) 117 | emit("adc r%s, %s", acc[2], zero) 118 | emit("st z+, r%s", acc[0]) 119 | print "" 120 | acc = acc[1:] + acc[:1] 121 | lo_r = lo_r[1:] + lo_r[:1] # make the indexing easy 122 | 123 | emit("mul r%s, r%s", lo_r[0], hi_r[h - 1]) 124 | emit("add r%s, r0", acc[0]) 125 | emit("adc r%s, r1", acc[1]) 126 | emit("st z+, r%s", acc[0]) 127 | emit("st z+, r%s", acc[1]) 128 | print "" 129 | emit("sbiw r26, %s", init_size) # reset x 130 | emit("sbiw r30, %s", size + init_size) # reset z 131 | 132 | # TODO you could do more rows of size 20 here if your integers are larger than 40 bytes 133 | 134 | s = size - init_size 135 | 136 | for i in xrange(s): 137 | emit("ld r%s, x+", rg(i)) 138 | 139 | #### first few columns 140 | # NOTE: this is only valid if size >= 3 141 | print "" 142 | emit("ldi r23, 0") 143 | emit("mul r%s, r%s", rg(0), rg(0)) 144 | emit("st z+, r0") 145 | emit("mov r22, r1") 146 | print "" 147 | emit("ldi r24, 0") 148 | emit("mul r%s, r%s", rg(0), rg(1)) 149 | emit("add r22, r0") 150 | emit("adc r23, r1") 151 | emit("adc r24, %s", zero) 152 | emit("add r22, r0") 153 | emit("adc r23, r1") 154 | emit("adc r24, %s", zero) 155 | emit("st z+, r22") 156 | print "" 157 | emit("ldi r22, 0") 158 | emit("mul r%s, r%s", rg(0), rg(2)) 159 | emit("add r23, r0") 160 | emit("adc r24, r1") 161 | emit("adc r22, %s", zero) 162 | emit("add r23, r0") 163 | emit("adc r24, r1") 164 | emit("adc r22, %s", zero) 165 | emit("mul r%s, r%s", rg(1), rg(1)) 166 | emit("add r23, r0") 167 | emit("adc r24, r1") 168 | emit("adc r22, %s", zero) 169 | emit("st z+, r23") 170 | print "" 171 | 172 | acc = [23, 24, 22] 173 | old_acc = [28, 29] 174 | for i in xrange(3, s): 175 | emit("ldi r%s, 0", old_acc[1]) 176 | tmp = [acc[1], acc[2]] 177 | acc = [acc[0], old_acc[0], old_acc[1]] 178 | old_acc = tmp 179 | 180 | # gather non-equal words 181 | emit("mul r%s, r%s", rg(0), rg(i)) 182 | emit("mov r%s, r0", acc[0]) 183 | emit("mov r%s, r1", acc[1]) 184 | for j in xrange(1, (i+1)//2): 185 | emit("mul r%s, r%s", rg(j), rg(i-j)) 186 | emit("add r%s, r0", acc[0]) 187 | emit("adc r%s, r1", acc[1]) 188 | emit("adc r%s, %s", acc[2], zero) 189 | # multiply by 2 190 | emit("lsl r%s", acc[0]) 191 | emit("rol r%s", acc[1]) 192 | emit("rol r%s", acc[2]) 193 | 194 | # add equal word (if any) 195 | if ((i+1) % 2) != 0: 196 | emit("mul r%s, r%s", rg(i//2), rg(i//2)) 197 | emit("add r%s, r0", acc[0]) 198 | emit("adc r%s, r1", acc[1]) 199 | emit("adc r%s, %s", acc[2], zero) 200 | 201 | # add old accumulator 202 | emit("add r%s, r%s", acc[0], old_acc[0]) 203 | emit("adc r%s, r%s", acc[1], old_acc[1]) 204 | emit("adc r%s, %s", acc[2], zero) 205 | 206 | # store 207 | emit("st z+, r%s", acc[0]) 208 | print "" 209 | 210 | regs = range(2, 22) 211 | for i in xrange(init_size): 212 | regs = regs[1:] + regs[:1] 213 | emit("ld r%s, x+", regs[19]) 214 | 215 | for limit in [18, 19]: 216 | emit("ldi r%s, 0", old_acc[1]) 217 | tmp = [acc[1], acc[2]] 218 | acc = [acc[0], old_acc[0], old_acc[1]] 219 | old_acc = tmp 220 | 221 | # gather non-equal words 222 | emit("mul r%s, r%s", regs[0], regs[limit]) 223 | emit("mov r%s, r0", acc[0]) 224 | emit("mov r%s, r1", acc[1]) 225 | for j in xrange(1, (limit+1)//2): 226 | emit("mul r%s, r%s", regs[j], regs[limit-j]) 227 | emit("add r%s, r0", acc[0]) 228 | emit("adc r%s, r1", acc[1]) 229 | emit("adc r%s, %s", acc[2], zero) 230 | 231 | emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment) 232 | emit("add r%s, r0", acc[0]) 233 | emit("adc r%s, r25", acc[1]) 234 | emit("adc r%s, r25", acc[2]) 235 | 236 | # multiply by 2 237 | emit("lsl r%s", acc[0]) 238 | emit("rol r%s", acc[1]) 239 | emit("rol r%s", acc[2]) 240 | 241 | # add equal word 242 | if limit == 18: 243 | emit("mul r%s, r%s", regs[9], regs[9]) 244 | emit("add r%s, r0", acc[0]) 245 | emit("adc r%s, r1", acc[1]) 246 | emit("adc r%s, %s", acc[2], zero) 247 | 248 | # add old accumulator 249 | emit("add r%s, r%s", acc[0], old_acc[0]) 250 | emit("adc r%s, r%s", acc[1], old_acc[1]) 251 | emit("adc r%s, %s", acc[2], zero) 252 | 253 | # store 254 | emit("st z+, r%s", acc[0]) 255 | print "" 256 | 257 | for i in xrange(1, s-3): 258 | emit("ldi r%s, 0", old_acc[1]) 259 | tmp = [acc[1], acc[2]] 260 | acc = [acc[0], old_acc[0], old_acc[1]] 261 | old_acc = tmp 262 | 263 | # gather non-equal words 264 | emit("mul r%s, r%s", regs[i], regs[s - 1]) 265 | emit("mov r%s, r0", acc[0]) 266 | emit("mov r%s, r1", acc[1]) 267 | for j in xrange(1, (s-i)//2): 268 | emit("mul r%s, r%s", regs[i+j], regs[s - 1 - j]) 269 | emit("add r%s, r0", acc[0]) 270 | emit("adc r%s, r1", acc[1]) 271 | emit("adc r%s, %s", acc[2], zero) 272 | # multiply by 2 273 | emit("lsl r%s", acc[0]) 274 | emit("rol r%s", acc[1]) 275 | emit("rol r%s", acc[2]) 276 | 277 | # add equal word (if any) 278 | if ((s-i) % 2) != 0: 279 | emit("mul r%s, r%s", regs[i + (s-i)//2], regs[i + (s-i)//2]) 280 | emit("add r%s, r0", acc[0]) 281 | emit("adc r%s, r1", acc[1]) 282 | emit("adc r%s, %s", acc[2], zero) 283 | 284 | # add old accumulator 285 | emit("add r%s, r%s", acc[0], old_acc[0]) 286 | emit("adc r%s, r%s", acc[1], old_acc[1]) 287 | emit("adc r%s, %s", acc[2], zero) 288 | 289 | # store 290 | emit("st z+, r%s", acc[0]) 291 | print "" 292 | 293 | acc = acc[1:] + acc[:1] 294 | emit("ldi r%s, 0", acc[2]) 295 | emit("mul r%s, r%s", regs[17], regs[19]) 296 | emit("add r%s, r0", acc[0]) 297 | emit("adc r%s, r1", acc[1]) 298 | emit("adc r%s, %s", acc[2], zero) 299 | emit("add r%s, r0", acc[0]) 300 | emit("adc r%s, r1", acc[1]) 301 | emit("adc r%s, %s", acc[2], zero) 302 | emit("mul r%s, r%s", regs[18], regs[18]) 303 | emit("add r%s, r0", acc[0]) 304 | emit("adc r%s, r1", acc[1]) 305 | emit("adc r%s, %s", acc[2], zero) 306 | emit("st z+, r%s", acc[0]) 307 | print "" 308 | 309 | acc = acc[1:] + acc[:1] 310 | emit("ldi r%s, 0", acc[2]) 311 | emit("mul r%s, r%s", regs[18], regs[19]) 312 | emit("add r%s, r0", acc[0]) 313 | emit("adc r%s, r1", acc[1]) 314 | emit("adc r%s, %s", acc[2], zero) 315 | emit("add r%s, r0", acc[0]) 316 | emit("adc r%s, r1", acc[1]) 317 | emit("adc r%s, %s", acc[2], zero) 318 | emit("st z+, r%s", acc[0]) 319 | print "" 320 | 321 | emit("mul r%s, r%s", regs[19], regs[19]) 322 | emit("add r%s, r0", acc[1]) 323 | emit("adc r%s, r1", acc[2]) 324 | emit("st z+, r%s", acc[1]) 325 | 326 | emit("st z+, r%s", acc[2]) 327 | emit("eor r1, r1") 328 | -------------------------------------------------------------------------------- /test/ecdsa_test_vectors.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2020, Kenneth MacKay. Licensed under the BSD 2-clause license. */ 2 | 3 | #include "uECC.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | typedef struct { 10 | const char* private_key; 11 | const char* public_key; 12 | const char* k; 13 | const char* hash; 14 | const char* r; 15 | const char* s; 16 | } Test; 17 | 18 | Test secp256k1_tests[] = { 19 | { 20 | "ebb2c082fd7727890a28ac82f6bdf97bad8de9f5d7c9028692de1a255cad3e0f", 21 | "779dd197a5df977ed2cf6cb31d82d43328b790dc6b3b7d4437a427bd5847dfcde94b724a555b6d017bb7607c3e3281daf5b1699d6ef4124975c9237b917d426f", 22 | "49a0d7b786ec9cde0d0721d72804befd06571c974b191efb42ecf322ba9ddd9a", 23 | "4b688df40bcedbe641ddb16ff0a1842d9c67ea1c3bf63f3e0471baa664531d1a", 24 | "241097efbf8b63bf145c8961dbdf10c310efbb3b2676bbc0f8b08505c9e2f795", 25 | "021006b7838609339e8b415a7f9acb1b661828131aef1ecbc7955dfb01f3ca0e" 26 | }, 27 | }; 28 | 29 | extern int uECC_sign_with_k(const uint8_t *private_key, 30 | const uint8_t *message_hash, 31 | unsigned hash_size, 32 | const uint8_t *k, 33 | uint8_t *signature, 34 | uECC_Curve curve); 35 | 36 | 37 | void vli_print(uint8_t *vli, unsigned int size) { 38 | for(unsigned i=0; i 6 | #include 7 | #include 8 | 9 | typedef struct { 10 | const char* k; 11 | const char* Q; 12 | int success; 13 | } Test; 14 | 15 | Test secp160r1_tests[] = { 16 | /* Note, I couldn't find any test vectors for secp160r1 online, so these are just 17 | generated on my desktop using uECC. */ 18 | { 19 | "000000000000000000000000000000000000000000", 20 | "00000000000000000000000000000000000000000000000000000000000000000000000000000000", 21 | 0 22 | }, 23 | { 24 | "000000000000000000000000000000000000000001", 25 | "00000000000000000000000000000000000000000000000000000000000000000000000000000000", 26 | 0 27 | }, 28 | { 29 | "000000000000000000000000000000000000000002", 30 | "02F997F33C5ED04C55D3EDF8675D3E92E8F46686F083A323482993E9440E817E21CFB7737DF8797B", 31 | 1 32 | }, 33 | { 34 | "000000000000000000000000000000000000000003", 35 | "7B76FF541EF363F2DF13DE1650BD48DAA958BC59C915CA790D8C8877B55BE0079D12854FFE9F6F5A", 36 | 1 37 | }, 38 | { /* n - 4 */ 39 | "0100000000000000000001F4C8F927AED3CA752253", 40 | "B4041D8683BE99F0AFE01C307B1AD4C100CF2A88C0CD35127BE0F73FF99F338B350B5A42864112F7", 41 | 1 42 | }, 43 | { /* n - 3 */ 44 | "0100000000000000000001F4C8F927AED3CA752254", 45 | "7B76FF541EF363F2DF13DE1650BD48DAA958BC5936EA3586F27377884AA41FF862ED7AAF816090A5", 46 | 1 47 | }, 48 | { /* n - 2 */ 49 | "0100000000000000000001F4C8F927AED3CA752255", 50 | "00000000000000000000000000000000000000000000000000000000000000000000000000000000", 51 | 0 52 | }, 53 | { /* n - 1 */ 54 | "0100000000000000000001F4C8F927AED3CA752256", 55 | "00000000000000000000000000000000000000000000000000000000000000000000000000000000", 56 | 0 57 | }, 58 | { /* n */ 59 | "0100000000000000000001F4C8F927AED3CA752257", 60 | "00000000000000000000000000000000000000000000000000000000000000000000000000000000", 61 | 0 62 | }, 63 | }; 64 | 65 | 66 | Test secp192r1_tests[] = { 67 | { 68 | "000000000000000000000000000000000000000000000000", 69 | "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 70 | 0 71 | }, 72 | { 73 | "000000000000000000000000000000000000000000000001", 74 | "188DA80EB03090F67CBF20EB43A18800F4FF0AFD82FF101207192B95FFC8DA78631011ED6B24CDD573F977A11E794811", 75 | 0 76 | }, 77 | { 78 | "000000000000000000000000000000000000000000000002", 79 | "DAFEBF5828783F2AD35534631588A3F629A70FB16982A888DD6BDA0D993DA0FA46B27BBC141B868F59331AFA5C7E93AB", 80 | 1 81 | }, 82 | { 83 | "000000000000000000000000000000000000000000000003", 84 | "76E32A2557599E6EDCD283201FB2B9AADFD0D359CBB263DA782C37E372BA4520AA62E0FED121D49EF3B543660CFD05FD", 85 | 1 86 | }, 87 | { /* n - 4 */ 88 | "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D2282D", 89 | "35433907297CC378B0015703374729D7A4FE46647084E4BA5D9B667B0DECA3CFE15C534F88932B0DDAC764CEE24C41CD", 90 | 1 91 | }, 92 | { /* n - 3 */ 93 | "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D2282E", 94 | "76E32A2557599E6EDCD283201FB2B9AADFD0D359CBB263DA87D3C81C8D45BADF559D1F012EDE2B600C4ABC99F302FA02", 95 | 1 96 | }, 97 | { /* n - 2 */ 98 | "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D2282F", 99 | "DAFEBF5828783F2AD35534631588A3F629A70FB16982A888229425F266C25F05B94D8443EBE4796FA6CCE505A3816C54", 100 | 0 101 | }, 102 | { /* n - 1 */ 103 | "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D22830", 104 | "188DA80EB03090F67CBF20EB43A18800F4FF0AFD82FF1012F8E6D46A003725879CEFEE1294DB32298C06885EE186B7EE", 105 | 0 106 | }, 107 | { /* n */ 108 | "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D22831", 109 | "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 110 | 0 111 | }, 112 | }; 113 | 114 | Test secp224r1_tests[] = { 115 | { 116 | "00000000000000000000000000000000000000000000000000000000", 117 | "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 118 | 0 119 | }, 120 | { 121 | "00000000000000000000000000000000000000000000000000000001", 122 | "B70E0CBD6BB4BF7F321390B94A03C1D356C21122343280D6115C1D21BD376388B5F723FB4C22DFE6CD4375A05A07476444D5819985007E34", 123 | 0 124 | }, 125 | { 126 | "00000000000000000000000000000000000000000000000000000002", 127 | "706A46DC76DCB76798E60E6D89474788D16DC18032D268FD1A704FA61C2B76A7BC25E7702A704FA986892849FCA629487ACF3709D2E4E8BB", 128 | 1 129 | }, 130 | { 131 | "00000000000000000000000000000000000000000000000000000003", 132 | "DF1B1D66A551D0D31EFF822558B9D2CC75C2180279FE0D08FD896D04A3F7F03CADD0BE444C0AA56830130DDF77D317344E1AF3591981A925", 133 | 1 134 | }, 135 | { /* n - 4 */ 136 | "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A39", 137 | "AE99FEEBB5D26945B54892092A8AEE02912930FA41CD114E40447301FB7DA7F5F13A43B81774373C879CD32D6934C05FA758EEB14FCFAB38", 138 | 1 139 | }, 140 | { /* n - 3 */ 141 | "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3A", 142 | "DF1B1D66A551D0D31EFF822558B9D2CC75C2180279FE0D08FD896D045C080FC3522F41BBB3F55A97CFECF21F882CE8CBB1E50CA6E67E56DC", 143 | 1 144 | }, 145 | { /* n - 2 */ 146 | "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3B", 147 | "706A46DC76DCB76798E60E6D89474788D16DC18032D268FD1A704FA6E3D4895843DA188FD58FB0567976D7B50359D6B78530C8F62D1B1746", 148 | 0 149 | }, 150 | { /* n - 1 */ 151 | "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3C", 152 | "B70E0CBD6BB4BF7F321390B94A03C1D356C21122343280D6115C1D2142C89C774A08DC04B3DD201932BC8A5EA5F8B89BBB2A7E667AFF81CD", 153 | 0 154 | }, 155 | { /* n */ 156 | "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3D", 157 | "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 158 | 0 159 | }, 160 | }; 161 | 162 | Test secp256r1_tests[] = { 163 | { 164 | "0000000000000000000000000000000000000000000000000000000000000000", 165 | "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 166 | 0 167 | }, 168 | { 169 | "0000000000000000000000000000000000000000000000000000000000000001", 170 | "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C2964FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5", 171 | 0 172 | }, 173 | { 174 | "0000000000000000000000000000000000000000000000000000000000000002", 175 | "7CF27B188D034F7E8A52380304B51AC3C08969E277F21B35A60B48FC4766997807775510DB8ED040293D9AC69F7430DBBA7DADE63CE982299E04B79D227873D1", 176 | 1 177 | }, 178 | { 179 | "0000000000000000000000000000000000000000000000000000000000000003", 180 | "5ECBE4D1A6330A44C8F7EF951D4BF165E6C6B721EFADA985FB41661BC6E7FD6C8734640C4998FF7E374B06CE1A64A2ECD82AB036384FB83D9A79B127A27D5032", 181 | 1 182 | }, 183 | { /* n - 4 */ 184 | "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC63254D", 185 | "E2534A3532D08FBBA02DDE659EE62BD0031FE2DB785596EF509302446B0308521F0EA8A4B39CC339E62011A02579D289B103693D0CF11FFAA3BD3DC0E7B12739", 186 | 1 187 | }, 188 | { /* n - 3 */ 189 | "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC63254E", 190 | "5ECBE4D1A6330A44C8F7EF951D4BF165E6C6B721EFADA985FB41661BC6E7FD6C78CB9BF2B6670082C8B4F931E59B5D1327D54FCAC7B047C265864ED85D82AFCD", 191 | 1 192 | }, 193 | { /* n - 2 */ 194 | "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC63254F", 195 | "7CF27B188D034F7E8A52380304B51AC3C08969E277F21B35A60B48FC47669978F888AAEE24712FC0D6C26539608BCF244582521AC3167DD661FB4862DD878C2E", 196 | 0 197 | }, 198 | { /* n - 1 */ 199 | "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632550", 200 | "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296B01CBD1C01E58065711814B583F061E9D431CCA994CEA1313449BF97C840AE0A", 201 | 0 202 | }, 203 | { /* n */ 204 | "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551", 205 | "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 206 | 0 207 | }, 208 | }; 209 | 210 | Test secp256k1_tests[] = { 211 | { 212 | "0000000000000000000000000000000000000000000000000000000000000000", 213 | "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 214 | 0 215 | }, 216 | { 217 | "0000000000000000000000000000000000000000000000000000000000000001", 218 | "79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8", 219 | 0 220 | }, 221 | { 222 | "0000000000000000000000000000000000000000000000000000000000000002", 223 | "C6047F9441ED7D6D3045406E95C07CD85C778E4B8CEF3CA7ABAC09B95C709EE51AE168FEA63DC339A3C58419466CEAEEF7F632653266D0E1236431A950CFE52A", 224 | 1 225 | }, 226 | { 227 | "0000000000000000000000000000000000000000000000000000000000000003", 228 | "F9308A019258C31049344F85F89D5229B531C845836F99B08601F113BCE036F9388F7B0F632DE8140FE337E62A37F3566500A99934C2231B6CB9FD7584B8E672", 229 | 1 230 | }, 231 | { /* n - 4 */ 232 | "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD036413D", 233 | "E493DBF1C10D80F3581E4904930B1404CC6C13900EE0758474FA94ABE8C4CD13AE1266C15F2BAA48A9BD1DF6715AEBB7269851CC404201BF30168422B88C630D", 234 | 1 235 | }, 236 | { /* n - 3 */ 237 | "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD036413E", 238 | "F9308A019258C31049344F85F89D5229B531C845836F99B08601F113BCE036F9C77084F09CD217EBF01CC819D5C80CA99AFF5666CB3DDCE4934602897B4715BD", 239 | 1 240 | }, 241 | { /* n - 2 */ 242 | "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD036413F", 243 | "C6047F9441ED7D6D3045406E95C07CD85C778E4B8CEF3CA7ABAC09B95C709EE5E51E970159C23CC65C3A7BE6B99315110809CD9ACD992F1EDC9BCE55AF301705", 244 | 0 245 | }, 246 | { /* n - 1 */ 247 | "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364140", 248 | "79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798B7C52588D95C3B9AA25B0403F1EEF75702E84BB7597AABE663B82F6F04EF2777", 249 | 0 250 | }, 251 | { /* n */ 252 | "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", 253 | "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", 254 | 0 255 | }, 256 | }; 257 | 258 | 259 | void vli_print(uint8_t *vli, unsigned int size) { 260 | for(unsigned i=0; i 6 | #include 7 | 8 | #ifndef uECC_TEST_NUMBER_OF_ITERATIONS 9 | #define uECC_TEST_NUMBER_OF_ITERATIONS 256 10 | #endif 11 | 12 | void vli_print(char *str, uint8_t *vli, unsigned int size) { 13 | printf("%s ", str); 14 | for(unsigned i=0; i 6 | #include 7 | 8 | void vli_print(char *str, uint8_t *vli, unsigned int size) { 9 | printf("%s ", str); 10 | for(unsigned i=0; i 6 | #include 7 | 8 | void vli_print(uint8_t *vli, unsigned int size) { 9 | for(unsigned i=0; i 6 | #include 7 | 8 | int main() { 9 | int i, c; 10 | uint8_t private[32] = {0}; 11 | uint8_t public[64] = {0}; 12 | uint8_t hash[32] = {0}; 13 | uint8_t sig[64] = {0}; 14 | 15 | const struct uECC_Curve_t * curves[5]; 16 | int num_curves = 0; 17 | #if uECC_SUPPORTS_secp160r1 18 | curves[num_curves++] = uECC_secp160r1(); 19 | #endif 20 | #if uECC_SUPPORTS_secp192r1 21 | curves[num_curves++] = uECC_secp192r1(); 22 | #endif 23 | #if uECC_SUPPORTS_secp224r1 24 | curves[num_curves++] = uECC_secp224r1(); 25 | #endif 26 | #if uECC_SUPPORTS_secp256r1 27 | curves[num_curves++] = uECC_secp256r1(); 28 | #endif 29 | #if uECC_SUPPORTS_secp256k1 30 | curves[num_curves++] = uECC_secp256k1(); 31 | #endif 32 | 33 | printf("Testing 256 signatures\n"); 34 | for (c = 0; c < num_curves; ++c) { 35 | for (i = 0; i < 256; ++i) { 36 | printf("."); 37 | fflush(stdout); 38 | 39 | if (!uECC_make_key(public, private, curves[c])) { 40 | printf("uECC_make_key() failed\n"); 41 | return 1; 42 | } 43 | memcpy(hash, public, sizeof(hash)); 44 | 45 | if (!uECC_sign(private, hash, sizeof(hash), sig, curves[c])) { 46 | printf("uECC_sign() failed\n"); 47 | return 1; 48 | } 49 | 50 | if (!uECC_verify(public, hash, sizeof(hash), sig, curves[c])) { 51 | printf("uECC_verify() failed\n"); 52 | return 1; 53 | } 54 | } 55 | printf("\n"); 56 | } 57 | 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /test/test_ecdsa_deterministic.c.example: -------------------------------------------------------------------------------- 1 | /* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */ 2 | 3 | #include "uECC.h" 4 | 5 | #include 6 | #include 7 | 8 | #define SHA256_BLOCK_LENGTH 64 9 | #define SHA256_DIGEST_LENGTH 32 10 | 11 | typedef struct SHA256_CTX { 12 | uint32_t state[8]; 13 | uint64_t bitcount; 14 | uint8_t buffer[SHA256_BLOCK_LENGTH]; 15 | } SHA256_CTX; 16 | 17 | extern void SHA256_Init(SHA256_CTX *ctx); 18 | extern void SHA256_Update(SHA256_CTX *ctx, const uint8_t *message, size_t message_size); 19 | extern void SHA256_Final(uint8_t digest[SHA256_DIGEST_LENGTH], SHA256_CTX *ctx); 20 | 21 | typedef struct SHA256_HashContext { 22 | uECC_HashContext uECC; 23 | SHA256_CTX ctx; 24 | } SHA256_HashContext; 25 | 26 | static void init_SHA256(const uECC_HashContext *base) { 27 | SHA256_HashContext *context = (SHA256_HashContext *)base; 28 | SHA256_Init(&context->ctx); 29 | } 30 | 31 | static void update_SHA256(const uECC_HashContext *base, 32 | const uint8_t *message, 33 | unsigned message_size) { 34 | SHA256_HashContext *context = (SHA256_HashContext *)base; 35 | SHA256_Update(&context->ctx, message, message_size); 36 | } 37 | 38 | static void finish_SHA256(const uECC_HashContext *base, uint8_t *hash_result) { 39 | SHA256_HashContext *context = (SHA256_HashContext *)base; 40 | SHA256_Final(hash_result, &context->ctx); 41 | } 42 | 43 | int main() { 44 | int i, c; 45 | uint8_t private[32] = {0}; 46 | uint8_t public[64] = {0}; 47 | uint8_t hash[32] = {0}; 48 | uint8_t sig[64] = {0}; 49 | 50 | uint8_t tmp[2 * SHA256_DIGEST_LENGTH + SHA256_BLOCK_LENGTH]; 51 | SHA256_HashContext ctx = {{ 52 | &init_SHA256, 53 | &update_SHA256, 54 | &finish_SHA256, 55 | SHA256_BLOCK_LENGTH, 56 | SHA256_DIGEST_LENGTH, 57 | tmp 58 | }}; 59 | 60 | const struct uECC_Curve_t * curves[5]; 61 | curves[0] = uECC_secp160r1(); 62 | curves[1] = uECC_secp192r1(); 63 | curves[2] = uECC_secp224r1(); 64 | curves[3] = uECC_secp256r1(); 65 | curves[4] = uECC_secp256k1(); 66 | 67 | printf("Testing 256 signatures\n"); 68 | for (c = 0; c < 5; ++c) { 69 | for (i = 0; i < 256; ++i) { 70 | printf("."); 71 | fflush(stdout); 72 | 73 | if (!uECC_make_key(public, private, curves[c])) { 74 | printf("uECC_make_key() failed\n"); 75 | return 1; 76 | } 77 | memcpy(hash, public, sizeof(hash)); 78 | 79 | if (!uECC_sign_deterministic(private, hash, sizeof(hash), &ctx.uECC, sig, curves[c])) { 80 | printf("uECC_sign() failed\n"); 81 | return 1; 82 | } 83 | 84 | if (!uECC_verify(public, hash, sizeof(hash), sig, curves[c])) { 85 | printf("uECC_verify() failed\n"); 86 | return 1; 87 | } 88 | } 89 | printf("\n"); 90 | } 91 | 92 | return 0; 93 | } 94 | -------------------------------------------------------------------------------- /types.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ 2 | 3 | #ifndef _UECC_TYPES_H_ 4 | #define _UECC_TYPES_H_ 5 | 6 | #ifndef uECC_PLATFORM 7 | #if __AVR__ 8 | #define uECC_PLATFORM uECC_avr 9 | #elif defined(__thumb2__) || defined(_M_ARMT) /* I think MSVC only supports Thumb-2 targets */ 10 | #define uECC_PLATFORM uECC_arm_thumb2 11 | #elif defined(__thumb__) 12 | #define uECC_PLATFORM uECC_arm_thumb 13 | #elif defined(__arm__) || defined(_M_ARM) 14 | #define uECC_PLATFORM uECC_arm 15 | #elif defined(__aarch64__) 16 | #define uECC_PLATFORM uECC_arm64 17 | #elif defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__I86__) 18 | #define uECC_PLATFORM uECC_x86 19 | #elif defined(__amd64__) || defined(_M_X64) 20 | #define uECC_PLATFORM uECC_x86_64 21 | #else 22 | #define uECC_PLATFORM uECC_arch_other 23 | #endif 24 | #endif 25 | 26 | #ifndef uECC_ARM_USE_UMAAL 27 | #if (uECC_PLATFORM == uECC_arm) && (__ARM_ARCH >= 6) 28 | #define uECC_ARM_USE_UMAAL 1 29 | #elif (uECC_PLATFORM == uECC_arm_thumb2) && (__ARM_ARCH >= 6) && !__ARM_ARCH_7M__ 30 | #define uECC_ARM_USE_UMAAL 1 31 | #else 32 | #define uECC_ARM_USE_UMAAL 0 33 | #endif 34 | #endif 35 | 36 | #ifndef uECC_WORD_SIZE 37 | #if uECC_PLATFORM == uECC_avr 38 | #define uECC_WORD_SIZE 1 39 | #elif (uECC_PLATFORM == uECC_x86_64 || uECC_PLATFORM == uECC_arm64) 40 | #define uECC_WORD_SIZE 8 41 | #else 42 | #define uECC_WORD_SIZE 4 43 | #endif 44 | #endif 45 | 46 | #if (uECC_WORD_SIZE != 1) && (uECC_WORD_SIZE != 4) && (uECC_WORD_SIZE != 8) 47 | #error "Unsupported value for uECC_WORD_SIZE" 48 | #endif 49 | 50 | #if ((uECC_PLATFORM == uECC_avr) && (uECC_WORD_SIZE != 1)) 51 | #pragma message ("uECC_WORD_SIZE must be 1 for AVR") 52 | #undef uECC_WORD_SIZE 53 | #define uECC_WORD_SIZE 1 54 | #endif 55 | 56 | #if ((uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \ 57 | uECC_PLATFORM == uECC_arm_thumb2) && \ 58 | (uECC_WORD_SIZE != 4)) 59 | #pragma message ("uECC_WORD_SIZE must be 4 for ARM") 60 | #undef uECC_WORD_SIZE 61 | #define uECC_WORD_SIZE 4 62 | #endif 63 | 64 | #if defined(__SIZEOF_INT128__) || ((__clang_major__ * 100 + __clang_minor__) >= 302) 65 | #define SUPPORTS_INT128 1 66 | #else 67 | #define SUPPORTS_INT128 0 68 | #endif 69 | 70 | typedef int8_t wordcount_t; 71 | typedef int16_t bitcount_t; 72 | typedef int8_t cmpresult_t; 73 | 74 | #if (uECC_WORD_SIZE == 1) 75 | 76 | typedef uint8_t uECC_word_t; 77 | typedef uint16_t uECC_dword_t; 78 | 79 | #define HIGH_BIT_SET 0x80 80 | #define uECC_WORD_BITS 8 81 | #define uECC_WORD_BITS_SHIFT 3 82 | #define uECC_WORD_BITS_MASK 0x07 83 | 84 | #elif (uECC_WORD_SIZE == 4) 85 | 86 | typedef uint32_t uECC_word_t; 87 | typedef uint64_t uECC_dword_t; 88 | 89 | #define HIGH_BIT_SET 0x80000000 90 | #define uECC_WORD_BITS 32 91 | #define uECC_WORD_BITS_SHIFT 5 92 | #define uECC_WORD_BITS_MASK 0x01F 93 | 94 | #elif (uECC_WORD_SIZE == 8) 95 | 96 | typedef uint64_t uECC_word_t; 97 | #if SUPPORTS_INT128 98 | typedef unsigned __int128 uECC_dword_t; 99 | #endif 100 | 101 | #define HIGH_BIT_SET 0x8000000000000000ull 102 | #define uECC_WORD_BITS 64 103 | #define uECC_WORD_BITS_SHIFT 6 104 | #define uECC_WORD_BITS_MASK 0x03F 105 | 106 | #endif /* uECC_WORD_SIZE */ 107 | 108 | #endif /* _UECC_TYPES_H_ */ 109 | -------------------------------------------------------------------------------- /uECC.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */ 2 | 3 | #ifndef _UECC_H_ 4 | #define _UECC_H_ 5 | 6 | #include 7 | 8 | /* Platform selection options. 9 | If uECC_PLATFORM is not defined, the code will try to guess it based on compiler macros. 10 | Possible values for uECC_PLATFORM are defined below: */ 11 | #define uECC_arch_other 0 12 | #define uECC_x86 1 13 | #define uECC_x86_64 2 14 | #define uECC_arm 3 15 | #define uECC_arm_thumb 4 16 | #define uECC_arm_thumb2 5 17 | #define uECC_arm64 6 18 | #define uECC_avr 7 19 | 20 | /* If desired, you can define uECC_WORD_SIZE as appropriate for your platform (1, 4, or 8 bytes). 21 | If uECC_WORD_SIZE is not explicitly defined then it will be automatically set based on your 22 | platform. */ 23 | 24 | /* Optimization level; trade speed for code size. 25 | Larger values produce code that is faster but larger. 26 | Currently supported values are 0 - 4; 0 is unusably slow for most applications. 27 | Optimization level 4 currently only has an effect ARM platforms where more than one 28 | curve is enabled. */ 29 | #ifndef uECC_OPTIMIZATION_LEVEL 30 | #define uECC_OPTIMIZATION_LEVEL 2 31 | #endif 32 | 33 | /* uECC_SQUARE_FUNC - If enabled (defined as nonzero), this will cause a specific function to be 34 | used for (scalar) squaring instead of the generic multiplication function. This can make things 35 | faster somewhat faster, but increases the code size. */ 36 | #ifndef uECC_SQUARE_FUNC 37 | #define uECC_SQUARE_FUNC 0 38 | #endif 39 | 40 | /* uECC_VLI_NATIVE_LITTLE_ENDIAN - If enabled (defined as nonzero), this will switch to native 41 | little-endian format for *all* arrays passed in and out of the public API. This includes public 42 | and private keys, shared secrets, signatures and message hashes. 43 | Using this switch reduces the amount of call stack memory used by uECC, since less intermediate 44 | translations are required. 45 | Note that this will *only* work on native little-endian processors and it will treat the uint8_t 46 | arrays passed into the public API as word arrays, therefore requiring the provided byte arrays 47 | to be word aligned on architectures that do not support unaligned accesses. 48 | IMPORTANT: Keys and signatures generated with uECC_VLI_NATIVE_LITTLE_ENDIAN=1 are incompatible 49 | with keys and signatures generated with uECC_VLI_NATIVE_LITTLE_ENDIAN=0; all parties must use 50 | the same endianness. */ 51 | #ifndef uECC_VLI_NATIVE_LITTLE_ENDIAN 52 | #define uECC_VLI_NATIVE_LITTLE_ENDIAN 0 53 | #endif 54 | 55 | /* Curve support selection. Set to 0 to remove that curve. */ 56 | #ifndef uECC_SUPPORTS_secp160r1 57 | #define uECC_SUPPORTS_secp160r1 1 58 | #endif 59 | #ifndef uECC_SUPPORTS_secp192r1 60 | #define uECC_SUPPORTS_secp192r1 1 61 | #endif 62 | #ifndef uECC_SUPPORTS_secp224r1 63 | #define uECC_SUPPORTS_secp224r1 1 64 | #endif 65 | #ifndef uECC_SUPPORTS_secp256r1 66 | #define uECC_SUPPORTS_secp256r1 1 67 | #endif 68 | #ifndef uECC_SUPPORTS_secp256k1 69 | #define uECC_SUPPORTS_secp256k1 1 70 | #endif 71 | 72 | /* Specifies whether compressed point format is supported. 73 | Set to 0 to disable point compression/decompression functions. */ 74 | #ifndef uECC_SUPPORT_COMPRESSED_POINT 75 | #define uECC_SUPPORT_COMPRESSED_POINT 1 76 | #endif 77 | 78 | struct uECC_Curve_t; 79 | typedef const struct uECC_Curve_t * uECC_Curve; 80 | 81 | #ifdef __cplusplus 82 | extern "C" 83 | { 84 | #endif 85 | 86 | #if uECC_SUPPORTS_secp160r1 87 | uECC_Curve uECC_secp160r1(void); 88 | #endif 89 | #if uECC_SUPPORTS_secp192r1 90 | uECC_Curve uECC_secp192r1(void); 91 | #endif 92 | #if uECC_SUPPORTS_secp224r1 93 | uECC_Curve uECC_secp224r1(void); 94 | #endif 95 | #if uECC_SUPPORTS_secp256r1 96 | uECC_Curve uECC_secp256r1(void); 97 | #endif 98 | #if uECC_SUPPORTS_secp256k1 99 | uECC_Curve uECC_secp256k1(void); 100 | #endif 101 | 102 | /* uECC_RNG_Function type 103 | The RNG function should fill 'size' random bytes into 'dest'. It should return 1 if 104 | 'dest' was filled with random data, or 0 if the random data could not be generated. 105 | The filled-in values should be either truly random, or from a cryptographically-secure PRNG. 106 | 107 | A correctly functioning RNG function must be set (using uECC_set_rng()) before calling 108 | uECC_make_key() or uECC_sign(). 109 | 110 | Setting a correctly functioning RNG function improves the resistance to side-channel attacks 111 | for uECC_shared_secret() and uECC_sign_deterministic(). 112 | 113 | A correct RNG function is set by default when building for Windows, Linux, or OS X. 114 | If you are building on another POSIX-compliant system that supports /dev/random or /dev/urandom, 115 | you can define uECC_POSIX to use the predefined RNG. For embedded platforms there is no predefined 116 | RNG function; you must provide your own. 117 | */ 118 | typedef int (*uECC_RNG_Function)(uint8_t *dest, unsigned size); 119 | 120 | /* uECC_set_rng() function. 121 | Set the function that will be used to generate random bytes. The RNG function should 122 | return 1 if the random data was generated, or 0 if the random data could not be generated. 123 | 124 | On platforms where there is no predefined RNG function (eg embedded platforms), this must 125 | be called before uECC_make_key() or uECC_sign() are used. 126 | 127 | Inputs: 128 | rng_function - The function that will be used to generate random bytes. 129 | */ 130 | void uECC_set_rng(uECC_RNG_Function rng_function); 131 | 132 | /* uECC_get_rng() function. 133 | 134 | Returns the function that will be used to generate random bytes. 135 | */ 136 | uECC_RNG_Function uECC_get_rng(void); 137 | 138 | /* uECC_curve_private_key_size() function. 139 | 140 | Returns the size of a private key for the curve in bytes. 141 | */ 142 | int uECC_curve_private_key_size(uECC_Curve curve); 143 | 144 | /* uECC_curve_public_key_size() function. 145 | 146 | Returns the size of a public key for the curve in bytes. 147 | */ 148 | int uECC_curve_public_key_size(uECC_Curve curve); 149 | 150 | /* uECC_make_key() function. 151 | Create a public/private key pair. 152 | 153 | Outputs: 154 | public_key - Will be filled in with the public key. Must be at least 2 * the curve size 155 | (in bytes) long. For example, if the curve is secp256r1, public_key must be 64 156 | bytes long. 157 | private_key - Will be filled in with the private key. Must be as long as the curve order; this 158 | is typically the same as the curve size, except for secp160r1. For example, if the 159 | curve is secp256r1, private_key must be 32 bytes long. 160 | 161 | For secp160r1, private_key must be 21 bytes long! Note that the first byte will 162 | almost always be 0 (there is about a 1 in 2^80 chance of it being non-zero). 163 | 164 | Returns 1 if the key pair was generated successfully, 0 if an error occurred. 165 | */ 166 | int uECC_make_key(uint8_t *public_key, uint8_t *private_key, uECC_Curve curve); 167 | 168 | /* uECC_shared_secret() function. 169 | Compute a shared secret given your secret key and someone else's public key. If the public key 170 | is not from a trusted source and has not been previously verified, you should verify it first 171 | using uECC_valid_public_key(). 172 | Note: It is recommended that you hash the result of uECC_shared_secret() before using it for 173 | symmetric encryption or HMAC. 174 | 175 | Inputs: 176 | public_key - The public key of the remote party. 177 | private_key - Your private key. 178 | 179 | Outputs: 180 | secret - Will be filled in with the shared secret value. Must be the same size as the 181 | curve size; for example, if the curve is secp256r1, secret must be 32 bytes long. 182 | 183 | Returns 1 if the shared secret was generated successfully, 0 if an error occurred. 184 | */ 185 | int uECC_shared_secret(const uint8_t *public_key, 186 | const uint8_t *private_key, 187 | uint8_t *secret, 188 | uECC_Curve curve); 189 | 190 | #if uECC_SUPPORT_COMPRESSED_POINT 191 | /* uECC_compress() function. 192 | Compress a public key. 193 | 194 | Inputs: 195 | public_key - The public key to compress. 196 | 197 | Outputs: 198 | compressed - Will be filled in with the compressed public key. Must be at least 199 | (curve size + 1) bytes long; for example, if the curve is secp256r1, 200 | compressed must be 33 bytes long. 201 | */ 202 | void uECC_compress(const uint8_t *public_key, uint8_t *compressed, uECC_Curve curve); 203 | 204 | /* uECC_decompress() function. 205 | Decompress a compressed public key. 206 | 207 | Inputs: 208 | compressed - The compressed public key. 209 | 210 | Outputs: 211 | public_key - Will be filled in with the decompressed public key. 212 | */ 213 | void uECC_decompress(const uint8_t *compressed, uint8_t *public_key, uECC_Curve curve); 214 | #endif /* uECC_SUPPORT_COMPRESSED_POINT */ 215 | 216 | /* uECC_valid_public_key() function. 217 | Check to see if a public key is valid. 218 | 219 | Note that you are not required to check for a valid public key before using any other uECC 220 | functions. However, you may wish to avoid spending CPU time computing a shared secret or 221 | verifying a signature using an invalid public key. 222 | 223 | Inputs: 224 | public_key - The public key to check. 225 | 226 | Returns 1 if the public key is valid, 0 if it is invalid. 227 | */ 228 | int uECC_valid_public_key(const uint8_t *public_key, uECC_Curve curve); 229 | 230 | /* uECC_compute_public_key() function. 231 | Compute the corresponding public key for a private key. 232 | 233 | Inputs: 234 | private_key - The private key to compute the public key for 235 | 236 | Outputs: 237 | public_key - Will be filled in with the corresponding public key 238 | 239 | Returns 1 if the key was computed successfully, 0 if an error occurred. 240 | */ 241 | int uECC_compute_public_key(const uint8_t *private_key, uint8_t *public_key, uECC_Curve curve); 242 | 243 | /* uECC_sign() function. 244 | Generate an ECDSA signature for a given hash value. 245 | 246 | Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and pass it in to 247 | this function along with your private key. 248 | 249 | Inputs: 250 | private_key - Your private key. 251 | message_hash - The hash of the message to sign. 252 | hash_size - The size of message_hash in bytes. 253 | 254 | Outputs: 255 | signature - Will be filled in with the signature value. Must be at least 2 * curve size long. 256 | For example, if the curve is secp256r1, signature must be 64 bytes long. 257 | 258 | Returns 1 if the signature generated successfully, 0 if an error occurred. 259 | */ 260 | int uECC_sign(const uint8_t *private_key, 261 | const uint8_t *message_hash, 262 | unsigned hash_size, 263 | uint8_t *signature, 264 | uECC_Curve curve); 265 | 266 | /* uECC_HashContext structure. 267 | This is used to pass in an arbitrary hash function to uECC_sign_deterministic(). 268 | The structure will be used for multiple hash computations; each time a new hash 269 | is computed, init_hash() will be called, followed by one or more calls to 270 | update_hash(), and finally a call to finish_hash() to produce the resulting hash. 271 | 272 | The intention is that you will create a structure that includes uECC_HashContext 273 | followed by any hash-specific data. For example: 274 | 275 | typedef struct SHA256_HashContext { 276 | uECC_HashContext uECC; 277 | SHA256_CTX ctx; 278 | } SHA256_HashContext; 279 | 280 | void init_SHA256(uECC_HashContext *base) { 281 | SHA256_HashContext *context = (SHA256_HashContext *)base; 282 | SHA256_Init(&context->ctx); 283 | } 284 | 285 | void update_SHA256(uECC_HashContext *base, 286 | const uint8_t *message, 287 | unsigned message_size) { 288 | SHA256_HashContext *context = (SHA256_HashContext *)base; 289 | SHA256_Update(&context->ctx, message, message_size); 290 | } 291 | 292 | void finish_SHA256(uECC_HashContext *base, uint8_t *hash_result) { 293 | SHA256_HashContext *context = (SHA256_HashContext *)base; 294 | SHA256_Final(hash_result, &context->ctx); 295 | } 296 | 297 | ... when signing ... 298 | { 299 | uint8_t tmp[32 + 32 + 64]; 300 | SHA256_HashContext ctx = {{&init_SHA256, &update_SHA256, &finish_SHA256, 64, 32, tmp}}; 301 | uECC_sign_deterministic(key, message_hash, &ctx.uECC, signature); 302 | } 303 | */ 304 | typedef struct uECC_HashContext { 305 | void (*init_hash)(const struct uECC_HashContext *context); 306 | void (*update_hash)(const struct uECC_HashContext *context, 307 | const uint8_t *message, 308 | unsigned message_size); 309 | void (*finish_hash)(const struct uECC_HashContext *context, uint8_t *hash_result); 310 | unsigned block_size; /* Hash function block size in bytes, eg 64 for SHA-256. */ 311 | unsigned result_size; /* Hash function result size in bytes, eg 32 for SHA-256. */ 312 | uint8_t *tmp; /* Must point to a buffer of at least (2 * result_size + block_size) bytes. */ 313 | } uECC_HashContext; 314 | 315 | /* uECC_sign_deterministic() function. 316 | Generate an ECDSA signature for a given hash value, using a deterministic algorithm 317 | (see RFC 6979). You do not need to set the RNG using uECC_set_rng() before calling 318 | this function; however, if the RNG is defined it will improve resistance to side-channel 319 | attacks. 320 | 321 | Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and pass it to 322 | this function along with your private key and a hash context. Note that the message_hash 323 | does not need to be computed with the same hash function used by hash_context. 324 | 325 | Inputs: 326 | private_key - Your private key. 327 | message_hash - The hash of the message to sign. 328 | hash_size - The size of message_hash in bytes. 329 | hash_context - A hash context to use. 330 | 331 | Outputs: 332 | signature - Will be filled in with the signature value. 333 | 334 | Returns 1 if the signature generated successfully, 0 if an error occurred. 335 | */ 336 | int uECC_sign_deterministic(const uint8_t *private_key, 337 | const uint8_t *message_hash, 338 | unsigned hash_size, 339 | const uECC_HashContext *hash_context, 340 | uint8_t *signature, 341 | uECC_Curve curve); 342 | 343 | /* uECC_verify() function. 344 | Verify an ECDSA signature. 345 | 346 | Usage: Compute the hash of the signed data using the same hash as the signer and 347 | pass it to this function along with the signer's public key and the signature values (r and s). 348 | 349 | Inputs: 350 | public_key - The signer's public key. 351 | message_hash - The hash of the signed data. 352 | hash_size - The size of message_hash in bytes. 353 | signature - The signature value. 354 | 355 | Returns 1 if the signature is valid, 0 if it is invalid. 356 | */ 357 | int uECC_verify(const uint8_t *public_key, 358 | const uint8_t *message_hash, 359 | unsigned hash_size, 360 | const uint8_t *signature, 361 | uECC_Curve curve); 362 | 363 | #ifdef __cplusplus 364 | } /* end of extern "C" */ 365 | #endif 366 | 367 | #endif /* _UECC_H_ */ 368 | -------------------------------------------------------------------------------- /uECC_vli.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ 2 | 3 | #ifndef _UECC_VLI_H_ 4 | #define _UECC_VLI_H_ 5 | 6 | #include "uECC.h" 7 | #include "types.h" 8 | 9 | /* Functions for raw large-integer manipulation. These are only available 10 | if uECC.c is compiled with uECC_ENABLE_VLI_API defined to 1. */ 11 | #ifndef uECC_ENABLE_VLI_API 12 | #define uECC_ENABLE_VLI_API 0 13 | #endif 14 | 15 | #ifdef __cplusplus 16 | extern "C" 17 | { 18 | #endif 19 | 20 | #if uECC_ENABLE_VLI_API 21 | 22 | void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words); 23 | 24 | /* Constant-time comparison to zero - secure way to compare long integers */ 25 | /* Returns 1 if vli == 0, 0 otherwise. */ 26 | uECC_word_t uECC_vli_isZero(const uECC_word_t *vli, wordcount_t num_words); 27 | 28 | /* Returns nonzero if bit 'bit' of vli is set. */ 29 | uECC_word_t uECC_vli_testBit(const uECC_word_t *vli, bitcount_t bit); 30 | 31 | /* Counts the number of bits required to represent vli. */ 32 | bitcount_t uECC_vli_numBits(const uECC_word_t *vli, const wordcount_t max_words); 33 | 34 | /* Sets dest = src. */ 35 | void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordcount_t num_words); 36 | 37 | /* Constant-time comparison function - secure way to compare long integers */ 38 | /* Returns one if left == right, zero otherwise */ 39 | uECC_word_t uECC_vli_equal(const uECC_word_t *left, 40 | const uECC_word_t *right, 41 | wordcount_t num_words); 42 | 43 | /* Constant-time comparison function - secure way to compare long integers */ 44 | /* Returns sign of left - right, in constant time. */ 45 | cmpresult_t uECC_vli_cmp(const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words); 46 | 47 | /* Computes vli = vli >> 1. */ 48 | void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words); 49 | 50 | /* Computes result = left + right, returning carry. Can modify in place. */ 51 | uECC_word_t uECC_vli_add(uECC_word_t *result, 52 | const uECC_word_t *left, 53 | const uECC_word_t *right, 54 | wordcount_t num_words); 55 | 56 | /* Computes result = left - right, returning borrow. Can modify in place. */ 57 | uECC_word_t uECC_vli_sub(uECC_word_t *result, 58 | const uECC_word_t *left, 59 | const uECC_word_t *right, 60 | wordcount_t num_words); 61 | 62 | /* Computes result = left * right. Result must be 2 * num_words long. */ 63 | void uECC_vli_mult(uECC_word_t *result, 64 | const uECC_word_t *left, 65 | const uECC_word_t *right, 66 | wordcount_t num_words); 67 | 68 | /* Computes result = left^2. Result must be 2 * num_words long. */ 69 | void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, wordcount_t num_words); 70 | 71 | /* Computes result = (left + right) % mod. 72 | Assumes that left < mod and right < mod, and that result does not overlap mod. */ 73 | void uECC_vli_modAdd(uECC_word_t *result, 74 | const uECC_word_t *left, 75 | const uECC_word_t *right, 76 | const uECC_word_t *mod, 77 | wordcount_t num_words); 78 | 79 | /* Computes result = (left - right) % mod. 80 | Assumes that left < mod and right < mod, and that result does not overlap mod. */ 81 | void uECC_vli_modSub(uECC_word_t *result, 82 | const uECC_word_t *left, 83 | const uECC_word_t *right, 84 | const uECC_word_t *mod, 85 | wordcount_t num_words); 86 | 87 | /* Computes result = product % mod, where product is 2N words long. 88 | Currently only designed to work for mod == curve->p or curve_n. */ 89 | void uECC_vli_mmod(uECC_word_t *result, 90 | uECC_word_t *product, 91 | const uECC_word_t *mod, 92 | wordcount_t num_words); 93 | 94 | /* Calculates result = product (mod curve->p), where product is up to 95 | 2 * curve->num_words long. */ 96 | void uECC_vli_mmod_fast(uECC_word_t *result, uECC_word_t *product, uECC_Curve curve); 97 | 98 | /* Computes result = (left * right) % mod. 99 | Currently only designed to work for mod == curve->p or curve_n. */ 100 | void uECC_vli_modMult(uECC_word_t *result, 101 | const uECC_word_t *left, 102 | const uECC_word_t *right, 103 | const uECC_word_t *mod, 104 | wordcount_t num_words); 105 | 106 | /* Computes result = (left * right) % curve->p. */ 107 | void uECC_vli_modMult_fast(uECC_word_t *result, 108 | const uECC_word_t *left, 109 | const uECC_word_t *right, 110 | uECC_Curve curve); 111 | 112 | /* Computes result = left^2 % mod. 113 | Currently only designed to work for mod == curve->p or curve_n. */ 114 | void uECC_vli_modSquare(uECC_word_t *result, 115 | const uECC_word_t *left, 116 | const uECC_word_t *mod, 117 | wordcount_t num_words); 118 | 119 | /* Computes result = left^2 % curve->p. */ 120 | void uECC_vli_modSquare_fast(uECC_word_t *result, const uECC_word_t *left, uECC_Curve curve); 121 | 122 | /* Computes result = (1 / input) % mod.*/ 123 | void uECC_vli_modInv(uECC_word_t *result, 124 | const uECC_word_t *input, 125 | const uECC_word_t *mod, 126 | wordcount_t num_words); 127 | 128 | #if uECC_SUPPORT_COMPRESSED_POINT 129 | /* Calculates a = sqrt(a) (mod curve->p) */ 130 | void uECC_vli_mod_sqrt(uECC_word_t *a, uECC_Curve curve); 131 | #endif 132 | 133 | /* Converts an integer in uECC native format to big-endian bytes. */ 134 | void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, const uECC_word_t *native); 135 | /* Converts big-endian bytes to an integer in uECC native format. */ 136 | void uECC_vli_bytesToNative(uECC_word_t *native, const uint8_t *bytes, int num_bytes); 137 | 138 | unsigned uECC_curve_num_words(uECC_Curve curve); 139 | unsigned uECC_curve_num_bytes(uECC_Curve curve); 140 | unsigned uECC_curve_num_bits(uECC_Curve curve); 141 | unsigned uECC_curve_num_n_words(uECC_Curve curve); 142 | unsigned uECC_curve_num_n_bytes(uECC_Curve curve); 143 | unsigned uECC_curve_num_n_bits(uECC_Curve curve); 144 | 145 | const uECC_word_t *uECC_curve_p(uECC_Curve curve); 146 | const uECC_word_t *uECC_curve_n(uECC_Curve curve); 147 | const uECC_word_t *uECC_curve_G(uECC_Curve curve); 148 | const uECC_word_t *uECC_curve_b(uECC_Curve curve); 149 | 150 | int uECC_valid_point(const uECC_word_t *point, uECC_Curve curve); 151 | 152 | /* Multiplies a point by a scalar. Points are represented by the X coordinate followed by 153 | the Y coordinate in the same array, both coordinates are curve->num_words long. Note 154 | that scalar must be curve->num_n_words long (NOT curve->num_words). */ 155 | void uECC_point_mult(uECC_word_t *result, 156 | const uECC_word_t *point, 157 | const uECC_word_t *scalar, 158 | uECC_Curve curve); 159 | 160 | /* Generates a random integer in the range 0 < random < top. 161 | Both random and top have num_words words. */ 162 | int uECC_generate_random_int(uECC_word_t *random, 163 | const uECC_word_t *top, 164 | wordcount_t num_words); 165 | 166 | #endif /* uECC_ENABLE_VLI_API */ 167 | 168 | #ifdef __cplusplus 169 | } /* end of extern "C" */ 170 | #endif 171 | 172 | #endif /* _UECC_VLI_H_ */ 173 | --------------------------------------------------------------------------------