├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── apache-2.0.txt
├── bench.sh
├── benchmark.c
├── consttime.sh
├── coverage.sh
├── deps.sh
├── entry.c
├── gen-test-data.py
├── libc.msu
├── macros.sh
├── on-target-benchmark
    ├── .gitignore
    ├── README.md
    ├── main.cpp
    ├── mbed-os.lib
    ├── mbed_app.json
    ├── mbed_settings.py
    ├── p256-m.c
    └── p256-m.h
├── on-target-closedbox
    ├── .gitignore
    ├── README.md
    ├── main.c
    ├── mbed-os.lib
    ├── mbed_app.json
    ├── mbed_settings.py
    ├── p256-m.c
    └── p256-m.h
├── on-target-openbox
    ├── .gitignore
    ├── README.md
    ├── main.c
    ├── mbed-os.lib
    ├── mbed_app.json
    └── mbed_settings.py
├── p256-m.c
├── p256-m.h
├── p256.py
├── prof-g.sh
├── prof-gpt.sh
├── prof-vg.sh
├── prof.c
├── sizes.sh
├── stack.sh
├── test-closedbox.c
├── test-common.h
├── test-openbox.c
├── toolchain-mul64.c
├── toolchain-mul64.sh
└── wcs.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | benchmark
 2 | test-openbox
 3 | test-closedbox
 4 | test-data.h
 5 | *.o
 6 | *.s
 7 | *.dump
 8 | *.sizes
 9 | *.su
10 | *.dfinish
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Unless specifically indicated otherwise in a file, files are licensed
2 | under the Apache 2.0 license, as can be found in apache-2.0.txt.
3 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Usage:
 2 | # - 'make' runs the test suites on the host
 3 | # - 'make all' also prints code size on stack usage (main development target)
 4 | #   it leaves a variety of dumps around for inspection if desired
 5 | # - 'make clean' removes files generated by 'make all' and coverage.sh.
 6 | 
 7 | TESTOPEN=test-openbox
 8 | TESTCLOSED=test-closedbox
 9 | TESTDATA=test-data.h
10 | TESTLIB=p256-native.o
11 | SRC=p256-m.c
12 | HDR=p256-m.h
13 | 
14 | CC=clang
15 | CFLAGS=-Werror -Weverything --std=c99 -Os
16 | CFLAGS_SAN=-fsanitize=address -fsanitize=undefined
17 | 
18 | runtest: $(TESTCLOSED) $(TESTOPEN)
19 | 	./$(TESTCLOSED)
20 | 	./$(TESTOPEN)
21 | 
22 | $(TESTLIB): $(SRC) $(HDR)
23 | 	$(CC) $(CFLAGS) $(CFLAGS_SAN) $< -c -o $@
24 | 
25 | $(TESTCLOSED): test-closedbox.c $(TESTLIB) $(TESTDATA) $(HDR)
26 | 	$(CC) $(CFLAGS) $(CFLAGS_SAN) $< $(TESTLIB) -o $@
27 | 
28 | $(TESTOPEN): test-openbox.c $(TESTDATA) $(SRC)
29 | 	$(CC) $(CFLAGS) $(CFLAGS_SAN) $< -o $@
30 | 
31 | $(TESTDATA): gen-test-data.py p256.py
32 | 	python3 $< > $@
33 | 
34 | all: runtest
35 | 	./sizes.sh
36 | 	./stack.sh
37 | 
38 | clean:
39 | 	rm -f $(TESTCLOSED) $(TESTOPEN) $(TESTDATA)
40 | 	rm -f *.s *.o *.dump *.sizes *.su *.dfinish
41 | 	rm -f *.gcda *.gcno *.info *.html
42 | 	rm -rf cov-closed cov-open
43 | 
44 | .PHONY: runtest clean all
45 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | p256-m is a minimalistic implementation of ECDH and ECDSA on NIST P-256,
  2 | especially suited to constrained 32-bit environments. It's written in standard
  3 | C, with optional bits of assembly for Arm Cortex-M and Cortex-A CPUs.
  4 | 
  5 | Its design is guided by the following goals in this order:
  6 | 
  7 | 1. correctness & security;
  8 | 2. low code size & RAM usage;
  9 | 3. runtime performance.
 10 | 
 11 | Most cryptographic implementations care more about speed than footprint, and
 12 | some might even risk weakening security for more speed. p256-m was written
 13 | because I wanted to see what happened when reversing the usual emphasis.
 14 | 
 15 | The result is a full implementation of ECDH and ECDSA in **less than 3KiB of
 16 | code**, using **less than 768 bytes of RAM**, with comparable performance
 17 | to existing implementations (see below) - in less than 700 LOC.
 18 | 
 19 | _Contents of this Readme:_
 20 | 
 21 | - [Correctness](#correctness)
 22 | - [Security](#security)
 23 | - [Code size](#code-size)
 24 | - [RAM usage](#ram-usage)
 25 | - [Runtime performance](#runtime-performance)
 26 | - [Comparison with other implementations](#comparison-with-other-implementations)
 27 | - [Design overview](#design-overview)
 28 | - [Notes about other curves](#notes-about-other-curves)
 29 | - [Notes about other platforms](#notes-about-other-platforms)
 30 | 
 31 | ## Correctness
 32 | 
 33 | **API design:**
 34 | 
 35 | - The API is minimal: only 4 public functions.
 36 | - Each public function fully validates its inputs and returns specific errors.
 37 | - The API uses arrays of octets for all input and output.
 38 | 
 39 | **Testing:**
 40 | 
 41 | - p256-m is validated against multiple test vectors from various RFCs and
 42 |   NIST.
 43 | - In addition, crafted inputs are used for negative testing and to reach
 44 |   corner cases.
 45 | - Two test suites are provided: one for closed-box testing (using only the
 46 |   public API), one for open-box testing (for unit-testing internal functions,
 47 | and reaching more error cases by exploiting knowledge of how the RNG is used).
 48 | - The resulting branch coverage is maximal: closed-box testing reaches all
 49 |   branches except four; three of them are reached by open-box testing using a
 50 | rigged RNG; the last branch could only be reached by computing a discrete log
 51 | on P-256... See `coverage.sh`.
 52 | - Testing also uses dynamic analysis: valgrind, ASan, MemSan, UBSan.
 53 | 
 54 | **Code quality:**
 55 | 
 56 | - The code is standard C99; it builds without warnings with `clang
 57 |   -Weverything` and `gcc -Wall -Wextra -pedantic`.
 58 | - The code is small and well documented, including internal APIs: with the
 59 |   header file, it's less than 700 lines of code, and more lines of comments
 60 | than of code.
 61 | - However it _has not been reviewed_ independently so far, as this is a
 62 |   personal project.
 63 | 
 64 | **Short Weierstrass pitfalls:**
 65 | 
 66 | Its has been [pointed out](https://safecurves.cr.yp.to/) that the NIST curves,
 67 | and indeed all Short Weierstrass curves, have a number of pitfalls including
 68 | risk for the implementation to:
 69 | 
 70 | - "produce incorrect results for some rare curve points" - this is avoided by
 71 |   carefully checking the validity domain of formulas used throughout the code;
 72 | - "leak secret data when the input isn't a curve point" - this is avoided by
 73 |   validating that points lie on the curve every time a point is deserialized.
 74 | 
 75 | ## Security
 76 | 
 77 | In addition to the above correctness claims, p256-m has the following
 78 | properties:
 79 | 
 80 | - it has no branch depending (even indirectly) on secret data;
 81 | - it has no memory access depending (even indirectly) on secret data.
 82 | 
 83 | These properties are checked using valgrind and MemSan with the ideas
 84 | behind [ctgrind](https://github.com/agl/ctgrind), see `consttime.sh`.
 85 | 
 86 | In addition to avoiding branches and memory accesses depending on secret data,
 87 | p256-m also avoid instructions (or library functions) whose execution time
 88 | depends on the value of operands on cores of interest. Namely, it never uses
 89 | integer division, and for multiplication by default it only uses 16x16->32 bit
 90 | unsigned multiplication. On cores which have a constant-time 32x32->64 bit
 91 | unsigned multiplication instruction, the symbol `MUL64_IS_CONSTANT_TIME` can
 92 | be defined by the user at compile-time to take advantage of it in order to
 93 | improve performance and code size. (On Cortex-M and Cortex-A cores wtih GCC or
 94 | Clang this is not necessary, since inline assembly is used instead.)
 95 | 
 96 | As a result, p256-m should be secure against the following classes of attackers:
 97 | 
 98 | 1. attackers who can only manipulate the input and observe the output;
 99 | 2. attackers who can also measure the total computation time of the operation;
100 | 3. attackers who can also observe and manipulate micro-architectural features
101 |    such as the cache or branch predictor with arbitrary precision.
102 | 
103 | However, p256-m makes no attempt to protect against:
104 | 
105 | 4. passive physical attackers who can record traces of physical emissions
106 |    (power, EM, sound) of the CPU while it manipulates secrets;
107 | 5. active physical attackers who can also inject faults in the computation.
108 | 
109 | (Note: p256-m should actually be secure against SPA, by virtue of being fully
110 | constant-flow, but is not expected to resist any other physical attack.)
111 | 
112 | **Warning:** p256-m requires an externally-provided RNG function. If that
113 | function is not cryptographically secure, then neither is p256-m's key
114 | generation or ECDSA signature generation.
115 | 
116 | _Note:_ p256-m also follows best practices such as securely erasing secret
117 | data on the stack before returning.
118 | 
119 | ## Code size
120 | 
121 | Compiled with
122 | [ARM-GCC 9](https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm/downloads),
123 | with `-mthumb -Os`, here are samples of code sizes reached on selected cores:
124 | 
125 | - Cortex-M0: 2988 bytes
126 | - Cortex-M4: 2900 bytes
127 | - Cortex-A7: 2924 bytes
128 | 
129 | Clang was also tried but tends to generate larger code (by about 10%). For
130 | details, see `sizes.sh`.
131 | 
132 | **What's included:**
133 | 
134 | - Full input validation and (de)serialisation of input/outputs to/from bytes.
135 | - Cleaning up secret values from the stack before returning from a function.
136 | - The code has no dependency on libc functions or the toolchain's runtime
137 |   library (such as helpers for long multiply); this can be checked for the
138 | Arm-GCC toolchain with the `deps.sh` script.
139 | 
140 | **What's excluded:**
141 | 
142 | - A secure RNG function needs to be provided externally, see
143 |   `p256_generate_random()` in `p256-m.h`.
144 | 
145 | ## RAM usage
146 | 
147 | p256-m doesn't use any dynamic memory (on the heap), only the stack. Here's
148 | how much stack is used by each of its 4 public functions on selected cores:
149 | 
150 | | Function                  | Cortex-M0 | Cortex-M4 | Cortex-A7 |
151 | | ------------------------- | --------: | --------: | --------: |
152 | | `p256_gen_keypair`        |       608 |       564 |       564 |
153 | | `p256_ecdh_shared_secret` |       640 |       596 |       596 |
154 | | `p256_ecdsa_sign`         |       664 |       604 |       604 |
155 | | `p256_ecdsa_verify`       |       752 |       700 |       700 |
156 | 
157 | For details, see `stack.sh`, `wcs.py` and `libc.msu` (the above figures assume
158 | that the externally-provided RNG function uses at most 384 bytes of stack).
159 | 
160 | ## Runtime performance
161 | 
162 | Here are the timings of each public function in milliseconds measured on
163 | platforms based on a selection of cores:
164 | 
165 | - Cortex-M0 at  48 MHz: STM32F091 board running Mbed OS 6
166 | - Cortex-M4 at 100 MHz: STM32F411 board running Mbed OS 6
167 | - Cortex-A7 at 900 MHz: Raspberry Pi 2B running Raspbian Buster
168 | 
169 | | Function                  | Cortex-M0 | Cortex-M4 | Cortex-A7 |
170 | | ------------------------- | --------: | --------: | --------: |
171 | | `p256_gen_keypair`        |       921 |       145 |        11 |
172 | | `p256_ecdh_shared_secret` |       922 |       144 |        11 |
173 | | `p256_ecdsa_sign`         |       990 |       155 |        12 |
174 | | `p256_ecdsa_verify`       |      1976 |       309 |        24 |
175 | | Sum of the above          |      4809 |       753 |        59 |
176 | 
177 | The sum of these operations corresponds to a TLS handshake using ECDHE-ECDSA
178 | with mutual authentication based on raw public keys or directly-trusted
179 | certificates (otherwise, add one 'verify' for each link in the peer's
180 | certificate chain).
181 | 
182 | _Note_: the above figures where obtained by compiling with GCC, which is able
183 | to use inline assembly. Without that inline assembly (22 lines for Cortex-M0,
184 | 1 line for Cortex-M4), the code would be roughly 2 times slower on those
185 | platforms. (The effect is much less important on the Cortex-A7 core.)
186 | 
187 | For details, see `bench.sh`, `benchmark.c` and `on-target-benchmark/`.
188 | 
189 | ## Comparison with other implementations
190 | 
191 | The most relevant/convenient implementation for comparisons is
192 | [TinyCrypt](https://github.com/intel/tinycrypt), as it's also a standalone
193 | implementation of ECDH and ECDSA on P-256 only, that also targets constrained
194 | devices. Other implementations tend to implement many curves and build on a
195 | shared bignum/MPI module (possibly also supporting RSA), which makes fair
196 | comparisons less convenient.
197 | 
198 | The scripts used for TinyCrypt measurements are available in [this
199 | branch](https://github.com/mpg/tinycrypt/tree/measurements), based on version
200 | 0.2.8.
201 | 
202 | **Code size**
203 | 
204 | | Core      | p256-m | TinyCrypt |
205 | | --------- | -----: | --------: |
206 | | Cortex-M0 |   2988 |      6134 |
207 | | Cortex-M4 |   2900 |      5934 |
208 | | Cortex-A7 |   2924 |      5934 |
209 | 
210 | **RAM usage**
211 | 
212 | TinyCrypto also uses no heap, only the stack. Here's the RAM used by each
213 | operation on a Cortex-M0 core:
214 | 
215 | | operation          | p256-m | TinyCrypt |
216 | | ------------------ | -----: | --------: |
217 | | key generation     |    608 |       824 |
218 | | ECDH shared secret |    640 |       728 |
219 | | ECDSA sign         |    664 |       880 |
220 | | ECDSA verify       |    752 |       824 |
221 | 
222 | On a Cortex-M4 or Cortex-A7 core (identical numbers):
223 | 
224 | | operation          | p256-m | TinyCrypt |
225 | | ------------------ | -----: | --------: |
226 | | key generation     |    564 |       796 |
227 | | ECDH shared secret |    596 |       700 |
228 | | ECDSA sign         |    604 |       844 |
229 | | ECDSA verify       |    700 |       808 |
230 | 
231 | **Runtime performance**
232 | 
233 | Here are the timings of each operation in milliseconds measured on
234 | platforms based on a selection of cores:
235 | 
236 | _Cortex-M0_ at  48 MHz: STM32F091 board running Mbed OS 6
237 | 
238 | | Operation          | p256-m | TinyCrypt |
239 | | ------------------ | -----: | --------: |
240 | | Key generation     |    921 |       979 |
241 | | ECDH shared secret |    922 |       975 |
242 | | ECDSA sign         |    990 |      1009 |
243 | | ECDSA verify       |   1976 |      1130 |
244 | | Sum of those 4     |   4809 |      4093 |
245 | 
246 | _Cortex-M4_ at 100 MHz: STM32F411 board running Mbed OS 6
247 | 
248 | | Operation          | p256-m | TinyCrypt |
249 | | ------------------ | -----: | --------: |
250 | | Key generation     |    145 |       178 |
251 | | ECDH shared secret |    144 |       177 |
252 | | ECDSA sign         |    155 |       188 |
253 | | ECDSA verify       |    309 |       210 |
254 | | Sum of those 4     |    753 |       753 |
255 | 
256 | _Cortex-A7_ at 900 MHz: Raspberry Pi 2B running Raspbian Buster
257 | 
258 | | Operation          | p256-m | TinyCrypt |
259 | | ------------------ | -----: | --------: |
260 | | Key generation     |     11 |        13 |
261 | | ECDH shared secret |     11 |        13 |
262 | | ECDSA sign         |     12 |        14 |
263 | | ECDSA verify       |     24 |        15 |
264 | | Sum of those 4     |     59 |        55 |
265 | 
266 | _64-bit Intel_ (i7-6500U at 2.50GHz) laptop running Ubuntu 20.04
267 | 
268 | Note: results in microseconds (previous benchmarks in milliseconds)
269 | 
270 | | Operation          | p256-m | TinyCrypt |
271 | | ------------------ | -----: | --------: |
272 | | Key generation     |   1060 |      1627 |
273 | | ECDH shared secret |   1060 |      1611 |
274 | | ECDSA sign         |   1136 |      1712 |
275 | | ECDSA verify       |   2279 |      1888 |
276 | | Sum of those 4     |   5535 |      6838 |
277 | 
278 | **Other differences**
279 | 
280 | - While p256-m fully validates all inputs, Tinycrypt's ECDH shared secret
281 |   function doesn't include validation of the peer's public key, which should be
282 | done separately by the user for static ECDH (there are attacks [when users
283 | forget](https://link.springer.com/chapter/10.1007/978-3-319-24174-6_21)).
284 | - The two implementations have slightly different security characteristics:
285 |   p256-m is fully constant-time from the ground up so should be more robust
286 | than TinyCrypt against powerful local attackers (such as an untrusted OS
287 | attacking a secure enclave); on the other hand TinyCrypt includes coordinate
288 | randomisation which protects against some passive physical attacks (such as
289 | DPA, see Table 3, column C9 of [this
290 | paper](https://www.esat.kuleuven.be/cosic/publications/article-2293.pdf#page=12)),
291 | which p256-m completely ignores.
292 | - TinyCrypt's code looks like it could easily be expanded to support other
293 |   curves, while p256-m has much more hard-coded to minimize code size (see
294 | "Notes about other curves" below).
295 | - TinyCrypt uses a specialised routine for reduction modulo the curve prime,
296 |   exploiting its structure as a Solinas prime, which should be faster than the
297 | generic Montgomery reduction used by p256-m, but other factors appear to
298 | compensate for that.
299 | - TinyCrypt uses Co-Z Jacobian formulas for point operation, which should be
300 |   faster (though a bit larger) than the mixed affine-Jacobian formulas
301 | used by p256-m, but again other factors appear to compensate for that.
302 | - p256-m uses bits of inline assembly for 64-bit multiplication on the
303 |   platforms used for benchmarking, while TinyCrypt uses only C (and the
304 | compiler's runtime library).
305 | - TinyCrypt uses a specialised routine based on Shamir's trick for
306 |   ECDSA verification, which gives much better performance than the generic
307 | code that p256-m uses in order to minimize code size.
308 | 
309 | ## Design overview
310 | 
311 | The implementation is contained in a single file to keep most functions static
312 | and allow for more optimisations. It is organized in multiple layers:
313 | 
314 | - Fixed-width multi-precision arithmetic
315 | - Fixed-width modular arithmetic
316 | - Operations on curve points
317 | - Operations with scalars
318 | - The public API
319 | 
320 | **Multi-precision arithmetic.**
321 | 
322 | Large integers are represented as arrays of `uint32_t` limbs. When carries may
323 | occur, casts to `uint64_t` are used to nudge the compiler towards using the
324 | CPU's carry flag. When overflow may occur, functions return a carry flag.
325 | 
326 | This layer contains optional assembly for Cortex-M and Cortex-A cores, for the
327 | internal `u32_muladd64()` function, as well as two pure C versions of this
328 | function, depending on whether `MUL64_IS_CONSTANT_TIME`.
329 | 
330 | This layer's API consists of:
331 | 
332 | - addition, subtraction;
333 | - multiply-and-add, shift by one limb (for Montgomery multiplication);
334 | - conditional assignment, assignment of a small value;
335 | - comparison of two values for equality, comparison to 0 for equality;
336 | - (de)serialization as big-endian arrays of bytes.
337 | 
338 | **Modular arithmetic.**
339 | 
340 | All modular operations are done in the Montgomery domain, that is x is
341 | represented by `x * 2^256 mod m`; integers need to be converted to that domain
342 | before computations, and back from it afterwards. Montgomery constants
343 | associated to the curve's p and n are pre-computed and stored in static
344 | structures.
345 | 
346 | Modular inversion is computed using Fermat's little theorem to get
347 | constant-time behaviour with respect to the value being inverted.
348 | 
349 | This layer's API consists of:
350 | 
351 | - the curve's constants p and n (and associated Montgomery constants);
352 | - modular addition, subtraction, multiplication, and inversion;
353 | - assignment of a small value;
354 | - conversion to/from Montgomery domain;
355 | - (de)serialization to/from bytes with integrated range checking and
356 |   Montgomery domain conversion.
357 | 
358 | **Operations on curve points.**
359 | 
360 | Curve points are represented using either affine or Jacobian coordinates;
361 | affine coordinates are extended to represent 0 as (0,0). Individual
362 | coordinates are always in the Montgomery domain.
363 | 
364 | Not all formulas associated with affine or Jacobian coordinates are complete;
365 | great care is taken to document and satisfy each function's pre-conditions.
366 | 
367 | This layer's API consists of:
368 | 
369 | - curve constants: b from the equation, the base point's coordinates;
370 | - point validity check (on the curve and not 0);
371 | - Jacobian to affine coordinate conversion;
372 | - point doubling in Jacobian coordinates (complete formulas);
373 | - point addition in mixed affine-Jacobian coordinates (P not in {0, Q, -Q});
374 | - point addition-or-doubling in affine coordinates (leaky version, only used
375 |   for ECDSA verify where all data is public);
376 | - (de)serialization to/from bytes with integrated validity checking
377 | 
378 | **Scalar operations.**
379 | 
380 | The crucial function here is scalar multiplication. It uses a signed binary
381 | ladder, which is a variant of the good old double-and-add algorithm where an
382 | addition/subtraction is performed at each step. Again, care is taken to make
383 | sure the pre-conditions for the addition formulas are always satisfied. The
384 | signed binary ladder only works if the scalar is odd; this is ensured by
385 | negating both the scalar (mod n) and the input point if necessary.
386 | 
387 | This layer's API consists of:
388 | 
389 | - scalar multiplication
390 | - de-serialization from bytes with integrated range checking
391 | - generation of a scalar and its associated public key
392 | 
393 | **Public API.**
394 | 
395 | This layer builds on the others, but unlike them, all inputs and outputs are
396 | byte arrays. Key generation and ECDH shared secret computation are thin
397 | wrappers around internal functions, just taking care of format conversions and
398 | errors. The ECDSA functions have more non-trivial logic.
399 | 
400 | This layer's API consists of:
401 | 
402 | - key-pair generation
403 | - ECDH shared secret computation
404 | - ECDSA signature creation
405 | - ECDSA signature verification
406 | 
407 | **Testing.**
408 | 
409 | A self-contained, straightforward, pure-Python implementation was first
410 | produced as a warm-up and to help check intermediate values. Test vectors from
411 | various sources are embedded and used to validate the implementation.
412 | 
413 | This implementation, `p256.py`, is used by a second Python script,
414 | `gen-test-data.py`, to generate additional data for both positive and negative
415 | testing, available from a C header file, that is then used by the closed-box
416 | and open-box test programs.
417 | 
418 | p256-m can be compiled with extra instrumentation to mark secret data and
419 | allow either valgrind or MemSan to check that no branch or memory access
420 | depends on it (even indirectly). Macros are defined for this purpose near the
421 | top of the file.
422 | 
423 | **Tested platforms.**
424 | 
425 | There are 4 versions of the internal function `u32_muladd64`: two assembly
426 | versions, for Cortex-M/A cores with or without the DSP extension, and two
427 | pure-C versions, depending on whether `MUL64_IS_CONSTANT_TIME`.
428 | 
429 | Tests are run on the following platforms:
430 | 
431 | - `make` on x64 tests the pure-C version without `MUL64_IS_CONSTANT_TIME`
432 |   (with Clang).
433 | - `./consttime.sh` on x64 tests both pure-C versions (with Clang).
434 | - `make` on Arm v7-A (Raspberry Pi 2) tests the Arm-DSP assembly version (with
435 |   Clang).
436 | - `on-target-*box` on boards based on Cortex-M0 and M4 cores test both
437 |   assembly versions (with GCC).
438 | 
439 | In addition:
440 | 
441 | - `sizes.sh` builds the code for three Arm cores with GCC and Clang.
442 | - `deps.sh` checks for external dependencies with GCC.
443 | 
444 | ## Notes about other curves
445 | 
446 | It should be clear that minimal code size can only be reached by specializing
447 | the implementation to the curve at hand. Here's a list of things in the
448 | implementation that are specific to the NIST P-256 curve, and how the
449 | implementation could be changed to expand to other curves, layer by layer (see
450 | "Design Overview" above).
451 | 
452 | **Fixed-width multi-precision arithmetic:**
453 | 
454 | - The number of limbs is hard-coded to 8. For other 256-bit curves, nothing to
455 |   change. For a curve of another size, hard-code to another value. For multiple
456 | curves of various sizes, add a parameter to each function specifying the
457 | number of limbs; when declaring arrays, always use the maximum number of
458 | limbs.
459 | 
460 | **Fixed-width modular arithmetic:**
461 | 
462 | - The values of the curve's constant p and n, and their associated Montgomery
463 |   constants, are hard-coded. For another curve, just hard-code the new constants.
464 | For multiple other curves, define all the constants, and from this layer's API
465 | only keep the functions that already accept a `mod` parameter (that is, remove
466 | convenience functions `m256_xxx_p()`).
467 | - The number of limbs is again hard-coded to 8. See above, but it order to
468 |   support multiple sizes there is no need to add a new parameter to functions
469 | in this layer: the existing `mod` parameter can include the number of limbs as
470 | well.
471 | 
472 | **Operations on curve points:**
473 | 
474 | - The values of the curve's constants b (constant term from the equation) and
475 |   gx, gy (coordinates of the base point) are hard-coded. For another curve,
476 |   hard-code the other values. For multiple curves, define each curve's value and
477 | add a "curve id" parameter to all functions in this layer.
478 | - The value of the curve's constant a is implicitly hard-coded to `-3` by using
479 |   a standard optimisation to save one multiplication in the first step of
480 | `point_double()`. For curves that don't have a == -3, replace that with the
481 | normal computation.
482 | - The fact that b != 0 in the curve equation is used indirectly, to ensure
483 |   that (0, 0) is not a point on the curve and re-use that value to represent
484 | the point 0. As far as I know, all Short Weierstrass curves standardized so
485 | far have b != 0.
486 | - The shape of the curve is assumed to be Short Weierstrass. For other curve
487 |   shapes (Montgomery, (twisted) Edwards), this layer would probably look very
488 | different (both implementation and API).
489 | 
490 | **Scalar operations:**
491 | 
492 | - If multiple curves are to be supported, all function in this layer need to
493 |   gain a new "curve id" parameter.
494 | - This layer assumes that the bit size of the curve's order n is the same as
495 |   that of the modulus p. This is true of most curves standardized so far, the
496 | only exception being secp224k1. If that curve were to be supported, the
497 | representation of `n` and scalars would need adapting to allow for an extra
498 | limb.
499 | - The bit size of the curve's order is hard-coded in `scalar_mult()`. For
500 |   multiple curves, this should be deduced from the "curve id" parameter.
501 | - The `scalar_mult()` function exploits the fact that the second least
502 |   significant bit of the curve's order n is set in order to avoid a special
503 | case. For curve orders that don't meet this criterion, we can just handle that
504 | special case (multiplication by +-2) separately (always compute that and
505 | conditionally assign it to the result).
506 | - The shape of the curve is again assumed to be Short Weierstrass. For other curve
507 |   shapes (Montgomery, (twisted) Edwards), this layer would probably have a
508 | very different implementation.
509 | 
510 | **Public API:**
511 | 
512 | - For multiple curves, all functions in this layer would need to gain a "curve
513 |   id" parameter and handle variable-sized input/output.
514 | - The shape of the curve is again assumed to be Short Weierstrass. For other curve
515 |   shapes (Montgomery, (twisted) Edwards), the ECDH API would probably look
516 | quite similar (with differences in the size of public keys), but the ECDSA API
517 | wouldn't apply and an EdDSA API would look pretty different.
518 | 
519 | ## Notes about other platforms
520 | 
521 | While p256-m is standard C99, it is written with constrained 32-bit platforms
522 | in mind and makes a few assumptions about the platform:
523 | 
524 | - The types `uint8_t`, `uint16_t`, `uint32_t` and `uint64_t` exist.
525 | - 32-bit unsigned addition and subtraction with carry are constant time.
526 | - 16x16->32-bit unsigned multiplication is available and constant time.
527 | 
528 | Also, on platforms on which 64-bit addition and subtraction with carry, or
529 | even 64x64->128-bit multiplication, are available, p256-m makes no use of
530 | them, though they could significantly improve performance.
531 | 
532 | This could be improved by replacing uses of arrays of `uint32_t` with a
533 | defined type throughout the internal APIs, and then on 64-bit platforms define
534 | that type to be an array of `uint64_t` instead, and making the obvious
535 | adaptations in the multi-precision arithmetic layer.
536 | 
537 | Finally, the optional assembly code (which boosts performance by a factor 2 on
538 | tested Cortex-M CPUs, while slightly reducing code size and stack usage) is
539 | currently only available with compilers that support GCC's extended asm
540 | syntax (which includes GCC and Clang).
541 | 


--------------------------------------------------------------------------------
/apache-2.0.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/bench.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Build and run the on-host benchmark program.
 4 | # (See also: on-target-benchmark directory.)
 5 | #
 6 | # Author: Manuel Pégourié-Gonnard.
 7 | # SPDX-License-Identifier: Apache-2.0
 8 | 
 9 | set -eu
10 | 
11 | # Anything capable of running gcc has CT 64-bit mul in practice
12 | gcc --std=c99 -Werror -Wall -Wextra -pedantic \
13 |     -march=native -DMUL64_IS_CONSTANT_TIME \
14 |     -Os p256-m.c benchmark.c -o benchmark
15 | 
16 | ./benchmark
17 | 
18 | rm benchmark
19 | 


--------------------------------------------------------------------------------
/benchmark.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * On-host benchmark program. See bench.sh.
 3 |  * See also: on-target-bencmark.
 4 |  *
 5 |  * Author: Manuel Pégourié-Gonnard.
 6 |  * SPDX-License-Identifier: Apache-2.0
 7 |  */
 8 | 
 9 | #include <stdio.h>
10 | #include <string.h>
11 | #include <stdlib.h>
12 | #include <sys/time.h>
13 | #include <inttypes.h>
14 | 
15 | #include "p256-m.h"
16 | 
17 | /* test version based on stdlib - never do this in production! */
18 | int p256_generate_random(uint8_t *output, unsigned output_size)
19 | {
20 |     for (unsigned i = 0; i < output_size; i++) {
21 |         output[i] = (uint8_t) rand();
22 |     }
23 | 
24 |     return 0;
25 | }
26 | 
27 | static uint64_t usec(void)
28 | {
29 |     struct timeval tv;
30 |     gettimeofday(&tv, NULL);
31 |     return (uint64_t) tv.tv_sec * 1000000 + (uint64_t) tv.tv_usec;
32 | }
33 | 
34 | #define SUCCESS     P256_SUCCESS
35 | #define TIMES       100
36 | #define TIMEIT(N, CODE)                                             \
37 | do {                                                                \
38 |     if (CODE != SUCCESS)                                            \
39 |         printf("%s failed\n", names[N]);                            \
40 |     const uint64_t start = usec();                                  \
41 |     for (unsigned i = 0; i < TIMES; i++) {                          \
42 |         CODE;                                                       \
43 |     }                                                               \
44 |     results[N][i] = (usec() - start) / TIMES;                       \
45 | } while (0)
46 | 
47 | #define RUNS 5
48 | 
49 | int cmp_u64(const void *a, const void *b) {
50 |     uint64_t x = *((uint64_t *) a);
51 |     uint64_t y = *((uint64_t *) b);
52 |     if (x < y)
53 |         return -1;
54 |     if (x > y)
55 |         return 1;
56 |     return 0;
57 | }
58 | 
59 | int main(void)
60 | {
61 |     uint8_t priv[32], pub[64], secret[32], sig[64], hash[32];
62 |     uint64_t results[4][RUNS], total = 0;
63 |     const char * names[4] = {"Keygen", "ECDH", "Sign", "Verify"};
64 | 
65 |     for (unsigned i = 0; i < RUNS; i++) {
66 |         TIMEIT(0, p256_gen_keypair(priv, pub));
67 |         TIMEIT(1, p256_ecdh_shared_secret(secret, priv, pub));
68 |         TIMEIT(2, p256_ecdsa_sign(sig, priv, hash, sizeof hash));
69 |         TIMEIT(3, p256_ecdsa_verify(sig, pub, hash, sizeof hash));
70 |     }
71 | 
72 |     for (unsigned n = 0; n < 4; n++) {
73 |         qsort(results[n], RUNS, sizeof results[n][0], cmp_u64);
74 |         uint64_t median = results[n][RUNS / 2];
75 |         printf("%s: %"PRIu64" us\n", names[n], median);
76 |         total += median;
77 |     }
78 |     printf("%s: %"PRIu64" us\n", "Total", total);
79 | 
80 |     return 0;
81 | }
82 | 


--------------------------------------------------------------------------------
/consttime.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Check constant-time behaviour using MemSan and Valgrind.
 4 | #
 5 | # Author: Manuel Pégourié-Gonnard.
 6 | # SPDX-License-Identifier: Apache-2.0
 7 | 
 8 | set -eu
 9 | 
10 | make clean
11 | 
12 | make CFLAGS_SAN='-DCT_MEMSAN -fsanitize=memory -g3'
13 | make clean
14 | 
15 | # valgrind is slow, save some time by using the CPU's mul64
16 | # (this also ensures the trivial definition of u32_mul64 is tested as well)
17 | make CFLAGS_SAN='-D CT_VALGRIND -g3 -D MUL64_IS_CONSTANT_TIME' test-closedbox test-openbox
18 | valgrind --track-origins=yes ./test-closedbox
19 | valgrind --track-origins=yes ./test-openbox
20 | make clean
21 | 


--------------------------------------------------------------------------------
/coverage.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Measure code coverage with open/closed-box testing using gcov/lcov.
 4 | #
 5 | # Author: Manuel Pégourié-Gonnard.
 6 | # SPDX-License-Identifier: Apache-2.0
 7 | 
 8 | set -eu
 9 | 
10 | make clean
11 | make CC=gcc CFLAGS='-Werror -Wall -Wextra -O1 -g3 --coverage' test-closedbox test-openbox
12 | 
13 | LCOV_FLAGS="--directory . --rc lcov_branch_coverage=1 --no-external"
14 | 
15 | ./test-closedbox
16 | lcov $LCOV_FLAGS --exclude $PWD/'test-*.c' --capture --output-file closed.info
17 | ./test-openbox
18 | lcov $LCOV_FLAGS --exclude $PWD/'test-*.c' --capture --output-file open.info
19 | 
20 | genhtml --branch-coverage closed.info -o cov-closed
21 | genhtml --branch-coverage open.info -o cov-open
22 | 
23 | # Leaving outputs for inspection. They're removed by 'make clean'.
24 | 


--------------------------------------------------------------------------------
/deps.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Print dependencies on libc / compiler's runtime, with sizes.
 4 | #
 5 | # Author: Manuel Pégourié-Gonnard.
 6 | # SPDX-License-Identifier: Apache-2.0
 7 | 
 8 | set -eu
 9 | 
10 | P256_SYM_RE='(u256|u288|m256|point|scalar|ecdsa|p256)_'
11 | 
12 | for CPU in m0 m4 a7; do
13 |     printf "\n*** %s ***\n" $CPU
14 |     arm-none-eabi-gcc -Os -mthumb -mcpu=cortex-$CPU p256-m.c entry.c \
15 |         --entry=p256_entry -nostartfiles -o linked.elf
16 |     arm-none-eabi-nm --print-size --radix=d linked.elf |
17 |         awk "/^[0-9]{8} [0-9]{8} . / && !/ . $P256_SYM_RE/ \
18 |             "'{print $2, $4; tot += $2} END {print "total: " tot}'
19 | done
20 | 
21 | rm linked.elf
22 | 


--------------------------------------------------------------------------------
/entry.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * See deps.sh - this provides an entry point for the linker.
 3 |  *
 4 |  * Author: Manuel Pégourié-Gonnard.
 5 |  * SPDX-License-Identifier: Apache-2.0
 6 |  */
 7 | 
 8 | #include "p256-m.h"
 9 | 
10 | /* dummy non-random version just to make the linker happy */
11 | int p256_generate_random(uint8_t *output, unsigned output_size)
12 | {
13 |     for (unsigned i = 0; i < output_size; i++) {
14 |         output[i] = 0;
15 |     }
16 | 
17 |     return 0;
18 | }
19 | 
20 | int p256_entry(void)
21 | {
22 |     uint8_t priv[32], pub[64], secret[32], sig[64], hash[32];
23 | 
24 |     p256_gen_keypair(priv, pub);
25 |     p256_ecdh_shared_secret(secret, priv, pub);
26 |     p256_ecdsa_sign(sig, priv, hash, sizeof hash);
27 |     p256_ecdsa_verify(sig, pub, hash, sizeof hash);
28 | 
29 |     return 0;
30 | }
31 | 


--------------------------------------------------------------------------------
/gen-test-data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # coding: utf-8
  3 | #
  4 | # Author: Manuel Pégourié-Gonnard.
  5 | # SPDX-License-Identifier: Apache-2.0
  6 | 
  7 | """Generate test data for P-256 and related functions."""
  8 | 
  9 | from p256 import (ModInt, p256,
 10 |                   ecdsa_modint_from_hash, EcdsaSigner,
 11 |                   tv_ecdsa_rfc6979_key, tv_ecdsa_rfc6979,
 12 |                   tv_ecdh_nist)
 13 | 
 14 | base = 2**32
 15 | n_limbs = 8
 16 | top = base ** n_limbs
 17 | 
 18 | 
 19 | def montmul(x, y, m):
 20 |     """Montgomery multiplication of x, y mod m."""
 21 |     x = ModInt(x, m)
 22 |     y = ModInt(y, m)
 23 |     R = ModInt(top, m)
 24 |     return int(x * y / R)
 25 | 
 26 | 
 27 | Rp = ModInt(top, p256.p)
 28 | Rn = ModInt(top, p256.n)
 29 | 
 30 | 
 31 | def get(x, i):
 32 |     """Return i-th limb of x."""
 33 |     return (x // base**i) % base
 34 | 
 35 | 
 36 | def c_print(name, val):
 37 |     """Print 256-bit value as little-endian array of 32-bit values."""
 38 |     print('static const uint32_t', name + '[8] = {', end='')
 39 |     for i in range(8):
 40 |         sep = '\n    ' if i % 4 == 0 else ' '
 41 |         print(sep + '0x' + format(get(val, i), '08x') + ',', end='')
 42 |     print('\n};')
 43 | 
 44 | 
 45 | def c_bytes(name, val, n):
 46 |     """Print int value as big-endian array of n bytes."""
 47 |     array = name + '[' + str(n) + ']'
 48 |     print('static const uint8_t', array, '= {', end='')
 49 |     for i in range(n):
 50 |         sep = '\n    ' if i % 8 == 0 else ' '
 51 |         limb = (val // 256**(n-1-i)) % 256
 52 |         print(sep + '0x' + format(limb, '02x') + ',', end='')
 53 |     print('\n};')
 54 | 
 55 | 
 56 | def c_point(name, p):
 57 |     """Print curve point as array of bytes."""
 58 |     val = top * int(p.x()) + int(p.y())
 59 |     c_bytes(name, val, 64)
 60 | 
 61 | 
 62 | def c_bytestr(name, val):
 63 |     """Print byte string as an array of bytes."""
 64 |     array = name + '[' + str(len(val)) + ']'
 65 |     print('static const uint8_t', array, '= {', end='')
 66 |     for i, b in enumerate(val):
 67 |         sep = '\n    ' if i % 8 == 0 else ' '
 68 |         print(sep + '0x' + format(b, '02x') + ',', end='')
 69 |     print('\n};')
 70 | 
 71 | 
 72 | def c_pair(name, r, s):
 73 |     """Print a pair of 256-bit values as an array of 64 bytes."""
 74 |     val = top * r + s
 75 |     c_bytes(name, val, 64)
 76 | 
 77 | 
 78 | def print_e(name, h):
 79 |     """Print the e value (Montgomery domain) derive from hash h."""
 80 |     e = ecdsa_modint_from_hash(h, p256.n, 256)
 81 |     e_mont = int(e) * top % p256.n
 82 |     c_print(name, e_mont)
 83 | 
 84 | 
 85 | def print_val(name, x, y, m):
 86 |     """Print result of Montgomery multiplication."""
 87 |     v = montmul(x, y, m)
 88 |     c_print(name, v)
 89 | 
 90 | 
 91 | def com(msg):
 92 |     """Skip a line and print a comment."""
 93 |     print("\n/*", msg, "*/")
 94 | 
 95 | 
 96 | # These constants are not in test data but in the code itself
 97 | # This is how they were generated for reference.
 98 | #
 99 | # c_print("p256_b", int(p256.b * Rp))
100 | # c_print("p256_Gx", int(p256.gx * Rp))
101 | # c_print("p256_Gy", int(p256.gy * Rp))
102 | 
103 | print("""
104 | /*
105 |  * Test data for ECDH, ECDSA, and internal functions.
106 |  * This file was generated by gen-test-data.py
107 |  */
108 | """)
109 | 
110 | 
111 | # generated by random.randrange(2**256)
112 | r = 0x760cd745ec0db49cf76db5ed0a14613ed937cbcb9c4ecc3c7d3d0eb8dcd1d063
113 | s = 0x17380bcf120eb6d7dde65249accbcfffb3b1c6ed5444fc98c5e403b2514595c2
114 | 
115 | com("General-purpose random values")
116 | c_print('r', r)
117 | c_print('s', s)
118 | 
119 | com("r+s, r-s, s-r")
120 | c_print('rps', r + s)
121 | c_print('rms', r - s)
122 | c_print('smr', s - r)
123 | 
124 | com("Useful values for arithmetic tests""")
125 | c_print('zero', 0)
126 | c_print('one', 1)
127 | c_print('word', 2**32 - 1)
128 | c_print('b128', 2**128)
129 | 
130 | com("n + 2**32 - 1 mod p")
131 | c_print('npwmp', (p256.n + 2**32 - 1) % p256.p)
132 | com("n + 2**128 mod p")
133 | c_print('npbmp', (p256.n + 2**128) % p256.p)
134 | com("n + n mod p")
135 | c_print('npnmp', (p256.n * 2) % p256.p)
136 | com("p - 1")
137 | c_print('pm1', p256.p - 1)
138 | com("n - 1")
139 | c_print('nm1', p256.n - 1)
140 | com("p - n")
141 | c_print('pmn', p256.p - p256.n)
142 | 
143 | com("r * 2^256 mod p and mod n")
144 | c_print('rmontp', int(r * Rp))
145 | c_print('rmontn', int(r * Rn))
146 | 
147 | com("r * s / 2^256 mod p")
148 | print_val("rsRip", r, s, p256.p)
149 | com("r * s / 2^256 mod n")
150 | print_val("rsRin", r, s, p256.n)
151 | 
152 | com("r * s mod p")
153 | c_print("rtsmp", r * s % p256.p)
154 | com("r * s mod n")
155 | c_print("rtsmn", r * s % p256.n)
156 | 
157 | com("r^-1 mod p")
158 | c_print("rip", int(ModInt(r, p256.p).inv()))
159 | com("r^-1 mod n")
160 | c_print("rin", int(ModInt(r, p256.n).inv()))
161 | 
162 | com("actual curve parameters (not in Montgomery domain)")
163 | c_print("b_raw", int(p256.b))
164 | c_print("gx_raw", int(p256.gx))
165 | c_print("gy_raw", int(p256.gy))
166 | 
167 | com("some jacobian coordinates for the base point, in Montgomery domain")
168 | z = ModInt(r*s, p256.p)
169 | c_print("jac_gx", int(p256.gx * z**2 * Rp))
170 | c_print("jac_gy", int(p256.gy * z**3 * Rp))
171 | c_print("jac_gz", int(z * Rp))
172 | 
173 | com("affine coordinates (not Montgomery) for 2 * G")
174 | g2 = 2 * p256.base_point()
175 | c_print("g2x", int(g2.x()))
176 | c_print("g2y", int(g2.y()))
177 | 
178 | com("affine coordinates (not Montgomery) for 3 * G")
179 | g3 = 3 * p256.base_point()
180 | c_print("g3x", int(g3.x()))
181 | c_print("g3y", int(g3.y()))
182 | 
183 | com("affine (non-Montgomery) y coordinates for -G, -2G, -3G")
184 | c_print("g1yn", int(-p256.base_point().y()))
185 | c_print("g2yn", int(-g2.y()))
186 | c_print("g3yn", int(-g3.y()))
187 | 
188 | com("affine (non-Montgomery) coordinates for rG, sG, and rsG")
189 | rg = r * p256.base_point()
190 | sg = s * p256.base_point()
191 | rsg = r * s * p256.base_point()
192 | c_print("rgx", int(rg.x()))
193 | c_print("rgy", int(rg.y()))
194 | c_print("sgx", int(sg.x()))
195 | c_print("sgy", int(sg.y()))
196 | c_print("rsgx", int(rsg.x()))
197 | c_print("rsgy", int(rsg.y()))
198 | 
199 | com("r and s as bytes, big-endian")
200 | c_bytes("rbytes", r, 32)
201 | c_bytes("sbytes", s, 32)
202 | 
203 | com("the curve's base point as bytes")
204 | c_point('gbytes', p256.base_point())
205 | 
206 | com("rG, sG and rsG as bytes")
207 | c_point("rgb", rg)
208 | c_point("sgb", sg)
209 | c_bytes("rsgxb", int(rsg.x()), 32)
210 | 
211 | com("ECDSA test vectors from RFC 6979 A.2.5 + integers derived from hashes")
212 | for i, tv in enumerate(tv_ecdsa_rfc6979):
213 |     h = tv['h']
214 |     bits = len(h) * 8
215 |     case = str(bits) + "ab"[i // 5]
216 | 
217 |     c_bytestr("h" + case, h)
218 |     print_e("h" + case + "_e", h)
219 | 
220 |     c_bytes("k" + case, tv['k'], 32)
221 |     c_pair("sig" + case, tv['r'], tv['s'])
222 | 
223 | com("key material from RFC A.2.5")
224 | key = tv_ecdsa_rfc6979_key
225 | c_bytes("ecdsa_priv", key['x'], 32)
226 | c_pair("ecdsa_pub", key['Ux'], key['Uy'])
227 | 
228 | com("bad key matetial")
229 | c_bytes("priv_bad_0", 0, 32)
230 | c_bytes("priv_bad_n", p256.n, 32)
231 | c_bytes("priv_bad_m", top - 1, 32)
232 | 
233 | Ux, Uy = key['Ux'], key['Uy']
234 | c_pair("pub_bad_xp", p256.p, Uy)
235 | c_pair("pub_bad_xm", top - 1, Uy)
236 | c_pair("pub_bad_yp", Ux, p256.p)
237 | c_pair("pub_bad_ym", Ux, top - 1)
238 | 
239 | com("bad ECDSA signature (out-of-range)")
240 | tv = tv_ecdsa_rfc6979[2]
241 | sigr, sigs = tv['r'], tv['s']
242 | c_pair("sig_bad_r0", 0, sigs)
243 | c_pair("sig_bad_rn", p256.n, sigs)
244 | c_pair("sig_bad_rm", top - 1, sigs)
245 | c_pair("sig_bad_s0", sigr, 0)
246 | c_pair("sig_bad_sn", sigr, p256.n)
247 | c_pair("sig_bad_sm", sigr, top - 1)
248 | 
249 | com("ECDSA: crafted hash values to hit sign/verify special cases")
250 | # h256a_s0:
251 | #   when signing: gives s == 0
252 | #   when verifying: with dummy non-zero s, gives R == 0 (u1 G = - u2 Q)
253 | # h256a_double:
254 | #   when verifying: with dummy non-zero s, gives u1 G == u2 Q
255 | sigr = ModInt(sigr, p256.n)
256 | d = ModInt(tv_ecdsa_rfc6979_key['x'], p256.n)
257 | # 0 == s == e + rd / k  <=>  e = -rd
258 | e = - sigr * d
259 | c_bytes("h256a_s0", int(e), 32)
260 | # u1 G == u2 Q <=> e = rd
261 | e = sigr * d
262 | c_bytes("h256a_double", int(e), 32)
263 | 
264 | com("ECDSA: signature on all-0 hash")
265 | key = tv_ecdsa_rfc6979_key
266 | signer = EcdsaSigner(p256, key['x'])
267 | sig = signer.sign(b"\0" * 32)
268 | c_pair("sig_h0", sig[0], sig[1])
269 | c_bytes("h0", 0, 32)
270 | 
271 | com("ECDH test vectors from NIST")
272 | for i, tv in enumerate(tv_ecdh_nist):
273 |     base = "ecdh" + str(i) + "_"
274 |     c_pair(base + "o", tv['ox'], tv['oy'])
275 |     c_bytes(base + "d", tv['d'], 32)
276 |     c_pair(base + "q", tv['Qx'], tv['Qy'])
277 |     c_bytes(base + "z", tv['Z'], 32)
278 | 


--------------------------------------------------------------------------------
/libc.msu:
--------------------------------------------------------------------------------
1 | p256_generate_random 384
2 | 


--------------------------------------------------------------------------------
/macros.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Print the values of pre-defined macros of interest on a selection of cores.
 4 | # This is a development helper to investigate how to detect cores/features.
 5 | #
 6 | # Author: Manuel Pégourié-Gonnard.
 7 | # SPDX-License-Identifier: Apache-2.0
 8 | 
 9 | set -eu
10 | 
11 | CPU_LIST='m0 m0plus m3 m4 m7 m23 m33'
12 | # v7-A cores
13 | #CPU_LIST='a5 a7 a8 a9 a12 a15 a17'
14 | # v8-A cores
15 | #CPU_LIST='a32 a35 a53 a55 a57 a72 a73 a75 a76'
16 | # pre-cortex cores
17 | #CPU_LIST='arm1176jzf-s arm10tdmi arm10e arm9tdmi arm9'
18 | 
19 | for CPU in $CPU_LIST; do
20 |     case $CPU in
21 |         arm*)   FULL_CPU="$CPU";;
22 |         *)      FULL_CPU="cortex-$CPU";;
23 |     esac
24 |     arm-none-eabi-gcc -mcpu=$FULL_CPU -mthumb -dM -E - </dev/null |
25 |         sort > macros-gcc-$CPU.txt
26 |     clang --target=arm-none-eabi -mcpu=$FULL_CPU -dM -E - </dev/null |
27 |         sort > macros-clang-$CPU.txt
28 | done
29 | 
30 | get_macro() {
31 |     RE=$1
32 |     CC=$2
33 |     CPU=$3
34 | 
35 |     sed -n "s/^#define $RE \(.*\)/\1/p" macros-$CC-$CPU.txt
36 | }
37 | 
38 | for MACRO_RE in __GNUC__ __ARM_ARCH __ARM_ARCH_PROFILE __ARM_FEATURE_DSP; do
39 |     printf "\n%s\n      " "$MACRO_RE"
40 |     for CPU in $CPU_LIST; do
41 |         printf "%7s " $CPU
42 |     done
43 |     printf "\n"
44 |     for CC in gcc clang; do
45 |         printf "%5s " $CC
46 |         for CPU in $CPU_LIST; do
47 |             printf "%7s " $(get_macro "$MACRO_RE" $CC $CPU)
48 |         done
49 |         printf "\n"
50 |     done
51 | done
52 | 
53 | # comment out for manual exploration
54 | rm macros-*.txt
55 | 


--------------------------------------------------------------------------------
/on-target-benchmark/.gitignore:
--------------------------------------------------------------------------------
1 | mbed-os
2 | BUILD
3 | .mbed
4 | 


--------------------------------------------------------------------------------
/on-target-benchmark/README.md:
--------------------------------------------------------------------------------
 1 | How to build and run the p256-m benchmark on Mbed-enabled targets
 2 | =================================================================
 3 | 
 4 | First time
 5 | ----------
 6 | 
 7 | - Make sure you have the `arm-none-eabi` GCC-based toolchain installed:
 8 |   https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm/downloads
 9 | - Install mbed-cli:
10 |   https://os.mbed.com/docs/mbed-os/v6.2/quick-start/build-with-mbed-cli.html
11 | - Give your user permission to access the serial port associated to your
12 |   device (on my Ubuntu 20.04 machine, that's `/dev/ttyACM0` and I needed to
13 | add myself to the `dialout` group and log in again).
14 | - Run `mbed deploy` in this directory.
15 | 
16 | Every time / for each target
17 | ----------------------------
18 | 
19 | - Run `make test-data.h` in the parent directory.
20 | - Connect your Mbed-enabled board to your computer.
21 | - Run `mbed compile -m <your_target> -t GCC_ARM --profile release --flash --sterm`.
22 | - If the `--flash` or `--sterm` options didn't work, you can manually:
23 |   - Copy the generated `.bin` file whose location was shown by `mbed compile`
24 |     to the directory where your target is mounter.
25 |   - Open your favourite serial terminal emulator to the connected device.
26 |   - Hit the reset button on your board if necessary.
27 | 
28 | The benchmark program should complete in a few (dozen) seconds.
29 | 


--------------------------------------------------------------------------------
/on-target-benchmark/main.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * On-target benchmark program for p256-m using Mbed OS.
 3 |  */
 4 | #include "mbed.h"
 5 | 
 6 | extern "C" {
 7 | #include "p256-m.h"
 8 | }
 9 | 
10 | /* test version based on stdlib - never do this in production! */
11 | int p256_generate_random(uint8_t *output, unsigned output_size)
12 | {
13 |     for (unsigned i = 0; i < output_size; i++) {
14 |         output[i] = (uint8_t) rand();
15 |     }
16 | 
17 |     return 0;
18 | }
19 | 
20 | Timer t;
21 | int total_ms = 0;
22 | 
23 | #define FMT "%10s: %5d ms\n"
24 | 
25 | #define TIMEIT(NAME, CODE)          \
26 |     t.reset();                      \
27 |     t.start();                      \
28 |     CODE;                           \
29 |     t.stop();                       \
30 |     total_ms += t.read_ms();        \
31 |     printf(FMT, NAME, t.read_ms());
32 | 
33 | int main()
34 | {
35 |     uint8_t priv[32], pub[64], secret[32], sig[64], hash[32];
36 | 
37 |     puts("\np256-m benchmark");
38 |     TIMEIT("Keygen", p256_gen_keypair(priv, pub));
39 |     TIMEIT("ECDH", p256_ecdh_shared_secret(secret, priv, pub));
40 |     TIMEIT("Sign", p256_ecdsa_sign(sig, priv, hash, sizeof hash));
41 |     TIMEIT("Verify", p256_ecdsa_verify(sig, pub, hash, sizeof hash));
42 | 
43 |     /* The total is useful for quick comparisons.
44 |      *
45 |      * It also happens to represent the computation time for a mutually
46 |      * authenticated TLS handshake with directly-trusted certs or raw public
47 |      * keys (with actual cert chains there are extra signature verifications).
48 |      */
49 |     printf(FMT, "Total", total_ms);
50 | }
51 | 


--------------------------------------------------------------------------------
/on-target-benchmark/mbed-os.lib:
--------------------------------------------------------------------------------
1 | https://github.com/ARMmbed/mbed-os/#e4b81f67f939a0c0b11c147ce74aa367271e1279
2 | 


--------------------------------------------------------------------------------
/on-target-benchmark/mbed_app.json:
--------------------------------------------------------------------------------
1 | {
2 |     "target_overrides": {
3 |         "K64F": {
4 |             "platform.stdio-baud-rate": 9600
5 |         }
6 |     }
7 | }


--------------------------------------------------------------------------------
/on-target-benchmark/mbed_settings.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mbed SDK
 3 | Copyright (c) 2016 ARM Limited
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | """
17 | 
18 | from os.path import join, abspath, dirname
19 | 
20 | #ROOT = abspath(join(dirname(__file__), "."))
21 | 
22 | ##############################################################################
23 | # Build System Settings
24 | ##############################################################################
25 | #BUILD_DIR = abspath(join(ROOT, "build"))
26 | 
27 | # ARM
28 | #ARM_PATH = "C:/Program Files/ARM"
29 | 
30 | # GCC ARM
31 | #GCC_ARM_PATH = ""
32 | 
33 | # IAR
34 | #IAR_PATH = "C:/Program Files (x86)/IAR Systems/Embedded Workbench 7.0/arm"
35 | 
36 | # Goanna static analyser. Please overload it in private_settings.py
37 | #GOANNA_PATH = "c:/Program Files (x86)/RedLizards/Goanna Central 3.2.3/bin"
38 | 
39 | #BUILD_OPTIONS = []
40 | 
41 | # mbed.org username
42 | #MBED_ORG_USER = ""
43 | 
44 | # Print compiler warnings and errors as link format
45 | #PRINT_COMPILER_OUTPUT_AS_LINK = False
46 | 


--------------------------------------------------------------------------------
/on-target-benchmark/p256-m.c:
--------------------------------------------------------------------------------
1 | ../p256-m.c


--------------------------------------------------------------------------------
/on-target-benchmark/p256-m.h:
--------------------------------------------------------------------------------
1 | ../p256-m.h


--------------------------------------------------------------------------------
/on-target-closedbox/.gitignore:
--------------------------------------------------------------------------------
1 | mbed-os
2 | BUILD
3 | .mbed
4 | 


--------------------------------------------------------------------------------
/on-target-closedbox/README.md:
--------------------------------------------------------------------------------
 1 | How to build and run p256-m test suites on Mbed-enabled targets
 2 | ===============================================================
 3 | 
 4 | First time
 5 | ----------
 6 | 
 7 | - Make sure you have the `arm-none-eabi` GCC-based toolchain installed:
 8 |   https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm/downloads
 9 | - Install mbed-cli:
10 |   https://os.mbed.com/docs/mbed-os/v6.2/quick-start/build-with-mbed-cli.html
11 | - Give your user permission to access the serial port associated to your
12 |   device (on my Ubuntu 20.04 machine, that's `/dev/ttyACM0` and I needed to
13 | add myself to the `dialout` group and log in again).
14 | - Run `mbed deploy` in this directory.
15 | 
16 | Every time / for each target
17 | ----------------------------
18 | 
19 | - Run `make test-data.h` in the parent directory.
20 | - Connect your Mbed-enabled board to your computer.
21 | - Run `mbed compile -m <your_target> -t GCC_ARM --flash --sterm`.
22 | - If the `--flash` or `--sterm` options didn't work, you can manually:
23 |   - Copy the generated `.bin` file whose location was shown by `mbed compile`
24 |     to the directory where your target is mounter.
25 |   - Open your favourite serial terminal emulator to the connected device.
26 |   - Hit the reset button on your board if necessary.
27 | 
28 | Be patient; the test suite can take several minutes to complete.
29 | 
30 | 


--------------------------------------------------------------------------------
/on-target-closedbox/main.c:
--------------------------------------------------------------------------------
1 | /* Show some progress, as the tests are much slower on target */
2 | #define TEST_VERBOSE
3 | /* Using #include rather than a symlink has two purposes:
4 |  * 1. make further #include directive from the included file suceed;
5 |  * 2. allow to tune compile options here rather than on the command line. */
6 | #include "../test-closedbox.c"
7 | 


--------------------------------------------------------------------------------
/on-target-closedbox/mbed-os.lib:
--------------------------------------------------------------------------------
1 | https://github.com/ARMmbed/mbed-os/#e4b81f67f939a0c0b11c147ce74aa367271e1279
2 | 


--------------------------------------------------------------------------------
/on-target-closedbox/mbed_app.json:
--------------------------------------------------------------------------------
1 | {
2 |     "target_overrides": {
3 |         "K64F": {
4 |             "platform.stdio-baud-rate": 9600
5 |         }
6 |     }
7 | }


--------------------------------------------------------------------------------
/on-target-closedbox/mbed_settings.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mbed SDK
 3 | Copyright (c) 2016 ARM Limited
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | """
17 | 
18 | from os.path import join, abspath, dirname
19 | 
20 | #ROOT = abspath(join(dirname(__file__), "."))
21 | 
22 | ##############################################################################
23 | # Build System Settings
24 | ##############################################################################
25 | #BUILD_DIR = abspath(join(ROOT, "build"))
26 | 
27 | # ARM
28 | #ARM_PATH = "C:/Program Files/ARM"
29 | 
30 | # GCC ARM
31 | #GCC_ARM_PATH = ""
32 | 
33 | # IAR
34 | #IAR_PATH = "C:/Program Files (x86)/IAR Systems/Embedded Workbench 7.0/arm"
35 | 
36 | # Goanna static analyser. Please overload it in private_settings.py
37 | #GOANNA_PATH = "c:/Program Files (x86)/RedLizards/Goanna Central 3.2.3/bin"
38 | 
39 | #BUILD_OPTIONS = []
40 | 
41 | # mbed.org username
42 | #MBED_ORG_USER = ""
43 | 
44 | # Print compiler warnings and errors as link format
45 | #PRINT_COMPILER_OUTPUT_AS_LINK = False
46 | 


--------------------------------------------------------------------------------
/on-target-closedbox/p256-m.c:
--------------------------------------------------------------------------------
1 | ../p256-m.c


--------------------------------------------------------------------------------
/on-target-closedbox/p256-m.h:
--------------------------------------------------------------------------------
1 | ../p256-m.h


--------------------------------------------------------------------------------
/on-target-openbox/.gitignore:
--------------------------------------------------------------------------------
1 | mbed-os
2 | BUILD
3 | .mbed
4 | 


--------------------------------------------------------------------------------
/on-target-openbox/README.md:
--------------------------------------------------------------------------------
 1 | How to build and run p256-m test suites on Mbed-enabled targets
 2 | ===============================================================
 3 | 
 4 | First time
 5 | ----------
 6 | 
 7 | - Make sure you have the `arm-none-eabi` GCC-based toolchain installed:
 8 |   https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm/downloads
 9 | - Install mbed-cli:
10 |   https://os.mbed.com/docs/mbed-os/v6.2/quick-start/build-with-mbed-cli.html
11 | - Give your user permission to access the serial port associated to your
12 |   device (on my Ubuntu 20.04 machine, that's `/dev/ttyACM0` and I needed to
13 | add myself to the `dialout` group and log in again).
14 | - Run `mbed deploy` in this directory.
15 | 
16 | Every time / for each target
17 | ----------------------------
18 | 
19 | - Run `make test-data.h` in the parent directory.
20 | - Connect your Mbed-enabled board to your computer.
21 | - Run `mbed compile -m <your_target> -t GCC_ARM --flash --sterm`.
22 | - If the `--flash` or `--sterm` options didn't work, you can manually:
23 |   - Copy the generated `.bin` file whose location was shown by `mbed compile`
24 |     to the directory where your target is mounter.
25 |   - Open your favourite serial terminal emulator to the connected device.
26 |   - Hit the reset button on your board if necessary.
27 | 
28 | Be patient; the test suite can take several minutes to complete.
29 | 


--------------------------------------------------------------------------------
/on-target-openbox/main.c:
--------------------------------------------------------------------------------
1 | /* Show some progress, as the tests are much slower on target */
2 | #define TEST_VERBOSE
3 | /* Using #include rather than a symlink has two purposes:
4 |  * 1. make further #include directive from the included file suceed;
5 |  * 2. allow to tune compile options here rather than on the command line. */
6 | #include "../test-openbox.c"
7 | 


--------------------------------------------------------------------------------
/on-target-openbox/mbed-os.lib:
--------------------------------------------------------------------------------
1 | https://github.com/ARMmbed/mbed-os/#e4b81f67f939a0c0b11c147ce74aa367271e1279
2 | 


--------------------------------------------------------------------------------
/on-target-openbox/mbed_app.json:
--------------------------------------------------------------------------------
1 | {
2 |     "target_overrides": {
3 |         "K64F": {
4 |             "platform.stdio-baud-rate": 9600
5 |         }
6 |     }
7 | }


--------------------------------------------------------------------------------
/on-target-openbox/mbed_settings.py:
--------------------------------------------------------------------------------
 1 | """
 2 | mbed SDK
 3 | Copyright (c) 2016 ARM Limited
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | """
17 | 
18 | from os.path import join, abspath, dirname
19 | 
20 | #ROOT = abspath(join(dirname(__file__), "."))
21 | 
22 | ##############################################################################
23 | # Build System Settings
24 | ##############################################################################
25 | #BUILD_DIR = abspath(join(ROOT, "build"))
26 | 
27 | # ARM
28 | #ARM_PATH = "C:/Program Files/ARM"
29 | 
30 | # GCC ARM
31 | #GCC_ARM_PATH = ""
32 | 
33 | # IAR
34 | #IAR_PATH = "C:/Program Files (x86)/IAR Systems/Embedded Workbench 7.0/arm"
35 | 
36 | # Goanna static analyser. Please overload it in private_settings.py
37 | #GOANNA_PATH = "c:/Program Files (x86)/RedLizards/Goanna Central 3.2.3/bin"
38 | 
39 | #BUILD_OPTIONS = []
40 | 
41 | # mbed.org username
42 | #MBED_ORG_USER = ""
43 | 
44 | # Print compiler warnings and errors as link format
45 | #PRINT_COMPILER_OUTPUT_AS_LINK = False
46 | 


--------------------------------------------------------------------------------
/p256-m.c:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * Implementation of curve P-256 (ECDH and ECDSA)
   3 |  *
   4 |  * Author: Manuel Pégourié-Gonnard.
   5 |  * SPDX-License-Identifier: Apache-2.0
   6 |  */
   7 | 
   8 | #include "p256-m.h"
   9 | 
  10 | /*
  11 |  * Zeroize memory - this should not be optimized away
  12 |  */
  13 | static void zeroize(void *d, size_t n)
  14 | {
  15 |     volatile char *p = d;
  16 |     while( n-- )
  17 |         *p++ = 0;
  18 | }
  19 | 
  20 | /*
  21 |  * Helpers to test constant-time behaviour with valgrind or MemSan.
  22 |  *
  23 |  * CT_POISON() is used for secret data. It marks the memory area as
  24 |  * uninitialised, so that any branch or pointer dereference that depends on it
  25 |  * (even indirectly) triggers a warning.
  26 |  * CT_UNPOISON() is used for public data; it marks the area as initialised.
  27 |  *
  28 |  * These are macros in order to avoid interfering with origin tracking.
  29 |  */
  30 | #if defined(CT_MEMSAN)
  31 | 
  32 | #include <sanitizer/msan_interface.h>
  33 | #define CT_POISON   __msan_allocated_memory
  34 | // void __msan_allocated_memory(const volatile void* data, size_t size);
  35 | #define CT_UNPOISON __msan_unpoison
  36 | // void __msan_unpoison(const volatile void *a, size_t size);
  37 | 
  38 | #elif defined(CT_VALGRIND)
  39 | 
  40 | #include <valgrind/memcheck.h>
  41 | #define CT_POISON   VALGRIND_MAKE_MEM_UNDEFINED
  42 | // VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr,_qzz_len)
  43 | #define CT_UNPOISON VALGRIND_MAKE_MEM_DEFINED
  44 | // VALGRIND_MAKE_MEM_DEFINED(_qzz_addr,_qzz_len)
  45 | 
  46 | #else
  47 | #define CT_POISON(p, sz)
  48 | #define CT_UNPOISON(p, sz)
  49 | #endif
  50 | 
  51 | /**********************************************************************
  52 |  *
  53 |  * Operations on fixed-width unsigned integers
  54 |  *
  55 |  * Represented using 32-bit limbs, least significant limb first.
  56 |  * That is: x = x[0] + 2^32 x[1] + ... + 2^224 x[7] for 256-bit.
  57 |  *
  58 |  **********************************************************************/
  59 | 
  60 | /*
  61 |  * 256-bit set to 32-bit value
  62 |  *
  63 |  * in: x in [0, 2^32)
  64 |  * out: z = x
  65 |  */
  66 | static void u256_set32(uint32_t z[8], uint32_t x)
  67 | {
  68 |     z[0] = x;
  69 |     for (unsigned i = 1; i < 8; i++) {
  70 |         z[i] = 0;
  71 |     }
  72 | }
  73 | 
  74 | /*
  75 |  * 256-bit addition
  76 |  *
  77 |  * in: x, y in [0, 2^256)
  78 |  * out: z = (x + y) mod 2^256
  79 |  *      c = (x + y) div 2^256
  80 |  * That is, z + c * 2^256 = x + y
  81 |  *
  82 |  * Note: as a memory area, z must be either equal to x or y, or not overlap.
  83 |  */
  84 | static uint32_t u256_add(uint32_t z[8],
  85 |                          const uint32_t x[8], const uint32_t y[8])
  86 | {
  87 |     uint32_t carry = 0;
  88 | 
  89 |     for (unsigned i = 0; i < 8; i++) {
  90 |         uint64_t sum = (uint64_t) carry + x[i] + y[i];
  91 |         z[i] = (uint32_t) sum;
  92 |         carry = (uint32_t) (sum >> 32);
  93 |     }
  94 | 
  95 |     return carry;
  96 | }
  97 | 
  98 | /*
  99 |  * 256-bit subtraction
 100 |  *
 101 |  * in: x, y in [0, 2^256)
 102 |  * out: z = (x - y) mod 2^256
 103 |  *      c = 0 if x >=y, 1 otherwise
 104 |  * That is, z = c * 2^256 + x - y
 105 |  *
 106 |  * Note: as a memory area, z must be either equal to x or y, or not overlap.
 107 |  */
 108 | static uint32_t u256_sub(uint32_t z[8],
 109 |                          const uint32_t x[8], const uint32_t y[8])
 110 | {
 111 |     uint32_t carry = 0;
 112 | 
 113 |     for (unsigned i = 0; i < 8; i++) {
 114 |         uint64_t diff = (uint64_t) x[i] - y[i] - carry;
 115 |         z[i] = (uint32_t) diff;
 116 |         carry = -(uint32_t) (diff >> 32);
 117 |     }
 118 | 
 119 |     return carry;
 120 | }
 121 | 
 122 | /*
 123 |  * 256-bit conditional assignment
 124 |  *
 125 |  * in: x in [0, 2^256)
 126 |  *     c in [0, 1]
 127 |  * out: z = x if c == 1, z unchanged otherwise
 128 |  *
 129 |  * Note: as a memory area, z must be either equal to x, or not overlap.
 130 |  */
 131 | static void u256_cmov(uint32_t z[8], const uint32_t x[8], uint32_t c)
 132 | {
 133 |     const uint32_t x_mask = -c;
 134 |     for (unsigned i = 0; i < 8; i++) {
 135 |         z[i] = (z[i] & ~x_mask) | (x[i] & x_mask);
 136 |     }
 137 | }
 138 | 
 139 | /*
 140 |  * 256-bit compare for equality
 141 |  *
 142 |  * in: x in [0, 2^256)
 143 |  *     y in [0, 2^256)
 144 |  * out: 0 if x == y, unspecified non-zero otherwise
 145 |  */
 146 | static uint32_t u256_diff(const uint32_t x[8], const uint32_t y[8])
 147 | {
 148 |     uint32_t diff = 0;
 149 |     for (unsigned i = 0; i < 8; i++) {
 150 |         diff |= x[i] ^ y[i];
 151 |     }
 152 |     return diff;
 153 | }
 154 | 
 155 | /*
 156 |  * 256-bit compare to zero
 157 |  *
 158 |  * in: x in [0, 2^256)
 159 |  * out: 0 if x == 0, unspecified non-zero otherwise
 160 |  */
 161 | static uint32_t u256_diff0(const uint32_t x[8])
 162 | {
 163 |     uint32_t diff = 0;
 164 |     for (unsigned i = 0; i < 8; i++) {
 165 |         diff |= x[i];
 166 |     }
 167 |     return diff;
 168 | }
 169 | 
 170 | /*
 171 |  * 32 x 32 -> 64-bit multiply-and-accumulate
 172 |  *
 173 |  * in: x, y, z, t in [0, 2^32)
 174 |  * out: x * y + z + t in [0, 2^64)
 175 |  *
 176 |  * Note: this computation cannot overflow.
 177 |  *
 178 |  * Note: this function has two pure-C implementations (depending on whether
 179 |  * MUL64_IS_CONSTANT_TIME), and possibly optimised asm implementations.
 180 |  * Start with the potential asm definitions, and use the C definition only if
 181 |  * we no have no asm for the current toolchain & CPU.
 182 |  */
 183 | static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t);
 184 | 
 185 | /* This macro is used to mark whether an asm implentation is found */
 186 | #undef MULADD64_ASM
 187 | /* This macro is used to mark whether the implementation has a small
 188 |  * code size (ie, it can be inlined even in an unrolled loop) */
 189 | #undef MULADD64_SMALL
 190 | 
 191 | /*
 192 |  * Currently assembly optimisations are only supported with GCC/Clang for
 193 |  * Arm's Cortex-A and Cortex-M lines of CPUs, which start with the v6-M and
 194 |  * v7-M architectures. __ARM_ARCH_PROFILE is not defined for v6 and earlier.
 195 |  */
 196 | #if defined(__GNUC__) &&\
 197 |     defined(__ARM_ARCH) && __ARM_ARCH >= 6 && defined(__ARM_ARCH_PROFILE) && \
 198 |     ( __ARM_ARCH_PROFILE == 77 || __ARM_ARCH_PROFILE == 65 ) /* 'M' or 'A' */
 199 | 
 200 | /*
 201 |  * This set of CPUs is conveniently partitioned as follows:
 202 |  *
 203 |  * 1. Cores that have the DSP extension, which includes a 1-cycle UMAAL
 204 |  *    instruction: M4, M7, M33, all A-class cores.
 205 |  * 2. Cores that don't have the DSP extension, and also lack a constant-time
 206 |  *    64-bit multiplication instruction:
 207 |  *    - M0, M0+, M23: 32-bit multiplication only;
 208 |  *    - M3: 64-bit multiplication is not constant-time.
 209 |  */
 210 | #if defined(__ARM_FEATURE_DSP)
 211 | 
 212 | static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
 213 | {
 214 |     __asm__(
 215 |         /* UMAAL <RdLo>, <RdHi>, <Rn>, <Rm> */
 216 |         "umaal   %[z], %[t], %[x], %[y]"
 217 |         : [z] "+l" (z), [t] "+l" (t)
 218 |         : [x] "l" (x), [y] "l" (y)
 219 |     );
 220 |     return ((uint64_t) t << 32) | z;
 221 | }
 222 | #define MULADD64_ASM
 223 | #define MULADD64_SMALL
 224 | 
 225 | #else /* __ARM_FEATURE_DSP */
 226 | 
 227 | /*
 228 |  * This implementation only uses 16x16->32 bit multiplication.
 229 |  *
 230 |  * It decomposes the multiplicands as:
 231 |  *      x = xh:xl = 2^16 * xh + xl
 232 |  *      y = yh:yl = 2^16 * yh + yl
 233 |  * and computes their product as:
 234 |  *      x*y = xl*yl + 2**16 (xh*yl + yl*yh) + 2**32 xh*yh
 235 |  * then adds z and t to the result.
 236 |  */
 237 | static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
 238 | {
 239 |     /* First compute x*y, using 3 temporary registers */
 240 |     uint32_t tmp1, tmp2, tmp3;
 241 |     __asm__(
 242 |         ".syntax unified\n\t"
 243 |         /* start by splitting the inputs into halves */
 244 |         "lsrs    %[u], %[x], #16\n\t"
 245 |         "lsrs    %[v], %[y], #16\n\t"
 246 |         "uxth    %[x], %[x]\n\t"
 247 |         "uxth    %[y], %[y]\n\t"
 248 |         /* now we have %[x], %[y], %[u], %[v] = xl, yl, xh, yh */
 249 |         /* let's compute the 4 products we can form with those */
 250 |         "movs    %[w], %[v]\n\t"
 251 |         "muls    %[w], %[u]\n\t"
 252 |         "muls    %[v], %[x]\n\t"
 253 |         "muls    %[x], %[y]\n\t"
 254 |         "muls    %[y], %[u]\n\t"
 255 |         /* now we have %[x], %[y], %[v], %[w] = xl*yl, xh*yl, xl*yh, xh*yh */
 256 |         /* let's split and add the first middle product */
 257 |         "lsls    %[u], %[y], #16\n\t"
 258 |         "lsrs    %[y], %[y], #16\n\t"
 259 |         "adds    %[x], %[u]\n\t"
 260 |         "adcs    %[y], %[w]\n\t"
 261 |         /* let's finish with the second middle product */
 262 |         "lsls    %[u], %[v], #16\n\t"
 263 |         "lsrs    %[v], %[v], #16\n\t"
 264 |         "adds    %[x], %[u]\n\t"
 265 |         "adcs    %[y], %[v]\n\t"
 266 |         : [x] "+l" (x), [y] "+l" (y),
 267 |           [u] "=&l" (tmp1), [v] "=&l" (tmp2), [w] "=&l" (tmp3)
 268 |         : /* no read-only inputs */
 269 |         : "cc"
 270 |     );
 271 |     (void) tmp1;
 272 |     (void) tmp2;
 273 |     (void) tmp3;
 274 | 
 275 |     /* Add z and t, using one temporary register */
 276 |     __asm__(
 277 |         ".syntax unified\n\t"
 278 |         "movs    %[u], #0\n\t"
 279 |         "adds    %[x], %[z]\n\t"
 280 |         "adcs    %[y], %[u]\n\t"
 281 |         "adds    %[x], %[t]\n\t"
 282 |         "adcs    %[y], %[u]\n\t"
 283 |         : [x] "+l" (x), [y] "+l" (y), [u] "=&l" (tmp1)
 284 |         : [z] "l" (z), [t] "l" (t)
 285 |         : "cc"
 286 |     );
 287 |     (void) tmp1;
 288 | 
 289 |     return ((uint64_t) y << 32) | x;
 290 | }
 291 | #define MULADD64_ASM
 292 | 
 293 | #endif /* __ARM_FEATURE_DSP */
 294 | 
 295 | #endif /* GCC/Clang with Cortex-M/A CPU */
 296 | 
 297 | #if !defined(MULADD64_ASM)
 298 | #if defined(MUL64_IS_CONSTANT_TIME)
 299 | static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
 300 | {
 301 |     return (uint64_t) x * y + z + t;
 302 | }
 303 | #define MULADD64_SMALL
 304 | #else
 305 | static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
 306 | {
 307 |     /* x = xl + 2**16 xh, y = yl + 2**16 yh */
 308 |     const uint16_t xl = (uint16_t) x;
 309 |     const uint16_t yl = (uint16_t) y;
 310 |     const uint16_t xh = x >> 16;
 311 |     const uint16_t yh = y >> 16;
 312 | 
 313 |     /* x*y = xl*yl + 2**16 (xh*yl + yl*yh) + 2**32 xh*yh
 314 |      *     = lo    + 2**16 (m1    + m2   ) + 2**32 hi    */
 315 |     const uint32_t lo = (uint32_t) xl * yl;
 316 |     const uint32_t m1 = (uint32_t) xh * yl;
 317 |     const uint32_t m2 = (uint32_t) xl * yh;
 318 |     const uint32_t hi = (uint32_t) xh * yh;
 319 | 
 320 |     uint64_t acc = lo + ((uint64_t) (hi + (m1 >> 16) + (m2 >> 16)) << 32);
 321 |     acc += m1 << 16;
 322 |     acc += m2 << 16;
 323 |     acc += z;
 324 |     acc += t;
 325 | 
 326 |     return acc;
 327 | }
 328 | #endif /* MUL64_IS_CONSTANT_TIME */
 329 | #endif /* MULADD64_ASM */
 330 | 
 331 | /*
 332 |  * 288 + 32 x 256 -> 288-bit multiply and add
 333 |  *
 334 |  * in: x in [0, 2^32)
 335 |  *     y in [0, 2^256)
 336 |  *     z in [0, 2^288)
 337 |  * out: z_out = z_in + x * y mod 2^288
 338 |  *      c     = z_in + x * y div 2^288
 339 |  * That is, z_out + c * 2^288 = z_in + x * y
 340 |  *
 341 |  * Note: as a memory area, z must be either equal to y, or not overlap.
 342 |  *
 343 |  * This is a helper for Montgomery multiplication.
 344 |  */
 345 | static uint32_t u288_muladd(uint32_t z[9], uint32_t x, const uint32_t y[8])
 346 | {
 347 |     uint32_t carry = 0;
 348 | 
 349 | #define U288_MULADD_STEP(i) \
 350 |     do { \
 351 |         uint64_t prod = u32_muladd64(x, y[i], z[i], carry); \
 352 |         z[i] = (uint32_t) prod; \
 353 |         carry = (uint32_t) (prod >> 32); \
 354 |     } while( 0 )
 355 | 
 356 | #if defined(MULADD64_SMALL)
 357 |     U288_MULADD_STEP(0);
 358 |     U288_MULADD_STEP(1);
 359 |     U288_MULADD_STEP(2);
 360 |     U288_MULADD_STEP(3);
 361 |     U288_MULADD_STEP(4);
 362 |     U288_MULADD_STEP(5);
 363 |     U288_MULADD_STEP(6);
 364 |     U288_MULADD_STEP(7);
 365 | #else
 366 |     for (unsigned i = 0; i < 8; i++) {
 367 |         U288_MULADD_STEP(i);
 368 |     }
 369 | #endif
 370 | 
 371 |     uint64_t sum = (uint64_t) z[8] + carry;
 372 |     z[8] = (uint32_t) sum;
 373 |     carry = (uint32_t) (sum >> 32);
 374 | 
 375 |     return carry;
 376 | }
 377 | 
 378 | /*
 379 |  * 288-bit in-place right shift by 32 bits
 380 |  *
 381 |  * in: z in [0, 2^288)
 382 |  *     c in [0, 2^32)
 383 |  * out: z_out = z_in div 2^32 + c * 2^256
 384 |  *            = (z_in + c * 2^288) div 2^32
 385 |  *
 386 |  * This is a helper for Montgomery multiplication.
 387 |  */
 388 | static void u288_rshift32(uint32_t z[9], uint32_t c)
 389 | {
 390 |     for (unsigned i = 0; i < 8; i++) {
 391 |         z[i] = z[i + 1];
 392 |     }
 393 |     z[8] = c;
 394 | }
 395 | 
 396 | /*
 397 |  * 256-bit import from big-endian bytes
 398 |  *
 399 |  * in: p = p0, ..., p31
 400 |  * out: z = p0 * 2^248 + p1 * 2^240 + ... + p30 * 2^8 + p31
 401 |  */
 402 | static void u256_from_bytes(uint32_t z[8], const uint8_t p[32])
 403 | {
 404 |     for (unsigned i = 0; i < 8; i++) {
 405 |         unsigned j = 4 * (7 - i);
 406 |         z[i] = ((uint32_t) p[j + 0] << 24) |
 407 |                ((uint32_t) p[j + 1] << 16) |
 408 |                ((uint32_t) p[j + 2] <<  8) |
 409 |                ((uint32_t) p[j + 3] <<  0);
 410 |     }
 411 | }
 412 | 
 413 | /*
 414 |  * 256-bit export to big-endian bytes
 415 |  *
 416 |  * in: z in [0, 2^256)
 417 |  * out: p = p0, ..., p31 such that
 418 |  *      z = p0 * 2^248 + p1 * 2^240 + ... + p30 * 2^8 + p31
 419 |  */
 420 | static void u256_to_bytes(uint8_t p[32], const uint32_t z[8])
 421 | {
 422 |     for (unsigned i = 0; i < 8; i++) {
 423 |         unsigned j = 4 * (7 - i);
 424 |         p[j + 0] = (uint8_t) (z[i] >> 24);
 425 |         p[j + 1] = (uint8_t) (z[i] >> 16);
 426 |         p[j + 2] = (uint8_t) (z[i] >>  8);
 427 |         p[j + 3] = (uint8_t) (z[i] >>  0);
 428 |     }
 429 | }
 430 | 
 431 | /**********************************************************************
 432 |  *
 433 |  * Operations modulo a 256-bit prime m
 434 |  *
 435 |  * These are done in the Montgomery domain, that is x is represented by
 436 |  *  x * 2^256 mod m
 437 |  * Numbers need to be converted to that domain before computations,
 438 |  * and back from it afterwards.
 439 |  *
 440 |  * Inversion is computed using Fermat's little theorem.
 441 |  *
 442 |  * Assumptions on m:
 443 |  * - Montgomery operations require that m is odd.
 444 |  * - Fermat's little theorem require it to be a prime.
 445 |  * - m256_inv() further requires that m % 2^32 >= 2.
 446 |  * - m256_inv() also assumes that the value of m is not a secret.
 447 |  *
 448 |  * In practice operations are done modulo the curve's p and n,
 449 |  * both of which satisfy those assumptions.
 450 |  *
 451 |  **********************************************************************/
 452 | 
 453 | /*
 454 |  * Data associated to a modulus for Montgomery operations.
 455 |  *
 456 |  * m in [0, 2^256) - the modulus itself, must be odd
 457 |  * R2 = 2^512 mod m
 458 |  * ni = -m^-1 mod 2^32
 459 |  */
 460 | typedef struct {
 461 |     uint32_t m[8];
 462 |     uint32_t R2[8];
 463 |     uint32_t ni;
 464 | }
 465 | m256_mod;
 466 | 
 467 | /*
 468 |  * Data for Montgomery operations modulo the curve's p
 469 |  */
 470 | static const m256_mod p256_p = {
 471 |     {   /* the curve's p */
 472 |         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
 473 |         0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF,
 474 |     },
 475 |     {   /* 2^512 mod p */
 476 |         0x00000003, 0x00000000, 0xffffffff, 0xfffffffb,
 477 |         0xfffffffe, 0xffffffff, 0xfffffffd, 0x00000004,
 478 |     },
 479 |     0x00000001, /* -p^-1 mod 2^32 */
 480 | };
 481 | 
 482 | /*
 483 |  * Data for Montgomery operations modulo the curve's n
 484 |  */
 485 | static const m256_mod p256_n = {
 486 |     {   /* the curve's n */
 487 |         0xFC632551, 0xF3B9CAC2, 0xA7179E84, 0xBCE6FAAD,
 488 |         0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF,
 489 |     },
 490 |     {   /* 2^512 mod n */
 491 |         0xbe79eea2, 0x83244c95, 0x49bd6fa6, 0x4699799c,
 492 |         0x2b6bec59, 0x2845b239, 0xf3d95620, 0x66e12d94,
 493 |     },
 494 |     0xee00bc4f, /* -n^-1 mod 2^32 */
 495 | };
 496 | 
 497 | /*
 498 |  * Modular addition
 499 |  *
 500 |  * in: x, y in [0, m)
 501 |  *     mod must point to a valid m256_mod structure
 502 |  * out: z = (x + y) mod m, in [0, m)
 503 |  *
 504 |  * Note: as a memory area, z must be either equal to x or y, or not overlap.
 505 |  */
 506 | static void m256_add(uint32_t z[8],
 507 |                      const uint32_t x[8], const uint32_t y[8],
 508 |                      const m256_mod *mod)
 509 | {
 510 |     uint32_t r[8];
 511 |     uint32_t carry_add = u256_add(z, x, y);
 512 |     uint32_t carry_sub = u256_sub(r, z, mod->m);
 513 |     /* Need to subract m if:
 514 |      *      x+y >= 2^256 > m (that is, carry_add == 1)
 515 |      *   OR z >= m (that is, carry_sub == 0) */
 516 |     uint32_t use_sub = carry_add | (1 - carry_sub);
 517 |     u256_cmov(z, r, use_sub);
 518 | }
 519 | 
 520 | /*
 521 |  * Modular addition mod p
 522 |  *
 523 |  * in: x, y in [0, p)
 524 |  * out: z = (x + y) mod p, in [0, p)
 525 |  *
 526 |  * Note: as a memory area, z must be either equal to x or y, or not overlap.
 527 |  */
 528 | static void m256_add_p(uint32_t z[8],
 529 |                        const uint32_t x[8], const uint32_t y[8])
 530 | {
 531 |     m256_add(z, x, y, &p256_p);
 532 | }
 533 | 
 534 | /*
 535 |  * Modular subtraction
 536 |  *
 537 |  * in: x, y in [0, m)
 538 |  *     mod must point to a valid m256_mod structure
 539 |  * out: z = (x - y) mod m, in [0, m)
 540 |  *
 541 |  * Note: as a memory area, z must be either equal to x or y, or not overlap.
 542 |  */
 543 | static void m256_sub(uint32_t z[8],
 544 |                      const uint32_t x[8], const uint32_t y[8],
 545 |                      const m256_mod *mod)
 546 | {
 547 |     uint32_t r[8];
 548 |     uint32_t carry = u256_sub(z, x, y);
 549 |     (void) u256_add(r, z, mod->m);
 550 |     /* Need to add m if and only if x < y, that is carry == 1.
 551 |      * In that case z is in [2^256 - m + 1, 2^256 - 1], so the
 552 |      * addition will have a carry as well, which cancels out. */
 553 |     u256_cmov(z, r, carry);
 554 | }
 555 | 
 556 | /*
 557 |  * Modular subtraction mod p
 558 |  *
 559 |  * in: x, y in [0, p)
 560 |  * out: z = (x + y) mod p, in [0, p)
 561 |  *
 562 |  * Note: as a memory area, z must be either equal to x or y, or not overlap.
 563 |  */
 564 | static void m256_sub_p(uint32_t z[8],
 565 |                        const uint32_t x[8], const uint32_t y[8])
 566 | {
 567 |     m256_sub(z, x, y, &p256_p);
 568 | }
 569 | 
 570 | /*
 571 |  * Montgomery modular multiplication
 572 |  *
 573 |  * in: x, y in [0, m)
 574 |  *     mod must point to a valid m256_mod structure
 575 |  * out: z = (x * y) / 2^256 mod m, in [0, m)
 576 |  *
 577 |  * Note: as a memory area, z may overlap with x or y.
 578 |  */
 579 | static void m256_mul(uint32_t z[8],
 580 |                      const uint32_t x[8], const uint32_t y[8],
 581 |                      const m256_mod *mod)
 582 | {
 583 |     /*
 584 |      * Algorithm 14.36 in Handbook of Applied Cryptography with:
 585 |      * b = 2^32, n = 8, R = 2^256
 586 |      */
 587 |     uint32_t m_prime = mod->ni;
 588 |     uint32_t a[9];
 589 | 
 590 |     for (unsigned i = 0; i < 9; i++) {
 591 |         a[i] = 0;
 592 |     }
 593 | 
 594 |     for (unsigned i = 0; i < 8; i++) {
 595 |         /* the "mod 2^32" is implicit from the type */
 596 |         uint32_t u = (a[0] + x[i] * y[0]) * m_prime;
 597 | 
 598 |         /* a = (a + x[i] * y + u * m) div b */
 599 |         uint32_t c = u288_muladd(a, x[i], y);
 600 |         c += u288_muladd(a, u, mod->m);
 601 |         u288_rshift32(a, c);
 602 |     }
 603 | 
 604 |     /* a = a > m ? a - m : a */
 605 |     uint32_t carry_add = a[8];  // 0 or 1 since a < 2m, see HAC Note 14.37
 606 |     uint32_t carry_sub = u256_sub(z, a, mod->m);
 607 |     uint32_t use_sub = carry_add | (1 - carry_sub);     // see m256_add()
 608 |     u256_cmov(z, a, 1 - use_sub);
 609 | }
 610 | 
 611 | /*
 612 |  * Montgomery modular multiplication modulo p.
 613 |  *
 614 |  * in: x, y in [0, p)
 615 |  * out: z = (x * y) / 2^256 mod p, in [0, p)
 616 |  *
 617 |  * Note: as a memory area, z may overlap with x or y.
 618 |  */
 619 | static void m256_mul_p(uint32_t z[8],
 620 |                        const uint32_t x[8], const uint32_t y[8])
 621 | {
 622 |     m256_mul(z, x, y, &p256_p);
 623 | }
 624 | 
 625 | /*
 626 |  * In-place conversion to Montgomery form
 627 |  *
 628 |  * in: z in [0, m)
 629 |  *     mod must point to a valid m256_mod structure
 630 |  * out: z_out = z_in * 2^256 mod m, in [0, m)
 631 |  */
 632 | static void m256_prep(uint32_t z[8], const m256_mod *mod)
 633 | {
 634 |     m256_mul(z, z, mod->R2, mod);
 635 | }
 636 | 
 637 | /*
 638 |  * In-place conversion from Montgomery form
 639 |  *
 640 |  * in: z in [0, m)
 641 |  *     mod must point to a valid m256_mod structure
 642 |  * out: z_out = z_in / 2^256 mod m, in [0, m)
 643 |  * That is, z_in was z_actual * 2^256 mod m, and z_out is z_actual
 644 |  */
 645 | static void m256_done(uint32_t z[8], const m256_mod *mod)
 646 | {
 647 |     uint32_t one[8];
 648 |     u256_set32(one, 1);
 649 |     m256_mul(z, z, one, mod);
 650 | }
 651 | 
 652 | /*
 653 |  * Set to 32-bit value
 654 |  *
 655 |  * in: x in [0, 2^32)
 656 |  *     mod must point to a valid m256_mod structure
 657 |  * out: z = x * 2^256 mod m, in [0, m)
 658 |  * That is, z is set to the image of x in the Montgomery domain.
 659 |  */
 660 | static void m256_set32(uint32_t z[8], uint32_t x, const m256_mod *mod)
 661 | {
 662 |     u256_set32(z, x);
 663 |     m256_prep(z, mod);
 664 | }
 665 | 
 666 | /*
 667 |  * Modular inversion in Montgomery form
 668 |  *
 669 |  * in: x in [0, m)
 670 |  *     mod must point to a valid m256_mod structure
 671 |  *     such that mod->m % 2^32 >= 2, assumed to be public.
 672 |  * out: z = x^-1 * 2^512 mod m if x != 0,
 673 |  *      z = 0 if x == 0
 674 |  * That is, if x = x_actual    * 2^256 mod m, then
 675 |  *             z = x_actual^-1 * 2^256 mod m
 676 |  *
 677 |  * Note: as a memory area, z may overlap with x.
 678 |  */
 679 | static void m256_inv(uint32_t z[8], const uint32_t x[8],
 680 |                      const m256_mod *mod)
 681 | {
 682 |     /*
 683 |      * Use Fermat's little theorem to compute x^-1 as x^(m-2).
 684 |      *
 685 |      * Take advantage of the fact that both p's and n's least significant limb
 686 |      * is at least 2 to perform the subtraction on the flight (no carry).
 687 |      *
 688 |      * Use plain right-to-left binary exponentiation;
 689 |      * branches are OK as the exponent is not a secret.
 690 |      */
 691 |     uint32_t bitval[8];
 692 |     u256_cmov(bitval, x, 1);    /* copy x before writing to z */
 693 | 
 694 |     m256_set32(z, 1, mod);
 695 | 
 696 |     unsigned i = 0;
 697 |     uint32_t limb = mod->m[i] - 2;
 698 |     while (1) {
 699 |         for (unsigned j = 0; j < 32; j++) {
 700 |             if ((limb & 1) != 0) {
 701 |                 m256_mul(z, z, bitval, mod);
 702 |             }
 703 |             m256_mul(bitval, bitval, bitval, mod);
 704 |             limb >>= 1;
 705 |         }
 706 | 
 707 |         if (i == 7)
 708 |             break;
 709 | 
 710 |         i++;
 711 |         limb = mod->m[i];
 712 |     }
 713 | }
 714 | 
 715 | /*
 716 |  * Import modular integer from bytes to Montgomery domain
 717 |  *
 718 |  * in: p = p0, ..., p32
 719 |  *     mod must point to a valid m256_mod structure
 720 |  * out: z = (p0 * 2^248 + ... + p31) * 2^256 mod m, in [0, m)
 721 |  *      return 0 if the number was already in [0, m), or -1.
 722 |  *      z may be incorrect and must be discared when -1 is returned.
 723 |  */
 724 | static int m256_from_bytes(uint32_t z[8],
 725 |                            const uint8_t p[32], const m256_mod *mod)
 726 | {
 727 |     u256_from_bytes(z, p);
 728 | 
 729 |     uint32_t t[8];
 730 |     uint32_t lt_m = u256_sub(t, z, mod->m);
 731 |     if (lt_m != 1)
 732 |         return -1;
 733 | 
 734 |     m256_prep(z, mod);
 735 |     return 0;
 736 | }
 737 | 
 738 | /*
 739 |  * Export modular integer from Montgomery domain to bytes
 740 |  *
 741 |  * in: z in [0, 2^256)
 742 |  *     mod must point to a valid m256_mod structure
 743 |  * out: p = p0, ..., p31 such that
 744 |  *      z = (p0 * 2^248 + ... + p31) * 2^256 mod m
 745 |  */
 746 | static void m256_to_bytes(uint8_t p[32],
 747 |                           const uint32_t z[8], const m256_mod *mod)
 748 | {
 749 |     uint32_t zi[8];
 750 |     u256_cmov(zi, z, 1);
 751 |     m256_done(zi, mod);
 752 | 
 753 |     u256_to_bytes(p, zi);
 754 | }
 755 | 
 756 | /**********************************************************************
 757 |  *
 758 |  * Operations on curve points
 759 |  *
 760 |  * Points are represented in two coordinates system:
 761 |  *  - affine (x, y) - extended to represent 0 (see below)
 762 |  *  - jacobian (x:y:z)
 763 |  * In either case, coordinates are integers modulo p256_p and
 764 |  * are always represented in the Montgomery domain.
 765 |  *
 766 |  * For background on jacobian coordinates, see for example [GECC] 3.2.2:
 767 |  * - conversions go (x, y) -> (x:y:1) and (x:y:z) -> (x/z^2, y/z^3)
 768 |  * - the curve equation becomes y^2 = x^3 - 3 x z^4 + b z^6
 769 |  * - 0 (aka the origin aka point at infinity) is (x:y:0) with y^2 = x^3.
 770 |  * - point negation goes -(x:y:z) = (x:-y:z)
 771 |  *
 772 |  * Normally 0 (the point at infinity) can't be represented in affine
 773 |  * coordinates. However we extend affine coordinates with the convention that
 774 |  * (0, 0) (which is normally not a point on the curve) is interpreted as 0.
 775 |  *
 776 |  * References:
 777 |  * - [GECC]: Guide to Elliptic Curve Cryptography; Hankerson, Menezes,
 778 |  *   Vanstone; Springer, 2004.
 779 |  * - [CMO98]: Efficient Elliptic Curve Exponentiation Using Mixed Coordinates;
 780 |  *   Cohen, Miyaji, Ono; Springer, ASIACRYPT 1998.
 781 |  *   https://link.springer.com/content/pdf/10.1007/3-540-49649-1_6.pdf
 782 |  * - [RCB15]: Complete addition formulas for prime order elliptic curves;
 783 |  *   Renes, Costello, Batina; IACR e-print 2015-1060.
 784 |  *   https://eprint.iacr.org/2015/1060.pdf
 785 |  *
 786 |  **********************************************************************/
 787 | 
 788 | /*
 789 |  * The curve's b parameter in the Short Weierstrass equation
 790 |  *  y^2 = x^3 - 3*x + b
 791 |  * Compared to the standard, this is converted to the Montgomery domain.
 792 |  */
 793 | static const uint32_t p256_b[8] = { /* b * 2^256 mod p */
 794 |     0x29c4bddf, 0xd89cdf62, 0x78843090, 0xacf005cd,
 795 |     0xf7212ed6, 0xe5a220ab, 0x04874834, 0xdc30061d,
 796 | };
 797 | 
 798 | /*
 799 |  * The curve's conventional base point G.
 800 |  * Compared to the standard, coordinates converted to the Montgomery domain.
 801 |  */
 802 | static const uint32_t p256_gx[8] = { /* G_x * 2^256 mod p */
 803 |     0x18a9143c, 0x79e730d4, 0x5fedb601, 0x75ba95fc,
 804 |     0x77622510, 0x79fb732b, 0xa53755c6, 0x18905f76,
 805 | };
 806 | static const uint32_t p256_gy[8] = { /* G_y * 2^256 mod p */
 807 |     0xce95560a, 0xddf25357, 0xba19e45c, 0x8b4ab8e4,
 808 |     0xdd21f325, 0xd2e88688, 0x25885d85, 0x8571ff18,
 809 | };
 810 | 
 811 | /*
 812 |  * Point-on-curve check - do the coordinates satisfy the curve's equation?
 813 |  *
 814 |  * in: x, y in [0, p)   (Montgomery domain)
 815 |  * out: 0 if the point lies on the curve and is not 0,
 816 |  *      unspecified non-zero otherwise
 817 |  */
 818 | static uint32_t point_check(const uint32_t x[8], const uint32_t y[8])
 819 | {
 820 |     uint32_t lhs[8], rhs[8];
 821 | 
 822 |     /* lhs = y^2 */
 823 |     m256_mul_p(lhs, y, y);
 824 | 
 825 |     /* rhs = x^3 - 3x + b */
 826 |     m256_mul_p(rhs, x,   x);      /* x^2 */
 827 |     m256_mul_p(rhs, rhs, x);      /* x^3 */
 828 |     for (unsigned i = 0; i < 3; i++)
 829 |         m256_sub_p(rhs, rhs, x);  /* x^3 - 3x */
 830 |     m256_add_p(rhs, rhs, p256_b); /* x^3 - 3x + b */
 831 | 
 832 |     return u256_diff(lhs, rhs);
 833 | }
 834 | 
 835 | /*
 836 |  * In-place jacobian to affine coordinate conversion
 837 |  *
 838 |  * in: (x:y:z) must be on the curve (coordinates in Montegomery domain)
 839 |  * out: x_out = x_in / z_in^2   (Montgomery domain)
 840 |  *      y_out = y_in / z_in^3   (Montgomery domain)
 841 |  *      z_out unspecified, must be disregarded
 842 |  *
 843 |  * Note: if z is 0 (that is, the input point is 0), x_out = y_out = 0.
 844 |  */
 845 | static void point_to_affine(uint32_t x[8], uint32_t y[8], uint32_t z[8])
 846 | {
 847 |     uint32_t t[8];
 848 | 
 849 |     m256_inv(z, z, &p256_p);    /* z = z^-1 */
 850 | 
 851 |     m256_mul_p(t, z, z);        /* t = z^-2 */
 852 |     m256_mul_p(x, x, t);        /* x = x * z^-2 */
 853 | 
 854 |     m256_mul_p(t, t, z);        /* t = z^-3 */
 855 |     m256_mul_p(y, y, t);        /* y = y * z^-3 */
 856 | }
 857 | 
 858 | /*
 859 |  * In-place point doubling in jacobian coordinates (Montgomery domain)
 860 |  *
 861 |  * in: P_in = (x:y:z), must be on the curve
 862 |  * out: (x:y:z) = P_out = 2 * P_in
 863 |  */
 864 | static void point_double(uint32_t x[8], uint32_t y[8], uint32_t z[8])
 865 | {
 866 |     /*
 867 |      * This is formula 6 from [CMO98], cited as complete in [RCB15] (table 1).
 868 |      * Notations as in the paper, except u added and t ommited (it's x3).
 869 |      */
 870 |     uint32_t m[8], s[8], u[8];
 871 | 
 872 |     /* m = 3 * x^2 + a * z^4 = 3 * (x + z^2) * (x - z^2) */
 873 |     m256_mul_p(s, z, z);
 874 |     m256_add_p(m, x, s);
 875 |     m256_sub_p(u, x, s);
 876 |     m256_mul_p(s, m, u);
 877 |     m256_add_p(m, s, s);
 878 |     m256_add_p(m, m, s);
 879 | 
 880 |     /* s = 4 * x * y^2 */
 881 |     m256_mul_p(u, y, y);
 882 |     m256_add_p(u, u, u); /* u = 2 * y^2 (used below) */
 883 |     m256_mul_p(s, x, u);
 884 |     m256_add_p(s, s, s);
 885 | 
 886 |     /* u = 8 * y^4 (not named in the paper, first term of y3) */
 887 |     m256_mul_p(u, u, u);
 888 |     m256_add_p(u, u, u);
 889 | 
 890 |     /* x3 = t = m^2 - 2 * s */
 891 |     m256_mul_p(x, m, m);
 892 |     m256_sub_p(x, x, s);
 893 |     m256_sub_p(x, x, s);
 894 | 
 895 |     /* z3 = 2 * y * z */
 896 |     m256_mul_p(z, y, z);
 897 |     m256_add_p(z, z, z);
 898 | 
 899 |     /* y3 = -u + m * (s - t) */
 900 |     m256_sub_p(y, s, x);
 901 |     m256_mul_p(y, y, m);
 902 |     m256_sub_p(y, y, u);
 903 | }
 904 | 
 905 | /*
 906 |  * In-place point addition in jacobian-affine coordinates (Montgomery domain)
 907 |  *
 908 |  * in: P_in = (x1:y1:z1), must be on the curve and not 0
 909 |  *     Q = (x2, y2), must be on the curve and not P_in or -P_in or 0
 910 |  * out: P_out = (x1:y1:z1) = P_in + Q
 911 |  */
 912 | static void point_add(uint32_t x1[8], uint32_t y1[8], uint32_t z1[8],
 913 |                       const uint32_t x2[8], const uint32_t y2[8])
 914 | {
 915 |     /*
 916 |      * This is formula 5 from [CMO98], with z2 == 1 substituted. We use
 917 |      * intermediates with neutral names, and names from the paper in comments.
 918 |      */
 919 |     uint32_t t1[8], t2[8], t3[8];
 920 | 
 921 |     /* u1 = x1 and s1 = y1 (no computations) */
 922 | 
 923 |     /* t1 = u2 = x2 z1^2 */
 924 |     m256_mul_p(t1, z1, z1);
 925 |     m256_mul_p(t2, t1, z1);
 926 |     m256_mul_p(t1, t1, x2);
 927 | 
 928 |     /* t2 = s2 = y2 z1^3 */
 929 |     m256_mul_p(t2, t2, y2);
 930 | 
 931 |     /* t1 = h = u2 - u1 */
 932 |     m256_sub_p(t1, t1, x1); /* t1 = x2 * z1^2 - x1 */
 933 | 
 934 |     /* t2 = r = s2 - s1 */
 935 |     m256_sub_p(t2, t2, y1);
 936 | 
 937 |     /* z3 = z1 * h */
 938 |     m256_mul_p(z1, z1, t1);
 939 | 
 940 |     /* t1 = h^3 */
 941 |     m256_mul_p(t3, t1, t1);
 942 |     m256_mul_p(t1, t3, t1);
 943 | 
 944 |     /* t3 = x1 * h^2 */
 945 |     m256_mul_p(t3, t3, x1);
 946 | 
 947 |     /* x3 = r^2 - 2 * x1 * h^2 - h^3 */
 948 |     m256_mul_p(x1, t2, t2);
 949 |     m256_sub_p(x1, x1, t3);
 950 |     m256_sub_p(x1, x1, t3);
 951 |     m256_sub_p(x1, x1, t1);
 952 | 
 953 |     /* y3 = r * (x1 * h^2 - x3) - y1 h^3 */
 954 |     m256_sub_p(t3, t3, x1);
 955 |     m256_mul_p(t3, t3, t2);
 956 |     m256_mul_p(t1, t1, y1);
 957 |     m256_sub_p(y1, t3, t1);
 958 | }
 959 | 
 960 | /*
 961 |  * Point addition or doubling (affine to jacobian, Montgomery domain)
 962 |  *
 963 |  * in: P = (x1, y1) - must be on the curve and not 0
 964 |  *     Q = (x2, y2) - must be on the curve and not 0
 965 |  * out: (x3, y3) = R = P + Q
 966 |  *
 967 |  * Note: unlike point_add(), this function works if P = +- Q;
 968 |  * however it leaks information on its input through timing,
 969 |  * branches taken and memory access patterns (if observable).
 970 |  */
 971 | static void point_add_or_double_leaky(
 972 |                         uint32_t x3[8], uint32_t y3[8],
 973 |                         const uint32_t x1[8], const uint32_t y1[8],
 974 |                         const uint32_t x2[8], const uint32_t y2[8])
 975 | {
 976 | 
 977 |     uint32_t z3[8];
 978 |     u256_cmov(x3, x1, 1);
 979 |     u256_cmov(y3, y1, 1);
 980 |     m256_set32(z3, 1, &p256_p);
 981 | 
 982 |     if (u256_diff(x1, x2) != 0) {
 983 |         // P != +- Q -> generic addition
 984 |         point_add(x3, y3, z3, x2, y2);
 985 |         point_to_affine(x3, y3, z3);
 986 |     }
 987 |     else if (u256_diff(y1, y2) == 0) {
 988 |         // P == Q -> double
 989 |         point_double(x3, y3, z3);
 990 |         point_to_affine(x3, y3, z3);
 991 |     } else {
 992 |         // P == -Q -> zero
 993 |         m256_set32(x3, 0, &p256_p);
 994 |         m256_set32(y3, 0, &p256_p);
 995 |     }
 996 | }
 997 | 
 998 | /*
 999 |  * Import curve point from bytes
1000 |  *
1001 |  * in: p = (x, y) concatenated, fixed-width 256-bit big-endian integers
1002 |  * out: x, y in Mongomery domain
1003 |  *      return 0 if x and y are both in [0, p)
1004 |  *                  and (x, y) is on the curve and not 0
1005 |  *             unspecified non-zero otherwise.
1006 |  *      x and y are unspecified and must be discarded if returning non-zero.
1007 |  */
1008 | static int point_from_bytes(uint32_t x[8], uint32_t y[8], const uint8_t p[64])
1009 | {
1010 |     int ret;
1011 | 
1012 |     ret = m256_from_bytes(x, p, &p256_p);
1013 |     if (ret != 0)
1014 |         return ret;
1015 | 
1016 |     ret = m256_from_bytes(y, p + 32, &p256_p);
1017 |     if (ret != 0)
1018 |         return ret;
1019 | 
1020 |     return (int) point_check(x, y);
1021 | }
1022 | 
1023 | /*
1024 |  * Export curve point to bytes
1025 |  *
1026 |  * in: x, y affine coordinates of a point (Montgomery domain)
1027 |  *     must be on the curve and not 0
1028 |  * out: p = (x, y) concatenated, fixed-width 256-bit big-endian integers
1029 |  */
1030 | static void point_to_bytes(uint8_t p[64],
1031 |                            const uint32_t x[8], const uint32_t y[8])
1032 | {
1033 |     m256_to_bytes(p,        x, &p256_p);
1034 |     m256_to_bytes(p + 32,   y, &p256_p);
1035 | }
1036 | 
1037 | /**********************************************************************
1038 |  *
1039 |  * Scalar multiplication and other scalar-related operations
1040 |  *
1041 |  **********************************************************************/
1042 | 
1043 | /*
1044 |  * Scalar multiplication
1045 |  *
1046 |  * in: P = (px, py), affine (Montgomery), must be on the curve and not 0
1047 |  *     s in [1, n-1]
1048 |  * out: R = s * P = (rx, ry), affine coordinates (Montgomery).
1049 |  *
1050 |  * Note: as memory areas, none of the parameters may overlap.
1051 |  */
1052 | static void scalar_mult(uint32_t rx[8], uint32_t ry[8],
1053 |                         const uint32_t px[8], const uint32_t py[8],
1054 |                         const uint32_t s[8])
1055 | {
1056 |     /*
1057 |      * We use a signed binary ladder, see for example slides 10-14 of
1058 |      * http://ecc2015.math.u-bordeaux1.fr/documents/hamburg.pdf but with
1059 |      * implicit recoding, and a different loop initialisation to avoid feeding
1060 |      * 0 to our addition formulas, as they don't support it.
1061 |      */
1062 |     uint32_t s_odd[8], py_neg[8], py_use[8], rz[8];
1063 | 
1064 |     /*
1065 |      * Make s odd by replacing it with n - s if necessary.
1066 |      *
1067 |      * If s was odd, we'll have s_odd = s, and define P' = P.
1068 |      * Otherwise, we'll have s_odd = n - s and define P' = -P.
1069 |      *
1070 |      * Either way, we can compute s * P as s_odd * P'.
1071 |      */
1072 |     u256_sub(s_odd, p256_n.m, s); /* no carry, result still in [1, n-1] */
1073 |     uint32_t negate = ~s[0] & 1;
1074 |     u256_cmov(s_odd, s, 1 - negate);
1075 | 
1076 |     /* Compute py_neg = - py mod p (that's the y coordinate of -P) */
1077 |     u256_set32(py_use, 0);
1078 |     m256_sub_p(py_neg, py_use, py);
1079 | 
1080 |     /* Initialize R = P' = (x:(-1)^negate * y:1) */
1081 |     u256_cmov(rx, px, 1);
1082 |     u256_cmov(ry, py, 1);
1083 |     m256_set32(rz, 1, &p256_p);
1084 |     u256_cmov(ry, py_neg, negate);
1085 | 
1086 |     /*
1087 |      * For any odd number s_odd = b255 ... b1 1, we have
1088 |      *      s_odd = 2^255 + 2^254 sbit(b255) + ... + 2 sbit(b2) + sbit(b1)
1089 |      * writing
1090 |      *      sbit(b) = 2 * b - 1 = b ? 1 : -1
1091 |      *
1092 |      * Use that to compute s_odd * P' by repeating R = 2 * R +- P':
1093 |      *      s_odd * P' = 2 * ( ... (2 * P' + sbit(b255) P') ... ) + sbit(b1) P'
1094 |      *
1095 |      * The loop invariant is that when beginning an iteration we have
1096 |      *      R = s_i P'
1097 |      * with
1098 |      *      s_i = 2^(255-i) + 2^(254-i) sbit(b_255) + ...
1099 |      * where the sum has 256 - i terms.
1100 |      *
1101 |      * When updating R we need to make sure the input to point_add() is
1102 |      * neither 0 not +-P'. Since that input is 2 s_i P', it is sufficient to
1103 |      * see that 1 < 2 s_i < n-1. The lower bound is obvious since s_i is a
1104 |      * positive integer, and for the upper bound we distinguish three cases.
1105 |      *
1106 |      * If i > 1, then s_i < 2^254, so 2 s_i < 2^255 < n-1.
1107 |      * Otherwise, i == 1 and we have 2 s_i = s_odd - sbit(b1).
1108 |      *      If s_odd <= n-4, then 2 s_1 <= n-3.
1109 |      *      Otherwise, s_odd = n-2, and for this curve's value of n,
1110 |      *      we have b1 == 1, so sbit(b1) = 1 and 2 s_1 <= n-3.
1111 |      */
1112 |     for (unsigned i = 255; i > 0; i--) {
1113 |         uint32_t bit = (s_odd[i / 32] >> i % 32) & 1;
1114 | 
1115 |         /* set (px, py_use) = sbit(bit) P' = sbit(bit) * (-1)^negate P */
1116 |         u256_cmov(py_use, py, bit ^ negate);
1117 |         u256_cmov(py_use, py_neg, (1 - bit) ^ negate);
1118 | 
1119 |         /* Update R = 2 * R +- P' */
1120 |         point_double(rx, ry, rz);
1121 |         point_add(rx, ry, rz, px, py_use);
1122 |     }
1123 | 
1124 |     point_to_affine(rx, ry, rz);
1125 | }
1126 | 
1127 | /*
1128 |  * Scalar import from big-endian bytes
1129 |  *
1130 |  * in: p = p0, ..., p31
1131 |  * out: s = p0 * 2^248 + p1 * 2^240 + ... + p30 * 2^8 + p31
1132 |  *      return 0 if s in [1, n-1],
1133 |  *            -1 otherwise.
1134 |  */
1135 | static int scalar_from_bytes(uint32_t s[8], const uint8_t p[32])
1136 | {
1137 |     u256_from_bytes(s, p);
1138 | 
1139 |     uint32_t r[8];
1140 |     uint32_t lt_n = u256_sub(r, s, p256_n.m);
1141 | 
1142 |     u256_set32(r, 1);
1143 |     uint32_t lt_1 = u256_sub(r, s, r);
1144 | 
1145 |     if (lt_n && !lt_1)
1146 |         return 0;
1147 | 
1148 |     return -1;
1149 | }
1150 | 
1151 | /*
1152 |  * Scalar generation, with public key
1153 |  *
1154 |  * out: sbytes the big-endian bytes representation of the scalar
1155 |  *      s its u256 representation
1156 |  *      x, y the affine coordinates of s * G (Montgomery domain)
1157 |  *      return 0 if OK, -1 on failure
1158 |  *      sbytes, s, x, y must be discarded when returning non-zero.
1159 |  */
1160 | static int scalar_gen_with_pub(uint8_t sbytes[32], uint32_t s[8],
1161 |                                uint32_t x[8], uint32_t y[8])
1162 | {
1163 |     /* generate a random valid scalar */
1164 |     int ret;
1165 |     unsigned nb_tried = 0;
1166 |     do {
1167 |         if (nb_tried++ >= 4)
1168 |             return -1;
1169 | 
1170 |         ret = p256_generate_random(sbytes, 32);
1171 |         CT_POISON(sbytes, 32);
1172 |         if (ret != 0)
1173 |             return -1;
1174 | 
1175 |         ret = scalar_from_bytes(s, sbytes);
1176 |         CT_UNPOISON(&ret, sizeof ret);
1177 |     }
1178 |     while (ret != 0);
1179 | 
1180 |     /* compute and ouput the associated public key */
1181 |     scalar_mult(x, y, p256_gx, p256_gy, s);
1182 | 
1183 |     /* the associated public key is not a secret */
1184 |     CT_UNPOISON(x, 32);
1185 |     CT_UNPOISON(y, 32);
1186 | 
1187 |     return 0;
1188 | }
1189 | 
1190 | /*
1191 |  * ECDH/ECDSA generate pair
1192 |  */
1193 | int p256_gen_keypair(uint8_t priv[32], uint8_t pub[64])
1194 | {
1195 |     uint32_t s[8], x[8], y[8];
1196 |     int ret = scalar_gen_with_pub(priv, s, x, y);
1197 |     zeroize(s, sizeof s);
1198 |     if (ret != 0)
1199 |         return P256_RANDOM_FAILED;
1200 | 
1201 |     point_to_bytes(pub, x, y);
1202 |     return 0;
1203 | }
1204 | 
1205 | /**********************************************************************
1206 |  *
1207 |  * ECDH
1208 |  *
1209 |  **********************************************************************/
1210 | 
1211 | /*
1212 |  * ECDH compute shared secret
1213 |  */
1214 | int p256_ecdh_shared_secret(uint8_t secret[32],
1215 |                             const uint8_t priv[32], const uint8_t peer[64])
1216 | {
1217 |     CT_POISON(priv, 32);
1218 | 
1219 |     uint32_t s[8], px[8], py[8], x[8], y[8];
1220 |     int ret;
1221 | 
1222 |     ret = scalar_from_bytes(s, priv);
1223 |     CT_UNPOISON(&ret, sizeof ret);
1224 |     if (ret != 0) {
1225 |         ret = P256_INVALID_PRIVKEY;
1226 |         goto cleanup;
1227 |     }
1228 | 
1229 |     ret = point_from_bytes(px, py, peer);
1230 |     if (ret != 0) {
1231 |         ret = P256_INVALID_PUBKEY;
1232 |         goto cleanup;
1233 |     }
1234 | 
1235 |     scalar_mult(x, y, px, py, s);
1236 | 
1237 |     m256_to_bytes(secret, x, &p256_p);
1238 |     CT_UNPOISON(secret, 32);
1239 | 
1240 | cleanup:
1241 |     zeroize(s, sizeof s);
1242 |     return ret;
1243 | }
1244 | 
1245 | /**********************************************************************
1246 |  *
1247 |  * ECDSA
1248 |  *
1249 |  * Reference:
1250 |  * [SEC1] SEC 1: Elliptic Curve Cryptography, Certicom research, 2009.
1251 |  *        http://www.secg.org/sec1-v2.pdf
1252 |  **********************************************************************/
1253 | 
1254 | /*
1255 |  * Reduction mod n of a small number
1256 |  *
1257 |  * in: x in [0, 2^256)
1258 |  * out: x_out = x_in mod n in [0, n)
1259 |  */
1260 | static void ecdsa_m256_mod_n(uint32_t x[8])
1261 | {
1262 |     uint32_t t[8];
1263 |     uint32_t c = u256_sub(t, x, p256_n.m);
1264 |     u256_cmov(x, t, 1 - c);
1265 | }
1266 | 
1267 | /*
1268 |  * Import integer mod n (Montgomery domain) from hash
1269 |  *
1270 |  * in: h = h0, ..., h_hlen
1271 |  *     hlen the length of h in bytes
1272 |  * out: z = (h0 * 2^l-8 + ... + h_l) * 2^256 mod n
1273 |  *      with l = min(32, hlen)
1274 |  *
1275 |  * Note: in [SEC1] this is step 5 of 4.1.3 (sign) or step 3 or 4.1.4 (verify),
1276 |  * with obvious simplications since n's bit-length is a multiple of 8.
1277 |  */
1278 | static void ecdsa_m256_from_hash(uint32_t z[8],
1279 |                                  const uint8_t *h, size_t hlen)
1280 | {
1281 |     /* convert from h (big-endian) */
1282 |     /* hlen is public data so it's OK to branch on it */
1283 |     if (hlen < 32) {
1284 |         uint8_t p[32];
1285 |         for (unsigned i = 0; i < 32; i++)
1286 |             p[i] = 0;
1287 |         for (unsigned i = 0; i < hlen; i++)
1288 |             p[32 - hlen + i] = h[i];
1289 |         u256_from_bytes(z, p);
1290 |     } else {
1291 |         u256_from_bytes(z, h);
1292 |     }
1293 | 
1294 |     /* ensure the result is in [0, n) */
1295 |     ecdsa_m256_mod_n(z);
1296 | 
1297 |     /* map to Montgomery domain */
1298 |     m256_prep(z, &p256_n);
1299 | }
1300 | 
1301 | /*
1302 |  * ECDSA sign
1303 |  */
1304 | int p256_ecdsa_sign(uint8_t sig[64], const uint8_t priv[32],
1305 |                     const uint8_t *hash, size_t hlen)
1306 | {
1307 |     CT_POISON(priv, 32);
1308 | 
1309 |     /*
1310 |      * Steps and notations from [SEC1] 4.1.3
1311 |      *
1312 |      * Instead of retrying on r == 0 or s == 0, just abort,
1313 |      * as those events have negligible probability.
1314 |      */
1315 |     int ret;
1316 | 
1317 |     /* Temporary buffers - the first two are mostly stable, so have names */
1318 |     uint32_t xr[8], k[8], t3[8], t4[8];
1319 | 
1320 |     /* 1. Set ephemeral keypair */
1321 |     uint8_t *kb = (uint8_t *) t4;
1322 |     /* kb will be erased by re-using t4 for dU - if we exit before that, we
1323 |      * haven't read the private key yet so we kb isn't sensitive yet */
1324 |     ret = scalar_gen_with_pub(kb, k, xr, t3);   /* xr = x_coord(k * G) */
1325 |     if (ret != 0)
1326 |         return P256_RANDOM_FAILED;
1327 |     m256_prep(k, &p256_n);
1328 | 
1329 |     /* 2. Convert xr to an integer */
1330 |     m256_done(xr, &p256_p);
1331 | 
1332 |     /* 3. Reduce xr mod n (extra: output it while at it) */
1333 |     ecdsa_m256_mod_n(xr);    /* xr = int(xr) mod n */
1334 | 
1335 |     /* xr is public data so it's OK to use a branch */
1336 |     if (u256_diff0(xr) == 0)
1337 |         return P256_RANDOM_FAILED;
1338 | 
1339 |     u256_to_bytes(sig, xr);
1340 | 
1341 |     m256_prep(xr, &p256_n);
1342 | 
1343 |     /* 4. Skipped - we take the hash as an input, not the message */
1344 | 
1345 |     /* 5. Derive an integer from the hash */
1346 |     ecdsa_m256_from_hash(t3, hash, hlen);   /* t3 = e */
1347 | 
1348 |     /* 6. Compute s = k^-1 * (e + r * dU) */
1349 | 
1350 |     /* Note: dU will be erased by re-using t4 for the value of s (public) */
1351 |     ret = scalar_from_bytes(t4, priv);   /* t4 = dU (integer domain) */
1352 |     CT_UNPOISON(&ret, sizeof ret); /* Result of input validation */
1353 |     if (ret != 0)
1354 |         return P256_INVALID_PRIVKEY;
1355 |     m256_prep(t4, &p256_n);         /* t4 = dU (Montgomery domain) */
1356 | 
1357 |     m256_inv(k, k, &p256_n);        /* k^-1 */
1358 |     m256_mul(t4, xr, t4, &p256_n);  /* t4 = r * dU */
1359 |     m256_add(t4, t3, t4, &p256_n);  /* t4 = e + r * dU */
1360 |     m256_mul(t4, k, t4, &p256_n);   /* t4 = s = k^-1 * (e + r * dU) */
1361 |     zeroize(k, sizeof k);
1362 | 
1363 |     /* 7. Output s (r already outputed at step 3) */
1364 |     CT_UNPOISON(t4, 32);
1365 |     if (u256_diff0(t4) == 0) {
1366 |         /* undo early output of r */
1367 |         u256_to_bytes(sig, t4);
1368 |         return P256_RANDOM_FAILED;
1369 |     }
1370 |     m256_to_bytes(sig + 32, t4, &p256_n);
1371 | 
1372 |     return P256_SUCCESS;
1373 | }
1374 | 
1375 | /*
1376 |  * ECDSA verify
1377 |  */
1378 | int p256_ecdsa_verify(const uint8_t sig[64], const uint8_t pub[64],
1379 |                       const uint8_t *hash, size_t hlen)
1380 | {
1381 |     /*
1382 |      * Steps and notations from [SEC1] 4.1.3
1383 |      *
1384 |      * Note: we're using public data only, so branches are OK
1385 |      */
1386 |     int ret;
1387 | 
1388 |     /* 1. Validate range of r and s : [1, n-1] */
1389 |     uint32_t r[8], s[8];
1390 |     ret = scalar_from_bytes(r, sig);
1391 |     if (ret != 0)
1392 |         return P256_INVALID_SIGNATURE;
1393 |     ret = scalar_from_bytes(s, sig + 32);
1394 |     if (ret != 0)
1395 |         return P256_INVALID_SIGNATURE;
1396 | 
1397 |     /* 2. Skipped - we take the hash as an input, not the message */
1398 | 
1399 |     /* 3. Derive an integer from the hash */
1400 |     uint32_t e[8];
1401 |     ecdsa_m256_from_hash(e, hash, hlen);
1402 | 
1403 |     /* 4. Compute u1 = e * s^-1 and u2 = r * s^-1 */
1404 |     uint32_t u1[8], u2[8];
1405 |     m256_prep(s, &p256_n);           /* s in Montgomery domain */
1406 |     m256_inv(s, s, &p256_n);         /* s = s^-1 mod n */
1407 |     m256_mul(u1, e, s, &p256_n);     /* u1 = e * s^-1 mod n */
1408 |     m256_done(u1, &p256_n);          /* u1 out of Montgomery domain */
1409 | 
1410 |     u256_cmov(u2, r, 1);
1411 |     m256_prep(u2, &p256_n);          /* r in Montgomery domain */
1412 |     m256_mul(u2, u2, s, &p256_n);    /* u2 = r * s^-1 mod n */
1413 |     m256_done(u2, &p256_n);          /* u2 out of Montgomery domain */
1414 | 
1415 |     /* 5. Compute R (and re-use (u1, u2) to store its coordinates */
1416 |     uint32_t px[8], py[8];
1417 |     ret = point_from_bytes(px, py, pub);
1418 |     if (ret != 0)
1419 |         return P256_INVALID_PUBKEY;
1420 | 
1421 |     scalar_mult(e, s, px, py, u2);      /* (e, s) = R2 = u2 * Qu */
1422 | 
1423 |     if (u256_diff0(u1) == 0) {
1424 |         /* u1 out of range for scalar_mult() - just skip it */
1425 |         u256_cmov(u1, e, 1);
1426 |         /* we don't care about the y coordinate */
1427 |     } else {
1428 |         scalar_mult(px, py, p256_gx, p256_gy, u1); /* (px, py) = R1 = u1 * G */
1429 | 
1430 |         /* (u1, u2) = R = R1 + R2 */
1431 |         point_add_or_double_leaky(u1, u2, px, py, e, s);
1432 |         /* No need to check if R == 0 here: if that's the case, it will be
1433 |          * caught when comparating rx (which will be 0) to r (which isn't). */
1434 |     }
1435 | 
1436 |     /* 6. Convert xR to an integer */
1437 |     m256_done(u1, &p256_p);
1438 | 
1439 |     /* 7. Reduce xR mod n */
1440 |     ecdsa_m256_mod_n(u1);
1441 | 
1442 |     /* 8. Compare xR mod n to r */
1443 |     uint32_t diff = u256_diff(u1, r);
1444 |     if (diff == 0)
1445 |         return P256_SUCCESS;
1446 | 
1447 |     return P256_INVALID_SIGNATURE;
1448 | }
1449 | 


--------------------------------------------------------------------------------
/p256-m.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Interface of curve P-256 (ECDH and ECDSA)
 3 |  *
 4 |  * Author: Manuel Pégourié-Gonnard.
 5 |  * SPDX-License-Identifier: Apache-2.0
 6 |  */
 7 | #ifndef P256_M_H
 8 | #define P256_M_H
 9 | 
10 | #include <stdint.h>
11 | #include <stddef.h>
12 | 
13 | /* Status codes */
14 | #define P256_SUCCESS            0
15 | #define P256_RANDOM_FAILED      -1
16 | #define P256_INVALID_PUBKEY     -2
17 | #define P256_INVALID_PRIVKEY    -3
18 | #define P256_INVALID_SIGNATURE  -4
19 | 
20 | #ifdef __cplusplus
21 | extern "C" {
22 | #endif
23 | 
24 | /*
25 |  * RNG function - must be provided externally and be cryptographically secure.
26 |  *
27 |  * in: output - must point to a writable buffer of at least output_size bytes.
28 |  *     output_size - the number of random bytes to write to output.
29 |  * out: output is filled with output_size random bytes.
30 |  *      return 0 on success, non-zero on errors.
31 |  */
32 | extern int p256_generate_random(uint8_t * output, unsigned output_size);
33 | 
34 | /*
35 |  * ECDH/ECDSA generate key pair
36 |  *
37 |  * [in] draws from p256_generate_random()
38 |  * [out] priv: on success, holds the private key, as a big-endian integer
39 |  * [out] pub: on success, holds the public key, as two big-endian integers
40 |  *
41 |  * return:  P256_SUCCESS on success
42 |  *          P256_RANDOM_FAILED on failure
43 |  */
44 | int p256_gen_keypair(uint8_t priv[32], uint8_t pub[64]);
45 | 
46 | /*
47 |  * ECDH compute shared secret
48 |  *
49 |  * [out] secret: on success, holds the shared secret, as a big-endian integer
50 |  * [in] priv: our private key as a big-endian integer
51 |  * [in] pub: the peer's public key, as two big-endian integers
52 |  *
53 |  * return:  P256_SUCCESS on success
54 |  *          P256_INVALID_PRIVKEY if priv is invalid
55 |  *          P256_INVALID_PUBKEY if pub is invalid
56 |  */
57 | int p256_ecdh_shared_secret(uint8_t secret[32],
58 |                             const uint8_t priv[32], const uint8_t pub[64]);
59 | 
60 | /*
61 |  * ECDSA sign
62 |  *
63 |  * [in] draws from p256_generate_random()
64 |  * [out] sig: on success, holds the signature, as two big-endian integers
65 |  * [in] priv: our private key as a big-endian integer
66 |  * [in] hash: the hash of the message to be signed
67 |  * [in] hlen: the size of hash in bytes
68 |  *
69 |  * return:  P256_SUCCESS on success
70 |  *          P256_RANDOM_FAILED on failure
71 |  *          P256_INVALID_PRIVKEY if priv is invalid
72 |  */
73 | int p256_ecdsa_sign(uint8_t sig[64], const uint8_t priv[32],
74 |                     const uint8_t *hash, size_t hlen);
75 | 
76 | /*
77 |  * ECDSA verify
78 |  *
79 |  * [in] sig: the signature to be verified, as two big-endian integers
80 |  * [in] pub: the associated public key, as two big-endian integers
81 |  * [in] hash: the hash of the message that was signed
82 |  * [in] hlen: the size of hash in bytes
83 |  *
84 |  * return:  P256_SUCCESS on success - the signature was verified as valid
85 |  *          P256_INVALID_PUBKEY if pub is invalid
86 |  *          P256_INVALID_SIGNATURE if the signature was found to be invalid
87 |  */
88 | int p256_ecdsa_verify(const uint8_t sig[64], const uint8_t pub[64],
89 |                       const uint8_t *hash, size_t hlen);
90 | 
91 | #ifdef __cplusplus
92 | }
93 | #endif
94 | 
95 | #endif /* P256_M_H */
96 | 


--------------------------------------------------------------------------------
/p256.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # coding: utf-8
  3 | #
  4 | # Author: Manuel Pégourié-Gonnard.
  5 | # SPDX-License-Identifier: Apache-2.0
  6 | 
  7 | """A simple implementation of P-256 (ECDH, ECDSA) for tests."""
  8 | 
  9 | import secrets
 10 | import hashlib
 11 | 
 12 | 
 13 | class ModInt:
 14 |     """Modular integer."""
 15 | 
 16 |     def __init__(self, x, n):
 17 |         """Build x mod n."""
 18 |         self.x = x % n
 19 |         self.n = n
 20 | 
 21 |     def __repr__(self):
 22 |         """Represent self."""
 23 |         return "ModInt({}, {})".format(self.x, self.n)
 24 | 
 25 |     def __int__(self):
 26 |         """Return the representant in [0, n)."""
 27 |         return self.x
 28 | 
 29 |     def __eq__(self, other):
 30 |         """Compare to another ModInt."""
 31 |         return self.x == other.x and self.n == other.n
 32 | 
 33 |     def __add__(self, other):
 34 |         """Add to another ModInt."""
 35 |         return ModInt(self.x + other.x, self.n)
 36 | 
 37 |     def __sub__(self, other):
 38 |         """Subtract another ModInt."""
 39 |         return ModInt(self.x - other.x, self.n)
 40 | 
 41 |     def __neg__(self):
 42 |         """Negate self."""
 43 |         return ModInt(-self.x, self.n)
 44 | 
 45 |     def __mul__(self, other):
 46 |         """Multiply by another ModInt."""
 47 |         return ModInt(self.x * other.x, self.n)
 48 | 
 49 |     def __rmul__(self, other):
 50 |         """Multiply self by an integer."""
 51 |         return ModInt(self.x * other, self.n)
 52 | 
 53 |     def __pow__(self, other):
 54 |         """Elevate to an integer power."""
 55 |         return ModInt(pow(self.x, other, self.n), self.n)
 56 | 
 57 |     def inv(self):
 58 |         """Return modular inverse as a ModInt or raise ZeroDivisionError."""
 59 |         a, b, u, s = self.x, self.n, 1, 0
 60 |         # invariants: a < b and a == u*x mod n and b == s*x mod n
 61 |         while a > 1:
 62 |             q, r = divmod(b, a)  # r = b - q*a
 63 |             a, b, u, s = r, a, s - q*u, u
 64 |         if a != 1:
 65 |             raise ZeroDivisionError
 66 |         return ModInt(u, self.n)
 67 | 
 68 |     def __truediv__(self, other):
 69 |         """Divide by another ModInt or raise ZeroDivisionError."""
 70 |         return self * other.inv()
 71 | 
 72 |     def is_zero(self):
 73 |         """Tell if we're 0."""
 74 |         return self.x == 0
 75 | 
 76 | 
 77 | class Curve:
 78 |     """Curve parameters - Short Weierstrass curves over GF(p), p > 3."""
 79 | 
 80 |     # assuming cofactor of 1 (true for NIST and Brainpool curves),
 81 |     # so n is the order of the curve and of the base point G
 82 | 
 83 |     def __init__(self, name, *, p, a, b, gx, gy, n):
 84 |         """Build a Curve from the given int parameters."""
 85 |         self.name = name
 86 |         self.p = p
 87 |         self.a = ModInt(a, p)
 88 |         self.b = ModInt(b, p)
 89 |         self.gx = ModInt(gx, p)
 90 |         self.gy = ModInt(gy, p)
 91 |         self.n = n
 92 | 
 93 |         self.p_bits = p.bit_length()
 94 |         self.p_bytes = (self.p_bits + 7) // 8
 95 | 
 96 |         self.n_bits = n.bit_length()
 97 |         self.n_bytes = (self.n_bits + 7) // 8
 98 | 
 99 |     def __str__(self):
100 |         """Human-friendly name."""
101 |         return self.name
102 | 
103 |     def zero(self):
104 |         """Return the origin (point at infinity)."""
105 |         return CurvePoint(None, self)
106 | 
107 |     def base_point(self):
108 |         """Return this curve's conventional base point."""
109 |         return CurvePoint((self.gx, self.gy), self)
110 | 
111 | 
112 | # rfc 6090 app. D, or rfc 5903 3.1, or sec2-v2 2.4.2, or FIPS 186-4 D.1.2.3
113 | p256 = Curve(
114 |     "P-256",
115 |     p=0xFFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF,
116 |     a=-3,
117 |     b=0x5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B,
118 |     n=0xFFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551,
119 |     gx=0x6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296,
120 |     gy=0x4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5
121 | )
122 | 
123 | 
124 | class CurvePoint:
125 |     """Point on a Curve."""
126 | 
127 |     def __init__(self, coordinates, curve):
128 |         """Coordinates is either a pair of ModInt or None for 0."""
129 |         self.coord = coordinates
130 |         self.curve = curve
131 | 
132 |     def is_zero(self):
133 |         """Tell if this is 0 (aka the origin aka the point at infinity."""
134 |         return self.coord is None
135 | 
136 |     def x(self):
137 |         """Return the x coordinate as a ModInt."""
138 |         return self.coord[0]
139 | 
140 |     def y(self):
141 |         """Return the y coordinate as a ModInt."""
142 |         return self.coord[1]
143 | 
144 |     def __eq__(self, other):
145 |         """Compare to another point on the curve."""
146 |         if self.is_zero() and other.is_zero():
147 |             return True
148 | 
149 |         if self.is_zero() or other.is_zero():
150 |             return False
151 | 
152 |         return self.x() == other.x() and self.y() == other.y()
153 | 
154 |     def __add__(self, other):
155 |         """Add to another point - RFC 6090 Appendix F.1."""
156 |         if self.is_zero():
157 |             return other
158 | 
159 |         if other.is_zero():
160 |             return self
161 | 
162 |         x1, y1, x2, y2 = self.x(), self.y(), other.x(), other.y()
163 | 
164 |         if self != other and x1 == y1:
165 |             return CurvePoint(None, self.curve)
166 | 
167 |         if self != other:
168 |             x3 = ((y2-y1) / (x2-x1))**2 - x1 - x2
169 |             y3 = (x1-x3) * (y2-y1) / (x2-x1) - y1
170 |             return CurvePoint((x3, y3), self.curve)
171 | 
172 |         # this can't happen with curves of large prime order,
173 |         # but let's just follow the formulas in the RFC
174 |         if y1.is_zero():
175 |             return CurvePoint(None, self.curve)
176 | 
177 |         a = self.curve.a
178 |         x3 = ((3*x1**2 + a) / (2*y1))**2 - 2*x1
179 |         y3 = (x1-x3)*(3*x1**2 + a) / (2*y1) - y1
180 |         return CurvePoint((x3, y3), self.curve)
181 | 
182 |     def __rmul__(self, other):
183 |         """Multiply self by a positive integer (scalar multiplication)."""
184 |         # invariant: result + scale * scalar = self * other
185 |         result = self.curve.zero()
186 |         scale = self
187 |         scalar = other
188 |         while scalar != 0:
189 |             if scalar % 2 != 0:
190 |                 result += scale
191 |             scale += scale
192 |             scalar //= 2
193 | 
194 |         return result
195 | 
196 | 
197 | def ecdsa_modint_from_hash(msg_hash, n, nbits):
198 |     """Derive an integer mod n from a message hash for ECDSA."""
199 |     # This is Sec1 4.1.3 step 5 or 4.1.4 step 3
200 |     # Subteps 1-3: simplify when nbits is a multiple of 8
201 |     assert(nbits % 8 == 0)
202 |     use_len = min(32, len(msg_hash))
203 |     msg_hash = msg_hash[:use_len]
204 |     # Substep 4: 2.3.8 says big endian
205 |     e = int.from_bytes(msg_hash, 'big')
206 |     # Extra: mod n
207 |     return ModInt(e, n)
208 | 
209 | 
210 | class EcdsaSigner:
211 |     """A private key, able to create ECDSA signatures."""
212 | 
213 |     def __init__(self, curve, d=None):
214 |         """Create an ECDSA private key for curve or load it from an int."""
215 |         self.curve = curve
216 |         self.d = d if d is not None else self._gen_scalar()
217 | 
218 |     def _gen_scalar(self):
219 |         # sec1 3.2.1: d in [1, n-1] ( = [0, n-1) + 1 )
220 |         return secrets.randbelow(self.curve.n - 1) + 1
221 | 
222 |     def _gen_public(self, d):
223 |         return d * self.curve.base_point()
224 | 
225 |     def public_key(self):
226 |         """Return the associated public key as a CurvePoint."""
227 |         return self._gen_public(self.d)
228 | 
229 |     def sign(self, msg_hash, k=None):
230 |         """Generate a signature (int pair) for that message hash (bytes)."""
231 |         # sec1 4.1.3, but instead of retrying just abort
232 |         n = self.curve.n
233 |         nbits = self.curve.n_bits
234 |         # 1. Set ephemeral keypair
235 |         if k is None:
236 |             k = self._gen_scalar()
237 |         R = self._gen_public(k)
238 |         k = ModInt(k, n)
239 |         # 2, 3. Convert to integer mod n
240 |         r = ModInt(int(R.x()), n)
241 |         assert(not r.is_zero())
242 |         # 4. Skipped - we take the hash as input
243 |         # 5. Derive integer from hash
244 |         e = ecdsa_modint_from_hash(msg_hash, n, nbits)
245 |         # 6. Compute s
246 |         d = ModInt(self.d, n)
247 |         s = (e + r * d) / k
248 |         assert(not s.is_zero())
249 |         # 7. Output two integers
250 |         return (int(r), int(s))
251 | 
252 | 
253 | class EcdsaVerifier:
254 |     """An ECDSA public key, able to verify signatures."""
255 | 
256 |     def __init__(self, curve, public_key):
257 |         """Create an ECDSA verifier from a public key (CurvePoint)."""
258 |         self.curve = curve
259 |         self.Q = public_key
260 | 
261 |     def is_valid(self, sig, msg_hash):
262 |         """Tell if signature (int pair) is valid for that hash (bytes)."""
263 |         # sec1 4.1.4
264 |         n = self.curve.n
265 |         nbits = self.curve.n_bits
266 |         r, s = sig
267 |         # 1. Verify range
268 |         if not (0 < r < n and 0 < s < n):
269 |             return False
270 |         # 2. Skip hashing - we take the hash as input
271 |         # 3. Derive integer from hash
272 |         e = ecdsa_modint_from_hash(msg_hash, n, nbits)
273 |         # 4. Compute u1, u2
274 |         r = ModInt(r, n)
275 |         s = ModInt(s, n)
276 |         u1 = e / s
277 |         u2 = r / s
278 |         # 5. Compute R
279 |         R = int(u1) * self.curve.base_point() + int(u2) * self.Q
280 |         if R.is_zero():
281 |             return False
282 |         # 6, 7. Convert to v
283 |         v = ModInt(int(R.x()), n)
284 |         # 8. Compare
285 |         return v == r
286 | 
287 | 
288 | # Section 8.1 of RFC 5903
289 | tv_ecdh_rfc5903 = dict(
290 |     i=0xC88F01F510D9AC3F70A292DAA2316DE544E9AAB8AFE84049C62A9C57862D1433,
291 |     gix=0xDAD0B65394221CF9B051E1FECA5787D098DFE637FC90B9EF945D0C3772581180,
292 |     giy=0x5271A0461CDB8252D61F1C456FA3E59AB1F45B33ACCF5F58389E0577B8990BB3,
293 |     r=0xC6EF9C5D78AE012A011164ACB397CE2088685D8F06BF9BE0B283AB46476BEE53,
294 |     grx=0xD12DFB5289C8D4F81208B70270398C342296970A0BCCB74C736FC7554494BF63,
295 |     gry=0x56FBF3CA366CC23E8157854C13C58D6AAC23F046ADA30F8353E74F33039872AB,
296 |     girx=0xD6840F6B42F6EDAFD13116E0E12565202FEF8E9ECE7DCE03812464D04B9442DE,
297 |     giry=0x522BDE0AF0D8585B8DEF9C183B5AE38F50235206A8674ECB5D98EDB20EB153A2,
298 | )
299 | 
300 | # NIST KAS_ECC_CDH_PrimitiveTest.txt P-256 counts 0 to 9
301 | tv_ecdh_nist = (
302 |     dict(
303 |         ox=0x700c48f77f56584c5cc632ca65640db91b6bacce3a4df6b42ce7cc838833d287,
304 |         oy=0xdb71e509e3fd9b060ddb20ba5c51dcc5948d46fbf640dfe0441782cab85fa4ac,
305 |         d=0x7d7dc5f71eb29ddaf80d6214632eeae03d9058af1fb6d22ed80badb62bc1a534,
306 |         Qx=0xead218590119e8876b29146ff89ca61770c4edbbf97d38ce385ed281d8a6b230,
307 |         Qy=0x28af61281fd35e2fa7002523acc85a429cb06ee6648325389f59edfce1405141,
308 |         Z=0x46fc62106420ff012e54a434fbdd2d25ccc5852060561e68040dd7778997bd7b,
309 |     ), dict(
310 |         ox=0x809f04289c64348c01515eb03d5ce7ac1a8cb9498f5caa50197e58d43a86a7ae,
311 |         oy=0xb29d84e811197f25eba8f5194092cb6ff440e26d4421011372461f579271cda3,
312 |         d=0x38f65d6dce47676044d58ce5139582d568f64bb16098d179dbab07741dd5caf5,
313 |         Qx=0x119f2f047902782ab0c9e27a54aff5eb9b964829ca99c06b02ddba95b0a3f6d0,
314 |         Qy=0x8f52b726664cac366fc98ac7a012b2682cbd962e5acb544671d41b9445704d1d,
315 |         Z=0x057d636096cb80b67a8c038c890e887d1adfa4195e9b3ce241c8a778c59cda67,
316 |     ), dict(
317 |         ox=0xa2339c12d4a03c33546de533268b4ad667debf458b464d77443636440ee7fec3,
318 |         oy=0xef48a3ab26e20220bcda2c1851076839dae88eae962869a497bf73cb66faf536,
319 |         d=0x1accfaf1b97712b85a6f54b148985a1bdc4c9bec0bd258cad4b3d603f49f32c8,
320 |         Qx=0xd9f2b79c172845bfdb560bbb01447ca5ecc0470a09513b6126902c6b4f8d1051,
321 |         Qy=0xf815ef5ec32128d3487834764678702e64e164ff7315185e23aff5facd96d7bc,
322 |         Z=0x2d457b78b4614132477618a5b077965ec90730a8c81a1c75d6d4ec68005d67ec,
323 |     ), dict(
324 |         ox=0xdf3989b9fa55495719b3cf46dccd28b5153f7808191dd518eff0c3cff2b705ed,
325 |         oy=0x422294ff46003429d739a33206c8752552c8ba54a270defc06e221e0feaf6ac4,
326 |         d=0x207c43a79bfee03db6f4b944f53d2fb76cc49ef1c9c4d34d51b6c65c4db6932d,
327 |         Qx=0x24277c33f450462dcb3d4801d57b9ced05188f16c28eda873258048cd1607e0d,
328 |         Qy=0xc4789753e2b1f63b32ff014ec42cd6a69fac81dfe6d0d6fd4af372ae27c46f88,
329 |         Z=0x96441259534b80f6aee3d287a6bb17b5094dd4277d9e294f8fe73e48bf2a0024,
330 |     ), dict(
331 |         ox=0x41192d2813e79561e6a1d6f53c8bc1a433a199c835e141b05a74a97b0faeb922,
332 |         oy=0x1af98cc45e98a7e041b01cf35f462b7562281351c8ebf3ffa02e33a0722a1328,
333 |         d=0x59137e38152350b195c9718d39673d519838055ad908dd4757152fd8255c09bf,
334 |         Qx=0xa8c5fdce8b62c5ada598f141adb3b26cf254c280b2857a63d2ad783a73115f6b,
335 |         Qy=0x806e1aafec4af80a0d786b3de45375b517a7e5b51ffb2c356537c9e6ef227d4a,
336 |         Z=0x19d44c8d63e8e8dd12c22a87b8cd4ece27acdde04dbf47f7f27537a6999a8e62,
337 |     ), dict(
338 |         ox=0x33e82092a0f1fb38f5649d5867fba28b503172b7035574bf8e5b7100a3052792,
339 |         oy=0xf2cf6b601e0a05945e335550bf648d782f46186c772c0f20d3cd0d6b8ca14b2f,
340 |         d=0xf5f8e0174610a661277979b58ce5c90fee6c9b3bb346a90a7196255e40b132ef,
341 |         Qx=0x7b861dcd2844a5a8363f6b8ef8d493640f55879217189d80326aad9480dfc149,
342 |         Qy=0xc4675b45eeb306405f6c33c38bc69eb2bdec9b75ad5af4706aab84543b9cc63a,
343 |         Z=0x664e45d5bba4ac931cd65d52017e4be9b19a515f669bea4703542a2c525cd3d3,
344 |     ), dict(
345 |         ox=0x6a9e0c3f916e4e315c91147be571686d90464e8bf981d34a90b6353bca6eeba7,
346 |         oy=0x40f9bead39c2f2bcc2602f75b8a73ec7bdffcbcead159d0174c6c4d3c5357f05,
347 |         d=0x3b589af7db03459c23068b64f63f28d3c3c6bc25b5bf76ac05f35482888b5190,
348 |         Qx=0x9fb38e2d58ea1baf7622e96720101cae3cde4ba6c1e9fa26d9b1de0899102863,
349 |         Qy=0xd5561b900406edf50802dd7d73e89395f8aed72fba0e1d1b61fe1d22302260f0,
350 |         Z=0xca342daa50dc09d61be7c196c85e60a80c5cb04931746820be548cdde055679d,
351 |     ), dict(
352 |         ox=0xa9c0acade55c2a73ead1a86fb0a9713223c82475791cd0e210b046412ce224bb,
353 |         oy=0xf6de0afa20e93e078467c053d241903edad734c6b403ba758c2b5ff04c9d4229,
354 |         d=0xd8bf929a20ea7436b2461b541a11c80e61d826c0a4c9d322b31dd54e7f58b9c8,
355 |         Qx=0x20f07631e4a6512a89ad487c4e9d63039e579cb0d7a556cb9e661cd59c1e7fa4,
356 |         Qy=0x6de91846b3eee8a5ec09c2ab1f41e21bd83620ccdd1bdce3ab7ea6e02dd274f5,
357 |         Z=0x35aa9b52536a461bfde4e85fc756be928c7de97923f0416c7a3ac8f88b3d4489,
358 |     ), dict(
359 |         ox=0x94e94f16a98255fff2b9ac0c9598aac35487b3232d3231bd93b7db7df36f9eb9,
360 |         oy=0xd8049a43579cfa90b8093a94416cbefbf93386f15b3f6e190b6e3455fedfe69a,
361 |         d=0x0f9883ba0ef32ee75ded0d8bda39a5146a29f1f2507b3bd458dbea0b2bb05b4d,
362 |         Qx=0xabb61b423be5d6c26e21c605832c9142dc1dfe5a5fff28726737936e6fbf516d,
363 |         Qy=0x733d2513ef58beab202090586fac91bf0fee31e80ab33473ab23a2d89e58fad6,
364 |         Z=0x605c16178a9bc875dcbff54d63fe00df699c03e8a888e9e94dfbab90b25f39b4,
365 |     ), dict(
366 |         ox=0xe099bf2a4d557460b5544430bbf6da11004d127cb5d67f64ab07c94fcdf5274f,
367 |         oy=0xd9c50dbe70d714edb5e221f4e020610eeb6270517e688ca64fb0e98c7ef8c1c5,
368 |         d=0x2beedb04b05c6988f6a67500bb813faf2cae0d580c9253b6339e4a3337bb6c08,
369 |         Qx=0x3d63e429cb5fa895a9247129bf4e48e89f35d7b11de8158efeb3e106a2a87395,
370 |         Qy=0x0cae9e477ef41e7c8c1064379bb7b554ddcbcae79f9814281f1e50f0403c61f3,
371 |         Z=0xf96e40a1b72840854bb62bc13c40cc2795e373d4e715980b261476835a092e0b,
372 |     )
373 | )
374 | 
375 | # Section 8.1 of RFC 4754
376 | tv_ecdsa_rfc4754 = dict(
377 |     w=0xDC51D3866A15BACDE33D96F992FCA99DA7E6EF0934E7097559C27F1614C88A7F,
378 |     gwx=0x2442A5CC0ECD015FA3CA31DC8E2BBC70BF42D60CBCA20085E0822CB04235E970,
379 |     gwy=0x6FC98BD7E50211A4A27102FA3549DF79EBCB4BF246B80945CDDFE7D509BBFD7D,
380 |     k=0x9E56F509196784D963D1C0A401510EE7ADA3DCC5DEE04B154BF61AF1D5A6DECE,
381 |     r=0xCB28E0999B9C7715FD0A80D8E47A77079716CBBF917DD72E97566EA1C066957C,
382 |     s=0x86FA3BB4E26CAD5BF90B7F81899256CE7594BB1EA0C89212748BFF3B3D5B0315,
383 |     h="BA7816BF8F01CFEA414140DE5DAE2223B00361A396177A9CB410FF61F20015AD",
384 | )
385 | 
386 | # Section A.2.5 of RFC 6979
387 | tv_ecdsa_rfc6979_key = dict(
388 |     x=0xC9AFA9D845BA75166B5C215767B1D6934E50C3DB36E89B127B8A622B120F6721,
389 |     Ux=0x60FED4BA255A9D31C961EB74C6356D68C049B8923B61FA6CE669622E60F29FB6,
390 |     Uy=0x7903FE1008B8BC99A41AE9E95628BC64F2F1B20C2D7E9F5177A3C294D4462299,
391 | )
392 | tv_ecdsa_rfc6979 = (
393 |     dict(
394 |         h=hashlib.sha1(b"sample").digest(),
395 |         k=0x882905F1227FD620FBF2ABF21244F0BA83D0DC3A9103DBBEE43A1FB858109DB4,
396 |         r=0x61340C88C3AAEBEB4F6D667F672CA9759A6CCAA9FA8811313039EE4A35471D32,
397 |         s=0x6D7F147DAC089441BB2E2FE8F7A3FA264B9C475098FDCF6E00D7C996E1B8B7EB,
398 |     ), dict(
399 |         h=hashlib.sha224(b"sample").digest(),
400 |         k=0x103F90EE9DC52E5E7FB5132B7033C63066D194321491862059967C715985D473,
401 |         r=0x53B2FFF5D1752B2C689DF257C04C40A587FABABB3F6FC2702F1343AF7CA9AA3F,
402 |         s=0xB9AFB64FDC03DC1A131C7D2386D11E349F070AA432A4ACC918BEA988BF75C74C,
403 |     ), dict(
404 |         h=hashlib.sha256(b"sample").digest(),
405 |         k=0xA6E3C57DD01ABE90086538398355DD4C3B17AA873382B0F24D6129493D8AAD60,
406 |         r=0xEFD48B2AACB6A8FD1140DD9CD45E81D69D2C877B56AAF991C34D0EA84EAF3716,
407 |         s=0xF7CB1C942D657C41D436C7A1B6E29F65F3E900DBB9AFF4064DC4AB2F843ACDA8,
408 |     ), dict(
409 |         h=hashlib.sha384(b"sample").digest(),
410 |         k=0x09F634B188CEFD98E7EC88B1AA9852D734D0BC272F7D2A47DECC6EBEB375AAD4,
411 |         r=0x0EAFEA039B20E9B42309FB1D89E213057CBF973DC0CFC8F129EDDDC800EF7719,
412 |         s=0x4861F0491E6998B9455193E34E7B0D284DDD7149A74B95B9261F13ABDE940954,
413 |     ), dict(
414 |         h=hashlib.sha512(b"sample").digest(),
415 |         k=0x5FA81C63109BADB88C1F367B47DA606DA28CAD69AA22C4FE6AD7DF73A7173AA5,
416 |         r=0x8496A60B5E9B47C825488827E0495B0E3FA109EC4568FD3F8D1097678EB97F00,
417 |         s=0x2362AB1ADBE2B8ADF9CB9EDAB740EA6049C028114F2460F96554F61FAE3302FE,
418 |     ), dict(
419 |         h=hashlib.sha1(b"test").digest(),
420 |         k=0x8C9520267C55D6B980DF741E56B4ADEE114D84FBFA2E62137954164028632A2E,
421 |         r=0x0CBCC86FD6ABD1D99E703E1EC50069EE5C0B4BA4B9AC60E409E8EC5910D81A89,
422 |         s=0x01B9D7B73DFAA60D5651EC4591A0136F87653E0FD780C3B1BC872FFDEAE479B1,
423 |     ), dict(
424 |         h=hashlib.sha224(b"test").digest(),
425 |         k=0x669F4426F2688B8BE0DB3A6BD1989BDAEFFF84B649EEB84F3DD26080F667FAA7,
426 |         r=0xC37EDB6F0AE79D47C3C27E962FA269BB4F441770357E114EE511F662EC34A692,
427 |         s=0xC820053A05791E521FCAAD6042D40AEA1D6B1A540138558F47D0719800E18F2D,
428 |     ), dict(
429 |         h=hashlib.sha256(b"test").digest(),
430 |         k=0xD16B6AE827F17175E040871A1C7EC3500192C4C92677336EC2537ACAEE0008E0,
431 |         r=0xF1ABB023518351CD71D881567B1EA663ED3EFCF6C5132B354F28D3B0B7D38367,
432 |         s=0x019F4113742A2B14BD25926B49C649155F267E60D3814B4C0CC84250E46F0083,
433 |     ), dict(
434 |         h=hashlib.sha384(b"test").digest(),
435 |         k=0x16AEFFA357260B04B1DD199693960740066C1A8F3E8EDD79070AA914D361B3B8,
436 |         r=0x83910E8B48BB0C74244EBDF7F07A1C5413D61472BD941EF3920E623FBCCEBEB6,
437 |         s=0x8DDBEC54CF8CD5874883841D712142A56A8D0F218F5003CB0296B6B509619F2C,
438 |     ), dict(
439 |         h=hashlib.sha512(b"test").digest(),
440 |         k=0x6915D11632ACA3C40D5D51C08DAF9C555933819548784480E93499000D9F0B7F,
441 |         r=0x461D93F31B6540894788FD206C07CFA0CC35F46FA3C91816FFF1040AD1581A04,
442 |         s=0x39AF9F15DE0DB8D97E72719C74820D304CE5226E32DEDAE67519E840D1194E55,
443 |     ),
444 | )
445 | 
446 | 
447 | if __name__ == '__main__':
448 |     print("P-256 ECDH test vectors from RFC 5903 Sec. 8.1...",
449 |           end=' ', flush=True)
450 |     tv = tv_ecdh_rfc5903
451 | 
452 |     gi = tv['i'] * p256.base_point()
453 |     assert(tv['gix'] == int(gi.x()) and tv['giy'] == int(gi.y()))
454 | 
455 |     gr = tv['r'] * p256.base_point()
456 |     assert(tv['grx'] == int(gr.x()) and tv['gry'] == int(gr.y()))
457 | 
458 |     si = tv['i'] * gr
459 |     assert(tv['girx'] == int(si.x()) and tv['giry'] == int(si.y()))
460 | 
461 |     sr = tv['r'] * gi
462 |     assert(tv['girx'] == int(sr.x()) and tv['giry'] == int(sr.y()))
463 | 
464 |     print("OK")
465 | 
466 |     print("P-256 ECDH test vectors from NIST...",
467 |           end=' ', flush=True)
468 | 
469 |     for tv in tv_ecdh_nist:
470 |         Q = tv['d'] * p256.base_point()
471 |         assert(tv['Qx'] == int(Q.x()) and tv['Qy'] == int(Q.y()))
472 | 
473 |         ox = ModInt(tv['ox'], p256.p)
474 |         oy = ModInt(tv['oy'], p256.p)
475 |         o = CurvePoint((ox, oy), p256)
476 |         do = tv['d'] * o
477 |         assert(tv['Z'] == int(do.x()))
478 | 
479 |     print("OK")
480 | 
481 |     print("P-256 ECDSA test vectors from RFC 4754 Sec. 8.1...",
482 |           end=' ', flush=True)
483 |     tv = tv_ecdsa_rfc4754
484 |     h = bytes.fromhex(tv['h'])
485 | 
486 |     # signature generation
487 |     signer = EcdsaSigner(p256, tv['w'])
488 |     sig = signer.sign(h, tv['k'])
489 |     assert(sig == (tv['r'], tv['s']))
490 | 
491 |     # key generation
492 |     pub = signer.public_key()
493 |     assert(tv['gwx'] == int(pub.x()))
494 |     assert(tv['gwy'] == int(pub.y()))
495 | 
496 |     # signature verification
497 |     verif = EcdsaVerifier(p256, pub)
498 |     assert(verif.is_valid((tv['r'], tv['s']), h) is True)
499 | 
500 |     bad_r = tv['r'] + 1
501 |     bad_s = tv['s'] + 1
502 |     bad_h = h[::-1]
503 |     assert(verif.is_valid((bad_r, tv['s']), h) is False)
504 |     assert(verif.is_valid((tv['r'], bad_s), h) is False)
505 |     assert(verif.is_valid((tv['r'], tv['s']), bad_h) is False)
506 | 
507 |     print("OK")
508 | 
509 |     print("P-256 ECDSA test vectors from RFC 6979 A.2.5...",
510 |           end=' ', flush=True)
511 | 
512 |     # key generation
513 |     tv = tv_ecdsa_rfc6979_key
514 |     signer = EcdsaSigner(p256, tv['x'])
515 |     pub = signer.public_key()
516 |     assert(tv['Ux'] == int(pub.x()))
517 |     assert(tv['Uy'] == int(pub.y()))
518 |     verif = EcdsaVerifier(p256, pub)
519 | 
520 |     # signature generation and verification
521 |     for tv in tv_ecdsa_rfc6979:
522 |         h, k, r, s = tv['h'], tv['k'], tv['r'], tv['s']
523 |         sig = signer.sign(h, k)
524 |         assert(sig == (r, s))
525 |         assert(verif.is_valid((r, s), h) is True)
526 |         assert(verif.is_valid((r+1, s), h) is False)
527 |         assert(verif.is_valid((r, s+1), h) is False)
528 |         assert(verif.is_valid((r, s), h[::-1]) is False)
529 | 
530 |     print("OK")
531 | 


--------------------------------------------------------------------------------
/prof-g.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # On-host profiling using Gprof
 4 | #
 5 | # Author: Manuel Pégourié-Gonnard.
 6 | # SPDX-License-Identifier: Apache-2.0
 7 | 
 8 | set -eu
 9 | 
10 | # value chosen so that the program runs for at least 5 sec on my laptop,
11 | # resulting in at least 500 samples being collected.
12 | : ${TIMES:=500}
13 | 
14 | # Anything capable of running gcc has CT 64-bit mul in practice
15 | gcc --std=c99 -Werror -Wall -Wextra -pedantic \
16 |     -march=native -DMUL64_IS_CONSTANT_TIME \
17 |     -Os -g -pg --static -DTIMES=$TIMES p256-m.c prof.c -o prof-g
18 | 
19 | ./prof-g
20 | 
21 | gprof -p -b ./prof-g
22 | 
23 | rm prof-g gmon.out
24 | 


--------------------------------------------------------------------------------
/prof-gpt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # On-host profiling using gperftools
 4 | #
 5 | # Author: Manuel Pégourié-Gonnard.
 6 | # SPDX-License-Identifier: Apache-2.0
 7 | 
 8 | set -eu
 9 | 
10 | # value chosen so that the program runs for at least 5 sec on my laptop,
11 | # resulting in at least 500 samples being collected.
12 | : ${TIMES:=1000}
13 | 
14 | # adjust for your platform
15 | : ${TRIPLET:=x86_64-linux-gnu}
16 | 
17 | # Anything capable of running gcc has CT 64-bit mul in practice
18 | gcc --std=c99 -Werror -Wall -Wextra -pedantic \
19 |     -march=native -DMUL64_IS_CONSTANT_TIME \
20 |     -Os -g -DTIMES=$TIMES p256-m.c prof.c -o prof-gpt
21 | 
22 | # for some reason compiling with -lprofile doesn't seem to work for me, so
23 | # using LD_PRELOAD instead
24 | CPUPROFILE=gpt.out LD_PRELOAD=/usr/lib/$TRIPLET/libprofiler.so ./prof-gpt
25 | 
26 | google-pprof -text ./prof-gpt gpt.out
27 | 
28 | rm prof-gpt gpt.out
29 | 


--------------------------------------------------------------------------------
/prof-vg.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # On-host profiling using Valgrind
 4 | #
 5 | # Author: Manuel Pégourié-Gonnard.
 6 | # SPDX-License-Identifier: Apache-2.0
 7 | 
 8 | set -eu
 9 | 
10 | # Anything capable of running gcc has CT 64-bit mul in practice
11 | gcc --std=c99 -Werror -Wall -Wextra -pedantic \
12 |     -march=native -DMUL64_IS_CONSTANT_TIME \
13 |     -Os -g p256-m.c prof.c -o prof
14 | 
15 | OUTFILE=prof.callgrind.$$
16 | valgrind -q --tool=callgrind --collect-atstart=no --toggle-collect=main \
17 |     --callgrind-out-file=$OUTFILE ./prof
18 | 
19 | callgrind_annotate --show-percs=yes $OUTFILE |
20 |     sed -n '/file:function/,$ p'
21 | 
22 | rm $OUTFILE prof
23 | 


--------------------------------------------------------------------------------
/prof.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Dummy program for on-host profiling.
 3 |  * See prof-vg.sh, prof-g.sh, prof-gpt.sh
 4 |  *
 5 |  * Author: Manuel Pégourié-Gonnard.
 6 |  * SPDX-License-Identifier: Apache-2.0
 7 |  */
 8 | 
 9 | #include <stdlib.h>
10 | 
11 | #include "p256-m.h"
12 | 
13 | /* test version based on stdlib - never do this in production! */
14 | int p256_generate_random(uint8_t *output, unsigned output_size)
15 | {
16 |     for (unsigned i = 0; i < output_size; i++) {
17 |         output[i] = (uint8_t) rand();
18 |     }
19 | 
20 |     return 0;
21 | }
22 | 
23 | #if !defined(TIMES)
24 | #define TIMES 1
25 | #endif
26 | 
27 | int main(void)
28 | {
29 |     uint8_t priv[32], pub[64], secret[32], sig[64], hash[32];
30 |     int ret = 0;
31 | 
32 |     for (unsigned i = 0; i < TIMES; i++)
33 |     {
34 |         ret |= p256_gen_keypair(priv, pub);
35 |         ret |= p256_ecdh_shared_secret(secret, priv, pub);
36 |         ret |= p256_ecdsa_sign(sig, priv, hash, sizeof hash);
37 |         ret |= p256_ecdsa_verify(sig, pub, hash, sizeof hash);
38 |     }
39 | 
40 |     return ret;
41 | }
42 | 


--------------------------------------------------------------------------------
/sizes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Print the size of p256-m.o with GCC and Clang on selected cores.
 4 | #
 5 | # Author: Manuel Pégourié-Gonnard.
 6 | # SPDX-License-Identifier: Apache-2.0
 7 | 
 8 | set -eu
 9 | 
10 | SRC=p256-m.c
11 | CFLAGS_COMMON="-Werror --std=c99 -fomit-frame-pointer -mthumb $SRC"
12 | 
13 | gcc() {
14 |     arm-none-eabi-gcc -Wall -Wextra -pedantic \
15 |         -Os $CFLAGS_COMMON "$@"
16 | }
17 | 
18 | clang() {
19 |     env clang --target=arm-none-eabi -Weverything \
20 |         -Oz $CFLAGS_COMMON "$@"
21 | }
22 | 
23 | OBJECTS=''
24 | 
25 | for CC in gcc clang; do
26 |     for CPU in m0 m4 a7; do
27 |         NAME="${CC}-${CPU}"
28 |         $CC -mcpu=cortex-$CPU -S -fverbose-asm -o ${NAME}.s
29 |         $CC -mcpu=cortex-$CPU -c -o ${NAME}.o
30 |         arm-none-eabi-objdump -d ${NAME}.o > ${NAME}.dump
31 |         nm --radix=d --size-sort ${NAME}.o > ${NAME}.sizes
32 |         OBJECTS="$OBJECTS ${NAME}.o"
33 |     done
34 | done
35 | 
36 | arm-none-eabi-size $OBJECTS
37 | 


--------------------------------------------------------------------------------
/stack.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Compute maximum stack usage on selected cores with GCC.
 4 | #
 5 | # Author: Manuel Pégourié-Gonnard.
 6 | # SPDX-License-Identifier: Apache-2.0
 7 | 
 8 | for CPU in m0 m4 a7; do
 9 |     echo "*** $CPU ***"
10 |     arm-none-eabi-gcc -c -fdump-rtl-dfinish -fstack-usage \
11 |         -Os -fomit-frame-pointer -mthumb -mcpu=cortex-$CPU \
12 |         p256-m.c
13 |     python3 wcs.py | sed -n 's/^..p256-m.c *p256_/p256_/p'
14 | done
15 | 


--------------------------------------------------------------------------------
/test-closedbox.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Closed-box testing of curve P-256 (ECDH and ECDSA)
  3 |  *
  4 |  * - validate determinstic public functions against standard test vectors
  5 |  * - validate non-deterministic public functions against other functions
  6 |  * - exercise error cases that can be reached in a deterministic way
  7 |  *
  8 |  * Author: Manuel Pégourié-Gonnard.
  9 |  * SPDX-License-Identifier: Apache-2.0
 10 |  */
 11 | 
 12 | #include "p256-m.h"
 13 | #include "test-data.h"
 14 | #include "test-common.h"
 15 | 
 16 | #include <stdio.h>
 17 | #include <assert.h>
 18 | #include <string.h>
 19 | #include <stdlib.h>
 20 | 
 21 | static int rng_ret = 0;
 22 | 
 23 | /* test version based on stdlib - never do this in production! */
 24 | int p256_generate_random(uint8_t *output, unsigned output_size)
 25 | {
 26 |     for (unsigned i = 0; i < output_size; i++) {
 27 |         output[i] = (uint8_t) rand();
 28 |     }
 29 | 
 30 |     int ret = rng_ret;
 31 |     rng_ret = 0;
 32 |     return ret;
 33 | }
 34 | 
 35 | /* validate ecdsa_verify against one set of test vectors */
 36 | static void assert_ecdsa_verify_one(const uint8_t sig[64],
 37 |                                     const uint8_t *hash, size_t hlen)
 38 | {
 39 |     int ret;
 40 | 
 41 |     /* valid */
 42 |     ret = p256_ecdsa_verify(sig, ecdsa_pub, hash, hlen);
 43 |     assert(ret == P256_SUCCESS);
 44 | 
 45 |     /* corrupt the first or last bit of r or s */
 46 |     uint8_t bad_sig[64];
 47 | 
 48 |     memcpy(bad_sig, sig, sizeof bad_sig);
 49 |     bad_sig[0] ^= 0x80;
 50 |     ret = p256_ecdsa_verify(bad_sig, ecdsa_pub, hash, hlen);
 51 |     assert(ret == P256_INVALID_SIGNATURE);
 52 | 
 53 |     memcpy(bad_sig, sig, sizeof bad_sig);
 54 |     bad_sig[31] ^= 0x01;
 55 |     ret = p256_ecdsa_verify(bad_sig, ecdsa_pub, hash, hlen);
 56 |     assert(ret == P256_INVALID_SIGNATURE);
 57 | 
 58 |     memcpy(bad_sig, sig, sizeof bad_sig);
 59 |     bad_sig[32] ^= 0x80;
 60 |     ret = p256_ecdsa_verify(bad_sig, ecdsa_pub, hash, hlen);
 61 |     assert(ret == P256_INVALID_SIGNATURE);
 62 | 
 63 |     memcpy(bad_sig, sig, sizeof bad_sig);
 64 |     bad_sig[63] ^= 0x01;
 65 |     ret = p256_ecdsa_verify(bad_sig, ecdsa_pub, hash, hlen);
 66 |     assert(ret == P256_INVALID_SIGNATURE);
 67 | 
 68 |     /* corrupt the first bit of hash (the last one may be truncated away) */
 69 |     uint8_t bad_hash[64];
 70 | 
 71 |     memcpy(bad_hash, hash, hlen);
 72 |     bad_hash[0] ^= 0x80;
 73 |     ret = p256_ecdsa_verify(sig, ecdsa_pub, bad_hash, hlen);
 74 |     assert(ret == P256_INVALID_SIGNATURE);
 75 | }
 76 | 
 77 | static void assert_ecdsa_verify(void)
 78 | {
 79 |     /* known-good values */
 80 |     assert_ecdsa_verify_one(sig160a, h160a, sizeof h160a);
 81 |     assert_ecdsa_verify_one(sig224a, h224a, sizeof h224a);
 82 |     assert_ecdsa_verify_one(sig256a, h256a, sizeof h256a);
 83 |     assert_ecdsa_verify_one(sig384a, h384a, sizeof h384a);
 84 |     assert_ecdsa_verify_one(sig512a, h512a, sizeof h512a);
 85 |     assert_ecdsa_verify_one(sig160b, h160b, sizeof h160b);
 86 |     assert_ecdsa_verify_one(sig224b, h224b, sizeof h224b);
 87 |     assert_ecdsa_verify_one(sig256b, h256b, sizeof h256b);
 88 |     assert_ecdsa_verify_one(sig384b, h384b, sizeof h384b);
 89 |     assert_ecdsa_verify_one(sig512b, h512b, sizeof h512b);
 90 |     assert_ecdsa_verify_one(sig_h0, h0, sizeof h0);
 91 | 
 92 |     /* r, s out of range */
 93 |     const size_t hlen = sizeof h256a;
 94 |     assert(P256_INVALID_SIGNATURE == p256_ecdsa_verify(sig_bad_r0, ecdsa_pub, h256a, hlen));
 95 |     assert(P256_INVALID_SIGNATURE == p256_ecdsa_verify(sig_bad_rn, ecdsa_pub, h256a, hlen));
 96 |     assert(P256_INVALID_SIGNATURE == p256_ecdsa_verify(sig_bad_rm, ecdsa_pub, h256a, hlen));
 97 |     assert(P256_INVALID_SIGNATURE == p256_ecdsa_verify(sig_bad_s0, ecdsa_pub, h256a, hlen));
 98 |     assert(P256_INVALID_SIGNATURE == p256_ecdsa_verify(sig_bad_sn, ecdsa_pub, h256a, hlen));
 99 |     assert(P256_INVALID_SIGNATURE == p256_ecdsa_verify(sig_bad_sm, ecdsa_pub, h256a, hlen));
100 | 
101 |     /* pub invalid (coordinates out of range) */
102 |     assert(P256_INVALID_PUBKEY == p256_ecdsa_verify(sig256a, pub_bad_xp, h256a, hlen));
103 |     assert(P256_INVALID_PUBKEY == p256_ecdsa_verify(sig256a, pub_bad_xm, h256a, hlen));
104 |     assert(P256_INVALID_PUBKEY == p256_ecdsa_verify(sig256a, pub_bad_yp, h256a, hlen));
105 |     assert(P256_INVALID_PUBKEY == p256_ecdsa_verify(sig256a, pub_bad_ym, h256a, hlen));
106 | 
107 |     /* invalid signature for crafted hash that gives u1 G + u2 Q == 0 */
108 |     assert(P256_INVALID_SIGNATURE == p256_ecdsa_verify(sig256a, ecdsa_pub, h256a_s0, hlen));
109 | 
110 |     /* invalid signature for crafted hash that gives u1 G == u2 Q */
111 |     assert(P256_INVALID_SIGNATURE == p256_ecdsa_verify(sig256a, ecdsa_pub, h256a_double, hlen));
112 | }
113 | 
114 | /* validate sign against verify */
115 | static void assert_ecdsa_sign_one(const uint8_t *hash, size_t hlen)
116 | {
117 |     int ret;
118 |     uint8_t sig[64];
119 | 
120 |     ret = p256_ecdsa_sign(sig, ecdsa_priv, hash, hlen);
121 |     assert(ret == P256_SUCCESS);
122 |     assert(p256_ecdsa_verify(sig, ecdsa_pub, hash, hlen) == P256_SUCCESS);
123 | }
124 | 
125 | static void assert_ecdsa_sign(void)
126 | {
127 |     assert_ecdsa_sign_one(h160a, sizeof h160a);
128 |     assert_ecdsa_sign_one(h224a, sizeof h224a);
129 |     assert_ecdsa_sign_one(h256a, sizeof h256a);
130 |     assert_ecdsa_sign_one(h384a, sizeof h384a);
131 |     assert_ecdsa_sign_one(h512a, sizeof h512a);
132 |     assert_ecdsa_sign_one(h160b, sizeof h160b);
133 |     assert_ecdsa_sign_one(h224b, sizeof h224b);
134 |     assert_ecdsa_sign_one(h256b, sizeof h256b);
135 |     assert_ecdsa_sign_one(h384b, sizeof h384b);
136 |     assert_ecdsa_sign_one(h512b, sizeof h512b);
137 |     assert_ecdsa_sign_one(h0, sizeof h0);
138 | 
139 |     /* bad priv (out-of-range) */
140 |     uint8_t sig[64];
141 |     assert(P256_INVALID_PRIVKEY == p256_ecdsa_sign(sig, priv_bad_0, h256a, sizeof h256a));
142 |     assert(P256_INVALID_PRIVKEY == p256_ecdsa_sign(sig, priv_bad_n, h256a, sizeof h256a));
143 |     assert(P256_INVALID_PRIVKEY == p256_ecdsa_sign(sig, priv_bad_m, h256a, sizeof h256a));
144 | 
145 |     /* failing RNG */
146 |     rng_ret = 42;
147 |     assert(P256_RANDOM_FAILED == p256_ecdsa_sign(sig, ecdsa_priv, h256a, sizeof h256a));
148 | }
149 | 
150 | /* validate ecdh_shared_secret() against one test vector */
151 | static void assert_ecdh_shared_one(const uint8_t refsec[32],
152 |                                    const uint8_t priv[32],
153 |                                    const uint8_t pub[64])
154 | {
155 |     uint8_t sec[32];
156 |     int ret = p256_ecdh_shared_secret(sec, priv, pub);
157 |     assert(ret == P256_SUCCESS);
158 |     assert(memcmp(sec, refsec, sizeof sec) == P256_SUCCESS);
159 | }
160 | 
161 | static void assert_ecdh_shared(void)
162 | {
163 |     assert_ecdh_shared_one(ecdh0_z, ecdh0_d, ecdh0_o);
164 |     assert_ecdh_shared_one(ecdh1_z, ecdh1_d, ecdh1_o);
165 |     assert_ecdh_shared_one(ecdh2_z, ecdh2_d, ecdh2_o);
166 |     assert_ecdh_shared_one(ecdh3_z, ecdh3_d, ecdh3_o);
167 |     assert_ecdh_shared_one(ecdh4_z, ecdh4_d, ecdh4_o);
168 |     assert_ecdh_shared_one(ecdh5_z, ecdh5_d, ecdh5_o);
169 |     assert_ecdh_shared_one(ecdh6_z, ecdh6_d, ecdh6_o);
170 |     assert_ecdh_shared_one(ecdh7_z, ecdh7_d, ecdh7_o);
171 |     assert_ecdh_shared_one(ecdh8_z, ecdh8_d, ecdh8_o);
172 |     assert_ecdh_shared_one(ecdh9_z, ecdh9_d, ecdh9_o);
173 | 
174 |     /* bad priv (out-of-range) */
175 |     uint8_t sec[32];
176 |     assert(P256_INVALID_PRIVKEY == p256_ecdh_shared_secret(sec, priv_bad_0, ecdh0_o));
177 |     assert(P256_INVALID_PRIVKEY == p256_ecdh_shared_secret(sec, priv_bad_n, ecdh0_o));
178 |     assert(P256_INVALID_PRIVKEY == p256_ecdh_shared_secret(sec, priv_bad_m, ecdh0_o));
179 | 
180 |     /* bad peer (out-of-range coordinates) */
181 |     assert(P256_INVALID_PUBKEY == p256_ecdh_shared_secret(sec, ecdh0_d, pub_bad_xp));
182 |     assert(P256_INVALID_PUBKEY == p256_ecdh_shared_secret(sec, ecdh0_d, pub_bad_xm));
183 |     assert(P256_INVALID_PUBKEY == p256_ecdh_shared_secret(sec, ecdh0_d, pub_bad_yp));
184 |     assert(P256_INVALID_PUBKEY == p256_ecdh_shared_secret(sec, ecdh0_d, pub_bad_ym));
185 | }
186 | 
187 | /* validate gen_keypair() against ecdh_shared_secret() */
188 | static void assert_gen_keypair_one(void)
189 | {
190 |     int ret;
191 |     uint8_t a_priv[32], a_pub[64], a_sec[32];
192 |     uint8_t b_priv[32], b_pub[64], b_sec[32];
193 | 
194 |     ret = p256_gen_keypair(a_priv, a_pub);
195 |     assert(ret == P256_SUCCESS);
196 | 
197 |     ret = p256_gen_keypair(b_priv, b_pub);
198 |     assert(ret == P256_SUCCESS);
199 | 
200 |     ret = p256_ecdh_shared_secret(a_sec, a_priv, b_pub);
201 |     assert(ret == P256_SUCCESS);
202 | 
203 |     ret = p256_ecdh_shared_secret(b_sec, b_priv, a_pub);
204 |     assert(ret == P256_SUCCESS);
205 | 
206 |     assert(memcmp(a_sec, b_sec, 32) == P256_SUCCESS);
207 | }
208 | 
209 | static void assert_gen_keypair(void)
210 | {
211 |     for (unsigned i = 0; i < 5; i++)
212 |         assert_gen_keypair_one();
213 | 
214 |     /* failing RNG */
215 |     uint8_t priv[32], pub[64];
216 |     rng_ret = 42;
217 |     assert(P256_RANDOM_FAILED == p256_gen_keypair(priv, pub));
218 | }
219 | 
220 | int main(void)
221 | {
222 |     PUTS("\np256-m closed-box test suite");
223 | 
224 |     RUN(assert_ecdsa_verify());
225 |     RUN(assert_ecdsa_sign());
226 | 
227 |     RUN(assert_ecdh_shared());
228 |     RUN(assert_gen_keypair());
229 | 
230 |     PUTS("PASSED");
231 | }
232 | 


--------------------------------------------------------------------------------
/test-common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Common macros for both test-closedbox.c and test-openbox.c.
 3 |  *
 4 |  * Author: Manuel Pégourié-Gonnard.
 5 |  * SPDX-License-Identifier: Apache-2.0
 6 |  */
 7 | #if defined(TEST_VERBOSE)
 8 | #define PUTS    puts
 9 | #else
10 | #define PUTS(s)
11 | #endif
12 | 
13 | #define RUN( code ) \
14 |     PUTS(#code);    \
15 |     code
16 | 


--------------------------------------------------------------------------------
/test-openbox.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Open-box testing of curve P-256 (ECDH and ECDSA)
  3 |  *
  4 |  * - unit-tests for static functions (by including the C file)
  5 |  * - tests using a fixed RNG (and knowledge of how it's used)
  6 |  *
  7 |  * Author: Manuel Pégourié-Gonnard.
  8 |  * SPDX-License-Identifier: Apache-2.0
  9 |  */
 10 | 
 11 | #include "p256-m.c"
 12 | #include "test-data.h"
 13 | #include "test-common.h"
 14 | 
 15 | #include <stdio.h>
 16 | #include <assert.h>
 17 | #include <string.h>
 18 | #include <stdlib.h>
 19 | 
 20 | static void assert_add(const uint32_t x[8], const uint32_t y[8],
 21 |                        const uint32_t z[8], uint32_t c)
 22 | {
 23 |     uint32_t myz[8];
 24 |     uint32_t myc = u256_add(myz, x, y);
 25 |     assert(memcmp(myz, z, sizeof myz) == 0);
 26 |     assert(myc == c);
 27 | }
 28 | 
 29 | static void assert_sub(const uint32_t x[8], const uint32_t y[8],
 30 |                        const uint32_t z[8], uint32_t c)
 31 | {
 32 |     uint32_t myz[8];
 33 |     uint32_t myc = u256_sub(myz, x, y);
 34 |     assert(memcmp(myz, z, sizeof myz) == 0);
 35 |     assert(myc == c);
 36 | }
 37 | 
 38 | static void assert_cmov()
 39 | {
 40 |     uint32_t z[8];
 41 |     memcpy(z, r, sizeof z);
 42 |     u256_cmov(z, s, 0u);
 43 |     assert(memcmp(z, r, sizeof z) == 0);
 44 |     u256_cmov(z, s, 1u);
 45 |     assert(memcmp(z, s, sizeof z) == 0);
 46 | }
 47 | 
 48 | static void assert_ubytes(void)
 49 | {
 50 |     uint32_t z[8];
 51 |     u256_from_bytes(z, rbytes);
 52 |     assert(memcmp(z, r, sizeof z) == 0);
 53 | 
 54 |     uint8_t p[32];
 55 |     u256_to_bytes(p, r);
 56 |     assert(memcmp(p, rbytes, sizeof p) == 0);
 57 | }
 58 | 
 59 | static const uint16_t ma64_half[] = {
 60 |     0x0000,
 61 |     0x0001,
 62 |     0xfffe,
 63 |     0xffff,
 64 | };
 65 | 
 66 | static const uint32_t ma64_full[] = {
 67 |     0x00000000,
 68 |     0x00000001,
 69 |     0xfffffffe,
 70 |     0xffffffff,
 71 | };
 72 | 
 73 | #define ARRLEN(x)       (sizeof x / sizeof x[0])
 74 | 
 75 | static void assert_muladd64()
 76 | {
 77 |     for (unsigned i = 0; i < ARRLEN(ma64_half); i++)
 78 |         for (unsigned j = 0; j < ARRLEN(ma64_half); j++)
 79 |             for (unsigned k = 0; k < ARRLEN(ma64_half); k++)
 80 |                 for (unsigned l = 0; l < ARRLEN(ma64_half); l++)
 81 |                     for (unsigned m = 0; m < ARRLEN(ma64_full); m++)
 82 |                         for (unsigned n = 0; n < ARRLEN(ma64_full); n++)
 83 |                         {
 84 |                             uint32_t x = ((uint32_t) ma64_half[i] << 16)
 85 |                                                    + ma64_half[j];
 86 |                             uint32_t y = ((uint32_t) ma64_half[k] << 16)
 87 |                                                    + ma64_half[l];
 88 |                             uint32_t z = ma64_full[m];
 89 |                             uint32_t t = ma64_full[n];
 90 | 
 91 |                             uint64_t u = u32_muladd64(x, y, z, t);
 92 |                             uint64_t v = (uint64_t) x * y + z + t;
 93 |                             assert(u == v);
 94 |                         }
 95 | }
 96 | 
 97 | static void assert_madd()
 98 | {
 99 |     uint32_t z[8];
100 | 
101 |     /* x + y < p */
102 |     m256_add(z, p256_n.m, word, &p256_p);
103 |     assert(memcmp(z, npwmp, sizeof z) == 0);
104 | 
105 |     /* p <= x + y < 2^256 */
106 |     m256_add(z, p256_n.m, b128, &p256_p);
107 |     assert(memcmp(z, npbmp, sizeof z) == 0);
108 | 
109 |     /* x + y >= 2^256 */
110 |     m256_add(z, p256_n.m, p256_n.m, &p256_p);
111 |     assert(memcmp(z, npnmp, sizeof z) == 0);
112 | }
113 | 
114 | static void assert_msub()
115 | {
116 |     uint32_t z[8];
117 | 
118 |     /* x > y */
119 |     m256_sub(z, one, zero, &p256_p);
120 |     assert(memcmp(z, one, sizeof z) == 0);
121 | 
122 |     /* x == y */
123 |     m256_sub(z, one, one, &p256_p);
124 |     assert(memcmp(z, zero, sizeof z) == 0);
125 | 
126 |     /* x < y by few */
127 |     m256_sub(z, zero, one, &p256_p);
128 |     assert(memcmp(z, pm1, sizeof z) == 0);
129 | 
130 |     /* x < y by far */
131 |     m256_sub(z, zero, pm1, &p256_p);
132 |     assert(memcmp(z, one, sizeof z) == 0);
133 | }
134 | 
135 | static void assert_mmul(void)
136 | {
137 |     uint32_t z[8];
138 | 
139 |     m256_mul(z, r, s, &p256_p);
140 |     assert(memcmp(z, rsRip, sizeof z) == 0);
141 | 
142 |     m256_mul(z, r, s, &p256_n);
143 |     assert(memcmp(z, rsRin, sizeof z) == 0);
144 | }
145 | 
146 | static void assert_prep_mul_done(void)
147 | {
148 |     uint32_t rm[8], sm[8], z[8];
149 | 
150 |     /* mod p */
151 |     memcpy(rm, r, sizeof rm);
152 |     memcpy(sm, s, sizeof rm);
153 | 
154 |     m256_prep(rm, &p256_p);
155 |     m256_prep(sm, &p256_p);
156 | 
157 |     m256_mul(z, rm, sm, &p256_p);
158 | 
159 |     m256_done(z, &p256_p);
160 | 
161 |     assert(memcmp(z, rtsmp, sizeof z) == 0);
162 | 
163 |     /* mod n */
164 |     memcpy(rm, r, sizeof rm);
165 |     memcpy(sm, s, sizeof rm);
166 | 
167 |     m256_prep(rm, &p256_n);
168 |     m256_prep(sm, &p256_n);
169 | 
170 |     m256_mul(z, rm, sm, &p256_n);
171 | 
172 |     m256_done(z, &p256_n);
173 | 
174 |     assert(memcmp(z, rtsmn, sizeof z) == 0);
175 | }
176 | 
177 | static void assert_inv(void)
178 | {
179 |     uint32_t rm[8], z[8];
180 | 
181 |     memcpy(rm, r, sizeof rm);
182 |     m256_prep(rm, &p256_p);
183 |     m256_inv(z, rm, &p256_p);
184 |     m256_done(z, &p256_p);
185 |     assert(memcmp(z, rip, sizeof z) == 0);
186 | 
187 |     memcpy(rm, r, sizeof rm);
188 |     m256_prep(rm, &p256_n);
189 |     m256_inv(z, rm, &p256_n);
190 |     m256_done(z, &p256_n);
191 |     assert(memcmp(z, rin, sizeof z) == 0);
192 | 
193 |     /* Special case: rm == 0 */
194 |     m256_set32(rm, 0, &p256_p);
195 |     m256_inv(z, rm, &p256_p);
196 |     m256_done(z, &p256_p);
197 |     assert(memcmp(z, zero, sizeof z) == 0);
198 | }
199 | 
200 | static void assert_mbytes()
201 | {
202 |     int ret;
203 |     uint32_t z[8];
204 |     uint8_t p[32];
205 | 
206 |     /* mod p */
207 |     ret = m256_from_bytes(z, rbytes, &p256_p);
208 |     assert(ret == 0);
209 |     assert(memcmp(z, rmontp, sizeof z) == 0);
210 | 
211 |     m256_to_bytes(p, z, &p256_p);
212 |     assert(memcmp(p, rbytes, sizeof p) == 0);
213 | 
214 |     /* mod n */
215 |     ret = m256_from_bytes(z, rbytes, &p256_n);
216 |     assert(ret == 0);
217 |     assert(memcmp(z, rmontn, sizeof z) == 0);
218 | 
219 |     m256_to_bytes(p, z, &p256_n);
220 |     assert(memcmp(p, rbytes, sizeof p) == 0);
221 | 
222 |     /* too large by one, mod p and n */
223 |     u256_to_bytes(p, p256_p.m);
224 |     ret = m256_from_bytes(z, p, &p256_p);
225 |     assert(ret == -1);
226 | 
227 |     u256_to_bytes(p, p256_n.m);
228 |     ret = m256_from_bytes(z, p, &p256_n);
229 |     assert(ret == -1);
230 | }
231 | 
232 | static void assert_pt_params(void)
233 | {
234 |     uint32_t z[8];
235 | 
236 |     u256_cmov(z, p256_b, 1);
237 |     m256_done(z, &p256_p);
238 |     assert(memcmp(z, b_raw, sizeof z) == 0);
239 | 
240 |     u256_cmov(z, p256_gx, 1);
241 |     m256_done(z, &p256_p);
242 |     assert(memcmp(z, gx_raw, sizeof z) == 0);
243 | 
244 |     u256_cmov(z, p256_gy, 1);
245 |     m256_done(z, &p256_p);
246 |     assert(memcmp(z, gy_raw, sizeof z) == 0);
247 | }
248 | 
249 | static void assert_pt_check(void)
250 | {
251 |     assert(point_check(p256_gx, p256_gy) == 0);
252 | 
253 |     assert(point_check(p256_gx, p256_gx) != 0);
254 |     assert(point_check(p256_gy, p256_gx) != 0);
255 |     assert(point_check(p256_gy, p256_gy) != 0);
256 | }
257 | 
258 | static void assert_pt_affine(void)
259 | {
260 |     uint32_t x[8], y[8], z[8];
261 | 
262 |     u256_cmov(x, jac_gx, 1);
263 |     u256_cmov(y, jac_gy, 1);
264 |     u256_cmov(z, jac_gz, 1);
265 | 
266 |     point_to_affine(x, y, z);
267 | 
268 |     assert(memcmp(x, p256_gx, sizeof x) == 0);
269 |     assert(memcmp(y, p256_gy, sizeof y) == 0);
270 | 
271 |     /* Special case: z == 0 (that is, input point is 0) */
272 |     m256_set32(x, 1, &p256_p);
273 |     m256_set32(y, 1, &p256_p);
274 |     m256_set32(z, 0, &p256_p);
275 | 
276 |     point_to_affine(x, y, z);
277 | 
278 |     assert(memcmp(x, zero, sizeof x) == 0);
279 |     assert(memcmp(y, zero, sizeof y) == 0);
280 | }
281 | 
282 | static void assert_pt_double(void)
283 | {
284 |     uint32_t dx[8], dy[8], dz[8];
285 | 
286 |     u256_cmov(dx, jac_gx, 1);
287 |     u256_cmov(dy, jac_gy, 1);
288 |     u256_cmov(dz, jac_gz, 1);
289 | 
290 |     point_double(dx, dy, dz);
291 | 
292 |     point_to_affine(dx, dy, dz);
293 |     m256_done(dx, &p256_p);
294 |     m256_done(dy, &p256_p);
295 | 
296 |     assert(memcmp(dx, g2x, sizeof dx) == 0);
297 |     assert(memcmp(dy, g2y, sizeof dy) == 0);
298 | }
299 | 
300 | static void assert_pt_add(void)
301 | {
302 |     uint32_t tx[8], ty[8], tz[8], mg2x[8], mg2y[8];
303 | 
304 |     u256_cmov(mg2x, g2x, 1);
305 |     u256_cmov(mg2y, g2y, 1);
306 |     m256_prep(mg2x, &p256_p);
307 |     m256_prep(mg2y, &p256_p);
308 | 
309 |     u256_cmov(tx, jac_gx, 1);
310 |     u256_cmov(ty, jac_gy, 1);
311 |     u256_cmov(tz, jac_gz, 1);
312 | 
313 |     point_add(tx, ty, tz, mg2x, mg2y);
314 | 
315 |     point_to_affine(tx, ty, tz);
316 |     m256_done(tx, &p256_p);
317 |     m256_done(ty, &p256_p);
318 | 
319 |     assert(memcmp(tx, g3x, sizeof tx) == 0);
320 |     assert(memcmp(ty, g3y, sizeof ty) == 0);
321 | }
322 | 
323 | static void assert_pt_add_or_double(void)
324 | {
325 |     uint32_t rx[8], ry[8], mx[8], my[8];
326 | 
327 |     /* r = 2G + G (generic addition) */
328 |     u256_cmov(mx, g2x, 1);
329 |     u256_cmov(my, g2y, 1);
330 |     m256_prep(mx, &p256_p);
331 |     m256_prep(my, &p256_p);
332 | 
333 |     point_add_or_double_leaky(rx, ry, mx, my, p256_gx, p256_gy);
334 | 
335 |     m256_done(rx, &p256_p);
336 |     m256_done(ry, &p256_p);
337 | 
338 |     assert(memcmp(rx, g3x, sizeof rx) == 0);
339 |     assert(memcmp(ry, g3y, sizeof ry) == 0);
340 | 
341 |     /* r = G + G (double) */
342 |     point_add_or_double_leaky(rx, ry, p256_gx, p256_gy, p256_gx, p256_gy);
343 | 
344 |     m256_done(rx, &p256_p);
345 |     m256_done(ry, &p256_p);
346 | 
347 |     assert(memcmp(rx, g2x, sizeof rx) == 0);
348 |     assert(memcmp(ry, g2y, sizeof ry) == 0);
349 | 
350 |     /* r = (-G) + G (zero) */
351 |     u256_cmov(my, g1yn, 1);
352 |     m256_prep(my, &p256_p);
353 | 
354 |     point_add_or_double_leaky(rx, ry, p256_gx, my, p256_gx, p256_gy);
355 | 
356 |     m256_done(rx, &p256_p);
357 |     m256_done(ry, &p256_p);
358 | 
359 |     assert(memcmp(rx, zero, sizeof rx) == 0);
360 |     assert(memcmp(ry, zero, sizeof rx) == 0);
361 | }
362 | 
363 | static void assert_pt_bytes(void)
364 | {
365 |     uint8_t p[64];
366 |     uint32_t x[8], y[8];
367 |     int ret;
368 | 
369 |     /* valid */
370 |     ret = point_from_bytes(x, y, gbytes);
371 |     assert(ret == 0);
372 |     assert(memcmp(x, p256_gx, sizeof x) == 0);
373 |     assert(memcmp(y, p256_gy, sizeof y) == 0);
374 | 
375 |     point_to_bytes(p, x, y);
376 |     assert(memcmp(p, gbytes, sizeof p) == 0);
377 | 
378 |     /* invalid: x or y too big, (x, y) not on curve */
379 |     u256_to_bytes(p, p256_p.m);
380 |     ret = point_from_bytes(x, y, p);
381 |     assert(ret != 0);
382 | 
383 |     u256_to_bytes(p, one);
384 |     u256_to_bytes(p + 32, p256_p.m);
385 |     ret = point_from_bytes(x, y, p);
386 |     assert(ret != 0);
387 | 
388 |     u256_to_bytes(p, one);
389 |     u256_to_bytes(p + 32, one);
390 |     ret = point_from_bytes(x, y, p);
391 |     assert(ret != 0);
392 | }
393 | 
394 | static void assert_scalar_mult(void)
395 | {
396 |     uint32_t x[8], y[8], k[8], xx[8], yy[8];
397 | 
398 |     /* 1 * g */
399 |     u256_set32(k, 1);
400 |     scalar_mult(x, y, p256_gx, p256_gy, k);
401 |     assert(memcmp(x, p256_gx, sizeof x) == 0);
402 |     assert(memcmp(y, p256_gy, sizeof y) == 0);
403 | 
404 |     /* 2 * g */
405 |     u256_set32(k, 2);
406 |     scalar_mult(x, y, p256_gx, p256_gy, k);
407 |     m256_done(x, &p256_p);
408 |     m256_done(y, &p256_p);
409 |     assert(memcmp(x, g2x, sizeof x) == 0);
410 |     assert(memcmp(y, g2y, sizeof y) == 0);
411 | 
412 |     /* 3 * g */
413 |     u256_set32(k, 3);
414 |     scalar_mult(x, y, p256_gx, p256_gy, k);
415 |     m256_done(x, &p256_p);
416 |     m256_done(y, &p256_p);
417 |     assert(memcmp(x, g3x, sizeof x) == 0);
418 |     assert(memcmp(y, g3y, sizeof y) == 0);
419 | 
420 |     /* (n-1) * g */
421 |     u256_sub(k, p256_n.m, one);
422 |     scalar_mult(x, y, p256_gx, p256_gy, k);
423 |     m256_done(x, &p256_p);
424 |     m256_done(y, &p256_p);
425 |     assert(memcmp(x, gx_raw, sizeof x) == 0);
426 |     assert(memcmp(y, g1yn, sizeof y) == 0);
427 | 
428 |     /* (n-2) * g */
429 |     u256_sub(k, k, one);
430 |     scalar_mult(x, y, p256_gx, p256_gy, k);
431 |     m256_done(x, &p256_p);
432 |     m256_done(y, &p256_p);
433 |     assert(memcmp(x, g2x, sizeof x) == 0);
434 |     assert(memcmp(y, g2yn, sizeof y) == 0);
435 | 
436 |     /* (n-3) * g */
437 |     u256_sub(k, k, one);
438 |     scalar_mult(x, y, p256_gx, p256_gy, k);
439 |     m256_done(x, &p256_p);
440 |     m256_done(y, &p256_p);
441 |     assert(memcmp(x, g3x, sizeof x) == 0);
442 |     assert(memcmp(y, g3yn, sizeof y) == 0);
443 | 
444 |     /* rG then s(rG) */
445 |     scalar_mult(x, y, p256_gx, p256_gy, r);
446 |     u256_cmov(xx, x, 1);
447 |     u256_cmov(yy, y, 1);
448 |     m256_done(x, &p256_p);
449 |     m256_done(y, &p256_p);
450 |     assert(memcmp(x, rgx, sizeof x) == 0);
451 |     assert(memcmp(y, rgy, sizeof y) == 0);
452 | 
453 |     scalar_mult(x, y, xx, yy, s);
454 |     m256_done(x, &p256_p);
455 |     m256_done(y, &p256_p);
456 |     assert(memcmp(x, rsgx, sizeof x) == 0);
457 |     assert(memcmp(y, rsgy, sizeof y) == 0);
458 | 
459 |     /* sG then r(sG) */
460 |     scalar_mult(x, y, p256_gx, p256_gy, s);
461 |     u256_cmov(xx, x, 1);
462 |     u256_cmov(yy, y, 1);
463 |     m256_done(x, &p256_p);
464 |     m256_done(y, &p256_p);
465 |     assert(memcmp(x, sgx, sizeof x) == 0);
466 |     assert(memcmp(y, sgy, sizeof y) == 0);
467 | 
468 |     scalar_mult(x, y, xx, yy, r);
469 |     m256_done(x, &p256_p);
470 |     m256_done(y, &p256_p);
471 |     assert(memcmp(x, rsgx, sizeof x) == 0);
472 |     assert(memcmp(y, rsgy, sizeof y) == 0);
473 | }
474 | 
475 | static void assert_sbytes(void)
476 | {
477 |     uint32_t z[8];
478 | 
479 |     uint8_t p[32] = { 0 };
480 |     assert(scalar_from_bytes(z, p) == -1);
481 | 
482 |     p[31] = 1;
483 |     assert(scalar_from_bytes(z, p) == 0);
484 |     assert(memcmp(z, one, sizeof z) == 0);
485 | 
486 |     u256_cmov(z, p256_n.m, 1);
487 |     u256_to_bytes(p, z);
488 |     assert(scalar_from_bytes(z, p) == -1);
489 | 
490 |     u256_sub(z, p256_n.m, one);
491 |     u256_to_bytes(p, z);
492 |     assert(scalar_from_bytes(z, p) == 0);
493 | 
494 |     assert(scalar_from_bytes(z, rbytes) == 0);
495 |     assert(memcmp(z, r, sizeof z) == 0);
496 | }
497 | 
498 | /*
499 |  * RNG for testing - may optionally return fixed bytes at the beginning
500 |  */
501 | static uint8_t fixed[128];
502 | static unsigned nb_fixed, nb_drawn;
503 | static int fixed_ret;
504 | 
505 | static void fix_rng(const uint8_t *bytes, unsigned nb_bytes, int retval)
506 | {
507 |     assert(nb_bytes <= sizeof fixed);
508 |     if (bytes != NULL) {
509 |         memcpy(fixed, bytes, nb_bytes);
510 |     } else {
511 |         memset(fixed, 0, nb_bytes);
512 |     }
513 |     nb_fixed = nb_bytes;
514 |     nb_drawn = 0;
515 |     fixed_ret = retval;
516 | }
517 | 
518 | static void unfix_rng(void)
519 | {
520 |     nb_fixed = 0;
521 |     nb_drawn = 0;
522 |     fixed_ret = 0;
523 | }
524 | 
525 | int p256_generate_random(uint8_t *output, unsigned output_size)
526 | {
527 |     unsigned output_offset = 0;
528 | 
529 |     while (output_offset < output_size && nb_drawn < nb_fixed) {
530 |         output[output_offset++] = fixed[nb_drawn++];
531 |     }
532 | 
533 |     while (output_offset < output_size) {
534 |         output[output_offset++] = (uint8_t) rand();
535 |         nb_drawn++;
536 |     }
537 | 
538 |     return fixed_ret;
539 | }
540 | 
541 | static void printout(char *name, uint8_t *p, unsigned len,
542 |                      unsigned drawn, int ret)
543 | {
544 |     printf("%s: ", name);
545 |     for (unsigned i = 0; i < len; i++)
546 |         printf("%02x", p[i]);
547 |     printf(" (%d, %d)\n", drawn, ret);
548 | }
549 | 
550 | static void assert_rng_for_tests(void)
551 | {
552 |     uint8_t out[80], fix[64];
553 |     int ret;
554 | 
555 |     for (uint8_t i = 0; i < 64; i++)
556 |         fix[i] = i;
557 | 
558 |     ret = p256_generate_random(out, 80);
559 |     printout("rnd", out, 32, nb_drawn, ret);
560 |     assert(ret == 0);
561 | 
562 |     fix_rng(fix, 32, -1);
563 |     ret = p256_generate_random(out, 80);
564 |     //printout("f32", out, 80, nb_drawn, ret);
565 |     assert(memcmp(fix, out, 32) == 0);
566 |     assert(ret == -1);
567 | 
568 |     unfix_rng();
569 |     ret = p256_generate_random(out, 80);
570 |     //printout("rnd", out, 80, nb_drawn, ret);
571 |     assert(ret == 0);
572 | 
573 |     fix_rng(fix, 64, 0);
574 |     ret = p256_generate_random(out, 32);
575 |     ret = p256_generate_random(out + 32, 32);
576 |     ret = p256_generate_random(out + 64, 16);
577 |     //printout("f64", out, 80, nb_drawn, ret);
578 |     assert(memcmp(fix, out, 32) == 0);
579 | 
580 |     unfix_rng();
581 |     ret = p256_generate_random(out, 80);
582 |     //printout("rnd", out, 80, nb_drawn, ret);
583 |     assert(ret == 0);
584 | }
585 | 
586 | /*
587 |  * ECDH functions
588 |  */
589 | 
590 | static void assert_gen_keypair(void)
591 | {
592 |     int ret;
593 |     uint8_t priv[32], pub[64];
594 | 
595 |     /* non-random RNG - always zero */
596 |     fix_rng(NULL, 128, 0);
597 |     ret = p256_gen_keypair(priv, pub);
598 |     assert(ret == -1);
599 | 
600 |     /* unlucky RNG, need to retry */
601 |     memset(pub, 0, 32);
602 |     u256_to_bytes(pub + 32, p256_n.m);
603 |     fix_rng(pub, 64, 0);
604 |     ret = p256_gen_keypair(priv, pub);
605 |     assert(ret == 0);
606 |     assert(nb_drawn == 96);
607 | }
608 | 
609 | /*
610 |  * ECDSA
611 |  */
612 | 
613 | static void assert_ecdsa_mod_n(void)
614 | {
615 |     uint32_t z[8];
616 | 
617 |     /* less than n */
618 |     u256_cmov(z, r, 1);
619 |     ecdsa_m256_mod_n(z);
620 |     assert(memcmp(z, r, sizeof z) == 0);
621 | 
622 |     /* just less than n: equal to n-1 */
623 |     u256_cmov(z, nm1, 1);
624 |     ecdsa_m256_mod_n(z);
625 |     assert(memcmp(z, nm1, sizeof z) == 0);
626 | 
627 |     /* equal to n */
628 |     u256_cmov(z, p256_n.m, 1);
629 |     ecdsa_m256_mod_n(z);
630 |     assert(memcmp(z, zero, sizeof z) == 0);
631 | 
632 |     /* larger than n */
633 |     u256_cmov(z, p256_p.m, 1);
634 |     ecdsa_m256_mod_n(z);
635 |     assert(memcmp(z, pmn, sizeof z) == 0);
636 | }
637 | 
638 | static void assert_ecdsa_from_hash(void)
639 | {
640 |     uint32_t z[8];
641 | 
642 |     ecdsa_m256_from_hash(z, h160a, sizeof h160a);
643 |     assert(memcmp(z, h160a_e, sizeof z) == 0);
644 | 
645 |     ecdsa_m256_from_hash(z, h224a, sizeof h224a);
646 |     assert(memcmp(z, h224a_e, sizeof z) == 0);
647 | 
648 |     ecdsa_m256_from_hash(z, h256a, sizeof h256a);
649 |     assert(memcmp(z, h256a_e, sizeof z) == 0);
650 | 
651 |     ecdsa_m256_from_hash(z, h384a, sizeof h384a);
652 |     assert(memcmp(z, h384a_e, sizeof z) == 0);
653 | 
654 |     ecdsa_m256_from_hash(z, h512a, sizeof h512a);
655 |     assert(memcmp(z, h512a_e, sizeof z) == 0);
656 | 
657 |     ecdsa_m256_from_hash(z, h160b, sizeof h160b);
658 |     assert(memcmp(z, h160b_e, sizeof z) == 0);
659 | 
660 |     ecdsa_m256_from_hash(z, h224b, sizeof h224b);
661 |     assert(memcmp(z, h224b_e, sizeof z) == 0);
662 | 
663 |     ecdsa_m256_from_hash(z, h256b, sizeof h256b);
664 |     assert(memcmp(z, h256b_e, sizeof z) == 0);
665 | 
666 |     ecdsa_m256_from_hash(z, h384b, sizeof h384b);
667 |     assert(memcmp(z, h384b_e, sizeof z) == 0);
668 | 
669 |     ecdsa_m256_from_hash(z, h512b, sizeof h512b);
670 |     assert(memcmp(z, h512b_e, sizeof z) == 0);
671 | }
672 | 
673 | static void assert_ecdsa_sign_one(const uint8_t k[32], const uint8_t sigref[64],
674 |                                   const uint8_t *hash, size_t hlen)
675 | {
676 |     int ret;
677 |     uint8_t sig[64];
678 | 
679 |     fix_rng(k, 32, 0);
680 |     ret = p256_ecdsa_sign(sig, ecdsa_priv, hash, hlen);
681 |     assert(ret == 0);
682 |     assert(memcmp(sig, sigref, sizeof sig) == 0);
683 | }
684 | 
685 | static void assert_ecdsa_sign(void)
686 | {
687 |     /* known values */
688 |     assert_ecdsa_sign_one(k160a, sig160a, h160a, sizeof h160a);
689 |     assert_ecdsa_sign_one(k224a, sig224a, h224a, sizeof h224a);
690 |     assert_ecdsa_sign_one(k256a, sig256a, h256a, sizeof h256a);
691 |     assert_ecdsa_sign_one(k384a, sig384a, h384a, sizeof h384a);
692 |     assert_ecdsa_sign_one(k512a, sig512a, h512a, sizeof h512a);
693 |     assert_ecdsa_sign_one(k160b, sig160b, h160b, sizeof h160b);
694 |     assert_ecdsa_sign_one(k224b, sig224b, h224b, sizeof h224b);
695 |     assert_ecdsa_sign_one(k256b, sig256b, h256b, sizeof h256b);
696 |     assert_ecdsa_sign_one(k384b, sig384b, h384b, sizeof h384b);
697 |     assert_ecdsa_sign_one(k512b, sig512b, h512b, sizeof h512b);
698 | 
699 |     uint8_t sig[64];
700 |     int ret;
701 | 
702 |     /* non-random RNG */
703 |     fix_rng(NULL, 128, 0);
704 |     ret = p256_ecdsa_sign(sig, ecdsa_priv, h256a, sizeof h256a);
705 |     assert(ret == -1);
706 | 
707 |     /* unlucky RNG, need to retry */
708 |     memset(sig, 0, 32);
709 |     u256_to_bytes(sig + 32, p256_n.m);
710 |     fix_rng(sig, 64, 0);
711 |     ret = p256_ecdsa_sign(sig, ecdsa_priv, h256a, sizeof h256a);
712 |     assert(ret == 0);
713 |     assert(nb_drawn == 96);
714 | 
715 |     /* crafted hash value to reach s == 0 */
716 |     memset(sig, 42, sizeof sig);
717 |     fix_rng(k256a, 32, 0);
718 |     ret = p256_ecdsa_sign(sig, ecdsa_priv, h256a_s0, sizeof h256a_s0);
719 |     assert(ret == P256_RANDOM_FAILED);
720 |     for (unsigned i = 0; i < 32; i++) {
721 |         assert(sig[i] == 0 && sig[i+32] == 42);
722 |     }
723 | }
724 | 
725 | int main(void)
726 | {
727 |     PUTS("\np256-m open-box test suite");
728 | 
729 |     /* testing the test RNG */
730 |     RUN(assert_rng_for_tests());
731 | 
732 |     /* u256 */
733 |     RUN(assert_add(r, s, rps, 0u));
734 |     RUN(assert_sub(r, s, rms, 0u));
735 |     RUN(assert_sub(s, r, smr, 1u));
736 |     RUN(assert_cmov());
737 |     RUN(assert_ubytes());
738 | 
739 |     /* 64-bit multiply */
740 |     RUN(assert_muladd64());
741 | 
742 |     /* m256 */
743 |     RUN(assert_madd());
744 |     RUN(assert_msub());
745 |     RUN(assert_mmul());
746 |     RUN(assert_prep_mul_done());
747 |     RUN(assert_inv());
748 |     RUN(assert_mbytes());
749 | 
750 |     /* point */
751 |     RUN(assert_pt_params());
752 |     RUN(assert_pt_check());
753 |     RUN(assert_pt_affine());
754 |     RUN(assert_pt_double());
755 |     RUN(assert_pt_add());
756 |     RUN(assert_pt_add_or_double());
757 |     RUN(assert_pt_bytes());
758 | 
759 |     /* scalar */
760 |     RUN(assert_scalar_mult());
761 |     RUN(assert_sbytes());
762 | 
763 |     /* ecdh */
764 |     RUN(assert_gen_keypair());
765 | 
766 |     /* ecdsa */
767 |     RUN(assert_ecdsa_mod_n());
768 |     RUN(assert_ecdsa_from_hash());
769 |     RUN(assert_ecdsa_sign());
770 | 
771 |     PUTS("PASSED");
772 | }
773 | 


--------------------------------------------------------------------------------
/toolchain-mul64.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * See toolchain-mul64.sh.
 3 |  *
 4 |  * Author: Manuel Pégourié-Gonnard.
 5 |  * SPDX-License-Identifier: Apache-2.0
 6 |  */
 7 | #include <stdint.h>
 8 | 
 9 | uint64_t mul64(uint32_t x, uint32_t y, uint32_t z, uint32_t t) {
10 |     return (uint64_t) x * y + z + t;
11 | }
12 | 


--------------------------------------------------------------------------------
/toolchain-mul64.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Print generated assembly for 32x32->64 bit unsigned multiplication.
 4 | #
 5 | # This is a development helper to:
 6 | # 1. Check it the toolchain-provided __aeabi_lmul is contant-time (it isn't).
 7 | # 2. Check if the compiler uses the UMAAL instruction (it doesn't).
 8 | #
 9 | # Author: Manuel Pégourié-Gonnard.
10 | # SPDX-License-Identifier: Apache-2.0
11 | 
12 | set -eu
13 | 
14 | for CPU in m0 m0plus m3 m4 m7 m23 m33 a7; do
15 |     printf "\n***** %s *****\n" $CPU
16 |     arm-none-eabi-gcc -Os -mthumb -mcpu=cortex-$CPU toolchain-mul64.c \
17 |         --entry=mul64 -nostartfiles -o linked.elf
18 |     arm-none-eabi-objdump -d linked.elf
19 | done
20 | 
21 | rm linked.elf
22 | 


--------------------------------------------------------------------------------
/wcs.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2016, Peter McKinnis
  2 | # All rights reserved.
  3 | # 
  4 | # Redistribution and use in source and binary forms, with or without
  5 | # modification, are permitted provided that the following conditions are met:
  6 | #     * Redistributions of source code must retain the above copyright
  7 | #       notice, this list of conditions and the following disclaimer.
  8 | #     * Redistributions in binary form must reproduce the above copyright
  9 | #       notice, this list of conditions and the following disclaimer in the
 10 | #       documentation and/or other materials provided with the distribution.
 11 | #     * Neither the name of the <organization> nor the
 12 | #       names of its contributors may be used to endorse or promote products
 13 | #       derived from this software without specific prior written permission.
 14 | # 
 15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 18 | # DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
 19 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 20 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 21 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 22 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 23 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 24 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 25 | #
 26 | # https://github.com/PeterMcKinnis/WorstCaseStack
 27 | 
 28 | import re
 29 | import pprint
 30 | import os
 31 | from subprocess import check_output
 32 | from optparse import OptionParser
 33 | 
 34 | # Constants
 35 | rtl_ext_end = ".dfinish"
 36 | rtl_ext = None # e.g. '.c.270r.dfinish'. The number '270' will change with gcc version and is auto-detected by the
 37 |                # function find_rtl_ext
 38 | dir = None # Working directory
 39 | su_ext = '.su'
 40 | obj_ext = '.o'
 41 | manual_ext = '.msu'
 42 | read_elf_path = "arm-none-eabi-readelf" # You may need to enter the full path here
 43 | stdout_encoding = "utf-8"  # System dependant
 44 | 
 45 | 
 46 | class Printable:
 47 |     def __repr__(self):
 48 |         return "<" + type(self).__name__ + "> " + pprint.pformat(vars(self), indent=4, width=1)
 49 | 
 50 | 
 51 | class Symbol(Printable):
 52 |     pass
 53 | 
 54 | 
 55 | def read_symbols(file):
 56 |     from subprocess import check_output
 57 | 
 58 |     def to_symbol(read_elf_line):
 59 |         v = read_elf_line.split()
 60 | 
 61 |         s2 = Symbol()
 62 |         s2.value = int(v[1], 16)
 63 |         s2.size = int(v[2])
 64 |         s2.type = v[3]
 65 |         s2.binding = v[4]
 66 |         if len(v) >= 8:
 67 |             s2.name = v[7]
 68 |         else:
 69 |             s2.name = ""
 70 | 
 71 |         return s2
 72 | 
 73 |     output = check_output([read_elf_path, "-s", "-W", file]).decode(stdout_encoding)
 74 |     lines = output.splitlines()[3:]
 75 |     return [to_symbol(line) for line in lines]
 76 | 
 77 | 
 78 | def read_obj(tu, call_graph):
 79 |     """
 80 |     Reads the file tu.o and gets the binding (global or local) for each function
 81 |     :param tu: name of the translation unit (e.g. for main.c, this would be 'main')
 82 |     :param call_graph: a object used to store information about each function, results go here
 83 |     """
 84 |     symbols = read_symbols(tu[0:tu.rindex(".")] + obj_ext)
 85 | 
 86 |     for s in symbols:
 87 | 
 88 |         if s.type == 'FUNC':
 89 |             if s.binding == 'GLOBAL':
 90 |                 # Check for multiple declarations
 91 |                 if s.name in call_graph['globals'] or s.name in call_graph['locals']:
 92 |                     raise Exception('Multiple declarations of {}'.format(s.name))
 93 |                 call_graph['globals'][s.name] = {'tu': tu, 'name': s.name, 'binding': s.binding}
 94 |             elif s.binding == 'LOCAL':
 95 |                 # Check for multiple declarations
 96 |                 if s.name in call_graph['locals'] and tu in call_graph['locals'][s.name]:
 97 |                     raise Exception('Multiple declarations of {}'.format(s.name))
 98 | 
 99 |                 if s.name not in call_graph['locals']:
100 |                     call_graph['locals'][s.name] = {}
101 | 
102 |                 call_graph['locals'][s.name][tu] = {'tu': tu, 'name': s.name, 'binding': s.binding}
103 |             elif s.binding == 'WEAK':
104 |                 if s.name in call_graph['weak']:
105 |                     raise Exception('Multiple declarations of {}'.format(s.name))
106 |                 call_graph['weak'][s.name] = {'tu': tu, 'name': s.name, 'binding': s.binding}
107 |             else:
108 |                 raise Exception('Error Unknown Binding "{}" for symbol: {}'.format(s.binding, s.name))
109 | 
110 | 
111 | def find_fxn(tu, fxn, call_graph):
112 |     """
113 |     Looks up the dictionary associated with the function.
114 |     :param tu: The translation unit in which to look for locals functions
115 |     :param fxn: The function name
116 |     :param call_graph: a object used to store information about each function
117 |     :return: the dictionary for the given function or None
118 |     """
119 | 
120 |     if fxn in call_graph['globals']:
121 |         return call_graph['globals'][fxn]
122 |     else:
123 |         try:
124 |             return call_graph['locals'][fxn][tu]
125 |         except KeyError:
126 |             return None
127 | 
128 | 
129 | def find_demangled_fxn(tu, fxn, call_graph):
130 |     """
131 |     Looks up the dictionary associated with the function.
132 |     :param tu: The translation unit in which to look for locals functions
133 |     :param fxn: The function name
134 |     :param call_graph: a object used to store information about each function
135 |     :return: the dictionary for the given function or None
136 |     """
137 |     for f in call_graph['globals'].values():
138 |         if 'demangledName' in f:
139 |             if f['demangledName'] == fxn:
140 |                 return f
141 |     for f in call_graph['locals'].values():
142 |         if tu in f:
143 |             if 'demangledName' in f[tu]:
144 |                 if f[tu]['demangledName'] == fxn:
145 |                     return f[tu]
146 |     return None
147 | 
148 | 
149 | def read_rtl(tu, call_graph):
150 |     """
151 |     Read an RTL file and finds callees for each function and if there are calls via function pointer.
152 |     :param tu: the translation unit
153 |     :param call_graph: a object used to store information about each function, results go here
154 |     """
155 | 
156 |     # Construct A Call Graph
157 |     function = re.compile(r'^;; Function (.*) \((\S+), funcdef_no=\d+(, [a-z_]+=\d+)*\)( \([a-z ]+\))?$')
158 |     static_call = re.compile(r'^.*\(call.*"(.*)".*$')
159 |     other_call = re.compile(r'^.*call .*$')
160 | 
161 |     for line_ in open(tu + rtl_ext).readlines():
162 |         m = function.match(line_)
163 |         if m:
164 |             fxn_name = m.group(2)
165 |             fxn_dict2 = find_fxn(tu, fxn_name, call_graph)
166 |             if not fxn_dict2:
167 |                 pprint.pprint(call_graph)
168 |                 raise Exception("Error locating function {} in {}".format(fxn_name, tu))
169 | 
170 |             fxn_dict2['demangledName'] = m.group(1)
171 |             fxn_dict2['calls'] = set()
172 |             fxn_dict2['has_ptr_call'] = False
173 |             continue
174 | 
175 |         m = static_call.match(line_)
176 |         if m:
177 |             fxn_dict2['calls'].add(m.group(1))
178 |             # print("Call:  {0} -> {1}".format(current_fxn, m.group(1)))
179 |             continue
180 | 
181 |         m = other_call.match(line_)
182 |         if m:
183 |             fxn_dict2['has_ptr_call'] = True
184 |             continue
185 | 
186 | 
187 | def read_su(tu, call_graph):
188 |     """
189 |     Reads the 'local_stack' for each function.  Local stack ignores stack used by callees.
190 |     :param tu: the translation unit
191 |     :param call_graph: a object used to store information about each function, results go here
192 |     :return:
193 |     """
194 | 
195 |     su_line = re.compile(r'^([^ :]+):([\d]+):([\d]+):(.+)\t(\d+)\t(\S+)$')
196 |     i = 1
197 | 
198 |     for line in open(tu[0:tu.rindex(".")] + su_ext).readlines():
199 |         m = su_line.match(line)
200 |         if m:
201 |             fxn = m.group(4)
202 |             fxn_dict2 = find_demangled_fxn(tu, fxn, call_graph)
203 |             fxn_dict2['local_stack'] = int(m.group(5))
204 |         else:
205 |             print("error parsing line {} in file {}".format(i, tu))
206 |         i += 1
207 | 
208 | 
209 | def read_manual(file, call_graph):
210 |     """
211 |     reads the manual stack useage files.
212 |     :param file: the file name
213 |     :param call_graph: a object used to store information about each function, results go here
214 |     """
215 | 
216 |     for line in open(file).readlines():
217 |         fxn, stack_sz = line.split()
218 |         if fxn in call_graph:
219 |             raise Exception("Redeclared Function {}".format(fxn))
220 |         call_graph['globals'][fxn] = {'wcs': int(stack_sz),
221 |                                       'calls': set(),
222 |                                       'has_ptr_call': False,
223 |                                       'local_stack': int(stack_sz),
224 |                                       'is_manual': True,
225 |                                       'name': fxn,
226 |                                       'demangledName': fxn,
227 |                                       'tu': '#MANUAL',
228 |                                       'binding': 'GLOBAL'}
229 | 
230 | 
231 | def validate_all_data(call_graph):
232 |     """
233 |     Check that every entry in the call graph has the following fields:
234 |     .calls, .has_ptr_call, .local_stack, .scope, .src_line
235 |     """
236 | 
237 |     def validate_dict(d):
238 |         if not ('calls' in d and 'has_ptr_call' in d and 'local_stack' in d
239 |                 and 'name' in d and 'tu' in d):
240 |             print("Error data is missing in fxn dictionary {}".format(d))
241 | 
242 |     # Loop through every global and local function
243 |     # and resolve each call, save results in r_calls
244 |     for fxn_dict2 in call_graph['globals'].values():
245 |         validate_dict(fxn_dict2)
246 | 
247 |     for l_dict in call_graph['locals'].values():
248 |         for fxn_dict2 in l_dict.values():
249 |             validate_dict(fxn_dict2)
250 | 
251 | def resolve_all_calls(call_graph):
252 |     def resolve_calls(fxn_dict2):
253 |         fxn_dict2['r_calls'] = []
254 |         fxn_dict2['unresolved_calls'] = set()
255 | 
256 |         for call in fxn_dict2['calls']:
257 |             call_dict = find_fxn(fxn_dict2['tu'], call, call_graph)
258 |             if call_dict:
259 |                 fxn_dict2['r_calls'].append(call_dict)
260 |             else:
261 |                 fxn_dict2['unresolved_calls'].add(call)
262 | 
263 |     # Loop through every global and local function
264 |     # and resolve each call, save results in r_calls
265 |     for fxn_dict in call_graph['globals'].values():
266 |         resolve_calls(fxn_dict)
267 | 
268 |     for l_dict in call_graph['locals'].values():
269 |         for fxn_dict in l_dict.values():
270 |             resolve_calls(fxn_dict)
271 | 
272 | 
273 | def calc_all_wcs(call_graph):
274 |     def calc_wcs(fxn_dict2, call_graph1, parents):
275 |         """
276 |         Calculates the worst case stack for a fxn that is declared (or called from) in a given file.
277 |         :param parents: This function gets called recursively through the call graph.  If a function has recursion the
278 |         tuple file, fxn will be in the parents stack and everything between the top of the stack and the matching entry
279 |         has recursion.
280 |         :return:
281 |         """
282 | 
283 |         # If the wcs is already known, then nothing to do
284 |         if 'wcs' in fxn_dict2:
285 |             return
286 | 
287 |         # Check for pointer calls
288 |         if fxn_dict2['has_ptr_call']:
289 |             fxn_dict2['wcs'] = 'unbounded'
290 |             return
291 | 
292 |         # Check for recursion
293 |         if fxn_dict2 in parents:
294 |             fxn_dict2['wcs'] = 'unbounded'
295 |             return
296 | 
297 |         # Calculate WCS
298 |         call_max = 0
299 |         for call_dict in fxn_dict2['r_calls']:
300 | 
301 |             # Calculate the WCS for the called function
302 |             parents.append(fxn_dict2)
303 |             calc_wcs(call_dict, call_graph1, parents)
304 |             parents.pop()
305 | 
306 |             # If the called function is unbounded, so is this function
307 |             if call_dict['wcs'] == 'unbounded':
308 |                 fxn_dict2['wcs'] = 'unbounded'
309 |                 return
310 | 
311 |             # Keep track of the call with the largest stack use
312 |             call_max = max(call_max, call_dict['wcs'])
313 | 
314 |             # Propagate Unresolved Calls
315 |             for unresolved_call in call_dict['unresolved_calls']:
316 |                 fxn_dict2['unresolved_calls'].add(unresolved_call)
317 | 
318 |         fxn_dict2['wcs'] = call_max + fxn_dict2['local_stack']
319 | 
320 |     # Loop through every global and local function
321 |     # and resolve each call, save results in r_calls
322 |     for fxn_dict in call_graph['globals'].values():
323 |         calc_wcs(fxn_dict, call_graph, [])
324 | 
325 |     for l_dict in call_graph['locals'].values():
326 |         for fxn_dict in l_dict.values():
327 |             calc_wcs(fxn_dict, call_graph, [])
328 | 
329 | 
330 | def print_all_fxns(call_graph):
331 | 
332 |     def print_fxn(row_format, fxn_dict2):
333 |         unresolved = fxn_dict2['unresolved_calls']
334 |         stack = str(fxn_dict2['wcs'])
335 |         if unresolved:
336 |             unresolved_str = '({})'.format(' ,'.join(unresolved))
337 |             if stack != 'unbounded':
338 |                 stack = "unbounded:" + stack
339 |         else:
340 |             unresolved_str = ''
341 | 
342 |         print(row_format.format(fxn_dict2['tu'], fxn_dict2['demangledName'], stack, unresolved_str))
343 | 
344 |     def get_order(val):
345 |         if val == 'unbounded':
346 |             return 1
347 |         else:
348 |             return -val
349 | 
350 |     # Loop through every global and local function
351 |     # and resolve each call, save results in r_calls
352 |     d_list = []
353 |     for fxn_dict in call_graph['globals'].values():
354 |         d_list.append(fxn_dict)
355 | 
356 |     for l_dict in call_graph['locals'].values():
357 |         for fxn_dict in l_dict.values():
358 |             d_list.append(fxn_dict)
359 | 
360 |     d_list.sort(key=lambda item: get_order(item['wcs']))
361 | 
362 |     # Calculate table width
363 |     tu_width = max(max([len(d['tu']) for d in d_list]), 16)
364 |     name_width = max(max([len(d['name']) for d in d_list]), 13)
365 |     row_format = "{:<" + str(tu_width + 2) + "}  {:<" + str(name_width + 2) + "}  {:>14}  {:<17}"
366 | 
367 |     # Print out the table
368 |     print("")
369 |     print(row_format.format('Translation Unit', 'Function Name', 'Stack', 'Unresolved Dependencies'))
370 |     for d in d_list:
371 |         print_fxn(row_format, d)
372 | 
373 | 
374 | def find_rtl_ext():
375 |     # Find the rtl_extension
376 |     global rtl_ext
377 |     
378 |     for root, directories, filenames in os.walk('.'):
379 |         for f in filenames:
380 |             if (f.endswith(rtl_ext_end)):
381 |                 rtl_ext = f[f[:-len(rtl_ext_end)].rindex("."):]
382 |                 print("rtl_ext = " + rtl_ext)
383 |                 return
384 | 
385 |     print("Could not find any files ending with '.dfinish'.  Check that the script is being run from the correct "
386 |           "directory.  Check that the code was compiled with the correct flags")
387 |     exit(-1)
388 | 
389 | 
390 | def find_files():
391 |     tu = []
392 |     manual = []
393 |     all_files = []
394 |     for root, directories, filenames in os.walk('.'):
395 |         for filename in filenames:
396 |             all_files.append(os.path.join(root,filename))
397 | 
398 |     files = [f for f in all_files if os.path.isfile(f) and f.endswith(rtl_ext)]
399 |     for f in files:
400 |         base = f[0:-len(rtl_ext)]
401 |         short_base = base[0:base.rindex(".")]
402 |         if short_base + su_ext in all_files and short_base + obj_ext in all_files:
403 |             tu.append(base)
404 |             print('Reading: {}{}, {}{}, {}{}'.format(base, rtl_ext, short_base, su_ext, short_base, obj_ext))
405 | 
406 |     files = [f for f in all_files if os.path.isfile(f) and f.endswith(manual_ext)]
407 |     for f in files:
408 |         manual.append(f)
409 |         print('Reading: {}'.format(f))
410 | 
411 |     # Print some diagnostic messages
412 |     if not tu:
413 |         print("Could not find any translation units to analyse")
414 |         exit(-1)
415 | 
416 |     return tu, manual
417 | 
418 | 
419 | def main():
420 | 
421 |     # Find the appropriate RTL extension
422 |     find_rtl_ext()
423 | 
424 |     # Find all input files
425 |     call_graph = {'locals': {}, 'globals': {}, 'weak': {}}
426 |     tu_list, manual_list = find_files()
427 | 
428 |     # Read the input files
429 |     for tu in tu_list:
430 |         read_obj(tu, call_graph)  # This must be first
431 |         
432 |     for fxn in call_graph['weak'].values():
433 |         if fxn['name'] not in call_graph['globals'].keys():
434 |             call_graph['globals'][fxn['name']] = fxn
435 | 
436 |     for tu in tu_list:
437 |         read_rtl(tu, call_graph)
438 |     for tu in tu_list:
439 |         read_su(tu, call_graph)
440 | 
441 |     # Read manual files
442 |     for m in manual_list:
443 |         read_manual(m, call_graph)
444 | 
445 |     # Validate Data
446 |     validate_all_data(call_graph)
447 | 
448 |     # Resolve All Function Calls
449 |     resolve_all_calls(call_graph)
450 | 
451 |     # Calculate Worst Case Stack For Each Function
452 |     calc_all_wcs(call_graph)
453 | 
454 |     # Print A Nice Message With Each Function and the WCS
455 |     print_all_fxns(call_graph)
456 | 
457 | 
458 | main()
459 | 


--------------------------------------------------------------------------------