├── Cargo.toml ├── LICENSE ├── README.md ├── benches ├── blake2s.rs ├── ed25519.rs ├── ed448.rs ├── gf25519.rs ├── gf255e.rs ├── gf448.rs ├── gls254.rs ├── jq255e.rs ├── jq255s.rs ├── modint.rs ├── p256.rs ├── ristretto255.rs ├── sc448.rs ├── secp256k1.rs ├── util.rs ├── x25519.rs └── x448.rs ├── extra ├── frost-sample.rs ├── jq255-formulas.txt ├── mkuxcomp.sage └── truncsig.pdf └── src ├── backend ├── mod.rs ├── w32 │ ├── gf255.rs │ ├── gf448.rs │ ├── gfb254_m32.rs │ ├── gfgen.rs │ ├── gfsecp256k1.rs │ ├── lagrange.rs │ ├── mod.rs │ ├── modint.rs │ └── zz.rs └── w64 │ ├── gf255_m51.rs │ ├── gf255_m64.rs │ ├── gf448.rs │ ├── gfb254_arm64pmull.rs │ ├── gfb254_m64.rs │ ├── gfb254_x86clmul.rs │ ├── gfgen.rs │ ├── gfp256.rs │ ├── gfsecp256k1.rs │ ├── lagrange.rs │ ├── mod.rs │ ├── modint.rs │ ├── modint32.rs │ ├── util32.rs │ ├── zz.rs │ └── zz32.rs ├── blake2s.rs ├── decaf448.rs ├── ed25519.rs ├── ed448.rs ├── field.rs ├── frost.rs ├── gls254.rs ├── jq255e.rs ├── jq255s.rs ├── lib.rs ├── lms.rs ├── p256.rs ├── ristretto255.rs ├── secp256k1.rs ├── sha2.rs ├── sha3.rs ├── x25519.rs └── x448.rs /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crrl" 3 | version = "0.9.0" 4 | authors = ["Thomas Pornin "] 5 | edition = "2018" 6 | license = "MIT" 7 | description = "Library for cryptographic research" 8 | repository = "https://github.com/pornin/crrl" 9 | readme = "README.md" 10 | categories = ["cryptography", "no-std"] 11 | keywords = ["cryptography", "ed25519", "ristretto255", "p256", "jq255e"] 12 | exclude = ["extra/*"] 13 | 14 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 15 | 16 | [dependencies] 17 | rand_core = "0.6" 18 | 19 | [dev-dependencies] 20 | hex = "0.4.3" 21 | num-bigint = "0.4.3" 22 | 23 | # Set feature "w32_backend" or "w64_backend" to force use of the 32-bit or 24 | # 64-bit backend explicitly. If neither is set then the backend will be 25 | # selected automatically (architectures with 64-bit pointers get the 64-bit 26 | # backend, while architectures with 32-bit pointers get the 32-bit backend). 27 | # The "64-bit backend on 32-bit arch" combination might not be fully 28 | # constant-time (depending on how the compiler implements multiplications 29 | # on multi-limb integers) and thus should be avoided. 30 | # 31 | # Default feature 'std' enables uses of heap allocation, which is used by 32 | # some functions. By disabling it, a core-only library can be obtained. 33 | [features] 34 | default = [ "std", "omnes" ] 35 | std = [ "alloc" ] 36 | alloc = [] 37 | w32_backend = [] 38 | w64_backend = [] 39 | gf255_m51 = [] 40 | gf255_m64 = [] 41 | gfb254_m64 = [] 42 | gfb254_x86clmul = [] 43 | gfb254_arm64pmull = [] 44 | omnes = [ "decaf448", "ed25519", "ed448", "frost", "jq255e", "jq255s", "lms", "p256", "ristretto255", "secp256k1", "gls254", "x25519", "x448", "modint256", "gf255", "gfgen" ] 45 | decaf448 = [ "ed448" ] 46 | ed25519 = [ "gf25519", "modint256" ] 47 | ed448 = [ "gf448", "gfgen" ] 48 | frost = [ "alloc" ] 49 | jq255e = [ "gf255e", "modint256", "blake2s" ] 50 | jq255s = [ "gf255s", "modint256", "blake2s" ] 51 | lms = [] 52 | p256 = [ "gfp256", "modint256" ] 53 | ristretto255 = [ "ed25519" ] 54 | secp256k1 = [ "gfsecp256k1", "modint256" ] 55 | gls254 = [ "gfb254", "modint256", "blake2s" ] 56 | x25519 = [ "ed25519" ] 57 | x448 = [ "ed448" ] 58 | gfgen = [] 59 | gf255 = [] 60 | gf255e = [] 61 | gf255s = [] 62 | gf25519 = [] 63 | gfp256 = [] 64 | gfsecp256k1 = [] 65 | gf448 = [] 66 | modint256 = [] 67 | gfb254 = [] 68 | gls254bench = [] 69 | zz32 = [] 70 | zz64 = [] 71 | blake2s = [] 72 | 73 | [[bench]] 74 | name = "modint" 75 | path = "benches/modint.rs" 76 | harness = false 77 | required-features = [ "modint256" ] 78 | 79 | [[bench]] 80 | name = "gf255e" 81 | path = "benches/gf255e.rs" 82 | harness = false 83 | required-features = [ "gf255e" ] 84 | 85 | [[bench]] 86 | name = "gf25519" 87 | path = "benches/gf25519.rs" 88 | harness = false 89 | required-features = [ "gf25519" ] 90 | 91 | [[bench]] 92 | name = "ed25519" 93 | path = "benches/ed25519.rs" 94 | harness = false 95 | required-features = [ "ed25519" ] 96 | 97 | [[bench]] 98 | name = "x25519" 99 | path = "benches/x25519.rs" 100 | harness = false 101 | required-features = [ "x25519" ] 102 | 103 | [[bench]] 104 | name = "p256" 105 | path = "benches/p256.rs" 106 | harness = false 107 | required-features = [ "p256" ] 108 | 109 | [[bench]] 110 | name = "ristretto255" 111 | path = "benches/ristretto255.rs" 112 | harness = false 113 | required-features = [ "ristretto255" ] 114 | 115 | [[bench]] 116 | name = "jq255e" 117 | path = "benches/jq255e.rs" 118 | harness = false 119 | required-features = [ "jq255e" ] 120 | 121 | [[bench]] 122 | name = "jq255s" 123 | path = "benches/jq255s.rs" 124 | harness = false 125 | required-features = [ "jq255s" ] 126 | 127 | [[bench]] 128 | name = "secp256k1" 129 | path = "benches/secp256k1.rs" 130 | harness = false 131 | required-features = [ "secp256k1" ] 132 | 133 | [[bench]] 134 | name = "gls254" 135 | path = "benches/gls254.rs" 136 | harness = false 137 | required-features = [ "gls254" ] 138 | 139 | [[bench]] 140 | name = "gf448" 141 | path = "benches/gf448.rs" 142 | harness = false 143 | required-features = [ "gf448" ] 144 | 145 | [[bench]] 146 | name = "sc448" 147 | path = "benches/sc448.rs" 148 | harness = false 149 | required-features = [ "ed448" ] 150 | 151 | [[bench]] 152 | name = "ed448" 153 | path = "benches/ed448.rs" 154 | harness = false 155 | required-features = [ "ed448" ] 156 | 157 | [[bench]] 158 | name = "x448" 159 | path = "benches/x448.rs" 160 | harness = false 161 | required-features = [ "x448" ] 162 | 163 | [[bench]] 164 | name = "blake2s" 165 | path = "benches/blake2s.rs" 166 | harness = false 167 | required-features = [ "blake2s" ] 168 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Thomas Pornin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # crrl 2 | 3 | This library implements some primitives for purposes of cryptographic 4 | research. Its point is to provide efficient, optimized and constant-time 5 | implementations that are supposed to be representative of 6 | production-ready code, so that realistic performance benchmarks may be 7 | performed. Thus, while meant primarily for research, the code here 8 | *should* be fine for production use (though of course I offer no such 9 | guarantee; use at your own risks). 10 | 11 | So far, only some primitives related to elliptic curve cryptography 12 | are implemented: 13 | 14 | - A generic type `GF255` for finite fields of integers modulo a 15 | prime 2^255-`MQ` (for a value of `MQ` between 1 and 32767). The `MQ` 16 | value is provided as a type parameter, i.e. the exact field is known 17 | at compile time. This type covers the usual modulus 2^255-19 (used 18 | in Curve25519) as well as 2^255-18651 and 2^255-3957 (used in 19 | [double-odd curves do255e and do255s](https://doubleodd.group/)). 20 | 21 | - A generic type `ModInt256` for arbitrary finite 22 | fields of integers modulo a prime between 2^192 and 2^256. 23 | Montgomery representation is internally used. The modulus is 24 | provided as type parameters, allowing the compiler to apply 25 | optimizations when some parts of the modulus allow them (in 26 | particular with the modulus used for NIST curve P-256). 27 | 28 | - Type `GFsecp256k1` implements the specific base field for curve 29 | secp256k1 (integers modulo 2^256-4294968273). The 64-bit backend 30 | has a dedicated implementation, while the 32-bit version of this 31 | type uses `ModInt256`. 32 | 33 | - The macro `define_gfgen` allows defining arbitrary finite fields 34 | of integers modulo a prime, with a large range of modulus size. 35 | It uses Montgomery representation internally. 36 | 37 | - Type `GF448` implements the specific base field for Curve448. 38 | The 64-bit backend has a dedicated implementation, while the 32-bit 39 | backend uses `define_gfgen`. 40 | 41 | - Type `ed25519::Point` provides generic group operations in the 42 | twisted Edwards curve Curve25519. Ed25519 signatures (as per [RFC 43 | 8032](https://datatracker.ietf.org/doc/html/rfc8032)) are 44 | implemented. Type `ed25519::Scalar` implements operations on 45 | integers modulo the curve subgroup order. 46 | 47 | - Type `ristretto255::Point` provides generic group operations in the 48 | [ristretto255 group](https://ristretto.group/), whose prime order is 49 | exactly the size of the interesting subgroup of Curve25519. 50 | 51 | - Type `ed448::Point` provides generic group operations in the 52 | Edwards curve edwards448. Ed448 signatures (as per [RFC 53 | 8032](https://datatracker.ietf.org/doc/html/rfc8032)) are 54 | implemented. Type `ed448::Scalar` implements operations on 55 | integers modulo the curve subgroup order. 56 | 57 | - Type `decaf448::Point` provides generic group operations in the 58 | [decaf448 group](https://ristretto.group/), whose prime order is 59 | exactly the size of the interesting subgroup of Curve448. 60 | 61 | - Type `p256::Point` provides generic group operations in the NIST 62 | P-256 curve (aka "secp256r1" aka "prime256v1"). ECDSA signatures are 63 | supported. The `p256::Scalar` type implements the corresponding 64 | scalars (integers modulo the curve order). 65 | 66 | - Type `secp256k1::Point` provides generic group operations in the 67 | secp256k1 curve (aka "the Bitcoin curve"). ECDSA signatures are 68 | supported. The `secp256k1::Scalar` type implements the corresponding 69 | scalars (integers modulo the curve order). The GLV endomorphism is 70 | leveraged to speed-up point multiplication (key exchange) and 71 | signature verification. 72 | 73 | - Types `jq255e::Point` and `jq255s::Point` implement the 74 | [double-odd curves](https://doubleodd.group/) jq255e and jq255s 75 | (along with the corresponding scalar types `jq255e::Scalar` and 76 | `jq255s::Scalar`). Key exchange and Schnorr signatures are 77 | implemented. These curves provide a prime-order group abstraction, 78 | similar to ristretto255, but with somewhat better performance at the 79 | same security level. Moreover, the relevant signatures are both 80 | shorter (48 bytes instead of 64) and faster than the usual Ed25519 81 | signatures. 82 | 83 | - Function `x25519::x25519()` implements the 84 | [X25519 function](https://datatracker.ietf.org/doc/html/rfc7748#section-5). 85 | An optimized `x25519::x2559_base()` function is provided when X25519 86 | is applied to the conventional base point. Similarly, `x448::x448()` 87 | and `x448::x448_base()` provide the same functionality for the 88 | X448 function. 89 | 90 | - Type `gls254::Point` implements the GLS254 curve (or, more precisely, 91 | a prime-order group homomorphic to a subgroup of that curve), which is 92 | defined over a binary field. `gls254::Scalar` is the type for integers 93 | modulo the curve order. `gls254::PrivateKey` and `gls254:PublicKey` 94 | implement high-level operations such as key exchange and signatures, 95 | using that group. 96 | 97 | - Module `blake2s` contains some BLAKE2s implementations, with 98 | optional SSE2 and AVX2 optimizations. 99 | 100 | Types `GF255` and `ModInt256` have a 32-bit and a 64-bit implementations 101 | each (actually two 64-bit implementations, see later the discussion 102 | about the `gf255_m51` feature). The code is portable (it was tested on 103 | 32-bit and 64-bit x86, 64-bit aarch64, and 64-bit riscv64). Performance 104 | is quite decent; e.g. Ed25519 signatures are computed in about 51500 105 | cycles, and verified in about 114000 cycles, on an Intel "Coffee Lake" 106 | CPU; this is not too far from the best assembly-optimized 107 | implementations. At the same time, use of operator overloading allows to 108 | express formulas on points and scalar with about the same syntax as 109 | their mathematical description. For instance, the core of the X25519 110 | implementation looks like this: 111 | 112 | ``` 113 | let A = x2 + z2; 114 | let B = x2 - z2; 115 | let AA = A.square(); 116 | let BB = B.square(); 117 | let C = x3 + z3; 118 | let D = x3 - z3; 119 | let E = AA - BB; 120 | let DA = D * A; 121 | let CB = C * B; 122 | x3 = (DA + CB).square(); 123 | z3 = x1 * (DA - CB).square(); 124 | x2 = AA * BB; 125 | z2 = E * (AA + E.mul_small(121665)); 126 | ``` 127 | 128 | which is quite close to the corresponding description in RFC 7748: 129 | 130 | ``` 131 | A = x_2 + z_2 132 | AA = A^2 133 | B = x_2 - z_2 134 | BB = B^2 135 | E = AA - BB 136 | C = x_3 + z_3 137 | D = x_3 - z_3 138 | DA = D * A 139 | CB = C * B 140 | x_3 = (DA + CB)^2 141 | z_3 = x_1 * (DA - CB)^2 142 | x_2 = AA * BB 143 | z_2 = E * (AA + a24 * E) 144 | ``` 145 | 146 | # Optional Features 147 | 148 | By default, everything in crrl is compiled, which unfortunately makes for 149 | a relatively long compilation time, especially on not-so-fast systems. 150 | To only compile support for some primitives, use `--no-default-features` 151 | then add selectively the features you are interested in with `-F`; e.g. 152 | use `cargo build --no-default-features -F ed25519` to only compile the 153 | Ed25519 support (and the primitives that it needs, such as its base 154 | field). The defined primitive-controlling features are the following: 155 | 156 | - `omnes`: enables all of the following. 157 | 158 | - `decaf448`: decaf448 prime-order group (based on edwards448) 159 | 160 | - `ed25519`: edwards25519 curve and signatures (RFC 8032: Ed25519) 161 | 162 | - `ed448`: edwards448 curve and signatures (RFC 8032: Ed448) 163 | 164 | - `frost`: FROST threshold signatures (support macros + standard 165 | ciphersuites, but only for the curves which are also enabled in 166 | this build) 167 | 168 | - `jq255e`: jq255e prime-order group and signatures 169 | 170 | - `jq255s`: jq255s prime-order group and signatures 171 | 172 | - `lms`: LMS support (hash-based signatures) 173 | 174 | - `p256`: NIST P-256 curve and signatures (ECDSA) 175 | 176 | - `ristretto255`: ristretto255 prime-order group (based on edwards25519) 177 | 178 | - `secp256k1`: secp256k1 curve and signatures (ECDSA) 179 | 180 | - `x25519`: X25519 key exchange primitive (RFC 7748) 181 | 182 | - `x448`: X448 key exchange primitive (RFC 7748) 183 | 184 | - `modint256`: generic finite field implementation (prime order of up to 185 | 256 bits) 186 | 187 | - `gf255`: generic finite field implementation (for prime order 188 | `q = 2^255 - MQ` with `MQ < 2^15`) 189 | 190 | - `gfgen`: generic finite field implementation (generating macro; prime 191 | modulus of arbitrary length) 192 | 193 | - `gls254`: GLS254 prime-order group and signatures 194 | 195 | - `gls254bench`: additional benchmarking code for GLS254 196 | 197 | - `blake2s`: BLAKE2s hash function 198 | 199 | Some operations have multiple backends. An appropriate backend is selected 200 | at compile-time, but this can be overridden by enabling some features: 201 | 202 | - `w32_backend`: enforce use of the 32-bit code, even on 64-bit systems. 203 | 204 | - `w64_backend`: enforce use of the 64-bit code, even on 32-bit systems. 205 | 206 | - `gf255_m64`: enforce use of 64-bit limbs for `GF255`; this is the 207 | default on 64-bit machines, except RISC-V (riscv64) where 51-bit 208 | limbs are used by default. This feature has no effect if the 32-bit code 209 | is used. 210 | 211 | - `gf255_m51`: encode use of 51-bit limbs for `GF255`; this is the 212 | default on 64-bit RISC-V targets (riscv64), but not on other 64-bit 213 | architectures where 64-bit limbs are normally preferred. This feature 214 | has no effect if the 32-bit code is used. 215 | 216 | - `gfb254_m64`: enforce use of the generic implementation of the 217 | binary field GF(2^254). This feature has no effect if the 32-bit code 218 | is used. 219 | 220 | - `gfb254_x86clmul`: enforce use of the AVX2+pclmulqdq implementation of 221 | the binary field GF(2^254). This code is used automatically if the 222 | compilation target is an x86 with the relevant hardware support; this 223 | feature bypasses the automatic detection. This feature has no effect 224 | if the 32-bit code is used. 225 | 226 | - `gfb254_arm64pmull`: enforce use of the NEON+pmull implementation of 227 | the binary field GF(2^254). This code is used automatically if the 228 | compilation target is an aarch64 system; this feature bypasses the 229 | automatic detection. This feature has no effect if the 32-bit code 230 | is used. 231 | 232 | # Security and Compliance 233 | 234 | All the code is strict, both in terms of timing-based side-channels 235 | (everything is constant-time, except if explicitly stated otherwise, 236 | e.g. in a function whose name includes `vartime`) and in compliance to 237 | relevant standards. For instance, the Ed25519 signature support applies 238 | and enforces canonical encodings of both points and scalars. 239 | 240 | There is no attempt at "zeroizing memory" anywhere in the code. In 241 | general, such memory cleansing is a fool's quest. Note that since most 242 | of the library use `no_std` rules, dynamic allocation happens only on 243 | the stack, thereby limiting the risk of leaving secret information 244 | lingering all over the RAM. The only functions that use heap allocation 245 | only store public data there. 246 | 247 | **WARNING:** I reiterate what was written above: although all of the 248 | code aims at being representative of optimized production-ready code, it 249 | is still fairly recent and some bugs might still lurk, however careful I 250 | am when writing code. Any assertion of suitability to any purpose is 251 | explcitly denied. The primary purpose is to help with "trying out stuff" 252 | in cryptographic research, by offering an easy-to-use API backed by 253 | performance close enough to what can be done in actual applications. 254 | 255 | # Truncated Signatures 256 | 257 | Support for truncated signatures is implemented for Ed25519 and 258 | ECDSA/P-256. Standard signatures can be shortened by 8 to 32 bits (i.e. 259 | the size may shrink from 64 down to 60 bytes), and the verifier rebuilds 260 | the original signature during verification (at some computational cost). 261 | This is not a ground-breaking feature, but it can be very convenient in 262 | some situations with tight constraints on bandwidth and a requirement to 263 | work with standard signature formats. See 264 | `ed25519::PublicKey::verify_trunc_raw()` and 265 | `p256::PublicKey::verify_trunc_hash()` for details. 266 | 267 | # FROST Threshold Schnorr Signatures 268 | 269 | The FROST protocol for a distributed Schnorr signature generation scheme 270 | has been implemented, as per the v14 draft specification: 271 | [draft-irtf-cfrg-frost-14](https://datatracker.ietf.org/doc/html/draft-irtf-cfrg-frost-14). 272 | Four ciphersuites are provided, with similar APIs, in the 273 | `frost::ed25519`, `frost::ristretto255`, `frost::ed448`, `frost::p256` and 274 | `frost::secp256k1` modules. A sample code showing how to use the API is 275 | provided in the [frost-sample.rs](extra/frost-sample.rs) file. 276 | 277 | While FROST is inherently a distributed scheme, the implementation can 278 | also be used in a single signer mode by using the "group" private key 279 | directly. 280 | 281 | # Benchmarks 282 | 283 | `cargo bench` runs some benchmarks, but there are a few caveats: 284 | 285 | - The cycle counter is used on x86. If frequency scaling ("TurboBoost") 286 | is not disabled, then you'll get wrong and meaningless results. 287 | 288 | - On aarch64, the cycle counter is also accessed directly, which will 289 | in general fail with some CPU exception. Access to the counter must 290 | first be enabled, which requires (on Linux) a specific kernel 291 | module. [This 292 | one](https://github.com/jerinjacobk/armv8_pmu_cycle_counter_el0) 293 | works for me. 294 | 295 | - On riscv64gc, the cycle counter is accessed directly. In general, 296 | that counter is not enabled and all benches return zero; to enable 297 | the cycle counter, run the benchmark binary inside the `perf` 298 | tool (which comes with the `linux-tools`). 299 | 300 | - On architectures other than i386, x86-64, aarch64 and riscv64gc, 301 | benchmark code will simply not compile. 302 | 303 | # TODO 304 | 305 | In general, about anything related to cryptography may show up here, 306 | if there is a use case for it. 307 | -------------------------------------------------------------------------------- /benches/blake2s.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | #![cfg(feature = "blake2s")] 3 | 4 | mod util; 5 | use util::core_cycles; 6 | 7 | use crrl::blake2s::Blake2s256; 8 | 9 | fn bench_blake2s_short() -> (f64, u8) { 10 | let z = core_cycles(); 11 | let mut seed = [0u8; 32]; 12 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 13 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 14 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 15 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 16 | let mut tt = [0; 100]; 17 | let mut sh = Blake2s256::new(); 18 | for i in 0..(tt.len() + 1000) { 19 | let begin = core_cycles(); 20 | for _ in 0..100 { 21 | sh.update(&seed); 22 | sh.finalize_reset_write(&mut seed); 23 | } 24 | let end = core_cycles(); 25 | if i >= 1000 { 26 | tt[i - 1000] = end.wrapping_sub(begin); 27 | } 28 | } 29 | tt.sort(); 30 | ((tt[tt.len() >> 1] as f64) / 100.0, seed[0]) 31 | } 32 | 33 | fn bench_blake2s_4096() -> (f64, u8) { 34 | let z = core_cycles(); 35 | let mut sh = Blake2s256::new(); 36 | let mut buf = [0u8; 4096]; 37 | for i in 0..(buf.len() >> 5) { 38 | sh.update(&z.to_le_bytes()); 39 | sh.update(&(i as u64).to_le_bytes()); 40 | sh.finalize_reset_write(&mut buf[(i << 5)..]); 41 | } 42 | let mut tt = [0; 100]; 43 | for i in 0..tt.len() { 44 | let begin = core_cycles(); 45 | for _ in 0..(buf.len() >> 5) { 46 | sh.update(&buf); 47 | sh.finalize_reset_write(&mut buf[(i << 5)..]); 48 | } 49 | let end = core_cycles(); 50 | tt[i] = end.wrapping_sub(begin); 51 | } 52 | tt.sort(); 53 | ((tt[tt.len() >> 1] as f64) / ((buf.len() >> 5) as f64), buf[0]) 54 | } 55 | 56 | fn main() { 57 | let mut bx = 0u8; 58 | 59 | let (v, x) = bench_blake2s_short(); 60 | bx ^= x; 61 | println!("BLAKE2s (short): {:13.2}", v); 62 | let (v, x) = bench_blake2s_4096(); 63 | bx ^= x; 64 | println!("BLAKE2s (4096 bytes): {:13.2}", v); 65 | 66 | println!("{}", bx); 67 | } 68 | -------------------------------------------------------------------------------- /benches/ed25519.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | #![cfg(feature = "ed25519")] 3 | 4 | mod util; 5 | use util::core_cycles; 6 | 7 | use crrl::ed25519::{PrivateKey, Point, Scalar}; 8 | use sha2::{Sha256, Digest}; 9 | 10 | fn bench_mulgen() -> (f64, u8) { 11 | let z = core_cycles(); 12 | let mut seed = [0u8; 32]; 13 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 14 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 15 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 16 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 17 | let mut s = Scalar::decode_reduce(&seed); 18 | let mut tt = [0; 100]; 19 | for i in 0..tt.len() { 20 | let begin = core_cycles(); 21 | for _ in 0..100 { 22 | let P = Point::mulgen(&s); 23 | if P.isneutral() != 0 { 24 | s += Scalar::ZERO; 25 | } else { 26 | s += Scalar::ONE; 27 | } 28 | } 29 | let end = core_cycles(); 30 | tt[i] = end.wrapping_sub(begin); 31 | } 32 | tt.sort(); 33 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0]) 34 | } 35 | 36 | fn bench_mul() -> (f64, u8) { 37 | let z = core_cycles(); 38 | let mut seed = [0u8; 32]; 39 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 40 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 41 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 42 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 43 | let mut s = Scalar::decode_reduce(&seed); 44 | let mut P = Point::mulgen(&s); 45 | let mut tt = [0; 100]; 46 | for i in 0..tt.len() { 47 | let begin = core_cycles(); 48 | for _ in 0..100 { 49 | P *= s; 50 | if P.isneutral() != 0 { 51 | s += Scalar::ZERO; 52 | } else { 53 | s += Scalar::ONE; 54 | } 55 | } 56 | let end = core_cycles(); 57 | tt[i] = end.wrapping_sub(begin); 58 | } 59 | tt.sort(); 60 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0]) 61 | } 62 | 63 | fn bench_mul_add_mulgen() -> (f64, u8) { 64 | let z = core_cycles(); 65 | let mut uu = [Scalar::ZERO; 128]; 66 | let mut vv = [Scalar::ZERO; 128]; 67 | let mut sh = Sha256::new(); 68 | for i in 0..128 { 69 | sh.update(z.to_le_bytes()); 70 | sh.update(((2 * i + 0) as u64).to_le_bytes()); 71 | let b1 = sh.finalize_reset(); 72 | sh.update(z.to_le_bytes()); 73 | sh.update(((2 * i + 1) as u64).to_le_bytes()); 74 | let b2 = sh.finalize_reset(); 75 | uu[i] = Scalar::decode_reduce(&b1); 76 | vv[i] = Scalar::decode_reduce(&b2); 77 | } 78 | let mut tt = [0; 100]; 79 | let mut P = Point::mulgen(&uu[127]); 80 | for i in 0..tt.len() { 81 | let begin = core_cycles(); 82 | for j in 0..128 { 83 | let ku = (i + j) & 127; 84 | let kv = i.wrapping_sub(j) & 127; 85 | let Q = P.mul_add_mulgen_vartime(&uu[ku], &vv[kv]); 86 | P += Q; 87 | } 88 | let end = core_cycles(); 89 | tt[i] = end.wrapping_sub(begin); 90 | } 91 | tt.sort(); 92 | ((tt[tt.len() >> 1] as f64) / 128.0, P.encode()[0]) 93 | } 94 | 95 | fn bench_skey_load() -> (f64, u8) { 96 | let z = core_cycles(); 97 | let mut seed = [0u8; 32]; 98 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 99 | let mut tt = [0; 100]; 100 | for i in 0..tt.len() { 101 | let begin = core_cycles(); 102 | for _ in 0..100 { 103 | let skey = PrivateKey::from_seed(&seed); 104 | seed[..].copy_from_slice(&skey.public_key.encode()); 105 | } 106 | let end = core_cycles(); 107 | tt[i] = end.wrapping_sub(begin); 108 | } 109 | tt.sort(); 110 | ((tt[tt.len() >> 1] as f64) / 100.0, seed[0]) 111 | } 112 | 113 | fn bench_skey_sign() -> (f64, u8) { 114 | let z = core_cycles(); 115 | let mut seed = [0u8; 32]; 116 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 117 | let skey = PrivateKey::from_seed(&seed); 118 | let mut tt = [0; 100]; 119 | let mut msg = [0u8; 32]; 120 | for i in 0..tt.len() { 121 | let begin = core_cycles(); 122 | for _ in 0..100 { 123 | let sig = skey.sign_raw(&msg); 124 | msg[..].copy_from_slice(&sig[0..32]); 125 | } 126 | let end = core_cycles(); 127 | tt[i] = end.wrapping_sub(begin); 128 | } 129 | tt.sort(); 130 | ((tt[tt.len() >> 1] as f64) / 100.0, msg[0]) 131 | } 132 | 133 | fn bench_pkey_verify() -> (f64, u8) { 134 | let z = core_cycles(); 135 | let mut seed = [0u8; 32]; 136 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 137 | let skey = PrivateKey::from_seed(&seed); 138 | let pkey = skey.public_key; 139 | let mut sigs = [[0u8; 64]; 128]; 140 | for i in 0..128 { 141 | let msg = [i as u8; 32]; 142 | let sig = skey.sign_raw(&msg); 143 | sigs[i][..].copy_from_slice(&sig); 144 | } 145 | let mut tt = [0; 100]; 146 | let mut msg = [0u8; 32]; 147 | for i in 0..tt.len() { 148 | let begin = core_cycles(); 149 | for j in 0..128 { 150 | let ff = pkey.verify_raw(&sigs[j], &msg); 151 | sigs[j][40] ^= 1u8.wrapping_add(ff as u8); 152 | msg[3] ^= 3u8.wrapping_sub(ff as u8); 153 | } 154 | let end = core_cycles(); 155 | tt[i] = end.wrapping_sub(begin); 156 | } 157 | tt.sort(); 158 | ((tt[tt.len() >> 1] as f64) / 128.0, msg[0]) 159 | } 160 | 161 | fn bench_decode() -> (f64, u8) { 162 | let z = core_cycles(); 163 | let mut buf = [0u8; 32]; 164 | buf[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 165 | buf[ 8..16].copy_from_slice(&z.to_le_bytes()); 166 | buf[16..24].copy_from_slice(&z.to_le_bytes()); 167 | buf[24..32].copy_from_slice(&z.to_le_bytes()); 168 | let mut tt = [0; 10]; 169 | let mut P = Point::NEUTRAL; 170 | let Q = Point::BASE * z; 171 | for i in 0..10 { 172 | let begin = core_cycles(); 173 | for _ in 0..100 { 174 | let r = P.set_decode(&buf); 175 | buf[0] = buf[0].wrapping_add(1); 176 | buf[1] = buf[1].wrapping_add(r as u8); 177 | buf[2] = buf[2].wrapping_add(P.equals(Q) as u8); 178 | } 179 | let end = core_cycles(); 180 | tt[i] = end.wrapping_sub(begin); 181 | } 182 | tt.sort(); 183 | ((tt[4] as f64) / 100.0, buf[0]) 184 | } 185 | 186 | fn bench_encode() -> (f64, u8) { 187 | let z = core_cycles(); 188 | let mut P = Point::BASE * z; 189 | let mut tt = [0; 10]; 190 | for i in 0..10 { 191 | let begin = core_cycles(); 192 | for _ in 0..100 { 193 | let x = P.encode()[0]; 194 | if x & 1 == 0 { 195 | P = -P; 196 | } 197 | } 198 | let end = core_cycles(); 199 | tt[i] = end.wrapping_sub(begin); 200 | } 201 | tt.sort(); 202 | ((tt[4] as f64) / 100.0, P.encode()[0]) 203 | } 204 | 205 | /* 206 | * Old benchmark for the old is_in_subgroup() implementation. 207 | fn bench_subgroup_old() -> (f64, u8) { 208 | let z = core_cycles(); 209 | let Q = Point::BASE * z; 210 | let mut P = Point::NEUTRAL; 211 | let mut tt = [0; 10]; 212 | for i in 0..10 { 213 | let begin = core_cycles(); 214 | for _ in 0..100 { 215 | let r = P.old_is_in_subgroup(); 216 | P.set_cond(&(P + Q), r); 217 | } 218 | let end = core_cycles(); 219 | tt[i] = end.wrapping_sub(begin); 220 | } 221 | tt.sort(); 222 | ((tt[4] as f64) / 100.0, P.encode()[0]) 223 | } 224 | */ 225 | 226 | fn bench_subgroup() -> (f64, u8) { 227 | let z = core_cycles(); 228 | let Q = Point::BASE * z; 229 | let mut P = Point::NEUTRAL; 230 | let mut tt = [0; 10]; 231 | for i in 0..10 { 232 | let begin = core_cycles(); 233 | for _ in 0..100 { 234 | let r = P.is_in_subgroup(); 235 | P.set_cond(&(P + Q), r); 236 | } 237 | let end = core_cycles(); 238 | tt[i] = end.wrapping_sub(begin); 239 | } 240 | tt.sort(); 241 | ((tt[4] as f64) / 100.0, P.encode()[0]) 242 | } 243 | 244 | fn bench_pkey_verify_trunc(rm: usize) -> (f64, f64, u8) { 245 | let z = core_cycles(); 246 | let mut seed = [0u8; 32]; 247 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 248 | let skey = PrivateKey::from_seed(&seed); 249 | let pkey = skey.public_key; 250 | let mut sigs = [[0u8; 64]; 256]; 251 | for i in 0..256 { 252 | let msg = [i as u8; 32]; 253 | let sig = skey.sign_raw(&msg); 254 | sigs[i][..].copy_from_slice(&sig); 255 | } 256 | let mut x = 0; 257 | 258 | // Phase 1: all signatures are correct. 259 | let mut tt = [0; 2048]; 260 | for i in 0..tt.len() { 261 | let msg = [i as u8; 32]; 262 | let begin = core_cycles(); 263 | x ^= (pkey.verify_trunc_raw(&sigs[i % 256], rm, &msg).is_some()) as u8; 264 | let end = core_cycles(); 265 | tt[i] = end.wrapping_sub(begin); 266 | } 267 | tt.sort(); 268 | // Remove 10% slowest and 10% fastest, make an average of the rest. 269 | let n10 = tt.len() / 10; 270 | let n80 = tt.len() - 2 * n10; 271 | let mut s = 0u64; 272 | for i in n10..(tt.len() - n10) { 273 | s += tt[i]; 274 | } 275 | let res1 = (s as f64) / (n80 as f64); 276 | 277 | // Phase 2: all signatures are incorrect. 278 | // We expect much lower variance in that case. 279 | let mut tt = [0; 128]; 280 | for i in 0..tt.len() { 281 | let msg = [(i + 1) as u8; 32]; 282 | let begin = core_cycles(); 283 | x ^= (pkey.verify_trunc_raw(&sigs[i % 256], rm, &msg).is_some()) as u8; 284 | let end = core_cycles(); 285 | tt[i] = end.wrapping_sub(begin); 286 | } 287 | tt.sort(); 288 | // Remove 10% slowest and 10% fastest, make an average of the rest. 289 | let n10 = tt.len() / 10; 290 | let n80 = tt.len() - 2 * n10; 291 | let mut s = 0u64; 292 | for i in n10..(tt.len() - n10) { 293 | s += tt[i]; 294 | } 295 | let res2 = (s as f64) / (n80 as f64); 296 | 297 | (res1, res2, x) 298 | } 299 | 300 | fn main() { 301 | let mut bx = 0u8; 302 | 303 | let (v, x) = bench_mul(); 304 | bx ^= x; 305 | println!("Ed25519 point mul: {:13.2}", v); 306 | let (v, x) = bench_mulgen(); 307 | bx ^= x; 308 | println!("Ed25519 point mulgen: {:13.2}", v); 309 | let (v, x) = bench_mul_add_mulgen(); 310 | bx ^= x; 311 | println!("Ed25519 point mul_add_mulgen: {:13.2}", v); 312 | let (v, x) = bench_skey_load(); 313 | bx ^= x; 314 | println!("Ed25519 skey_load: {:13.2}", v); 315 | let (v, x) = bench_skey_sign(); 316 | bx ^= x; 317 | println!("Ed25519 sign: {:13.2}", v); 318 | let (v, x) = bench_pkey_verify(); 319 | bx ^= x; 320 | println!("Ed25519 verify: {:13.2}", v); 321 | let (v, x) = bench_decode(); 322 | bx ^= x; 323 | println!("Ed25519 decode: {:13.2}", v); 324 | let (v, x) = bench_encode(); 325 | bx ^= x; 326 | println!("Ed25519 encode: {:13.2}", v); 327 | let (v, x) = bench_subgroup(); 328 | bx ^= x; 329 | println!("Ed25519 subgroup: {:13.2}", v); 330 | 331 | let (v1, v2, x) = bench_pkey_verify_trunc(8); 332 | bx ^= x; 333 | println!("Ed25519 verify_trunc8: {:13.2} {:13.2}", v1, v2); 334 | let (v1, v2, x) = bench_pkey_verify_trunc(16); 335 | bx ^= x; 336 | println!("Ed25519 verify_trunc16: {:13.2} {:13.2}", v1, v2); 337 | /* 338 | let (v1, v2, x) = bench_pkey_verify_trunc(24); 339 | bx ^= x; 340 | println!("Ed25519 verify_trunc24: {:13.2} {:13.2}", v1, v2); 341 | let (v1, v2, x) = bench_pkey_verify_trunc(28); 342 | bx ^= x; 343 | println!("Ed25519 verify_trunc28: {:13.2} {:13.2}", v1, v2); 344 | let (v1, v2, x) = bench_pkey_verify_trunc(32); 345 | bx ^= x; 346 | println!("Ed25519 verify_trunc32: {:13.2} {:13.2}", v1, v2); 347 | */ 348 | 349 | println!("{}", bx); 350 | } 351 | -------------------------------------------------------------------------------- /benches/ed448.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | #![cfg(feature = "ed448")] 3 | 4 | mod util; 5 | use util::core_cycles; 6 | 7 | use crrl::ed448::{PrivateKey, Point, Scalar}; 8 | use sha2::{Sha512, Digest}; 9 | 10 | fn bench_mulgen() -> (f64, u8) { 11 | let z = core_cycles(); 12 | let mut seed = [0u8; 64]; 13 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 14 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 15 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 16 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 17 | seed[32..40].copy_from_slice(&z.to_le_bytes()); 18 | seed[40..48].copy_from_slice(&z.to_le_bytes()); 19 | seed[48..56].copy_from_slice(&z.to_le_bytes()); 20 | seed[56..64].copy_from_slice(&z.to_le_bytes()); 21 | let mut s = Scalar::decode_reduce(&seed); 22 | let mut tt = [0; 100]; 23 | for i in 0..tt.len() { 24 | let begin = core_cycles(); 25 | for _ in 0..100 { 26 | let P = Point::mulgen(&s); 27 | if P.isneutral() != 0 { 28 | s += Scalar::ZERO; 29 | } else { 30 | s += Scalar::ONE; 31 | } 32 | } 33 | let end = core_cycles(); 34 | tt[i] = end.wrapping_sub(begin); 35 | } 36 | tt.sort(); 37 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode()[0]) 38 | } 39 | 40 | fn bench_mul() -> (f64, u8) { 41 | let z = core_cycles(); 42 | let mut seed = [0u8; 64]; 43 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 44 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 45 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 46 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 47 | seed[32..40].copy_from_slice(&z.to_le_bytes()); 48 | seed[40..48].copy_from_slice(&z.to_le_bytes()); 49 | seed[48..56].copy_from_slice(&z.to_le_bytes()); 50 | seed[56..64].copy_from_slice(&z.to_le_bytes()); 51 | let mut s = Scalar::decode_reduce(&seed); 52 | let mut P = Point::mulgen(&s); 53 | let mut tt = [0; 100]; 54 | for i in 0..tt.len() { 55 | let begin = core_cycles(); 56 | for _ in 0..100 { 57 | P *= s; 58 | if P.isneutral() != 0 { 59 | s += Scalar::ZERO; 60 | } else { 61 | s += Scalar::ONE; 62 | } 63 | } 64 | let end = core_cycles(); 65 | tt[i] = end.wrapping_sub(begin); 66 | } 67 | tt.sort(); 68 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode()[0]) 69 | } 70 | 71 | fn bench_mul_add_mulgen() -> (f64, u8) { 72 | let z = core_cycles(); 73 | let mut uu = [Scalar::ZERO; 128]; 74 | let mut vv = [Scalar::ZERO; 128]; 75 | let mut sh = Sha512::new(); 76 | for i in 0..128 { 77 | sh.update(z.to_le_bytes()); 78 | sh.update(((2 * i + 0) as u64).to_le_bytes()); 79 | let b1 = sh.finalize_reset(); 80 | sh.update(z.to_le_bytes()); 81 | sh.update(((2 * i + 1) as u64).to_le_bytes()); 82 | let b2 = sh.finalize_reset(); 83 | uu[i] = Scalar::decode_reduce(&b1); 84 | vv[i] = Scalar::decode_reduce(&b2); 85 | } 86 | let mut tt = [0; 100]; 87 | let mut P = Point::mulgen(&uu[127]); 88 | for i in 0..tt.len() { 89 | let begin = core_cycles(); 90 | for j in 0..128 { 91 | let ku = (i + j) & 127; 92 | let kv = i.wrapping_sub(j) & 127; 93 | let Q = P.mul_add_mulgen_vartime(&uu[ku], &vv[kv]); 94 | P += Q; 95 | } 96 | let end = core_cycles(); 97 | tt[i] = end.wrapping_sub(begin); 98 | } 99 | tt.sort(); 100 | ((tt[tt.len() >> 1] as f64) / 128.0, P.encode()[0]) 101 | } 102 | 103 | fn bench_skey_load() -> (f64, u8) { 104 | let z = core_cycles(); 105 | let mut seed = [0u8; 57]; 106 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 107 | let mut tt = [0; 100]; 108 | for i in 0..tt.len() { 109 | let begin = core_cycles(); 110 | for _ in 0..100 { 111 | let skey = PrivateKey::from_seed(&seed); 112 | seed[..].copy_from_slice(&skey.public_key.encode()); 113 | } 114 | let end = core_cycles(); 115 | tt[i] = end.wrapping_sub(begin); 116 | } 117 | tt.sort(); 118 | ((tt[tt.len() >> 1] as f64) / 100.0, seed[0]) 119 | } 120 | 121 | fn bench_skey_sign() -> (f64, u8) { 122 | let z = core_cycles(); 123 | let mut seed = [0u8; 57]; 124 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 125 | let skey = PrivateKey::from_seed(&seed); 126 | let mut tt = [0; 100]; 127 | let mut msg = [0u8; 32]; 128 | for i in 0..tt.len() { 129 | let begin = core_cycles(); 130 | for _ in 0..100 { 131 | let sig = skey.sign_raw(&msg); 132 | msg[..].copy_from_slice(&sig[0..32]); 133 | } 134 | let end = core_cycles(); 135 | tt[i] = end.wrapping_sub(begin); 136 | } 137 | tt.sort(); 138 | ((tt[tt.len() >> 1] as f64) / 100.0, msg[0]) 139 | } 140 | 141 | fn bench_pkey_verify() -> (f64, u8) { 142 | let z = core_cycles(); 143 | let mut seed = [0u8; 57]; 144 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 145 | let skey = PrivateKey::from_seed(&seed); 146 | let pkey = skey.public_key; 147 | let mut sigs = [[0u8; 114]; 128]; 148 | for i in 0..128 { 149 | let msg = [i as u8; 32]; 150 | let sig = skey.sign_raw(&msg); 151 | sigs[i][..].copy_from_slice(&sig); 152 | } 153 | let mut tt = [0; 100]; 154 | let mut msg = [0u8; 32]; 155 | for i in 0..tt.len() { 156 | let begin = core_cycles(); 157 | for j in 0..128 { 158 | let ff = pkey.verify_raw(&sigs[j], &msg); 159 | sigs[j][40] ^= 1u8.wrapping_add(ff as u8); 160 | msg[3] ^= 3u8.wrapping_sub(ff as u8); 161 | } 162 | let end = core_cycles(); 163 | tt[i] = end.wrapping_sub(begin); 164 | } 165 | tt.sort(); 166 | ((tt[tt.len() >> 1] as f64) / 128.0, msg[0]) 167 | } 168 | 169 | fn bench_decode() -> (f64, u8) { 170 | let z = core_cycles(); 171 | let mut buf = [0u8; 57]; 172 | buf[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 173 | buf[ 8..16].copy_from_slice(&z.to_le_bytes()); 174 | buf[16..24].copy_from_slice(&z.to_le_bytes()); 175 | buf[24..32].copy_from_slice(&z.to_le_bytes()); 176 | buf[32..40].copy_from_slice(&z.to_le_bytes()); 177 | buf[40..48].copy_from_slice(&z.to_le_bytes()); 178 | buf[48..56].copy_from_slice(&z.to_le_bytes()); 179 | buf[56] = z as u8; 180 | let mut tt = [0; 10]; 181 | let mut P = Point::NEUTRAL; 182 | let Q = Point::BASE * z; 183 | for i in 0..10 { 184 | let begin = core_cycles(); 185 | for _ in 0..100 { 186 | let r = P.set_decode(&buf); 187 | buf[0] = buf[0].wrapping_add(1); 188 | buf[1] = buf[1].wrapping_add(r as u8); 189 | buf[2] = buf[2].wrapping_add(P.equals(Q) as u8); 190 | } 191 | let end = core_cycles(); 192 | tt[i] = end.wrapping_sub(begin); 193 | } 194 | tt.sort(); 195 | ((tt[4] as f64) / 100.0, buf[0]) 196 | } 197 | 198 | fn bench_encode() -> (f64, u8) { 199 | let z = core_cycles(); 200 | let mut P = Point::BASE * z; 201 | let mut tt = [0; 10]; 202 | for i in 0..10 { 203 | let begin = core_cycles(); 204 | for _ in 0..100 { 205 | let x = P.encode()[0]; 206 | if x & 1 == 0 { 207 | P = -P; 208 | } 209 | } 210 | let end = core_cycles(); 211 | tt[i] = end.wrapping_sub(begin); 212 | } 213 | tt.sort(); 214 | ((tt[4] as f64) / 100.0, P.encode()[0]) 215 | } 216 | 217 | /* 218 | * Old benchmark for the old is_in_subgroup() implementation. 219 | fn bench_subgroup_old() -> (f64, u8) { 220 | let z = core_cycles(); 221 | let Q = Point::BASE * z; 222 | let mut P = Point::NEUTRAL; 223 | let mut tt = [0; 10]; 224 | for i in 0..10 { 225 | let begin = core_cycles(); 226 | for _ in 0..100 { 227 | let r = P.old_is_in_subgroup(); 228 | P.set_cond(&(P + Q), r); 229 | } 230 | let end = core_cycles(); 231 | tt[i] = end.wrapping_sub(begin); 232 | } 233 | tt.sort(); 234 | ((tt[4] as f64) / 100.0, P.encode()[0]) 235 | } 236 | */ 237 | 238 | fn bench_subgroup() -> (f64, u8) { 239 | let z = core_cycles(); 240 | let Q = Point::BASE * z; 241 | let mut P = Point::NEUTRAL; 242 | let mut tt = [0; 10]; 243 | for i in 0..10 { 244 | let begin = core_cycles(); 245 | for _ in 0..100 { 246 | let r = P.is_in_subgroup(); 247 | P.set_cond(&(P + Q), r); 248 | } 249 | let end = core_cycles(); 250 | tt[i] = end.wrapping_sub(begin); 251 | } 252 | tt.sort(); 253 | ((tt[4] as f64) / 100.0, P.encode()[0]) 254 | } 255 | 256 | fn main() { 257 | let mut bx = 0u8; 258 | 259 | let (v, x) = bench_mul(); 260 | bx ^= x; 261 | println!("Ed448 point mul: {:13.2}", v); 262 | let (v, x) = bench_mulgen(); 263 | bx ^= x; 264 | println!("Ed448 point mulgen: {:13.2}", v); 265 | let (v, x) = bench_mul_add_mulgen(); 266 | bx ^= x; 267 | println!("Ed448 point mul_add_mulgen: {:13.2}", v); 268 | let (v, x) = bench_skey_load(); 269 | bx ^= x; 270 | println!("Ed448 skey_load: {:13.2}", v); 271 | let (v, x) = bench_skey_sign(); 272 | bx ^= x; 273 | println!("Ed448 sign: {:13.2}", v); 274 | let (v, x) = bench_pkey_verify(); 275 | bx ^= x; 276 | println!("Ed448 verify: {:13.2}", v); 277 | let (v, x) = bench_decode(); 278 | bx ^= x; 279 | println!("Ed448 decode: {:13.2}", v); 280 | let (v, x) = bench_encode(); 281 | bx ^= x; 282 | println!("Ed448 encode: {:13.2}", v); 283 | let (v, x) = bench_subgroup(); 284 | bx ^= x; 285 | println!("Ed448 subgroup: {:13.2}", v); 286 | 287 | println!("{}", bx); 288 | } 289 | -------------------------------------------------------------------------------- /benches/gf25519.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "gf25519")] 2 | 3 | mod util; 4 | use util::core_cycles; 5 | 6 | use crrl::field::GF25519; 7 | 8 | fn bench_gf25519_add() { 9 | let z = core_cycles(); 10 | let mut x = GF25519::w64le(z, z.wrapping_mul(3), 11 | z.wrapping_mul(5), z.wrapping_mul(7)); 12 | let mut y = x + GF25519::ONE; 13 | let mut tt = [0; 10]; 14 | for i in 0..10 { 15 | let begin = core_cycles(); 16 | for _ in 0..1000 { 17 | x += y; 18 | y += x; 19 | x += y; 20 | y += x; 21 | x += y; 22 | y += x; 23 | } 24 | let end = core_cycles(); 25 | tt[i] = end.wrapping_sub(begin); 26 | } 27 | tt.sort(); 28 | println!("GF25519 add: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 29 | } 30 | 31 | fn bench_gf25519_sub() { 32 | let z = core_cycles(); 33 | let mut x = GF25519::w64le(z, z.wrapping_mul(3), 34 | z.wrapping_mul(5), z.wrapping_mul(7)); 35 | let mut y = x + GF25519::ONE; 36 | let mut tt = [0; 10]; 37 | for i in 0..10 { 38 | let begin = core_cycles(); 39 | for _ in 0..1000 { 40 | x -= y; 41 | y -= x; 42 | x -= y; 43 | y -= x; 44 | x -= y; 45 | y -= x; 46 | } 47 | let end = core_cycles(); 48 | tt[i] = end.wrapping_sub(begin); 49 | } 50 | tt.sort(); 51 | println!("GF25519 sub: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 52 | } 53 | 54 | fn bench_gf25519_mul() { 55 | let z = core_cycles(); 56 | let mut x = GF25519::w64le(z, z.wrapping_mul(3), 57 | z.wrapping_mul(5), z.wrapping_mul(7)); 58 | let mut y = x + GF25519::ONE; 59 | let mut tt = [0; 10]; 60 | for i in 0..10 { 61 | let begin = core_cycles(); 62 | for _ in 0..1000 { 63 | x *= y; 64 | y *= x; 65 | x *= y; 66 | y *= x; 67 | x *= y; 68 | y *= x; 69 | } 70 | let end = core_cycles(); 71 | tt[i] = end.wrapping_sub(begin); 72 | } 73 | tt.sort(); 74 | println!("GF25519 mul: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 75 | } 76 | 77 | fn bench_gf25519_square() { 78 | let z = core_cycles(); 79 | let mut x = GF25519::w64le(z, z.wrapping_mul(3), 80 | z.wrapping_mul(5), z.wrapping_mul(7)); 81 | let mut tt = [0; 10]; 82 | for i in 0..10 { 83 | let begin = core_cycles(); 84 | x = x.xsquare(6000); 85 | let end = core_cycles(); 86 | tt[i] = end.wrapping_sub(begin); 87 | } 88 | tt.sort(); 89 | println!("GF25519 square: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 90 | } 91 | 92 | fn bench_gf25519_div() { 93 | let z = core_cycles(); 94 | let mut x = GF25519::w64le(z, z.wrapping_mul(3), 95 | z.wrapping_mul(5), z.wrapping_mul(7)); 96 | let mut y = x + GF25519::ONE; 97 | let mut tt = [0; 10]; 98 | for i in 0..10 { 99 | let begin = core_cycles(); 100 | for _ in 0..1000 { 101 | x /= y; 102 | y /= x; 103 | x /= y; 104 | y /= x; 105 | x /= y; 106 | y /= x; 107 | } 108 | let end = core_cycles(); 109 | tt[i] = end.wrapping_sub(begin); 110 | } 111 | tt.sort(); 112 | println!("GF25519 div: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 113 | } 114 | 115 | fn bench_gf25519_sqrt() { 116 | let z = core_cycles(); 117 | let mut x = GF25519::w64le(z, z.wrapping_mul(3), 118 | z.wrapping_mul(5), z.wrapping_mul(7)); 119 | let mut tt = [0; 10]; 120 | for i in 0..10 { 121 | let begin = core_cycles(); 122 | for _ in 0..6000 { 123 | let (x2, _) = x.sqrt(); 124 | x = x2 + GF25519::ONE; 125 | } 126 | let end = core_cycles(); 127 | tt[i] = end.wrapping_sub(begin); 128 | } 129 | tt.sort(); 130 | println!("GF25519 sqrt: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 131 | } 132 | 133 | fn bench_gf25519_legendre() { 134 | let z = core_cycles(); 135 | let mut x = GF25519::w64le(z, z.wrapping_mul(3), 136 | z.wrapping_mul(5), z.wrapping_mul(7)); 137 | let mut tt = [0; 10]; 138 | for i in 0..10 { 139 | let begin = core_cycles(); 140 | for _ in 0..6000 { 141 | let ls = x.legendre(); 142 | x += GF25519::w64le(ls as u64, ls as u64, ls as u64, ls as u64); 143 | } 144 | let end = core_cycles(); 145 | tt[i] = end.wrapping_sub(begin); 146 | } 147 | tt.sort(); 148 | println!("GF25519 legendre: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 149 | } 150 | 151 | fn main() { 152 | bench_gf25519_add(); 153 | bench_gf25519_sub(); 154 | bench_gf25519_mul(); 155 | bench_gf25519_square(); 156 | bench_gf25519_div(); 157 | bench_gf25519_sqrt(); 158 | bench_gf25519_legendre(); 159 | } 160 | -------------------------------------------------------------------------------- /benches/gf255e.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "gf255e")] 2 | 3 | mod util; 4 | use util::core_cycles; 5 | 6 | use crrl::field::GF255e; 7 | 8 | fn bench_gf255e_add() { 9 | let z = core_cycles(); 10 | let mut x = GF255e::w64le(z, z.wrapping_mul(3), 11 | z.wrapping_mul(5), z.wrapping_mul(7)); 12 | let mut y = x + GF255e::ONE; 13 | let mut tt = [0; 10]; 14 | for i in 0..10 { 15 | let begin = core_cycles(); 16 | for _ in 0..1000 { 17 | x += y; 18 | y += x; 19 | x += y; 20 | y += x; 21 | x += y; 22 | y += x; 23 | } 24 | let end = core_cycles(); 25 | tt[i] = end.wrapping_sub(begin); 26 | } 27 | tt.sort(); 28 | println!("GF255e add: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 29 | } 30 | 31 | fn bench_gf255e_sub() { 32 | let z = core_cycles(); 33 | let mut x = GF255e::w64le(z, z.wrapping_mul(3), 34 | z.wrapping_mul(5), z.wrapping_mul(7)); 35 | let mut y = x + GF255e::ONE; 36 | let mut tt = [0; 10]; 37 | for i in 0..10 { 38 | let begin = core_cycles(); 39 | for _ in 0..1000 { 40 | x -= y; 41 | y -= x; 42 | x -= y; 43 | y -= x; 44 | x -= y; 45 | y -= x; 46 | } 47 | let end = core_cycles(); 48 | tt[i] = end.wrapping_sub(begin); 49 | } 50 | tt.sort(); 51 | println!("GF255e sub: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 52 | } 53 | 54 | fn bench_gf255e_mul() { 55 | let z = core_cycles(); 56 | let mut x = GF255e::w64le(z, z.wrapping_mul(3), 57 | z.wrapping_mul(5), z.wrapping_mul(7)); 58 | let mut y = x + GF255e::ONE; 59 | let mut tt = [0; 10]; 60 | for i in 0..10 { 61 | let begin = core_cycles(); 62 | for _ in 0..1000 { 63 | x *= y; 64 | y *= x; 65 | x *= y; 66 | y *= x; 67 | x *= y; 68 | y *= x; 69 | } 70 | let end = core_cycles(); 71 | tt[i] = end.wrapping_sub(begin); 72 | } 73 | tt.sort(); 74 | println!("GF255e mul: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 75 | } 76 | 77 | fn bench_gf255e_square() { 78 | let z = core_cycles(); 79 | let mut x = GF255e::w64le(z, z.wrapping_mul(3), 80 | z.wrapping_mul(5), z.wrapping_mul(7)); 81 | let mut tt = [0; 10]; 82 | for i in 0..10 { 83 | let begin = core_cycles(); 84 | x = x.xsquare(6000); 85 | let end = core_cycles(); 86 | tt[i] = end.wrapping_sub(begin); 87 | } 88 | tt.sort(); 89 | println!("GF255e square: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 90 | } 91 | 92 | fn bench_gf255e_div() { 93 | let z = core_cycles(); 94 | let mut x = GF255e::w64le(z, z.wrapping_mul(3), 95 | z.wrapping_mul(5), z.wrapping_mul(7)); 96 | let mut y = x + GF255e::ONE; 97 | let mut tt = [0; 10]; 98 | for i in 0..10 { 99 | let begin = core_cycles(); 100 | for _ in 0..1000 { 101 | x /= y; 102 | y /= x; 103 | x /= y; 104 | y /= x; 105 | x /= y; 106 | y /= x; 107 | } 108 | let end = core_cycles(); 109 | tt[i] = end.wrapping_sub(begin); 110 | } 111 | tt.sort(); 112 | println!("GF255e div: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 113 | } 114 | 115 | fn bench_gf255e_sqrt() { 116 | let z = core_cycles(); 117 | let mut x = GF255e::w64le(z, z.wrapping_mul(3), 118 | z.wrapping_mul(5), z.wrapping_mul(7)); 119 | let mut tt = [0; 10]; 120 | for i in 0..10 { 121 | let begin = core_cycles(); 122 | for _ in 0..6000 { 123 | let (x2, _) = x.sqrt(); 124 | x = x2 + GF255e::ONE; 125 | } 126 | let end = core_cycles(); 127 | tt[i] = end.wrapping_sub(begin); 128 | } 129 | tt.sort(); 130 | println!("GF255e sqrt: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 131 | } 132 | 133 | fn bench_gf255e_legendre() { 134 | let z = core_cycles(); 135 | let mut x = GF255e::w64le(z, z.wrapping_mul(3), 136 | z.wrapping_mul(5), z.wrapping_mul(7)); 137 | let mut tt = [0; 10]; 138 | for i in 0..10 { 139 | let begin = core_cycles(); 140 | for _ in 0..6000 { 141 | let ls = x.legendre(); 142 | x += GF255e::w64le(ls as u64, ls as u64, ls as u64, ls as u64); 143 | } 144 | let end = core_cycles(); 145 | tt[i] = end.wrapping_sub(begin); 146 | } 147 | tt.sort(); 148 | println!("GF255e legendre: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode32()[0]); 149 | } 150 | 151 | fn main() { 152 | bench_gf255e_add(); 153 | bench_gf255e_sub(); 154 | bench_gf255e_mul(); 155 | bench_gf255e_square(); 156 | bench_gf255e_div(); 157 | bench_gf255e_sqrt(); 158 | bench_gf255e_legendre(); 159 | } 160 | -------------------------------------------------------------------------------- /benches/gf448.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "gf448")] 2 | 3 | mod util; 4 | use util::core_cycles; 5 | 6 | use crrl::field::GF448; 7 | 8 | fn bench_gf448_add() { 9 | let z = core_cycles(); 10 | let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 11 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 12 | z.wrapping_mul(13) ]); 13 | let mut y = x + GF448::ONE; 14 | let mut tt = [0; 10]; 15 | for i in 0..30 { 16 | let begin = core_cycles(); 17 | for _ in 0..1000 { 18 | x += y; 19 | y += x; 20 | x += y; 21 | y += x; 22 | x += y; 23 | y += x; 24 | } 25 | let end = core_cycles(); 26 | if i >= 20 { 27 | tt[i - 20] = end.wrapping_sub(begin); 28 | } 29 | } 30 | tt.sort(); 31 | println!("GF448 add: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 32 | } 33 | 34 | fn bench_gf448_sub() { 35 | let z = core_cycles(); 36 | let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 37 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 38 | z.wrapping_mul(13) ]); 39 | let mut y = x + GF448::ONE; 40 | let mut tt = [0; 10]; 41 | for i in 0..30 { 42 | let begin = core_cycles(); 43 | for _ in 0..1000 { 44 | x -= y; 45 | y -= x; 46 | x -= y; 47 | y -= x; 48 | x -= y; 49 | y -= x; 50 | } 51 | let end = core_cycles(); 52 | if i >= 20 { 53 | tt[i - 20] = end.wrapping_sub(begin); 54 | } 55 | } 56 | tt.sort(); 57 | println!("GF448 sub: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 58 | } 59 | 60 | fn bench_gf448_mul() { 61 | let z = core_cycles(); 62 | let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 63 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 64 | z.wrapping_mul(13) ]); 65 | let mut y = x + GF448::ONE; 66 | let mut tt = [0; 10]; 67 | for i in 0..30 { 68 | let begin = core_cycles(); 69 | for _ in 0..1000 { 70 | x *= y; 71 | y *= x; 72 | x *= y; 73 | y *= x; 74 | x *= y; 75 | y *= x; 76 | } 77 | let end = core_cycles(); 78 | if i >= 20 { 79 | tt[i - 20] = end.wrapping_sub(begin); 80 | } 81 | } 82 | tt.sort(); 83 | println!("GF448 mul: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 84 | } 85 | 86 | fn bench_gf448_square() { 87 | let z = core_cycles(); 88 | let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 89 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 90 | z.wrapping_mul(13) ]); 91 | let mut tt = [0; 10]; 92 | for i in 0..30 { 93 | let begin = core_cycles(); 94 | x = x.xsquare(6000); 95 | let end = core_cycles(); 96 | if i >= 20 { 97 | tt[i - 20] = end.wrapping_sub(begin); 98 | } 99 | } 100 | tt.sort(); 101 | println!("GF448 square: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 102 | } 103 | 104 | fn bench_gf448_div() { 105 | let z = core_cycles(); 106 | let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 107 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 108 | z.wrapping_mul(13) ]); 109 | let mut y = x + GF448::ONE; 110 | let mut tt = [0; 10]; 111 | for i in 0..30 { 112 | let begin = core_cycles(); 113 | for _ in 0..1000 { 114 | x /= y; 115 | y /= x; 116 | x /= y; 117 | y /= x; 118 | x /= y; 119 | y /= x; 120 | } 121 | let end = core_cycles(); 122 | if i >= 20 { 123 | tt[i - 20] = end.wrapping_sub(begin); 124 | } 125 | } 126 | tt.sort(); 127 | println!("GF448 div: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 128 | } 129 | 130 | fn bench_gf448_sqrt() { 131 | let z = core_cycles(); 132 | let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 133 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 134 | z.wrapping_mul(13) ]); 135 | let mut tt = [0; 10]; 136 | for i in 0..30 { 137 | let begin = core_cycles(); 138 | for _ in 0..6000 { 139 | let (x2, _) = x.sqrt(); 140 | x += x2 + GF448::ONE; 141 | } 142 | let end = core_cycles(); 143 | if i >= 20 { 144 | tt[i - 20] = end.wrapping_sub(begin); 145 | } 146 | } 147 | tt.sort(); 148 | println!("GF448 sqrt: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 149 | } 150 | 151 | fn bench_gf448_legendre() { 152 | let z = core_cycles(); 153 | let mut x = GF448::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 154 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 155 | z.wrapping_mul(13) ]); 156 | let mut tt = [0; 10]; 157 | for i in 0..30 { 158 | let begin = core_cycles(); 159 | for _ in 0..6000 { 160 | let ls = x.legendre(); 161 | let ls2 = ls as u64; 162 | x += GF448::w64le([ ls2, ls2, ls2, ls2, ls2, ls2, ls2 ]); 163 | } 164 | let end = core_cycles(); 165 | if i >= 20 { 166 | tt[i - 20] = end.wrapping_sub(begin); 167 | } 168 | } 169 | tt.sort(); 170 | println!("GF448 legendre: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 171 | } 172 | 173 | fn main() { 174 | bench_gf448_add(); 175 | bench_gf448_sub(); 176 | bench_gf448_mul(); 177 | bench_gf448_square(); 178 | bench_gf448_div(); 179 | bench_gf448_sqrt(); 180 | bench_gf448_legendre(); 181 | 182 | /* 183 | bench_fiat_add(); 184 | bench_fiat_sub(); 185 | bench_fiat_mul(); 186 | bench_fiat_square(); 187 | */ 188 | } 189 | 190 | /* 191 | extern crate fiat_crypto; 192 | use fiat_crypto::p448_solinas_64::*; 193 | 194 | fn bench_fiat_add() { 195 | let z = core_cycles(); 196 | let mut x: fiat_p448_tight_field_element = [ 197 | z & 0x00FFFFFFFFFFFFFF, 198 | z.wrapping_mul(3) & 0x00FFFFFFFFFFFFFF, 199 | z.wrapping_mul(5) & 0x00FFFFFFFFFFFFFF, 200 | z.wrapping_mul(7) & 0x00FFFFFFFFFFFFFF, 201 | z.wrapping_mul(9) & 0x00FFFFFFFFFFFFFF, 202 | z.wrapping_mul(11) & 0x00FFFFFFFFFFFFFF, 203 | z.wrapping_mul(13) & 0x00FFFFFFFFFFFFFF, 204 | z.wrapping_mul(15) & 0x00FFFFFFFFFFFFFF, 205 | ]; 206 | let mut y = x; 207 | y[0] += 1; 208 | let mut tt = [0; 10]; 209 | for i in 0..30 { 210 | let mut z: fiat_p448_loose_field_element = [0u64; 8]; 211 | let begin = core_cycles(); 212 | for _ in 0..1000 { 213 | fiat_p448_add(&mut z, &x, &y); fiat_p448_carry(&mut x, &z); 214 | fiat_p448_add(&mut z, &y, &x); fiat_p448_carry(&mut y, &z); 215 | fiat_p448_add(&mut z, &x, &y); fiat_p448_carry(&mut x, &z); 216 | fiat_p448_add(&mut z, &y, &x); fiat_p448_carry(&mut y, &z); 217 | fiat_p448_add(&mut z, &x, &y); fiat_p448_carry(&mut x, &z); 218 | fiat_p448_add(&mut z, &y, &x); fiat_p448_carry(&mut y, &z); 219 | } 220 | let end = core_cycles(); 221 | if i >= 20 { 222 | tt[i - 20] = end.wrapping_sub(begin); 223 | } 224 | } 225 | tt.sort(); 226 | println!("fc448 add: {:11.2} ({})", (tt[4] as f64) / 6000.0, x[0] as u8); 227 | } 228 | 229 | fn bench_fiat_sub() { 230 | let z = core_cycles(); 231 | let mut x: fiat_p448_tight_field_element = [ 232 | z & 0x00FFFFFFFFFFFFFF, 233 | z.wrapping_mul(3) & 0x00FFFFFFFFFFFFFF, 234 | z.wrapping_mul(5) & 0x00FFFFFFFFFFFFFF, 235 | z.wrapping_mul(7) & 0x00FFFFFFFFFFFFFF, 236 | z.wrapping_mul(9) & 0x00FFFFFFFFFFFFFF, 237 | z.wrapping_mul(11) & 0x00FFFFFFFFFFFFFF, 238 | z.wrapping_mul(13) & 0x00FFFFFFFFFFFFFF, 239 | z.wrapping_mul(15) & 0x00FFFFFFFFFFFFFF, 240 | ]; 241 | let mut y = x; 242 | y[0] += 1; 243 | let mut tt = [0; 10]; 244 | for i in 0..30 { 245 | let mut z: fiat_p448_loose_field_element = [0u64; 8]; 246 | let begin = core_cycles(); 247 | for _ in 0..1000 { 248 | fiat_p448_sub(&mut z, &x, &y); fiat_p448_carry(&mut x, &z); 249 | fiat_p448_sub(&mut z, &y, &x); fiat_p448_carry(&mut y, &z); 250 | fiat_p448_sub(&mut z, &x, &y); fiat_p448_carry(&mut x, &z); 251 | fiat_p448_sub(&mut z, &y, &x); fiat_p448_carry(&mut y, &z); 252 | fiat_p448_sub(&mut z, &x, &y); fiat_p448_carry(&mut x, &z); 253 | fiat_p448_sub(&mut z, &y, &x); fiat_p448_carry(&mut y, &z); 254 | } 255 | let end = core_cycles(); 256 | if i >= 20 { 257 | tt[i - 20] = end.wrapping_sub(begin); 258 | } 259 | } 260 | tt.sort(); 261 | println!("fc448 sub: {:11.2} ({})", (tt[4] as f64) / 6000.0, x[0] as u8); 262 | } 263 | 264 | fn bench_fiat_mul() { 265 | let z = core_cycles(); 266 | let mut x: fiat_p448_loose_field_element = [ 267 | z & 0x00FFFFFFFFFFFFFF, 268 | z.wrapping_mul(3) & 0x00FFFFFFFFFFFFFF, 269 | z.wrapping_mul(5) & 0x00FFFFFFFFFFFFFF, 270 | z.wrapping_mul(7) & 0x00FFFFFFFFFFFFFF, 271 | z.wrapping_mul(9) & 0x00FFFFFFFFFFFFFF, 272 | z.wrapping_mul(11) & 0x00FFFFFFFFFFFFFF, 273 | z.wrapping_mul(13) & 0x00FFFFFFFFFFFFFF, 274 | z.wrapping_mul(15) & 0x00FFFFFFFFFFFFFF, 275 | ]; 276 | let mut y = x; 277 | y[0] += 1; 278 | let mut tt = [0; 10]; 279 | for i in 0..30 { 280 | let mut z: fiat_p448_tight_field_element = [0u64; 8]; 281 | let begin = core_cycles(); 282 | for _ in 0..1000 { 283 | fiat_p448_carry_mul(&mut z, &x, &y); fiat_p448_relax(&mut x, &z); 284 | fiat_p448_carry_mul(&mut z, &y, &x); fiat_p448_relax(&mut y, &z); 285 | fiat_p448_carry_mul(&mut z, &x, &y); fiat_p448_relax(&mut x, &z); 286 | fiat_p448_carry_mul(&mut z, &y, &x); fiat_p448_relax(&mut y, &z); 287 | fiat_p448_carry_mul(&mut z, &x, &y); fiat_p448_relax(&mut x, &z); 288 | fiat_p448_carry_mul(&mut z, &y, &x); fiat_p448_relax(&mut y, &z); 289 | } 290 | let end = core_cycles(); 291 | if i >= 20 { 292 | tt[i - 20] = end.wrapping_sub(begin); 293 | } 294 | } 295 | tt.sort(); 296 | println!("fc448 mul: {:11.2} ({})", (tt[4] as f64) / 6000.0, x[0] as u8); 297 | } 298 | 299 | fn bench_fiat_square() { 300 | let z = core_cycles(); 301 | let mut x: fiat_p448_loose_field_element = [ 302 | z & 0x00FFFFFFFFFFFFFF, 303 | z.wrapping_mul(3) & 0x00FFFFFFFFFFFFFF, 304 | z.wrapping_mul(5) & 0x00FFFFFFFFFFFFFF, 305 | z.wrapping_mul(7) & 0x00FFFFFFFFFFFFFF, 306 | z.wrapping_mul(9) & 0x00FFFFFFFFFFFFFF, 307 | z.wrapping_mul(11) & 0x00FFFFFFFFFFFFFF, 308 | z.wrapping_mul(13) & 0x00FFFFFFFFFFFFFF, 309 | z.wrapping_mul(15) & 0x00FFFFFFFFFFFFFF, 310 | ]; 311 | let mut tt = [0; 10]; 312 | for i in 0..30 { 313 | let mut z: fiat_p448_tight_field_element = [0u64; 8]; 314 | let begin = core_cycles(); 315 | for _ in 0..1000 { 316 | fiat_p448_carry_square(&mut z, &x); fiat_p448_relax(&mut x, &z); 317 | fiat_p448_carry_square(&mut z, &x); fiat_p448_relax(&mut x, &z); 318 | fiat_p448_carry_square(&mut z, &x); fiat_p448_relax(&mut x, &z); 319 | fiat_p448_carry_square(&mut z, &x); fiat_p448_relax(&mut x, &z); 320 | fiat_p448_carry_square(&mut z, &x); fiat_p448_relax(&mut x, &z); 321 | fiat_p448_carry_square(&mut z, &x); fiat_p448_relax(&mut x, &z); 322 | } 323 | let end = core_cycles(); 324 | if i >= 20 { 325 | tt[i - 20] = end.wrapping_sub(begin); 326 | } 327 | } 328 | tt.sort(); 329 | println!("fc448 square: {:11.2} ({})", (tt[4] as f64) / 6000.0, x[0] as u8); 330 | } 331 | */ 332 | -------------------------------------------------------------------------------- /benches/gls254.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | #![cfg(feature = "gls254")] 3 | 4 | mod util; 5 | use util::core_cycles; 6 | 7 | use crrl::gls254::{Point, Scalar, PrivateKey}; 8 | use sha2::{Sha256, Digest}; 9 | 10 | fn bench_mulgen() -> (f64, u8) { 11 | let z = core_cycles(); 12 | let mut seed = [0u8; 32]; 13 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 14 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 15 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 16 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 17 | let mut s = Scalar::decode_reduce(&seed); 18 | let mut tt = [0; 100]; 19 | for i in 0..tt.len() { 20 | let begin = core_cycles(); 21 | for _ in 0..100 { 22 | let P = Point::mulgen(&s); 23 | if P.isneutral() != 0 { 24 | s += Scalar::ZERO; 25 | } else { 26 | s += Scalar::ONE; 27 | } 28 | } 29 | let end = core_cycles(); 30 | tt[i] = end.wrapping_sub(begin); 31 | } 32 | tt.sort(); 33 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0]) 34 | } 35 | 36 | fn bench_mul() -> (f64, u8) { 37 | let z = core_cycles(); 38 | let mut seed = [0u8; 32]; 39 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 40 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 41 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 42 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 43 | let mut s = Scalar::decode_reduce(&seed); 44 | let mut P = Point::mulgen(&s); 45 | let mut tt = [0; 100]; 46 | for i in 0..tt.len() { 47 | let begin = core_cycles(); 48 | for _ in 0..100 { 49 | P *= s; 50 | if P.isneutral() != 0 { 51 | s += Scalar::ZERO; 52 | } else { 53 | s += Scalar::ONE; 54 | } 55 | } 56 | let end = core_cycles(); 57 | tt[i] = end.wrapping_sub(begin); 58 | } 59 | tt.sort(); 60 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0]) 61 | } 62 | 63 | fn bench_skey_load() -> (f64, u8) { 64 | let z = core_cycles(); 65 | let mut seed = [0u8; 32]; 66 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 67 | let mut tt = [0; 100]; 68 | for i in 0..tt.len() { 69 | let begin = core_cycles(); 70 | for _ in 0..100 { 71 | let skey = PrivateKey::decode(&seed).unwrap(); 72 | seed[..].copy_from_slice(&skey.public_key.encode()); 73 | seed[31] &= 0x1Fu8; 74 | } 75 | let end = core_cycles(); 76 | tt[i] = end.wrapping_sub(begin); 77 | } 78 | tt.sort(); 79 | ((tt[tt.len() >> 1] as f64) / 100.0, seed[0]) 80 | } 81 | 82 | fn bench_skey_sign() -> (f64, u8) { 83 | let z = core_cycles(); 84 | let mut seed = [0u8; 32]; 85 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 86 | let mut sh = Sha256::new(); 87 | sh.update(&seed); 88 | seed[..].copy_from_slice(&sh.finalize()); 89 | seed[31] &= 0x1Fu8; 90 | let skey = PrivateKey::decode(&seed).unwrap(); 91 | let mut tt = [0; 100]; 92 | let mut msg = [0u8; 32]; 93 | for i in 0..tt.len() { 94 | let begin = core_cycles(); 95 | for _ in 0..100 { 96 | let sig = skey.sign("", &msg); 97 | msg[..].copy_from_slice(&sig[0..32]); 98 | } 99 | let end = core_cycles(); 100 | tt[i] = end.wrapping_sub(begin); 101 | } 102 | tt.sort(); 103 | ((tt[tt.len() >> 1] as f64) / 100.0, msg[0]) 104 | } 105 | 106 | fn bench_pkey_verify() -> (f64, u8) { 107 | let z = core_cycles(); 108 | let mut seed = [0u8; 32]; 109 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 110 | let mut sh = Sha256::new(); 111 | sh.update(&seed); 112 | seed[..].copy_from_slice(&sh.finalize()); 113 | seed[31] &= 0x1Fu8; 114 | let skey = PrivateKey::decode(&seed).unwrap(); 115 | let pkey = skey.public_key; 116 | let mut sigs = [[0u8; 48]; 128]; 117 | for i in 0..128 { 118 | let msg = [i as u8; 32]; 119 | let sig = skey.sign("", &msg); 120 | sigs[i][..].copy_from_slice(&sig); 121 | } 122 | let mut tt = [0; 100]; 123 | let mut msg = [0u8; 32]; 124 | for i in 0..tt.len() { 125 | let begin = core_cycles(); 126 | for j in 0..128 { 127 | let ff = pkey.verify(&sigs[j], "", &msg); 128 | sigs[j][40] ^= 1u8.wrapping_add(ff as u8); 129 | msg[3] ^= 3u8.wrapping_sub(ff as u8); 130 | } 131 | let end = core_cycles(); 132 | tt[i] = end.wrapping_sub(begin); 133 | } 134 | tt.sort(); 135 | ((tt[tt.len() >> 1] as f64) / 128.0, msg[0]) 136 | } 137 | 138 | fn bench_decode() -> (f64, u8) { 139 | let z = core_cycles(); 140 | let mut buf = [0u8; 32]; 141 | buf[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 142 | buf[ 8..16].copy_from_slice(&z.to_le_bytes()); 143 | buf[16..24].copy_from_slice(&z.to_le_bytes()); 144 | buf[24..32].copy_from_slice(&z.to_le_bytes()); 145 | let mut tt = [0; 10]; 146 | let mut P = Point::NEUTRAL; 147 | let Q = Point::BASE * z; 148 | for i in 0..10 { 149 | let begin = core_cycles(); 150 | for _ in 0..100 { 151 | let r = P.set_decode(&buf); 152 | buf[0] = buf[0].wrapping_add(1); 153 | buf[1] = buf[1].wrapping_add(r as u8); 154 | buf[2] = buf[2].wrapping_add(P.equals(Q) as u8); 155 | } 156 | let end = core_cycles(); 157 | tt[i] = end.wrapping_sub(begin); 158 | } 159 | tt.sort(); 160 | ((tt[4] as f64) / 100.0, buf[0]) 161 | } 162 | 163 | fn bench_encode() -> (f64, u8) { 164 | let z = core_cycles(); 165 | let mut P = Point::BASE * z; 166 | let mut tt = [0; 10]; 167 | for i in 0..10 { 168 | let begin = core_cycles(); 169 | for _ in 0..100 { 170 | let x = P.encode()[0]; 171 | if x & 1 == 0 { 172 | P = -P; 173 | } 174 | } 175 | let end = core_cycles(); 176 | tt[i] = end.wrapping_sub(begin); 177 | } 178 | tt.sort(); 179 | ((tt[4] as f64) / 100.0, P.encode()[0]) 180 | } 181 | 182 | fn bench_hash_to_curve() -> (f64, u8) { 183 | let mut buf = [0u8; 32]; 184 | for i in 0..4 { 185 | let z = core_cycles(); 186 | buf[(8 * i)..(8 * i + 8)].copy_from_slice(&z.to_le_bytes()); 187 | } 188 | let mut tt = [0; 10]; 189 | for i in 0..10 { 190 | let begin = core_cycles(); 191 | for _ in 0..100 { 192 | let P = Point::hash_to_curve("", &buf); 193 | buf[0] += P.isneutral() as u8; 194 | buf[1] += 3; 195 | buf[2] += 5; 196 | } 197 | let end = core_cycles(); 198 | tt[i] = end.wrapping_sub(begin); 199 | } 200 | tt.sort(); 201 | ((tt[4] as f64) / 100.0, buf[0]) 202 | } 203 | 204 | fn bench_split_mu() -> (f64, u8) { 205 | let z = core_cycles(); 206 | let mut x = Scalar::from_u64(z); 207 | x.set_xsquare(5); 208 | let mut tt = [0; 10]; 209 | for i in 0..10 { 210 | let begin = core_cycles(); 211 | for _ in 0..1000 { 212 | let (k0, s0, k1, s1) = Point::split_mu(&x); 213 | let mut buf = [0u8; 24]; 214 | buf[..16].copy_from_slice(&(k0 ^ k1).to_le_bytes()); 215 | buf[16..20].copy_from_slice(&s0.to_le_bytes()); 216 | buf[20..24].copy_from_slice(&s1.to_le_bytes()); 217 | x.set_decode_reduce(&buf); 218 | } 219 | let end = core_cycles(); 220 | tt[i] = end.wrapping_sub(begin); 221 | } 222 | tt.sort(); 223 | ((tt[4] as f64) / 1000.0, x.encode()[0]) 224 | } 225 | 226 | #[cfg(feature = "gls254bench")] 227 | fn bench_raw_ecdh_1dt_3() -> (f64, u8) { 228 | let z = core_cycles(); 229 | let mut seed = [0u8; 32]; 230 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 231 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 232 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 233 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 234 | let mut sk = Scalar::decode_reduce(&seed).encode(); 235 | let mut pp: [u8; 64] = [ 236 | 0x80, 0xAE, 0xB8, 0xED, 0x53, 0x59, 0xFF, 0x2D, 237 | 0xD0, 0x77, 0x45, 0x61, 0xF9, 0x22, 0xE4, 0x63, 238 | 0x9C, 0xEE, 0x3A, 0xF1, 0xE8, 0xF7, 0x23, 0x80, 239 | 0x74, 0x5A, 0x57, 0x29, 0xC5, 0xAA, 0xF5, 0x02, 240 | 0xA7, 0x52, 0x43, 0xDF, 0xCA, 0xE4, 0x13, 0x95, 241 | 0xD8, 0x49, 0xE7, 0xC8, 0x52, 0x6E, 0x4D, 0x6E, 242 | 0x03, 0x34, 0x21, 0x67, 0x21, 0x47, 0x37, 0xA4, 243 | 0x0C, 0x67, 0x34, 0x13, 0xF3, 0x48, 0x4B, 0x7D, 244 | ]; 245 | pp = Point::for_benchmarks_only_1dt_3(&pp, &sk).unwrap(); 246 | let mut tt = [0; 100]; 247 | for i in 0..tt.len() { 248 | let begin = core_cycles(); 249 | for _ in 0..100 { 250 | sk[..].copy_from_slice(&pp[..32]); 251 | sk[31] &= 0x1F; 252 | pp = Point::for_benchmarks_only_1dt_3(&pp, &sk).unwrap(); 253 | } 254 | let end = core_cycles(); 255 | tt[i] = end.wrapping_sub(begin); 256 | } 257 | tt.sort(); 258 | ((tt[tt.len() >> 1] as f64) / 100.0, pp[0]) 259 | } 260 | 261 | #[cfg(feature = "gls254bench")] 262 | fn bench_raw_ecdh_1dt_4() -> (f64, u8) { 263 | let z = core_cycles(); 264 | let mut seed = [0u8; 32]; 265 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 266 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 267 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 268 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 269 | let mut sk = Scalar::decode_reduce(&seed).encode(); 270 | let mut pp: [u8; 64] = [ 271 | 0x80, 0xAE, 0xB8, 0xED, 0x53, 0x59, 0xFF, 0x2D, 272 | 0xD0, 0x77, 0x45, 0x61, 0xF9, 0x22, 0xE4, 0x63, 273 | 0x9C, 0xEE, 0x3A, 0xF1, 0xE8, 0xF7, 0x23, 0x80, 274 | 0x74, 0x5A, 0x57, 0x29, 0xC5, 0xAA, 0xF5, 0x02, 275 | 0xA7, 0x52, 0x43, 0xDF, 0xCA, 0xE4, 0x13, 0x95, 276 | 0xD8, 0x49, 0xE7, 0xC8, 0x52, 0x6E, 0x4D, 0x6E, 277 | 0x03, 0x34, 0x21, 0x67, 0x21, 0x47, 0x37, 0xA4, 278 | 0x0C, 0x67, 0x34, 0x13, 0xF3, 0x48, 0x4B, 0x7D, 279 | ]; 280 | pp = Point::for_benchmarks_only_1dt_4(&pp, &sk).unwrap(); 281 | let mut tt = [0; 100]; 282 | for i in 0..tt.len() { 283 | let begin = core_cycles(); 284 | for _ in 0..100 { 285 | sk[..].copy_from_slice(&pp[..32]); 286 | sk[31] &= 0x1F; 287 | pp = Point::for_benchmarks_only_1dt_4(&pp, &sk).unwrap(); 288 | } 289 | let end = core_cycles(); 290 | tt[i] = end.wrapping_sub(begin); 291 | } 292 | tt.sort(); 293 | ((tt[tt.len() >> 1] as f64) / 100.0, pp[0]) 294 | } 295 | 296 | #[cfg(feature = "gls254bench")] 297 | fn bench_raw_ecdh_1dt_5() -> (f64, u8) { 298 | let z = core_cycles(); 299 | let mut seed = [0u8; 32]; 300 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 301 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 302 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 303 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 304 | let mut sk = Scalar::decode_reduce(&seed).encode(); 305 | let mut pp: [u8; 64] = [ 306 | 0x80, 0xAE, 0xB8, 0xED, 0x53, 0x59, 0xFF, 0x2D, 307 | 0xD0, 0x77, 0x45, 0x61, 0xF9, 0x22, 0xE4, 0x63, 308 | 0x9C, 0xEE, 0x3A, 0xF1, 0xE8, 0xF7, 0x23, 0x80, 309 | 0x74, 0x5A, 0x57, 0x29, 0xC5, 0xAA, 0xF5, 0x02, 310 | 0xA7, 0x52, 0x43, 0xDF, 0xCA, 0xE4, 0x13, 0x95, 311 | 0xD8, 0x49, 0xE7, 0xC8, 0x52, 0x6E, 0x4D, 0x6E, 312 | 0x03, 0x34, 0x21, 0x67, 0x21, 0x47, 0x37, 0xA4, 313 | 0x0C, 0x67, 0x34, 0x13, 0xF3, 0x48, 0x4B, 0x7D, 314 | ]; 315 | pp = Point::for_benchmarks_only_1dt_5(&pp, &sk).unwrap(); 316 | let mut tt = [0; 100]; 317 | for i in 0..tt.len() { 318 | let begin = core_cycles(); 319 | for _ in 0..100 { 320 | sk[..].copy_from_slice(&pp[..32]); 321 | sk[31] &= 0x1F; 322 | pp = Point::for_benchmarks_only_1dt_5(&pp, &sk).unwrap(); 323 | } 324 | let end = core_cycles(); 325 | tt[i] = end.wrapping_sub(begin); 326 | } 327 | tt.sort(); 328 | ((tt[tt.len() >> 1] as f64) / 100.0, pp[0]) 329 | } 330 | 331 | #[cfg(feature = "gls254bench")] 332 | fn bench_raw_ecdh_2dt_2() -> (f64, u8) { 333 | let z = core_cycles(); 334 | let mut seed = [0u8; 32]; 335 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 336 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 337 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 338 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 339 | let mut sk = Scalar::decode_reduce(&seed).encode(); 340 | let mut pp: [u8; 64] = [ 341 | 0x80, 0xAE, 0xB8, 0xED, 0x53, 0x59, 0xFF, 0x2D, 342 | 0xD0, 0x77, 0x45, 0x61, 0xF9, 0x22, 0xE4, 0x63, 343 | 0x9C, 0xEE, 0x3A, 0xF1, 0xE8, 0xF7, 0x23, 0x80, 344 | 0x74, 0x5A, 0x57, 0x29, 0xC5, 0xAA, 0xF5, 0x02, 345 | 0xA7, 0x52, 0x43, 0xDF, 0xCA, 0xE4, 0x13, 0x95, 346 | 0xD8, 0x49, 0xE7, 0xC8, 0x52, 0x6E, 0x4D, 0x6E, 347 | 0x03, 0x34, 0x21, 0x67, 0x21, 0x47, 0x37, 0xA4, 348 | 0x0C, 0x67, 0x34, 0x13, 0xF3, 0x48, 0x4B, 0x7D, 349 | ]; 350 | pp = Point::for_benchmarks_only_2dt_2(&pp, &sk).unwrap(); 351 | let mut tt = [0; 100]; 352 | for i in 0..tt.len() { 353 | let begin = core_cycles(); 354 | for _ in 0..100 { 355 | sk[..].copy_from_slice(&pp[..32]); 356 | sk[31] &= 0x1F; 357 | pp = Point::for_benchmarks_only_2dt_2(&pp, &sk).unwrap(); 358 | } 359 | let end = core_cycles(); 360 | tt[i] = end.wrapping_sub(begin); 361 | } 362 | tt.sort(); 363 | ((tt[tt.len() >> 1] as f64) / 100.0, pp[0]) 364 | } 365 | 366 | #[cfg(feature = "gls254bench")] 367 | fn bench_raw_ecdh_2dt_3() -> (f64, u8) { 368 | let z = core_cycles(); 369 | let mut seed = [0u8; 32]; 370 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 371 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 372 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 373 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 374 | let mut sk = Scalar::decode_reduce(&seed).encode(); 375 | let mut pp: [u8; 64] = [ 376 | 0x80, 0xAE, 0xB8, 0xED, 0x53, 0x59, 0xFF, 0x2D, 377 | 0xD0, 0x77, 0x45, 0x61, 0xF9, 0x22, 0xE4, 0x63, 378 | 0x9C, 0xEE, 0x3A, 0xF1, 0xE8, 0xF7, 0x23, 0x80, 379 | 0x74, 0x5A, 0x57, 0x29, 0xC5, 0xAA, 0xF5, 0x02, 380 | 0xA7, 0x52, 0x43, 0xDF, 0xCA, 0xE4, 0x13, 0x95, 381 | 0xD8, 0x49, 0xE7, 0xC8, 0x52, 0x6E, 0x4D, 0x6E, 382 | 0x03, 0x34, 0x21, 0x67, 0x21, 0x47, 0x37, 0xA4, 383 | 0x0C, 0x67, 0x34, 0x13, 0xF3, 0x48, 0x4B, 0x7D, 384 | ]; 385 | pp = Point::for_benchmarks_only_2dt_3(&pp, &sk).unwrap(); 386 | let mut tt = [0; 100]; 387 | for i in 0..tt.len() { 388 | let begin = core_cycles(); 389 | for _ in 0..100 { 390 | sk[..].copy_from_slice(&pp[..32]); 391 | sk[31] &= 0x1F; 392 | pp = Point::for_benchmarks_only_2dt_3(&pp, &sk).unwrap(); 393 | } 394 | let end = core_cycles(); 395 | tt[i] = end.wrapping_sub(begin); 396 | } 397 | tt.sort(); 398 | ((tt[tt.len() >> 1] as f64) / 100.0, pp[0]) 399 | } 400 | 401 | fn main() { 402 | let mut bx = 0u8; 403 | 404 | let (v, x) = bench_mul(); 405 | bx ^= x; 406 | println!("GLS254 point mul: {:13.2}", v); 407 | #[cfg(feature = "gls254bench")] 408 | { 409 | let (v, x) = bench_raw_ecdh_1dt_3(); 410 | bx ^= x; 411 | println!("GLS254 raw_ECDH 1DT-3: {:13.2}", v); 412 | let (v, x) = bench_raw_ecdh_1dt_4(); 413 | bx ^= x; 414 | println!("GLS254 raw_ECDH 1DT-4: {:13.2}", v); 415 | let (v, x) = bench_raw_ecdh_1dt_5(); 416 | bx ^= x; 417 | println!("GLS254 raw_ECDH 1DT-5: {:13.2}", v); 418 | let (v, x) = bench_raw_ecdh_2dt_2(); 419 | bx ^= x; 420 | println!("GLS254 raw_ECDH 2DT-2: {:13.2}", v); 421 | let (v, x) = bench_raw_ecdh_2dt_3(); 422 | bx ^= x; 423 | println!("GLS254 raw_ECDH 2DT-3: {:13.2}", v); 424 | } 425 | let (v, x) = bench_mulgen(); 426 | bx ^= x; 427 | println!("GLS254 point mulgen: {:13.2}", v); 428 | let (v, x) = bench_skey_load(); 429 | bx ^= x; 430 | println!("GLS254 skey_load: {:13.2}", v); 431 | let (v, x) = bench_skey_sign(); 432 | bx ^= x; 433 | println!("GLS254 sign: {:13.2}", v); 434 | let (v, x) = bench_pkey_verify(); 435 | bx ^= x; 436 | println!("GLS254 verify: {:13.2}", v); 437 | let (v, x) = bench_decode(); 438 | bx ^= x; 439 | println!("GLS254 decode: {:13.2}", v); 440 | let (v, x) = bench_encode(); 441 | bx ^= x; 442 | println!("GLS254 encode: {:13.2}", v); 443 | let (v, x) = bench_hash_to_curve(); 444 | bx ^= x; 445 | println!("GLS254 hash-to-curve: {:13.2}", v); 446 | let (v, x) = bench_split_mu(); 447 | bx ^= x; 448 | println!("GLS254 split_mu: {:13.2}", v); 449 | 450 | println!("{}", bx); 451 | } 452 | -------------------------------------------------------------------------------- /benches/jq255e.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | #![cfg(feature = "jq255e")] 3 | 4 | mod util; 5 | use util::core_cycles; 6 | 7 | use crrl::jq255e::{Point, Scalar, PrivateKey}; 8 | use sha2::{Sha256, Digest}; 9 | 10 | fn bench_mulgen() -> (f64, u8) { 11 | let z = core_cycles(); 12 | let mut seed = [0u8; 32]; 13 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 14 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 15 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 16 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 17 | let mut s = Scalar::decode_reduce(&seed); 18 | let mut tt = [0; 100]; 19 | for i in 0..tt.len() { 20 | let begin = core_cycles(); 21 | for _ in 0..100 { 22 | let P = Point::mulgen(&s); 23 | if P.isneutral() != 0 { 24 | s += Scalar::ZERO; 25 | } else { 26 | s += Scalar::ONE; 27 | } 28 | } 29 | let end = core_cycles(); 30 | tt[i] = end.wrapping_sub(begin); 31 | } 32 | tt.sort(); 33 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0]) 34 | } 35 | 36 | fn bench_mul() -> (f64, u8) { 37 | let z = core_cycles(); 38 | let mut seed = [0u8; 32]; 39 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 40 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 41 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 42 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 43 | let mut s = Scalar::decode_reduce(&seed); 44 | let mut P = Point::mulgen(&s); 45 | let mut tt = [0; 100]; 46 | for i in 0..tt.len() { 47 | let begin = core_cycles(); 48 | for _ in 0..100 { 49 | P *= s; 50 | if P.isneutral() != 0 { 51 | s += Scalar::ZERO; 52 | } else { 53 | s += Scalar::ONE; 54 | } 55 | } 56 | let end = core_cycles(); 57 | tt[i] = end.wrapping_sub(begin); 58 | } 59 | tt.sort(); 60 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0]) 61 | } 62 | 63 | fn bench_mul_add_mulgen() -> (f64, u8) { 64 | let z = core_cycles(); 65 | let mut uu = [Scalar::ZERO; 128]; 66 | let mut vv = [Scalar::ZERO; 128]; 67 | let mut sh = Sha256::new(); 68 | for i in 0..128 { 69 | sh.update(z.to_le_bytes()); 70 | sh.update(((2 * i + 0) as u64).to_le_bytes()); 71 | let b1 = sh.finalize_reset(); 72 | sh.update(z.to_le_bytes()); 73 | sh.update(((2 * i + 1) as u64).to_le_bytes()); 74 | let b2 = sh.finalize_reset(); 75 | uu[i] = Scalar::decode_reduce(&b1); 76 | vv[i] = Scalar::decode_reduce(&b2); 77 | } 78 | let mut tt = [0; 100]; 79 | let mut P = Point::mulgen(&uu[127]); 80 | for i in 0..tt.len() { 81 | let begin = core_cycles(); 82 | for j in 0..128 { 83 | let ku = (i + j) & 127; 84 | let kv = i.wrapping_sub(j) & 127; 85 | let Q = P.mul_add_mulgen_vartime(&uu[ku], &vv[kv]); 86 | P += Q; 87 | } 88 | let end = core_cycles(); 89 | tt[i] = end.wrapping_sub(begin); 90 | } 91 | tt.sort(); 92 | ((tt[tt.len() >> 1] as f64) / 128.0, P.encode()[0]) 93 | } 94 | 95 | fn bench_skey_load() -> (f64, u8) { 96 | let z = core_cycles(); 97 | let mut seed = [0u8; 32]; 98 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 99 | let mut tt = [0; 100]; 100 | for i in 0..tt.len() { 101 | let begin = core_cycles(); 102 | for _ in 0..100 { 103 | let skey = PrivateKey::decode(&seed).unwrap(); 104 | seed[..].copy_from_slice(&skey.public_key.encode()); 105 | seed[31] &= 0x1Fu8; 106 | } 107 | let end = core_cycles(); 108 | tt[i] = end.wrapping_sub(begin); 109 | } 110 | tt.sort(); 111 | ((tt[tt.len() >> 1] as f64) / 100.0, seed[0]) 112 | } 113 | 114 | fn bench_skey_sign() -> (f64, u8) { 115 | let z = core_cycles(); 116 | let mut seed = [0u8; 32]; 117 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 118 | let mut sh = Sha256::new(); 119 | sh.update(&seed); 120 | seed[..].copy_from_slice(&sh.finalize()); 121 | seed[31] &= 0x1Fu8; 122 | let skey = PrivateKey::decode(&seed).unwrap(); 123 | let mut tt = [0; 100]; 124 | let mut msg = [0u8; 32]; 125 | for i in 0..tt.len() { 126 | let begin = core_cycles(); 127 | for _ in 0..100 { 128 | let sig = skey.sign("", &msg); 129 | msg[..].copy_from_slice(&sig[0..32]); 130 | } 131 | let end = core_cycles(); 132 | tt[i] = end.wrapping_sub(begin); 133 | } 134 | tt.sort(); 135 | ((tt[tt.len() >> 1] as f64) / 100.0, msg[0]) 136 | } 137 | 138 | fn bench_pkey_verify() -> (f64, u8) { 139 | let z = core_cycles(); 140 | let mut seed = [0u8; 32]; 141 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 142 | let mut sh = Sha256::new(); 143 | sh.update(&seed); 144 | seed[..].copy_from_slice(&sh.finalize()); 145 | seed[31] &= 0x1Fu8; 146 | let skey = PrivateKey::decode(&seed).unwrap(); 147 | let pkey = skey.public_key; 148 | let mut sigs = [[0u8; 48]; 128]; 149 | for i in 0..128 { 150 | let msg = [i as u8; 32]; 151 | let sig = skey.sign("", &msg); 152 | sigs[i][..].copy_from_slice(&sig); 153 | } 154 | let mut tt = [0; 100]; 155 | let mut msg = [0u8; 32]; 156 | for i in 0..tt.len() { 157 | let begin = core_cycles(); 158 | for j in 0..128 { 159 | let ff = pkey.verify(&sigs[j], "", &msg); 160 | sigs[j][40] ^= 1u8.wrapping_add(ff as u8); 161 | msg[3] ^= 3u8.wrapping_sub(ff as u8); 162 | } 163 | let end = core_cycles(); 164 | tt[i] = end.wrapping_sub(begin); 165 | } 166 | tt.sort(); 167 | ((tt[tt.len() >> 1] as f64) / 128.0, msg[0]) 168 | } 169 | 170 | fn bench_decode() -> (f64, u8) { 171 | let z = core_cycles(); 172 | let mut buf = [0u8; 32]; 173 | buf[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 174 | buf[ 8..16].copy_from_slice(&z.to_le_bytes()); 175 | buf[16..24].copy_from_slice(&z.to_le_bytes()); 176 | buf[24..32].copy_from_slice(&z.to_le_bytes()); 177 | let mut tt = [0; 10]; 178 | let mut P = Point::NEUTRAL; 179 | let Q = Point::BASE * z; 180 | for i in 0..10 { 181 | let begin = core_cycles(); 182 | for _ in 0..100 { 183 | let r = P.set_decode(&buf); 184 | buf[0] = buf[0].wrapping_add(1); 185 | buf[1] = buf[1].wrapping_add(r as u8); 186 | buf[2] = buf[2].wrapping_add(P.equals(Q) as u8); 187 | } 188 | let end = core_cycles(); 189 | tt[i] = end.wrapping_sub(begin); 190 | } 191 | tt.sort(); 192 | ((tt[4] as f64) / 100.0, buf[0]) 193 | } 194 | 195 | fn bench_encode() -> (f64, u8) { 196 | let z = core_cycles(); 197 | let mut P = Point::BASE * z; 198 | let mut tt = [0; 10]; 199 | for i in 0..10 { 200 | let begin = core_cycles(); 201 | for _ in 0..100 { 202 | let x = P.encode()[0]; 203 | if x & 1 == 0 { 204 | P = -P; 205 | } 206 | } 207 | let end = core_cycles(); 208 | tt[i] = end.wrapping_sub(begin); 209 | } 210 | tt.sort(); 211 | ((tt[4] as f64) / 100.0, P.encode()[0]) 212 | } 213 | 214 | fn main() { 215 | let mut bx = 0u8; 216 | 217 | let (v, x) = bench_mul(); 218 | bx ^= x; 219 | println!("Jq255e point mul: {:13.2}", v); 220 | let (v, x) = bench_mulgen(); 221 | bx ^= x; 222 | println!("Jq255e point mulgen: {:13.2}", v); 223 | let (v, x) = bench_mul_add_mulgen(); 224 | bx ^= x; 225 | println!("Jq255e point mul_add_mulgen: {:13.2}", v); 226 | let (v, x) = bench_skey_load(); 227 | bx ^= x; 228 | println!("Jq255e skey_load: {:13.2}", v); 229 | let (v, x) = bench_skey_sign(); 230 | bx ^= x; 231 | println!("Jq255e sign: {:13.2}", v); 232 | let (v, x) = bench_pkey_verify(); 233 | bx ^= x; 234 | println!("Jq255e verify: {:13.2}", v); 235 | let (v, x) = bench_decode(); 236 | bx ^= x; 237 | println!("Jq255e decode: {:13.2}", v); 238 | let (v, x) = bench_encode(); 239 | bx ^= x; 240 | println!("Jq255e encode: {:13.2}", v); 241 | 242 | println!("{}", bx); 243 | } 244 | -------------------------------------------------------------------------------- /benches/jq255s.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | #![cfg(feature = "jq255s")] 3 | 4 | mod util; 5 | use util::core_cycles; 6 | 7 | use crrl::jq255s::{Point, Scalar, PrivateKey}; 8 | use sha2::{Sha256, Digest}; 9 | 10 | fn bench_mulgen() -> (f64, u8) { 11 | let z = core_cycles(); 12 | let mut seed = [0u8; 32]; 13 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 14 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 15 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 16 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 17 | let mut s = Scalar::decode_reduce(&seed); 18 | let mut tt = [0; 100]; 19 | for i in 0..tt.len() { 20 | let begin = core_cycles(); 21 | for _ in 0..100 { 22 | let P = Point::mulgen(&s); 23 | if P.isneutral() != 0 { 24 | s += Scalar::ZERO; 25 | } else { 26 | s += Scalar::ONE; 27 | } 28 | } 29 | let end = core_cycles(); 30 | tt[i] = end.wrapping_sub(begin); 31 | } 32 | tt.sort(); 33 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0]) 34 | } 35 | 36 | fn bench_mul() -> (f64, u8) { 37 | let z = core_cycles(); 38 | let mut seed = [0u8; 32]; 39 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 40 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 41 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 42 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 43 | let mut s = Scalar::decode_reduce(&seed); 44 | let mut P = Point::mulgen(&s); 45 | let mut tt = [0; 100]; 46 | for i in 0..tt.len() { 47 | let begin = core_cycles(); 48 | for _ in 0..100 { 49 | P *= s; 50 | if P.isneutral() != 0 { 51 | s += Scalar::ZERO; 52 | } else { 53 | s += Scalar::ONE; 54 | } 55 | } 56 | let end = core_cycles(); 57 | tt[i] = end.wrapping_sub(begin); 58 | } 59 | tt.sort(); 60 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0]) 61 | } 62 | 63 | fn bench_mul_add_mulgen() -> (f64, u8) { 64 | let z = core_cycles(); 65 | let mut uu = [Scalar::ZERO; 128]; 66 | let mut vv = [Scalar::ZERO; 128]; 67 | let mut sh = Sha256::new(); 68 | for i in 0..128 { 69 | sh.update(z.to_le_bytes()); 70 | sh.update(((2 * i + 0) as u64).to_le_bytes()); 71 | let b1 = sh.finalize_reset(); 72 | sh.update(z.to_le_bytes()); 73 | sh.update(((2 * i + 1) as u64).to_le_bytes()); 74 | let b2 = sh.finalize_reset(); 75 | uu[i] = Scalar::decode_reduce(&b1); 76 | vv[i] = Scalar::decode_reduce(&b2); 77 | } 78 | let mut tt = [0; 100]; 79 | let mut P = Point::mulgen(&uu[127]); 80 | for i in 0..tt.len() { 81 | let begin = core_cycles(); 82 | for j in 0..128 { 83 | let ku = (i + j) & 127; 84 | let kv = i.wrapping_sub(j) & 127; 85 | let Q = P.mul_add_mulgen_vartime(&uu[ku], &vv[kv]); 86 | P += Q; 87 | } 88 | let end = core_cycles(); 89 | tt[i] = end.wrapping_sub(begin); 90 | } 91 | tt.sort(); 92 | ((tt[tt.len() >> 1] as f64) / 128.0, P.encode()[0]) 93 | } 94 | 95 | fn bench_skey_load() -> (f64, u8) { 96 | let z = core_cycles(); 97 | let mut seed = [0u8; 32]; 98 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 99 | let mut tt = [0; 100]; 100 | for i in 0..tt.len() { 101 | let begin = core_cycles(); 102 | for _ in 0..100 { 103 | let skey = PrivateKey::decode(&seed).unwrap(); 104 | seed[..].copy_from_slice(&skey.public_key.encode()); 105 | seed[31] &= 0x1Fu8; 106 | } 107 | let end = core_cycles(); 108 | tt[i] = end.wrapping_sub(begin); 109 | } 110 | tt.sort(); 111 | ((tt[tt.len() >> 1] as f64) / 100.0, seed[0]) 112 | } 113 | 114 | fn bench_skey_sign() -> (f64, u8) { 115 | let z = core_cycles(); 116 | let mut seed = [0u8; 32]; 117 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 118 | let mut sh = Sha256::new(); 119 | sh.update(&seed); 120 | seed[..].copy_from_slice(&sh.finalize()); 121 | seed[31] &= 0x1Fu8; 122 | let skey = PrivateKey::decode(&seed).unwrap(); 123 | let mut tt = [0; 100]; 124 | let mut msg = [0u8; 32]; 125 | for i in 0..tt.len() { 126 | let begin = core_cycles(); 127 | for _ in 0..100 { 128 | let sig = skey.sign("", &msg); 129 | msg[..].copy_from_slice(&sig[0..32]); 130 | } 131 | let end = core_cycles(); 132 | tt[i] = end.wrapping_sub(begin); 133 | } 134 | tt.sort(); 135 | ((tt[tt.len() >> 1] as f64) / 100.0, msg[0]) 136 | } 137 | 138 | fn bench_pkey_verify() -> (f64, u8) { 139 | let z = core_cycles(); 140 | let mut seed = [0u8; 32]; 141 | seed[0..8].copy_from_slice(&z.to_le_bytes()); 142 | let mut sh = Sha256::new(); 143 | sh.update(&seed); 144 | seed[..].copy_from_slice(&sh.finalize()); 145 | seed[31] &= 0x1Fu8; 146 | let skey = PrivateKey::decode(&seed).unwrap(); 147 | let pkey = skey.public_key; 148 | let mut sigs = [[0u8; 48]; 128]; 149 | for i in 0..128 { 150 | let msg = [i as u8; 32]; 151 | let sig = skey.sign("", &msg); 152 | sigs[i][..].copy_from_slice(&sig); 153 | } 154 | let mut tt = [0; 100]; 155 | let mut msg = [0u8; 32]; 156 | for i in 0..tt.len() { 157 | let begin = core_cycles(); 158 | for j in 0..128 { 159 | let ff = pkey.verify(&sigs[j], "", &msg); 160 | sigs[j][40] ^= 1u8.wrapping_add(ff as u8); 161 | msg[3] ^= 3u8.wrapping_sub(ff as u8); 162 | } 163 | let end = core_cycles(); 164 | tt[i] = end.wrapping_sub(begin); 165 | } 166 | tt.sort(); 167 | ((tt[tt.len() >> 1] as f64) / 128.0, msg[0]) 168 | } 169 | 170 | fn bench_decode() -> (f64, u8) { 171 | let z = core_cycles(); 172 | let mut buf = [0u8; 32]; 173 | buf[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 174 | buf[ 8..16].copy_from_slice(&z.to_le_bytes()); 175 | buf[16..24].copy_from_slice(&z.to_le_bytes()); 176 | buf[24..32].copy_from_slice(&z.to_le_bytes()); 177 | let mut tt = [0; 10]; 178 | let mut P = Point::NEUTRAL; 179 | let Q = Point::BASE * z; 180 | for i in 0..10 { 181 | let begin = core_cycles(); 182 | for _ in 0..100 { 183 | let r = P.set_decode(&buf); 184 | buf[0] = buf[0].wrapping_add(1); 185 | buf[1] = buf[1].wrapping_add(r as u8); 186 | buf[2] = buf[2].wrapping_add(P.equals(Q) as u8); 187 | } 188 | let end = core_cycles(); 189 | tt[i] = end.wrapping_sub(begin); 190 | } 191 | tt.sort(); 192 | ((tt[4] as f64) / 100.0, buf[0]) 193 | } 194 | 195 | fn bench_encode() -> (f64, u8) { 196 | let z = core_cycles(); 197 | let mut P = Point::BASE * z; 198 | let mut tt = [0; 10]; 199 | for i in 0..10 { 200 | let begin = core_cycles(); 201 | for _ in 0..100 { 202 | let x = P.encode()[0]; 203 | if x & 1 == 0 { 204 | P = -P; 205 | } 206 | } 207 | let end = core_cycles(); 208 | tt[i] = end.wrapping_sub(begin); 209 | } 210 | tt.sort(); 211 | ((tt[4] as f64) / 100.0, P.encode()[0]) 212 | } 213 | 214 | fn main() { 215 | let mut bx = 0u8; 216 | 217 | let (v, x) = bench_mul(); 218 | bx ^= x; 219 | println!("Jq255s point mul: {:13.2}", v); 220 | let (v, x) = bench_mulgen(); 221 | bx ^= x; 222 | println!("Jq255s point mulgen: {:13.2}", v); 223 | let (v, x) = bench_mul_add_mulgen(); 224 | bx ^= x; 225 | println!("Jq255s point mul_add_mulgen: {:13.2}", v); 226 | let (v, x) = bench_skey_load(); 227 | bx ^= x; 228 | println!("Jq255s skey_load: {:13.2}", v); 229 | let (v, x) = bench_skey_sign(); 230 | bx ^= x; 231 | println!("Jq255s sign: {:13.2}", v); 232 | let (v, x) = bench_pkey_verify(); 233 | bx ^= x; 234 | println!("Jq255s verify: {:13.2}", v); 235 | let (v, x) = bench_decode(); 236 | bx ^= x; 237 | println!("Jq255s decode: {:13.2}", v); 238 | let (v, x) = bench_encode(); 239 | bx ^= x; 240 | println!("Jq255s encode: {:13.2}", v); 241 | 242 | println!("{}", bx); 243 | } 244 | -------------------------------------------------------------------------------- /benches/modint.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "modint256")] 2 | 3 | mod util; 4 | use util::core_cycles; 5 | 6 | use crrl::field::ModInt256; 7 | use sha2::{Sha256, Digest}; 8 | 9 | fn bench_modint256_add() -> (f64, u8) 11 | { 12 | let z = core_cycles(); 13 | let mut x = ModInt256::::w64le(z, z.wrapping_mul(3), 14 | z.wrapping_mul(5), z.wrapping_mul(7)); 15 | let mut y = x + ModInt256::::ONE; 16 | let mut tt = [0; 10]; 17 | for i in 0..10 { 18 | let begin = core_cycles(); 19 | for _ in 0..10000 { 20 | x += y; 21 | y += x; 22 | x += y; 23 | y += x; 24 | x += y; 25 | y += x; 26 | } 27 | let end = core_cycles(); 28 | tt[i] = end.wrapping_sub(begin); 29 | } 30 | tt.sort(); 31 | ((tt[4] as f64) / 60000.0, x.encode32()[0]) 32 | } 33 | 34 | fn bench_modint256_sub() -> (f64, u8) 36 | { 37 | let z = core_cycles(); 38 | let mut x = ModInt256::::w64le(z, z.wrapping_mul(3), 39 | z.wrapping_mul(5), z.wrapping_mul(7)); 40 | let mut y = x + ModInt256::::ONE; 41 | let mut tt = [0; 10]; 42 | for i in 0..10 { 43 | let begin = core_cycles(); 44 | for _ in 0..10000 { 45 | x -= y; 46 | y -= x; 47 | x -= y; 48 | y -= x; 49 | x -= y; 50 | y -= x; 51 | } 52 | let end = core_cycles(); 53 | tt[i] = end.wrapping_sub(begin); 54 | } 55 | tt.sort(); 56 | ((tt[4] as f64) / 60000.0, x.encode32()[0]) 57 | } 58 | 59 | fn bench_modint256_mul() -> (f64, u8) 61 | { 62 | let z = core_cycles(); 63 | let mut x = ModInt256::::w64le(z, z.wrapping_mul(3), 64 | z.wrapping_mul(5), z.wrapping_mul(7)); 65 | let mut y = x + ModInt256::::ONE; 66 | let mut tt = [0; 10]; 67 | for i in 0..10 { 68 | let begin = core_cycles(); 69 | for _ in 0..10000 { 70 | x *= y; 71 | y *= x; 72 | x *= y; 73 | y *= x; 74 | x *= y; 75 | y *= x; 76 | } 77 | let end = core_cycles(); 78 | tt[i] = end.wrapping_sub(begin); 79 | } 80 | tt.sort(); 81 | ((tt[4] as f64) / 60000.0, x.encode32()[0]) 82 | } 83 | 84 | fn bench_modint256_square() -> (f64, u8) 86 | { 87 | let z = core_cycles(); 88 | let mut x = ModInt256::::w64le(z, z.wrapping_mul(3), 89 | z.wrapping_mul(5), z.wrapping_mul(7)); 90 | let mut tt = [0; 10]; 91 | for i in 0..10 { 92 | let begin = core_cycles(); 93 | x = x.xsquare(60000); 94 | let end = core_cycles(); 95 | tt[i] = end.wrapping_sub(begin); 96 | } 97 | tt.sort(); 98 | ((tt[4] as f64) / 60000.0, x.encode32()[0]) 99 | } 100 | 101 | fn bench_modint256_div() -> (f64, u8) 103 | { 104 | let z = core_cycles(); 105 | let mut x = ModInt256::::w64le(z, z.wrapping_mul(3), 106 | z.wrapping_mul(5), z.wrapping_mul(7)); 107 | let mut y = x + ModInt256::::ONE; 108 | let mut tt = [0; 10]; 109 | for i in 0..10 { 110 | let begin = core_cycles(); 111 | for _ in 0..1000 { 112 | x /= y; 113 | y /= x; 114 | x /= y; 115 | y /= x; 116 | x /= y; 117 | y /= x; 118 | } 119 | let end = core_cycles(); 120 | tt[i] = end.wrapping_sub(begin); 121 | } 122 | tt.sort(); 123 | ((tt[4] as f64) / 6000.0, x.encode32()[0]) 124 | } 125 | 126 | fn bench_modint256_sqrt() -> (f64, u8) 128 | { 129 | let z = core_cycles(); 130 | let mut x = ModInt256::::w64le(z, z.wrapping_mul(3), 131 | z.wrapping_mul(5), z.wrapping_mul(7)); 132 | let mut tt = [0; 10]; 133 | for i in 0..10 { 134 | let begin = core_cycles(); 135 | for _ in 0..1000 { 136 | let (x2, _) = x.sqrt(); 137 | x = x2 + ModInt256::::ONE; 138 | } 139 | let end = core_cycles(); 140 | tt[i] = end.wrapping_sub(begin); 141 | } 142 | tt.sort(); 143 | ((tt[4] as f64) / 1000.0, x.encode32()[0]) 144 | } 145 | 146 | fn bench_modint256_legendre() -> (f64, u8) 148 | { 149 | let z = core_cycles(); 150 | let mut x = ModInt256::::w64le(z, z.wrapping_mul(3), 151 | z.wrapping_mul(5), z.wrapping_mul(7)); 152 | let mut tt = [0; 10]; 153 | for i in 0..10 { 154 | let begin = core_cycles(); 155 | for _ in 0..1000 { 156 | let ls = x.legendre(); 157 | x += ModInt256::::w64le(ls as u64, ls as u64, ls as u64, ls as u64); 158 | } 159 | let end = core_cycles(); 160 | tt[i] = end.wrapping_sub(begin); 161 | } 162 | tt.sort(); 163 | ((tt[4] as f64) / 1000.0, x.encode32()[0]) 164 | } 165 | 166 | fn bench_modint256_split() -> (f64, u8) 168 | { 169 | let z = core_cycles(); 170 | 171 | // Generate 512 pseudorandom elements. Number 512 was chosen so that 172 | // the total in-RAM size is 16 kB, which should fit in L1 cache with 173 | // enough room. 174 | let mut vv = [ModInt256::::ZERO; 512]; 175 | let mut sh = Sha256::new(); 176 | for i in 0..512 { 177 | sh.update(z.to_le_bytes()); 178 | sh.update((i as u64).to_le_bytes()); 179 | let bb = sh.finalize_reset(); 180 | vv[i] = ModInt256::::decode_reduce(&bb); 181 | } 182 | 183 | let mut tt = [0; 10]; 184 | for i in 0..10 { 185 | let begin = core_cycles(); 186 | for j in 0..512 { 187 | let (c0, c1) = vv[j].split_vartime(); 188 | let x = c0.wrapping_add(c1); 189 | vv[(j + 1) & 511] += ModInt256::::from_i128(x); 190 | } 191 | let end = core_cycles(); 192 | tt[i] = end.wrapping_sub(begin); 193 | } 194 | tt.sort(); 195 | ((tt[4] as f64) / 512.0, vv[0].encode32()[0]) 196 | } 197 | 198 | fn bench_modint256_reduce() -> (f64, u8) 200 | { 201 | let mut x = ModInt256::::ZERO; 202 | let mut buf = [0u8; 48]; 203 | for i in 0..12 { 204 | buf[(4 * i)..(4 * i + 4)].copy_from_slice( 205 | &(core_cycles() as u32).to_le_bytes()); 206 | } 207 | let mut tt = [0; 10]; 208 | for i in 0..10 { 209 | let begin = core_cycles(); 210 | for _ in 0..10000 { 211 | x.set_decode_reduce(&buf); 212 | let xe = x.encode32(); 213 | buf[16..].copy_from_slice(&xe); 214 | buf[..16].copy_from_slice(&xe[8..24]); 215 | } 216 | let end = core_cycles(); 217 | tt[i] = end.wrapping_sub(begin); 218 | } 219 | tt.sort(); 220 | ((tt[4] as f64) / 10000.0, buf[0]) 221 | } 222 | 223 | fn main() { 224 | let mut bx = 0u8; 225 | 226 | let (f1, b1) = bench_modint256_add::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>(); 227 | let (f2, b2) = bench_modint256_add::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>(); 228 | let (f3, b3) = bench_modint256_add::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>(); 229 | bx ^= b1 ^ b2 ^ b3; 230 | println!("ModInt256 add: {:11.2} {:11.2} {:11.2}", f1, f2, f3); 231 | 232 | let (f1, b1) = bench_modint256_sub::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>(); 233 | let (f2, b2) = bench_modint256_sub::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>(); 234 | let (f3, b3) = bench_modint256_sub::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>(); 235 | bx ^= b1 ^ b2 ^ b3; 236 | println!("ModInt256 sub: {:11.2} {:11.2} {:11.2}", f1, f2, f3); 237 | 238 | let (f1, b1) = bench_modint256_mul::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>(); 239 | let (f2, b2) = bench_modint256_mul::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>(); 240 | let (f3, b3) = bench_modint256_mul::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>(); 241 | bx ^= b1 ^ b2 ^ b3; 242 | println!("ModInt256 mul: {:11.2} {:11.2} {:11.2}", f1, f2, f3); 243 | 244 | let (f1, b1) = bench_modint256_square::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>(); 245 | let (f2, b2) = bench_modint256_square::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>(); 246 | let (f3, b3) = bench_modint256_square::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>(); 247 | bx ^= b1 ^ b2 ^ b3; 248 | println!("ModInt256 square: {:11.2} {:11.2} {:11.2}", f1, f2, f3); 249 | 250 | let (f1, b1) = bench_modint256_div::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>(); 251 | let (f2, b2) = bench_modint256_div::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>(); 252 | let (f3, b3) = bench_modint256_div::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>(); 253 | bx ^= b1 ^ b2 ^ b3; 254 | println!("ModInt256 div: {:11.2} {:11.2} {:11.2}", f1, f2, f3); 255 | 256 | let (f1, b1) = bench_modint256_sqrt::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>(); 257 | let (f2, b2) = bench_modint256_sqrt::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>(); 258 | let (f3, b3) = bench_modint256_sqrt::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>(); 259 | bx ^= b1 ^ b2 ^ b3; 260 | println!("ModInt256 sqrt: {:11.2} {:11.2} {:11.2}", f1, f2, f3); 261 | 262 | let (f1, b1) = bench_modint256_legendre::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>(); 263 | let (f2, b2) = bench_modint256_legendre::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>(); 264 | let (f3, b3) = bench_modint256_legendre::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>(); 265 | bx ^= b1 ^ b2 ^ b3; 266 | println!("ModInt256 legendre: {:11.2} {:11.2} {:11.2}", f1, f2, f3); 267 | 268 | let (f1, b1) = bench_modint256_split::<0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0xFFFFFFFF00000001>(); 269 | let (f2, b2) = bench_modint256_split::<0xFFFFFFFFFFFFFFED, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF>(); 270 | let (f3, b3) = bench_modint256_split::<0xFFFFFFFFFFFFFF43, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>(); 271 | bx ^= b1 ^ b2 ^ b3; 272 | println!("ModInt256 split (var) {:11.2} {:11.2} {:11.2}", f1, f2, f3); 273 | 274 | let (f1, b1) = bench_modint256_reduce::<0x43E1F593F0000001, 0x2833E84879B97091, 0xB85045B68181585D, 0x30644E72E131A029>(); 275 | println!("ModInt256 reduce (gen) {:11.2}", f1); 276 | bx ^= b1; 277 | 278 | println!("{}", bx); 279 | } 280 | -------------------------------------------------------------------------------- /benches/p256.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | #![cfg(feature = "p256")] 3 | 4 | mod util; 5 | use util::core_cycles; 6 | 7 | use crrl::p256::{Point, Scalar, PrivateKey}; 8 | use sha2::{Sha256, Digest}; 9 | 10 | fn bench_mulgen() -> (f64, u8) { 11 | let z = core_cycles(); 12 | let mut seed = [0u8; 32]; 13 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 14 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 15 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 16 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 17 | let mut s = Scalar::decode_reduce(&seed); 18 | let mut tt = [0; 100]; 19 | for i in 0..tt.len() { 20 | let begin = core_cycles(); 21 | for _ in 0..100 { 22 | let P = Point::mulgen(&s); 23 | if P.isneutral() != 0 { 24 | s += Scalar::ZERO; 25 | } else { 26 | s += Scalar::ONE; 27 | } 28 | } 29 | let end = core_cycles(); 30 | tt[i] = end.wrapping_sub(begin); 31 | } 32 | tt.sort(); 33 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0]) 34 | } 35 | 36 | fn bench_mul() -> (f64, u8) { 37 | let z = core_cycles(); 38 | let mut seed = [0u8; 32]; 39 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 40 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 41 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 42 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 43 | let mut s = Scalar::decode_reduce(&seed); 44 | let mut P = Point::mulgen(&s); 45 | let mut tt = [0; 100]; 46 | for i in 0..tt.len() { 47 | let begin = core_cycles(); 48 | for _ in 0..100 { 49 | P *= s; 50 | if P.isneutral() != 0 { 51 | s += Scalar::ZERO; 52 | } else { 53 | s += Scalar::ONE; 54 | } 55 | } 56 | let end = core_cycles(); 57 | tt[i] = end.wrapping_sub(begin); 58 | } 59 | tt.sort(); 60 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0]) 61 | } 62 | 63 | fn bench_mul_add_mulgen() -> (f64, u8) { 64 | let z = core_cycles(); 65 | let mut uu = [Scalar::ZERO; 128]; 66 | let mut vv = [Scalar::ZERO; 128]; 67 | let mut sh = Sha256::new(); 68 | for i in 0..128 { 69 | sh.update(z.to_le_bytes()); 70 | sh.update(((2 * i + 0) as u64).to_le_bytes()); 71 | let b1 = sh.finalize_reset(); 72 | sh.update(z.to_le_bytes()); 73 | sh.update(((2 * i + 1) as u64).to_le_bytes()); 74 | let b2 = sh.finalize_reset(); 75 | uu[i] = Scalar::decode_reduce(&b1); 76 | vv[i] = Scalar::decode_reduce(&b2); 77 | } 78 | let mut tt = [0; 100]; 79 | let mut P = Point::mulgen(&uu[127]); 80 | for i in 0..tt.len() { 81 | let begin = core_cycles(); 82 | for j in 0..128 { 83 | let ku = (i + j) & 127; 84 | let kv = i.wrapping_sub(j) & 127; 85 | let Q = P.mul_add_mulgen_vartime(&uu[ku], &vv[kv]); 86 | P += Q; 87 | } 88 | let end = core_cycles(); 89 | tt[i] = end.wrapping_sub(begin); 90 | } 91 | tt.sort(); 92 | ((tt[tt.len() >> 1] as f64) / 128.0, P.encode_compressed()[0]) 93 | } 94 | 95 | fn bench_skey_sign() -> (f64, u8) { 96 | let z = core_cycles(); 97 | let mut sh = Sha256::new(); 98 | sh.update(&z.to_le_bytes()); 99 | sh.update(&[0x00u8]); 100 | let s1 = sh.finalize_reset(); 101 | sh.update(&z.to_le_bytes()); 102 | sh.update(&[0x01u8]); 103 | let s2 = sh.finalize_reset(); 104 | let mut seed = [0u8; 48]; 105 | seed[..32].copy_from_slice(&s1); 106 | seed[32..].copy_from_slice(&s2[..16]); 107 | let skey = PrivateKey::from_seed(&seed); 108 | let mut tt = [0; 100]; 109 | let mut msg = [0u8; 32]; 110 | for i in 0..tt.len() { 111 | let begin = core_cycles(); 112 | for _ in 0..100 { 113 | let sig = skey.sign_hash(&msg, &[]); 114 | msg[..].copy_from_slice(&sig[0..32]); 115 | } 116 | let end = core_cycles(); 117 | tt[i] = end.wrapping_sub(begin); 118 | } 119 | tt.sort(); 120 | ((tt[tt.len() >> 1] as f64) / 100.0, msg[0]) 121 | } 122 | 123 | fn bench_pkey_verify() -> (f64, u8) { 124 | let z = core_cycles(); 125 | let mut sh = Sha256::new(); 126 | sh.update(&z.to_le_bytes()); 127 | sh.update(&[0x00u8]); 128 | let s1 = sh.finalize_reset(); 129 | sh.update(&z.to_le_bytes()); 130 | sh.update(&[0x01u8]); 131 | let s2 = sh.finalize_reset(); 132 | let mut seed = [0u8; 48]; 133 | seed[..32].copy_from_slice(&s1); 134 | seed[32..].copy_from_slice(&s2[..16]); 135 | let skey = PrivateKey::from_seed(&seed); 136 | let pkey = skey.to_public_key(); 137 | let mut sigs = [[0u8; 64]; 128]; 138 | for i in 0..128 { 139 | let msg = [i as u8; 32]; 140 | let sig = skey.sign_hash(&msg, &[]); 141 | sigs[i][..].copy_from_slice(&sig); 142 | } 143 | let mut tt = [0; 100]; 144 | let mut msg = [0u8; 32]; 145 | for i in 0..tt.len() { 146 | let begin = core_cycles(); 147 | for j in 0..128 { 148 | let ff = pkey.verify_hash(&sigs[j], &msg); 149 | sigs[j][40] ^= 1u8.wrapping_add(ff as u8); 150 | msg[3] ^= 3u8.wrapping_sub(ff as u8); 151 | } 152 | let end = core_cycles(); 153 | tt[i] = end.wrapping_sub(begin); 154 | } 155 | tt.sort(); 156 | ((tt[tt.len() >> 1] as f64) / 128.0, msg[0]) 157 | } 158 | 159 | #[cfg(feature = "alloc")] 160 | fn bench_pkey_verify_trunc(rm: usize) -> (f64, f64, u8) { 161 | let z = core_cycles(); 162 | let mut sh = Sha256::new(); 163 | sh.update(&z.to_le_bytes()); 164 | sh.update(&[0x00u8]); 165 | let s1 = sh.finalize_reset(); 166 | sh.update(&z.to_le_bytes()); 167 | sh.update(&[0x01u8]); 168 | let s2 = sh.finalize_reset(); 169 | let mut seed = [0u8; 48]; 170 | seed[..32].copy_from_slice(&s1); 171 | seed[32..].copy_from_slice(&s2[..16]); 172 | let skey = PrivateKey::from_seed(&seed); 173 | let pkey = skey.to_public_key(); 174 | let mut sigs = [[0u8; 64]; 256]; 175 | for i in 0..256 { 176 | let hv = [i as u8; 32]; 177 | let sig = skey.sign_hash(&hv, &[]); 178 | let sig = PrivateKey::prepare_truncate(&sig).unwrap(); 179 | sigs[i][..].copy_from_slice(&sig); 180 | } 181 | let mut x = 0; 182 | 183 | // Phase 1: all signatures are correct. 184 | let mut tt = [0; 2048]; 185 | for i in 0..tt.len() { 186 | let hv = [i as u8; 32]; 187 | let begin = core_cycles(); 188 | x ^= (pkey.verify_trunc_hash(&sigs[i % 256], rm, &hv).is_some()) as u8; 189 | let end = core_cycles(); 190 | tt[i] = end.wrapping_sub(begin); 191 | } 192 | tt.sort(); 193 | // Remove 10% slowest and 10% fastest, make an average of the rest. 194 | let n10 = tt.len() / 10; 195 | let n80 = tt.len() - 2 * n10; 196 | let mut s = 0u64; 197 | for i in n10..(tt.len() - n10) { 198 | s += tt[i]; 199 | } 200 | let res1 = (s as f64) / (n80 as f64); 201 | 202 | // Phase 2: all signatures are invalid. 203 | // Much less variance is expected in that case; we can use a lower 204 | // number of samples. 205 | let mut tt = [0; 128]; 206 | for i in 0..tt.len() { 207 | let hv = [(i + 1) as u8; 32]; 208 | let begin = core_cycles(); 209 | x ^= (pkey.verify_trunc_hash(&sigs[i % 256], rm, &hv).is_some()) as u8; 210 | let end = core_cycles(); 211 | tt[i] = end.wrapping_sub(begin); 212 | } 213 | tt.sort(); 214 | // Remove 10% slowest and 10% fastest, make an average of the rest. 215 | let n10 = tt.len() / 10; 216 | let n80 = tt.len() - 2 * n10; 217 | let mut s = 0u64; 218 | for i in n10..(tt.len() - n10) { 219 | s += tt[i]; 220 | } 221 | let res2 = (s as f64) / (n80 as f64); 222 | 223 | (res1, res2, x) 224 | } 225 | 226 | fn main() { 227 | let mut bx = 0u8; 228 | 229 | let (v, x) = bench_mul(); 230 | bx ^= x; 231 | println!("P-256 point mul: {:13.2}", v); 232 | let (v, x) = bench_mulgen(); 233 | bx ^= x; 234 | println!("P-256 point mulgen: {:13.2}", v); 235 | let (v, x) = bench_mul_add_mulgen(); 236 | bx ^= x; 237 | println!("P-256 point mul_add_mulgen: {:13.2}", v); 238 | let (v, x) = bench_skey_sign(); 239 | bx ^= x; 240 | println!("P-256 sign: {:13.2}", v); 241 | let (v, x) = bench_pkey_verify(); 242 | bx ^= x; 243 | println!("P-256 verify: {:13.2}", v); 244 | 245 | #[cfg(feature = "alloc")] 246 | { 247 | let (v1, v2, x) = bench_pkey_verify_trunc(8); 248 | bx ^= x; 249 | println!("P-256 verify_trunc8: {:13.2} {:13.2}", v1, v2); 250 | let (v1, v2, x) = bench_pkey_verify_trunc(16); 251 | bx ^= x; 252 | println!("P-256 verify_trunc16: {:13.2} {:13.2}", v1, v2); 253 | /* 254 | let (v1, v2, x) = bench_pkey_verify_trunc(24); 255 | bx ^= x; 256 | println!("P-256 verify_trunc24: {:13.2} {:13.2}", v1, v2); 257 | let (v1, v2, x) = bench_pkey_verify_trunc(28); 258 | bx ^= x; 259 | println!("P-256 verify_trunc28: {:13.2} {:13.2}", v1, v2); 260 | let (v1, v2, x) = bench_pkey_verify_trunc(32); 261 | bx ^= x; 262 | println!("P-256 verify_trunc32: {:13.2} {:13.2}", v1, v2); 263 | */ 264 | } 265 | 266 | println!("{}", bx); 267 | } 268 | -------------------------------------------------------------------------------- /benches/ristretto255.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | #![cfg(feature = "ristretto255")] 3 | 4 | mod util; 5 | use util::core_cycles; 6 | 7 | use crrl::ristretto255::Point; 8 | 9 | fn bench_decode() -> (f64, u8) { 10 | let z = core_cycles(); 11 | let mut buf = [0u8; 32]; 12 | buf[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 13 | buf[ 8..16].copy_from_slice(&z.to_le_bytes()); 14 | buf[16..24].copy_from_slice(&z.to_le_bytes()); 15 | buf[24..32].copy_from_slice(&z.to_le_bytes()); 16 | let mut tt = [0; 10]; 17 | let mut P = Point::NEUTRAL; 18 | let Q = Point::BASE * z; 19 | for i in 0..10 { 20 | let begin = core_cycles(); 21 | for _ in 0..100 { 22 | let r = P.set_decode(&buf); 23 | buf[0] = buf[0].wrapping_add(1); 24 | buf[1] = buf[1].wrapping_add(r as u8); 25 | buf[2] = buf[2].wrapping_add(P.equals(Q) as u8); 26 | } 27 | let end = core_cycles(); 28 | tt[i] = end.wrapping_sub(begin); 29 | } 30 | tt.sort(); 31 | ((tt[4] as f64) / 100.0, buf[0]) 32 | } 33 | 34 | fn bench_encode() -> (f64, u8) { 35 | let z = core_cycles(); 36 | let mut P = Point::BASE * z; 37 | let mut tt = [0; 10]; 38 | for i in 0..10 { 39 | let begin = core_cycles(); 40 | for _ in 0..100 { 41 | let x = P.encode()[0]; 42 | if x & 1 == 0 { 43 | P = -P; 44 | } 45 | } 46 | let end = core_cycles(); 47 | tt[i] = end.wrapping_sub(begin); 48 | } 49 | tt.sort(); 50 | ((tt[4] as f64) / 100.0, P.encode()[0]) 51 | } 52 | 53 | fn main() { 54 | let mut bx = 0u8; 55 | 56 | let (v, x) = bench_decode(); 57 | bx ^= x; 58 | println!("Ristretto255 decode: {:13.2}", v); 59 | let (v, x) = bench_encode(); 60 | bx ^= x; 61 | println!("Ristretto255 encode: {:13.2}", v); 62 | 63 | println!("{}", bx); 64 | } 65 | -------------------------------------------------------------------------------- /benches/sc448.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "ed448")] 2 | 3 | mod util; 4 | use util::core_cycles; 5 | 6 | use crrl::ed448::Scalar; 7 | 8 | fn bench_sc448_add() { 9 | let z = core_cycles(); 10 | let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 11 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 12 | z.wrapping_mul(13) ]); 13 | let mut y = x + Scalar::ONE; 14 | let mut tt = [0; 10]; 15 | for i in 0..10 { 16 | let begin = core_cycles(); 17 | for _ in 0..1000 { 18 | x += y; 19 | y += x; 20 | x += y; 21 | y += x; 22 | x += y; 23 | y += x; 24 | } 25 | let end = core_cycles(); 26 | tt[i] = end.wrapping_sub(begin); 27 | } 28 | tt.sort(); 29 | println!("sc448 add: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 30 | } 31 | 32 | fn bench_sc448_sub() { 33 | let z = core_cycles(); 34 | let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 35 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 36 | z.wrapping_mul(13) ]); 37 | let mut y = x + Scalar::ONE; 38 | let mut tt = [0; 10]; 39 | for i in 0..10 { 40 | let begin = core_cycles(); 41 | for _ in 0..1000 { 42 | x -= y; 43 | y -= x; 44 | x -= y; 45 | y -= x; 46 | x -= y; 47 | y -= x; 48 | } 49 | let end = core_cycles(); 50 | tt[i] = end.wrapping_sub(begin); 51 | } 52 | tt.sort(); 53 | println!("sc448 sub: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 54 | } 55 | 56 | fn bench_sc448_mul() { 57 | let z = core_cycles(); 58 | let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 59 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 60 | z.wrapping_mul(13) ]); 61 | let mut y = x + Scalar::ONE; 62 | let mut tt = [0; 10]; 63 | for i in 0..10 { 64 | let begin = core_cycles(); 65 | for _ in 0..1000 { 66 | x *= y; 67 | y *= x; 68 | x *= y; 69 | y *= x; 70 | x *= y; 71 | y *= x; 72 | } 73 | let end = core_cycles(); 74 | tt[i] = end.wrapping_sub(begin); 75 | } 76 | tt.sort(); 77 | println!("sc448 mul: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 78 | } 79 | 80 | fn bench_sc448_square() { 81 | let z = core_cycles(); 82 | let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 83 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 84 | z.wrapping_mul(13) ]); 85 | let mut tt = [0; 10]; 86 | for i in 0..10 { 87 | let begin = core_cycles(); 88 | x = x.xsquare(6000); 89 | let end = core_cycles(); 90 | tt[i] = end.wrapping_sub(begin); 91 | } 92 | tt.sort(); 93 | println!("sc448 square: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 94 | } 95 | 96 | fn bench_sc448_div() { 97 | let z = core_cycles(); 98 | let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 99 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 100 | z.wrapping_mul(13) ]); 101 | let mut y = x + Scalar::ONE; 102 | let mut tt = [0; 10]; 103 | for i in 0..10 { 104 | let begin = core_cycles(); 105 | for _ in 0..1000 { 106 | x /= y; 107 | y /= x; 108 | x /= y; 109 | y /= x; 110 | x /= y; 111 | y /= x; 112 | } 113 | let end = core_cycles(); 114 | tt[i] = end.wrapping_sub(begin); 115 | } 116 | tt.sort(); 117 | println!("sc448 div: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 118 | } 119 | 120 | fn bench_sc448_sqrt() { 121 | let z = core_cycles(); 122 | let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 123 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 124 | z.wrapping_mul(13) ]); 125 | let mut tt = [0; 10]; 126 | for i in 0..10 { 127 | let begin = core_cycles(); 128 | for _ in 0..6000 { 129 | let (x2, _) = x.sqrt(); 130 | x += x2 + Scalar::ONE; 131 | } 132 | let end = core_cycles(); 133 | tt[i] = end.wrapping_sub(begin); 134 | } 135 | tt.sort(); 136 | println!("sc448 sqrt: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 137 | } 138 | 139 | fn bench_sc448_legendre() { 140 | let z = core_cycles(); 141 | let mut x = Scalar::from_w64le([ z, z.wrapping_mul(3), z.wrapping_mul(5), 142 | z.wrapping_mul(7), z.wrapping_mul(9), z.wrapping_mul(11), 143 | z.wrapping_mul(13) ]); 144 | let mut tt = [0; 10]; 145 | for i in 0..10 { 146 | let begin = core_cycles(); 147 | for _ in 0..6000 { 148 | let ls = x.legendre(); 149 | x += Scalar::from_w64le([ ls as u64, ls as u64, ls as u64, 150 | ls as u64, ls as u64, ls as u64, ls as u64 ]); 151 | } 152 | let end = core_cycles(); 153 | tt[i] = end.wrapping_sub(begin); 154 | } 155 | tt.sort(); 156 | println!("sc448 legendre: {:11.2} ({})", (tt[4] as f64) / 6000.0, x.encode()[0]); 157 | } 158 | 159 | fn main() { 160 | bench_sc448_add(); 161 | bench_sc448_sub(); 162 | bench_sc448_mul(); 163 | bench_sc448_square(); 164 | bench_sc448_div(); 165 | bench_sc448_sqrt(); 166 | bench_sc448_legendre(); 167 | } 168 | -------------------------------------------------------------------------------- /benches/secp256k1.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | #![cfg(feature = "secp256k1")] 3 | 4 | mod util; 5 | use util::core_cycles; 6 | 7 | use crrl::secp256k1::{Point, Scalar, PrivateKey}; 8 | use sha2::{Sha256, Digest}; 9 | 10 | fn bench_mulgen() -> (f64, u8) { 11 | let z = core_cycles(); 12 | let mut seed = [0u8; 32]; 13 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 14 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 15 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 16 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 17 | let mut s = Scalar::decode_reduce(&seed); 18 | let mut tt = [0; 100]; 19 | for i in 0..tt.len() { 20 | let begin = core_cycles(); 21 | for _ in 0..100 { 22 | let P = Point::mulgen(&s); 23 | if P.isneutral() != 0 { 24 | s += Scalar::ZERO; 25 | } else { 26 | s += Scalar::ONE; 27 | } 28 | } 29 | let end = core_cycles(); 30 | tt[i] = end.wrapping_sub(begin); 31 | } 32 | tt.sort(); 33 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0]) 34 | } 35 | 36 | fn bench_mul() -> (f64, u8) { 37 | let z = core_cycles(); 38 | let mut seed = [0u8; 32]; 39 | seed[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 40 | seed[ 8..16].copy_from_slice(&z.to_le_bytes()); 41 | seed[16..24].copy_from_slice(&z.to_le_bytes()); 42 | seed[24..32].copy_from_slice(&z.to_le_bytes()); 43 | let mut s = Scalar::decode_reduce(&seed); 44 | let mut P = Point::mulgen(&s); 45 | let mut tt = [0; 100]; 46 | for i in 0..tt.len() { 47 | let begin = core_cycles(); 48 | for _ in 0..100 { 49 | P *= s; 50 | if P.isneutral() != 0 { 51 | s += Scalar::ZERO; 52 | } else { 53 | s += Scalar::ONE; 54 | } 55 | } 56 | let end = core_cycles(); 57 | tt[i] = end.wrapping_sub(begin); 58 | } 59 | tt.sort(); 60 | ((tt[tt.len() >> 1] as f64) / 100.0, s.encode32()[0]) 61 | } 62 | 63 | fn bench_mul_add_mulgen() -> (f64, u8) { 64 | let z = core_cycles(); 65 | let mut uu = [Scalar::ZERO; 128]; 66 | let mut vv = [Scalar::ZERO; 128]; 67 | let mut sh = Sha256::new(); 68 | for i in 0..128 { 69 | sh.update(z.to_le_bytes()); 70 | sh.update(((2 * i + 0) as u64).to_le_bytes()); 71 | let b1 = sh.finalize_reset(); 72 | sh.update(z.to_le_bytes()); 73 | sh.update(((2 * i + 1) as u64).to_le_bytes()); 74 | let b2 = sh.finalize_reset(); 75 | uu[i] = Scalar::decode_reduce(&b1); 76 | vv[i] = Scalar::decode_reduce(&b2); 77 | } 78 | let mut tt = [0; 100]; 79 | let mut P = Point::mulgen(&uu[127]); 80 | for i in 0..tt.len() { 81 | let begin = core_cycles(); 82 | for j in 0..128 { 83 | let ku = (i + j) & 127; 84 | let kv = i.wrapping_sub(j) & 127; 85 | let Q = P.mul_add_mulgen_vartime(&uu[ku], &vv[kv]); 86 | P += Q; 87 | } 88 | let end = core_cycles(); 89 | tt[i] = end.wrapping_sub(begin); 90 | } 91 | tt.sort(); 92 | ((tt[tt.len() >> 1] as f64) / 128.0, P.encode_compressed()[0]) 93 | } 94 | 95 | fn bench_skey_sign() -> (f64, u8) { 96 | let z = core_cycles(); 97 | let mut sh = Sha256::new(); 98 | sh.update(&z.to_le_bytes()); 99 | sh.update(&[0x00u8]); 100 | let s1 = sh.finalize_reset(); 101 | sh.update(&z.to_le_bytes()); 102 | sh.update(&[0x01u8]); 103 | let s2 = sh.finalize_reset(); 104 | let mut seed = [0u8; 48]; 105 | seed[..32].copy_from_slice(&s1); 106 | seed[32..].copy_from_slice(&s2[..16]); 107 | let skey = PrivateKey::from_seed(&seed); 108 | let mut tt = [0; 100]; 109 | let mut msg = [0u8; 32]; 110 | for i in 0..tt.len() { 111 | let begin = core_cycles(); 112 | for _ in 0..100 { 113 | let sig = skey.sign_hash(&msg, &[]); 114 | msg[..].copy_from_slice(&sig[0..32]); 115 | } 116 | let end = core_cycles(); 117 | tt[i] = end.wrapping_sub(begin); 118 | } 119 | tt.sort(); 120 | ((tt[tt.len() >> 1] as f64) / 100.0, msg[0]) 121 | } 122 | 123 | fn bench_pkey_verify() -> (f64, u8) { 124 | let z = core_cycles(); 125 | let mut sh = Sha256::new(); 126 | sh.update(&z.to_le_bytes()); 127 | sh.update(&[0x00u8]); 128 | let s1 = sh.finalize_reset(); 129 | sh.update(&z.to_le_bytes()); 130 | sh.update(&[0x01u8]); 131 | let s2 = sh.finalize_reset(); 132 | let mut seed = [0u8; 48]; 133 | seed[..32].copy_from_slice(&s1); 134 | seed[32..].copy_from_slice(&s2[..16]); 135 | let skey = PrivateKey::from_seed(&seed); 136 | let pkey = skey.to_public_key(); 137 | let mut sigs = [[0u8; 64]; 128]; 138 | for i in 0..128 { 139 | let msg = [i as u8; 32]; 140 | let sig = skey.sign_hash(&msg, &[]); 141 | sigs[i][..].copy_from_slice(&sig); 142 | } 143 | let mut tt = [0; 100]; 144 | let mut msg = [0u8; 32]; 145 | for i in 0..tt.len() { 146 | let begin = core_cycles(); 147 | for j in 0..128 { 148 | let ff = pkey.verify_hash(&sigs[j], &msg); 149 | sigs[j][40] ^= 1u8.wrapping_add(ff as u8); 150 | msg[3] ^= 3u8.wrapping_sub(ff as u8); 151 | } 152 | let end = core_cycles(); 153 | tt[i] = end.wrapping_sub(begin); 154 | } 155 | tt.sort(); 156 | ((tt[tt.len() >> 1] as f64) / 128.0, msg[0]) 157 | } 158 | 159 | fn main() { 160 | let mut bx = 0u8; 161 | 162 | let (v, x) = bench_mul(); 163 | bx ^= x; 164 | println!("secp256k1 point mul: {:13.2}", v); 165 | let (v, x) = bench_mulgen(); 166 | bx ^= x; 167 | println!("secp256k1 point mulgen: {:13.2}", v); 168 | let (v, x) = bench_mul_add_mulgen(); 169 | bx ^= x; 170 | println!("secp256k1 point mul_add_mulgen:{:13.2}", v); 171 | let (v, x) = bench_skey_sign(); 172 | bx ^= x; 173 | println!("secp256k1 sign: {:13.2}", v); 174 | let (v, x) = bench_pkey_verify(); 175 | bx ^= x; 176 | println!("secp256k1 verify: {:13.2}", v); 177 | 178 | println!("{}", bx); 179 | } 180 | -------------------------------------------------------------------------------- /benches/util.rs: -------------------------------------------------------------------------------- 1 | #[cfg(target_arch = "x86")] 2 | pub fn core_cycles() -> u64 { 3 | use core::arch::x86::{_mm_lfence, _rdtsc}; 4 | unsafe { 5 | _mm_lfence(); 6 | _rdtsc() 7 | } 8 | } 9 | 10 | #[cfg(target_arch = "x86_64")] 11 | pub fn core_cycles() -> u64 { 12 | use core::arch::x86_64::{_mm_lfence, _rdtsc}; 13 | unsafe { 14 | _mm_lfence(); 15 | _rdtsc() 16 | } 17 | } 18 | 19 | #[cfg(target_arch = "aarch64")] 20 | pub fn core_cycles() -> u64 { 21 | use core::arch::asm; 22 | let mut x: u64; 23 | unsafe { 24 | asm!("dsb sy", "mrs {}, pmccntr_el0", out(reg) x); 25 | } 26 | x 27 | } 28 | 29 | #[cfg(target_arch = "riscv64")] 30 | pub fn core_cycles() -> u64 { 31 | use core::arch::asm; 32 | let mut x: u64; 33 | unsafe { 34 | asm!("rdcycle {}", out(reg) x); 35 | } 36 | x 37 | } 38 | -------------------------------------------------------------------------------- /benches/x25519.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | #![cfg(feature = "x25519")] 3 | 4 | mod util; 5 | use util::core_cycles; 6 | 7 | use crrl::x25519::{x25519, x25519_base}; 8 | 9 | fn bench_x25519() -> (f64, u8) { 10 | let z = core_cycles(); 11 | let mut b = [0u8; 32]; 12 | b[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 13 | b[ 8..16].copy_from_slice(&z.to_le_bytes()); 14 | b[16..24].copy_from_slice(&z.to_le_bytes()); 15 | b[24..32].copy_from_slice(&z.to_le_bytes()); 16 | let mut tt = [0; 100]; 17 | for i in 0..tt.len() { 18 | let begin = core_cycles(); 19 | for _ in 0..100 { 20 | b = x25519(&b, &b); 21 | } 22 | let end = core_cycles(); 23 | tt[i] = end.wrapping_sub(begin); 24 | } 25 | tt.sort(); 26 | ((tt[tt.len() >> 1] as f64) / 100.0, b[0]) 27 | } 28 | 29 | fn bench_x25519_base() -> (f64, u8) { 30 | let z = core_cycles(); 31 | let mut b = [0u8; 32]; 32 | b[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 33 | b[ 8..16].copy_from_slice(&z.to_le_bytes()); 34 | b[16..24].copy_from_slice(&z.to_le_bytes()); 35 | b[24..32].copy_from_slice(&z.to_le_bytes()); 36 | let mut tt = [0; 100]; 37 | for i in 0..tt.len() { 38 | let begin = core_cycles(); 39 | for _ in 0..100 { 40 | b = x25519_base(&b); 41 | } 42 | let end = core_cycles(); 43 | tt[i] = end.wrapping_sub(begin); 44 | } 45 | tt.sort(); 46 | ((tt[tt.len() >> 1] as f64) / 100.0, b[0]) 47 | } 48 | 49 | fn main() { 50 | let mut bx = 0u8; 51 | 52 | let (v, x) = bench_x25519(); 53 | bx ^= x; 54 | println!("X25519 (generic): {:13.2}", v); 55 | let (v, x) = bench_x25519_base(); 56 | bx ^= x; 57 | println!("X25519 (base point): {:13.2}", v); 58 | 59 | println!("{}", bx); 60 | } 61 | -------------------------------------------------------------------------------- /benches/x448.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | #![cfg(feature = "x448")] 3 | 4 | mod util; 5 | use util::core_cycles; 6 | 7 | use crrl::x448::{x448, x448_base}; 8 | 9 | fn bench_x448() -> (f64, u8) { 10 | let z = core_cycles(); 11 | let mut b = [0u8; 56]; 12 | b[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 13 | b[ 8..16].copy_from_slice(&z.to_le_bytes()); 14 | b[16..24].copy_from_slice(&z.to_le_bytes()); 15 | b[24..32].copy_from_slice(&z.to_le_bytes()); 16 | b[32..40].copy_from_slice(&z.to_le_bytes()); 17 | b[40..48].copy_from_slice(&z.to_le_bytes()); 18 | b[48..56].copy_from_slice(&z.to_le_bytes()); 19 | let mut tt = [0; 100]; 20 | for i in 0..tt.len() { 21 | let begin = core_cycles(); 22 | for _ in 0..100 { 23 | b = x448(&b, &b); 24 | } 25 | let end = core_cycles(); 26 | tt[i] = end.wrapping_sub(begin); 27 | } 28 | tt.sort(); 29 | ((tt[tt.len() >> 1] as f64) / 100.0, b[0]) 30 | } 31 | 32 | fn bench_x448_base() -> (f64, u8) { 33 | let z = core_cycles(); 34 | let mut b = [0u8; 56]; 35 | b[ 0.. 8].copy_from_slice(&z.to_le_bytes()); 36 | b[ 8..16].copy_from_slice(&z.to_le_bytes()); 37 | b[16..24].copy_from_slice(&z.to_le_bytes()); 38 | b[24..32].copy_from_slice(&z.to_le_bytes()); 39 | b[32..40].copy_from_slice(&z.to_le_bytes()); 40 | b[40..48].copy_from_slice(&z.to_le_bytes()); 41 | b[48..56].copy_from_slice(&z.to_le_bytes()); 42 | let mut tt = [0; 100]; 43 | for i in 0..tt.len() { 44 | let begin = core_cycles(); 45 | for _ in 0..100 { 46 | b = x448_base(&b); 47 | } 48 | let end = core_cycles(); 49 | tt[i] = end.wrapping_sub(begin); 50 | } 51 | tt.sort(); 52 | ((tt[tt.len() >> 1] as f64) / 100.0, b[0]) 53 | } 54 | 55 | fn main() { 56 | let mut bx = 0u8; 57 | 58 | let (v, x) = bench_x448(); 59 | bx ^= x; 60 | println!("X448 (generic): {:13.2}", v); 61 | let (v, x) = bench_x448_base(); 62 | bx ^= x; 63 | println!("X448 (base point): {:13.2}", v); 64 | 65 | println!("{}", bx); 66 | } 67 | -------------------------------------------------------------------------------- /extra/frost-sample.rs: -------------------------------------------------------------------------------- 1 | // This sample code shows how to use the crrl FROST implementation. 2 | 3 | use crrl::frost::ristretto255::{ 4 | GroupPrivateKey, 5 | SignerPrivateKeyShare, 6 | SignerPublicKey, 7 | KeySplitter, 8 | VSSElement, 9 | SignatureShare, 10 | Commitment, 11 | Nonce, 12 | Coordinator, 13 | }; 14 | use rand::RngCore; 15 | use rand::rngs::OsRng; 16 | use std::vec::Vec; 17 | 18 | fn main() { 19 | 20 | // We want `max_signers` individual signers, such that a threshold 21 | // of `min_signers` of them is required to compute a signature. 22 | // Rules: 2 <= min_signers <= max_signers <= 65535 23 | let max_signers = 5; 24 | let min_signers = 3; 25 | 26 | // ==================================================================== 27 | // KEY GENERATION 28 | // 29 | // This step happens once. A trusted dealer generates the group private 30 | // key and splits it into individual key shares. Each signer receives 31 | // one key share. The signers can verify a VSS commitment by the dealer 32 | // to validate that the split was performed correctly (though the trusted 33 | // dealer is still trusted with using a proper entropy source for the 34 | // private key, and not remembering any secret afterwards). 35 | 36 | // =========== trusted dealer =========== 37 | 38 | // Generate a group private key. 39 | let mut rng = OsRng::default(); 40 | let group_sk = GroupPrivateKey::generate(&mut rng); 41 | 42 | // Split the key into individual signer key shares. 43 | let (sk_share, vss) = KeySplitter::trusted_split( 44 | &mut rng, group_sk, min_signers, max_signers); 45 | 46 | // Send its key share to each signer. 47 | // Optionally: also send the VSS commitment that allows each signer 48 | // to verify that the share was properly generated. 49 | let mut enc_sk_share: Vec<[u8; SignerPrivateKeyShare::ENC_LEN]> = 50 | Vec::new(); 51 | for sks in sk_share.iter() { 52 | enc_sk_share.push(sks.encode()); 53 | } 54 | let enc_vss = VSSElement::encode_list(&vss); 55 | 56 | // Also extract the group public key and each individual signer public 57 | // key; they should be "published" (everybody knows them). 58 | let group_pk = group_sk.get_public_key(); 59 | let mut signer_pk: Vec = Vec::new(); 60 | for sks in sk_share.iter() { 61 | signer_pk.push(sks.get_public_key()); 62 | } 63 | 64 | // =========== signers =========== 65 | 66 | // Each signer receives its private key share, decodes it, and 67 | // optionally verifies the VSS commitment that demonstrates proper 68 | // generation of the share. 69 | // In this example code we simulate all signers in a loop. 70 | 71 | let mut signer_sk_share: Vec = Vec::new(); 72 | for esks in enc_sk_share.iter() { 73 | // All decoding operations return Option so that None 74 | // is obtained on decoding failure. In this example we use unwrap(), 75 | // but this is where some error handling should happen. 76 | let sks = SignerPrivateKeyShare::decode(esks).unwrap(); 77 | 78 | // Verify the VSS commitment (optional; needed only if the dealing 79 | // process is such that accidental or malicious alteration of shares 80 | // may happen). 81 | let vss = VSSElement::decode_list(&enc_vss).unwrap(); 82 | if !sks.verify_split(&vss) { 83 | panic!("invalid key share"); 84 | } 85 | 86 | // The signer stores its private key share (securely! It's secret). 87 | // As shown above, it can be encoded and decoded, for storage in 88 | // a file or equivalent. In this example, we keep an in-RAM 89 | // structure. 90 | signer_sk_share.push(sks); 91 | } 92 | 93 | // ==================================================================== 94 | // SIGNATURE GENERATION 95 | // 96 | // Whenever a signature must be computed, over a given message, a 97 | // two-round protocol happens: 98 | // 99 | // Round 1: each signer generates a per-signature nonce and associated 100 | // commitment; the commitments are sent to the coordinator. Each signer 101 | // remembers its nonce and commitment. 102 | // 103 | // Round 2: the coordinator selects enough signers (among received 104 | // commitments) to meet the threshold. The corresponding list of 105 | // commitments is sent to the signers, along with the message. Each 106 | // signer computes and sends back to the coordinator a signature 107 | // share. The coordinator assembles the signature shares into the 108 | // signature value. 109 | 110 | // =========== signers =========== 111 | 112 | // Each signer generates a nonce and a commitment. The commitment is 113 | // sent to the coordinator. 114 | let mut signer_nonce: Vec = Vec::new(); 115 | let mut signer_comm: Vec = Vec::new(); 116 | let mut enc_signer_comm: Vec<[u8; Commitment::ENC_LEN]> = Vec::new(); 117 | for sks in signer_sk_share.iter() { 118 | let (nonce, comm) = sks.commit(&mut rng); 119 | signer_nonce.push(nonce); 120 | signer_comm.push(comm); 121 | enc_signer_comm.push(comm.encode()); 122 | } 123 | 124 | // =========== coordinator =========== 125 | 126 | // The coordinator knows the group public key and the signature 127 | // threshold. 128 | let coordinator = Coordinator::new(min_signers, group_pk).unwrap(); 129 | 130 | // This is the message to sign. 131 | let msg: &[u8] = b"sample"; 132 | 133 | // The coordinator receives _some_ commitments. The commitments may 134 | // be obtained in any order; some may missing; duplicates are tolerated 135 | // (they are automatically ignored). 136 | // In this example, we give apply a random permutation to the 137 | // encoded commitments to simulate some network-induced shuffling. 138 | for i in 0..enc_signer_comm.len() - 1 { 139 | let j = i + (rng.next_u64() as usize) % (enc_signer_comm.len() - i); 140 | if i != j { 141 | let t = enc_signer_comm[i]; 142 | enc_signer_comm[i] = enc_signer_comm[j]; 143 | enc_signer_comm[j] = t; 144 | } 145 | } 146 | 147 | // Decode the commitments and use them to select a proper subset. 148 | // The encoded commitments are sent to the selected signers (the 149 | // selected signers are identified by the 'ident' fields of the 150 | // commitments that have been chosen). 151 | let mut received_signer_comm: Vec = Vec::new(); 152 | for esc in enc_signer_comm.iter() { 153 | let sc = Commitment::decode(esc).unwrap(); 154 | received_signer_comm.push(sc); 155 | } 156 | let chosen_comm = coordinator.choose(&received_signer_comm).unwrap(); 157 | let enc_chosen_comm = Commitment::encode_list(&chosen_comm); 158 | 159 | // =========== signers =========== 160 | 161 | // The selected signers receive the encoded commitments. The coordinator 162 | // may know who are the selected signers by looking at the identifiers 163 | // (the `Commitment`, `SignerPublicKey` and `SignerPrivateKeyShare` 164 | // all have matching public `ident` fields). Another option (which is 165 | // used below) is to send the encoded commitments to everybody and see 166 | // what they answer; only actually selected signers will respond. 167 | let mut enc_sig_share: Vec<[u8; SignatureShare::ENC_LEN]> = Vec::new(); 168 | for (sks, (nonce, comm)) in signer_sk_share.iter().zip( 169 | signer_nonce.iter().zip(signer_comm)) 170 | { 171 | // The signer knows its private key share (sks), nonce, 172 | // and commitment. 173 | // Note: the commitment could also be recomputed from the nonce, 174 | // using `nonce.get_commitment()`. Remembering the commitment 175 | // saves a few clock cycles. 176 | 177 | // Decode the received commitment list. 178 | let comm_list = Commitment::decode_list(&enc_chosen_comm).unwrap(); 179 | 180 | // Compute the signature share from this signer. This may fail 181 | // if the commitment list is incorrect, but also if this signer 182 | // was not actually selected in the list. 183 | match sks.sign(*nonce, comm, msg, &comm_list) { 184 | Some(ss) => { enc_sig_share.push(ss.encode()); } 185 | None => { } 186 | } 187 | } 188 | 189 | // =========== coordinator =========== 190 | 191 | // The coordinator receives the encoded signature shares (in any order), 192 | // decodes them, then assembles them into the signature. We again 193 | // (for this example) randomly shuffle the list of encoded shares. 194 | for i in 0..enc_sig_share.len() - 1 { 195 | let j = i + (rng.next_u64() as usize) % (enc_sig_share.len() - i); 196 | if i != j { 197 | let t = enc_sig_share[i]; 198 | enc_sig_share[i] = enc_sig_share[j]; 199 | enc_sig_share[j] = t; 200 | } 201 | } 202 | 203 | // Decode the encoded signature shares. 204 | let mut sig_share: Vec = Vec::new(); 205 | for ess in enc_sig_share.iter() { 206 | sig_share.push(SignatureShare::decode(ess).unwrap()); 207 | } 208 | 209 | // Assemble the signature. This also verifies each share, _and_ checks 210 | // that the assembled signature is valid. 211 | // The coordinator uses the known signer public keys (signer_pk list); 212 | // that list can be provided in any order and also contain public keys of 213 | // signers that were not selected for this signature generation. 214 | let sig = coordinator.assemble_signature( 215 | &sig_share, &chosen_comm, &signer_pk, msg).unwrap(); 216 | 217 | // The signature can be encoded into bytes. 218 | let esig = sig.encode(); 219 | 220 | // ==================================================================== 221 | // SIGNATURE VERIFICATION 222 | 223 | // Generated signatures can be verified against the group public key. 224 | if !group_pk.verify_esig(&esig, msg) { 225 | panic!("signature verification failed"); 226 | } 227 | println!("OK"); 228 | } 229 | -------------------------------------------------------------------------------- /extra/mkuxcomp.sage: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env sage 2 | 3 | # This Sage script computes and prints the UX_COMP[] array of precomputed 4 | # values that are used to support efficient verification of truncated 5 | # Ed25519 signatures. 6 | # 7 | # We work in the Montgomery domain of Curve25519: y^2 = x^3 + 486662*x^2 + x 8 | # B = conventional generator 9 | # For i = 0 to 16384, let U_i = i*(2^240)*B, and x_i = x coordinate of U_i. 10 | # We consider x_i as an integer (with 0 <= x_i < 2^255-19) and define: 11 | # z_i = (x_i % 2^48)*2^16 + i (as an unsigned 64-bit integer) 12 | # The produced UX_COMP[] array contains the 16385 values z_i in ascending 13 | # numerical order. 14 | 15 | import importlib 16 | import hashlib 17 | 18 | def mkuxcomp(): 19 | p = 2**255 - 19 20 | K = Zmod(p) 21 | E = EllipticCurve(K, [0, 486662, 0, 1, 0]) 22 | B = E.point([9, 14781619447589544791020593568409986887264606134616475288964881837755586237401]) 23 | tt = [] 24 | T = E.point([0, 1, 0]) 25 | P = (2**240)*B 26 | for i in range(0, 16385): 27 | if T.is_zero(): 28 | x = K(0) 29 | else: 30 | x = T.xy()[0] 31 | tt.append(int(i) + ((int(x) % 2**48) << 16)) 32 | T = T + P 33 | tt.sort() 34 | print('static UX_COMP: [u64; 16385] = [', end='') 35 | for i in range(0, len(tt)): 36 | if (i % 3) == 0: 37 | print() 38 | print(' ', end='') 39 | else: 40 | print(' ', end='') 41 | print('0x%016X,' % int(tt[i]), end='') 42 | print() 43 | print('];') 44 | 45 | mkuxcomp() 46 | -------------------------------------------------------------------------------- /extra/truncsig.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pornin/crrl/4cc7cbbe8796ee8d459b815d81318603279879e4/extra/truncsig.pdf -------------------------------------------------------------------------------- /src/backend/w32/gf448.rs: -------------------------------------------------------------------------------- 1 | // TODO: make a dedicated GF448 implementation, leveraging the special 2 | // modulus format. For now, we use the generic code. 3 | 4 | use super::gfgen::{define_gfgen, define_gfgen_tests}; 5 | 6 | struct GF448Params; 7 | impl GF448Params { 8 | 9 | const MODULUS: [u64; 7] = [ 10 | 0xFFFFFFFFFFFFFFFF, 11 | 0xFFFFFFFFFFFFFFFF, 12 | 0xFFFFFFFFFFFFFFFF, 13 | 0xFFFFFFFEFFFFFFFF, 14 | 0xFFFFFFFFFFFFFFFF, 15 | 0xFFFFFFFFFFFFFFFF, 16 | 0xFFFFFFFFFFFFFFFF, 17 | ]; 18 | } 19 | 20 | define_gfgen!(GF448, GF448Params, gf448mod, false); 21 | define_gfgen_tests!(GF448, 7, test_gf448mod); 22 | -------------------------------------------------------------------------------- /src/backend/w32/gfsecp256k1.rs: -------------------------------------------------------------------------------- 1 | pub type GFsecp256k1 = super::modint::ModInt256< 2 | 0xFFFFFFFEFFFFFC2F, 0xFFFFFFFFFFFFFFFF, 3 | 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF>; 4 | 5 | impl GFsecp256k1 { 6 | 7 | // TODO: replace these functions with set_mul_small(), when the latter 8 | // is implemented. 9 | pub fn set_mul21(&mut self) { 10 | *self *= Self::w64be(0, 0, 0, 21); 11 | } 12 | pub fn mul21(self) -> Self { 13 | self * Self::w64be(0, 0, 0, 21) 14 | } 15 | 16 | #[inline(always)] 17 | pub fn encode(self) -> [u8; Self::ENC_LEN] { 18 | self.encode32() 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/backend/w32/mod.rs: -------------------------------------------------------------------------------- 1 | mod zz; 2 | pub use zz::{Zu128, Zu256, Zu384}; 3 | 4 | #[cfg(any( 5 | feature = "gf255", 6 | feature = "gf255e", 7 | feature = "gf255s", 8 | feature = "gf25519", 9 | ))] 10 | pub mod gf255; 11 | 12 | #[cfg(any( 13 | feature = "gf255", 14 | feature = "gf255e", 15 | feature = "gf255s", 16 | feature = "gf25519", 17 | ))] 18 | pub use gf255::GF255; 19 | 20 | #[cfg(feature = "gf255e")] 21 | pub type GF255e = GF255<18651>; 22 | 23 | #[cfg(feature = "gf255s")] 24 | pub type GF255s = GF255<3957>; 25 | 26 | #[cfg(feature = "gf25519")] 27 | pub type GF25519 = GF255<19>; 28 | 29 | #[cfg(any( 30 | feature = "modint256", 31 | feature = "gfp256", 32 | ))] 33 | pub mod modint; 34 | 35 | #[cfg(feature = "modint256")] 36 | pub use modint::ModInt256; 37 | 38 | #[cfg(feature = "modint256")] 39 | pub type ModInt256ct = ModInt256; 40 | 41 | #[cfg(feature = "gfp256")] 42 | pub type GFp256 = modint::ModInt256< 43 | 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 44 | 0x0000000000000000, 0xFFFFFFFF00000001>; 45 | 46 | #[cfg(feature = "gfp256")] 47 | impl GFp256 { 48 | /// Encodes a scalar element into bytes (little-endian). 49 | pub fn encode(self) -> [u8; 32] { 50 | self.encode32() 51 | } 52 | } 53 | 54 | #[cfg(feature = "secp256k1")] 55 | pub mod gfsecp256k1; 56 | 57 | #[cfg(feature = "secp256k1")] 58 | pub use gfsecp256k1::GFsecp256k1; 59 | 60 | #[cfg(feature = "gf448")] 61 | pub mod gf448; 62 | 63 | #[cfg(feature = "gf448")] 64 | pub use gf448::GF448; 65 | 66 | pub mod lagrange; 67 | 68 | #[cfg(feature = "gfgen")] 69 | pub mod gfgen; 70 | 71 | #[cfg(feature = "gfb254")] 72 | pub mod gfb254_m32; 73 | 74 | #[cfg(feature = "gfb254")] 75 | pub use gfb254_m32::{GFb127, GFb254}; 76 | 77 | // Carrying addition and subtraction should use u32::carrying_add() 78 | // and u32::borrowing_sub(), but these functions are currently only 79 | // experimental. 80 | 81 | // Add with carry; carry is 0 or 1. 82 | // (x, y, c_in) -> x + y + c_in mod 2^32, c_out 83 | 84 | #[cfg(target_arch = "x86")] 85 | #[allow(dead_code)] 86 | #[inline(always)] 87 | pub(crate) fn addcarry_u32(x: u32, y: u32, c: u8) -> (u32, u8) { 88 | use core::arch::x86::_addcarry_u32; 89 | unsafe { 90 | let mut d = 0u32; 91 | let cc = _addcarry_u32(c, x, y, &mut d); 92 | (d, cc) 93 | } 94 | } 95 | 96 | #[cfg(not(target_arch = "x86"))] 97 | #[allow(dead_code)] 98 | #[inline(always)] 99 | pub(crate) const fn addcarry_u32(x: u32, y: u32, c: u8) -> (u32, u8) { 100 | let z = (x as u64).wrapping_add(y as u64).wrapping_add(c as u64); 101 | (z as u32, (z >> 32) as u8) 102 | } 103 | 104 | // Subtract with borrow; borrow is 0 or 1. 105 | // (x, y, c_in) -> x - y - c_in mod 2^32, c_out 106 | 107 | #[cfg(target_arch = "x86")] 108 | #[allow(dead_code)] 109 | #[inline(always)] 110 | pub(crate) fn subborrow_u32(x: u32, y: u32, c: u8) -> (u32, u8) { 111 | use core::arch::x86::_subborrow_u32; 112 | unsafe { 113 | let mut d = 0u32; 114 | let cc = _subborrow_u32(c, x, y, &mut d); 115 | (d, cc) 116 | } 117 | } 118 | 119 | #[cfg(not(target_arch = "x86"))] 120 | #[allow(dead_code)] 121 | #[inline(always)] 122 | pub(crate) const fn subborrow_u32(x: u32, y: u32, c: u8) -> (u32, u8) { 123 | let z = (x as u64).wrapping_sub(y as u64).wrapping_sub(c as u64); 124 | (z as u32, (z >> 63) as u8) 125 | } 126 | 127 | // Compute x*y over 64 bits, returned as two 32-bit words (lo, hi) 128 | #[allow(dead_code)] 129 | #[inline(always)] 130 | pub(crate) const fn umull(x: u32, y: u32) -> (u32, u32) { 131 | let z = (x as u64) * (y as u64); 132 | (z as u32, (z >> 32) as u32) 133 | } 134 | 135 | // Compute x*y+z over 64 bits, returned as two 32-bit words (lo, hi) 136 | #[allow(dead_code)] 137 | #[inline(always)] 138 | pub(crate) const fn umull_add(x: u32, y: u32, z: u32) -> (u32, u32) { 139 | let t = ((x as u64) * (y as u64)).wrapping_add(z as u64); 140 | (t as u32, (t >> 32) as u32) 141 | } 142 | 143 | // Compute x*y+z1+z2 over 64 bits, returned as two 32-bit words (lo, hi) 144 | #[allow(dead_code)] 145 | #[inline(always)] 146 | pub(crate) const fn umull_add2(x: u32, y: u32, z1: u32, z2: u32) -> (u32, u32) { 147 | let t = ((x as u64) * (y as u64)) 148 | .wrapping_add(z1 as u64).wrapping_add(z2 as u64); 149 | (t as u32, (t >> 32) as u32) 150 | } 151 | 152 | // Compute x1*y1+x2*y2 over 64 bits, returned as two 32-bit words (lo, hi) 153 | #[allow(dead_code)] 154 | #[inline(always)] 155 | pub(crate) const fn umull_x2(x1: u32, y1: u32, x2: u32, y2: u32) -> (u32, u32) { 156 | let z1 = (x1 as u64) * (y1 as u64); 157 | let z2 = (x2 as u64) * (y2 as u64); 158 | let z = z1.wrapping_add(z2); 159 | (z as u32, (z >> 32) as u32) 160 | } 161 | 162 | // Compute x1*y1+x2*y2+z3 over 64 bits, returned as two 32-bit words (lo, hi) 163 | #[allow(dead_code)] 164 | #[inline(always)] 165 | pub(crate) const fn umull_x2_add(x1: u32, y1: u32, x2: u32, y2: u32, z3: u32) -> (u32, u32) { 166 | let z1 = (x1 as u64) * (y1 as u64); 167 | let z2 = (x2 as u64) * (y2 as u64); 168 | let z = z1.wrapping_add(z2).wrapping_add(z3 as u64); 169 | (z as u32, (z >> 32) as u32) 170 | } 171 | 172 | // Return 0xFFFFFFFF if x >= 0x80000000, 0 otherwise (i.e. take the sign 173 | // bit of the signed interpretation, and expand it to 32 bits). 174 | #[allow(dead_code)] 175 | #[inline(always)] 176 | pub(crate) const fn sgnw(x: u32) -> u32 { 177 | ((x as i32) >> 31) as u32 178 | } 179 | 180 | // Get the number of leading zeros in a 32-bit value. 181 | // On some platforms, u32::leading_zeros() performs the computation with 182 | // a code sequence that will be constant-time on most/all CPUs 183 | // compatible with that platforms (e.g. any 32-bit x86 with support for 184 | // the LZCNT opcode); on others, a non-constant-time sequence would be 185 | // used, and we must instead rely on a safe (but slower) routine. 186 | // 187 | // On x86 without LZCNT, u32::leading_zeros() uses a BSR opcode, but since 188 | // BSR yields an undefined result on an input of value 0, u32::leading_zeros() 189 | // includes an explicit test and a conditional jump for that case, and that 190 | // is not (in general) constant-time. 191 | #[cfg(any( 192 | all(target_arch = "x86", target_feature = "lzcnt"), 193 | ))] 194 | #[allow(dead_code)] 195 | #[inline(always)] 196 | pub(crate) const fn lzcnt(x: u32) -> u32 { 197 | x.leading_zeros() 198 | } 199 | 200 | #[cfg(not(any( 201 | all(target_arch = "x86", target_feature = "lzcnt"), 202 | )))] 203 | #[allow(dead_code)] 204 | pub(crate) const fn lzcnt(x: u32) -> u32 { 205 | let m = sgnw((x >> 16).wrapping_sub(1)); 206 | let s = m & 16; 207 | let x = (x >> 16) ^ (m & (x ^ (x >> 16))); 208 | 209 | let m = sgnw((x >> 8).wrapping_sub(1)); 210 | let s = s | (m & 8); 211 | let x = (x >> 8) ^ (m & (x ^ (x >> 8))); 212 | 213 | let m = sgnw((x >> 4).wrapping_sub(1)); 214 | let s = s | (m & 4); 215 | let x = (x >> 4) ^ (m & (x ^ (x >> 4))); 216 | 217 | let m = sgnw((x >> 2).wrapping_sub(1)); 218 | let s = s | (m & 2); 219 | let x = (x >> 2) ^ (m & (x ^ (x >> 2))); 220 | 221 | // At this point, x fits on 2 bits. Number of leading zeros is then: 222 | // x = 0 -> 2 223 | // x = 1 -> 1 224 | // x = 2 -> 0 225 | // x = 3 -> 0 226 | let s = s.wrapping_add(2u32.wrapping_sub(x) & ((x.wrapping_sub(3) >> 2))); 227 | 228 | s as u32 229 | } 230 | -------------------------------------------------------------------------------- /src/backend/w32/zz.rs: -------------------------------------------------------------------------------- 1 | use core::convert::TryFrom; 2 | 3 | use super::{addcarry_u32, subborrow_u32, umull, umull_add, umull_add2, sgnw}; 4 | 5 | /// A custom 128-bit integer with some constant-time operations. 6 | #[derive(Clone, Copy, Debug)] 7 | pub struct Zu128([u32; 4]); 8 | 9 | impl Zu128 { 10 | 11 | pub const ZERO: Self = Self([0; 4]); 12 | 13 | #[inline(always)] 14 | pub const fn w64le(x0: u64, x1: u64) -> Self { 15 | Self([ x0 as u32, (x0 >> 32) as u32, x1 as u32, (x1 >> 32) as u32 ]) 16 | } 17 | 18 | #[inline(always)] 19 | pub fn decode(buf: &[u8]) -> Option { 20 | if buf.len() != 16 { 21 | None 22 | } else { 23 | let mut x = Self::ZERO; 24 | for i in 0..4 { 25 | x.0[i] = u32::from_le_bytes(*<&[u8; 4]>::try_from( 26 | &buf[(4 * i)..(4 * i + 4)]).unwrap()); 27 | } 28 | Some(x) 29 | } 30 | } 31 | 32 | #[inline(always)] 33 | pub fn mul128x128(self, b: &Self) -> Zu256 { 34 | let mut d = [0u32; 8]; 35 | for i in 0..4 { 36 | let f = self.0[i]; 37 | let mut hi = 0; 38 | for j in 0..4 { 39 | (d[i + j], hi) = umull_add2(f, b.0[j], d[i + j], hi); 40 | } 41 | d[i + 4] = hi; 42 | } 43 | Zu256(d) 44 | } 45 | 46 | #[inline(always)] 47 | pub fn mul128x128trunc(self, b: &Self) -> Self { 48 | let f = self.0[0]; 49 | let (d0, hi) = umull(f, b.0[0]); 50 | let (d1, hi) = umull_add(f, b.0[1], hi); 51 | let (d2, hi) = umull_add(f, b.0[2], hi); 52 | let d3 = f.wrapping_mul(b.0[3]).wrapping_add(hi); 53 | let f = self.0[1]; 54 | let (d1, hi) = umull_add(f, b.0[0], d1); 55 | let (d2, hi) = umull_add2(f, b.0[1], d2, hi); 56 | let d3 = f.wrapping_mul(b.0[2]).wrapping_add(d3).wrapping_add(hi); 57 | let f = self.0[2]; 58 | let (d2, hi) = umull_add(f, b.0[0], d2); 59 | let d3 = f.wrapping_mul(b.0[1]).wrapping_add(d3).wrapping_add(hi); 60 | let f = self.0[3]; 61 | let d3 = f.wrapping_mul(b.0[0]).wrapping_add(d3); 62 | Self([ d0, d1, d2, d3 ]) 63 | } 64 | 65 | /// Interpreting this value as a signed 128-bit integer, return its 66 | /// absolute value (in a `u128` type) and the original sign (0xFFFFFFFF 67 | /// for negative, 0x00000000 for non-negative). 68 | #[inline(always)] 69 | pub fn abs(self) -> (u128, u32) { 70 | let s = sgnw(self.0[3]); 71 | let (d0, cc) = subborrow_u32(self.0[0] ^ s, s, 0); 72 | let (d1, cc) = subborrow_u32(self.0[1] ^ s, s, cc); 73 | let (d2, cc) = subborrow_u32(self.0[2] ^ s, s, cc); 74 | let (d3, _) = subborrow_u32(self.0[3] ^ s, s, cc); 75 | ((d0 as u128) | ((d1 as u128) << 32) 76 | | ((d2 as u128) << 64) | ((d3 as u128) << 96), s) 77 | } 78 | 79 | /// Interpreting this value as a signed 128-bit integer `x`, return 80 | /// the absolute value of `2*x+1` (as a `u128` type) and the original 81 | /// sign (0xFFFFFFFF for negative, 0x00000000 for non-negative). 82 | #[inline(always)] 83 | pub fn double_inc_abs(self) -> (u128, u32) { 84 | let s = sgnw(self.0[3]); 85 | let b0 = (self.0[0] << 1) | 1; 86 | let b1 = (self.0[0] >> 31) | (self.0[1] << 1); 87 | let b2 = (self.0[1] >> 31) | (self.0[2] << 1); 88 | let b3 = (self.0[2] >> 31) | (self.0[3] << 1); 89 | let (d0, cc) = subborrow_u32(b0 ^ s, s, 0); 90 | let (d1, cc) = subborrow_u32(b1 ^ s, s, cc); 91 | let (d2, cc) = subborrow_u32(b2 ^ s, s, cc); 92 | let (d3, _) = subborrow_u32(b3 ^ s, s, cc); 93 | ((d0 as u128) | ((d1 as u128) << 32) 94 | | ((d2 as u128) << 64) | ((d3 as u128) << 96), s) 95 | } 96 | 97 | #[inline(always)] 98 | pub fn set_sub(&mut self, b: &Self) { 99 | let mut cc = 0; 100 | for i in 0..4 { 101 | (self.0[i], cc) = subborrow_u32(self.0[i], b.0[i], cc); 102 | } 103 | } 104 | 105 | #[inline(always)] 106 | pub fn set_sub_u32(&mut self, b: u32) { 107 | let mut cc; 108 | (self.0[0], cc) = subborrow_u32(self.0[0], b, 0); 109 | for i in 1..4 { 110 | (self.0[i], cc) = subborrow_u32(self.0[i], 0, cc); 111 | } 112 | } 113 | } 114 | 115 | /// A custom 256-bit integer with some constant-time operations. 116 | #[derive(Clone, Copy, Debug)] 117 | pub struct Zu256([u32; 8]); 118 | 119 | impl Zu256 { 120 | 121 | pub const ZERO: Self = Self([0; 8]); 122 | 123 | #[inline(always)] 124 | pub const fn w64le(x0: u64, x1: u64, x2: u64, x3: u64) -> Self { 125 | Self([ 126 | x0 as u32, (x0 >> 32) as u32, 127 | x1 as u32, (x1 >> 32) as u32, 128 | x2 as u32, (x2 >> 32) as u32, 129 | x3 as u32, (x3 >> 32) as u32, 130 | ]) 131 | } 132 | 133 | #[inline(always)] 134 | pub fn decode(buf: &[u8]) -> Option { 135 | if buf.len() != 32 { 136 | None 137 | } else { 138 | let mut x = Self::ZERO; 139 | for i in 0..8 { 140 | x.0[i] = u32::from_le_bytes(*<&[u8; 4]>::try_from( 141 | &buf[(4 * i)..(4 * i + 4)]).unwrap()); 142 | } 143 | Some(x) 144 | } 145 | } 146 | 147 | #[inline(always)] 148 | pub fn trunc128(self) -> Zu128 { 149 | Zu128([ self.0[0], self.0[1], self.0[2], self.0[3] ]) 150 | } 151 | 152 | #[inline(always)] 153 | pub fn mul256x128(self, b: &Zu128) -> Zu384 { 154 | let mut d = [0u32; 12]; 155 | for i in 0..8 { 156 | let f = self.0[i]; 157 | let mut hi = 0; 158 | for j in 0..4 { 159 | (d[i + j], hi) = umull_add2(f, b.0[j], d[i + j], hi); 160 | } 161 | d[i + 4] = hi; 162 | } 163 | Zu384(d) 164 | } 165 | 166 | /// Return `floor((self + b)/2^224) mod 2^32` (i.e. addition truncated 167 | /// to 256 bits, then return the high 32 bits of the 256-bit result). 168 | #[inline(always)] 169 | pub fn add_rsh224(self, b: &Self) -> u32 { 170 | let mut cc; 171 | (_, cc) = addcarry_u32(self.0[0], b.0[0], 0); 172 | for i in 1..7 { 173 | (_, cc) = addcarry_u32(self.0[i], b.0[i], cc); 174 | } 175 | let (w, _) = addcarry_u32(self.0[7], b.0[7], cc); 176 | w 177 | } 178 | 179 | /// Return the borrow resulting from the subtraction of `b` from `self`; 180 | /// returned value is 1 in case of borrow, 0 otherwise. The subtraction 181 | /// result itself is discarded. 182 | #[inline(always)] 183 | pub fn borrow(self, b: &Self) -> u32 { 184 | let mut cc; 185 | (_, cc) = subborrow_u32(self.0[0], b.0[0], 0); 186 | for i in 1..8 { 187 | (_, cc) = subborrow_u32(self.0[i], b.0[i], cc); 188 | } 189 | cc as u32 190 | } 191 | } 192 | 193 | /// A custom 384-bit integer with some constant-time operations. 194 | #[derive(Clone, Copy, Debug)] 195 | pub struct Zu384([u32; 12]); 196 | 197 | impl Zu384 { 198 | 199 | pub const ZERO: Self = Self([0; 12]); 200 | 201 | #[inline(always)] 202 | pub const fn w64le(x0: u64, x1: u64, x2: u64, x3: u64, x4: u64, x5: u64) 203 | -> Self 204 | { 205 | Self([ 206 | x0 as u32, (x0 >> 32) as u32, 207 | x1 as u32, (x1 >> 32) as u32, 208 | x2 as u32, (x2 >> 32) as u32, 209 | x3 as u32, (x3 >> 32) as u32, 210 | x4 as u32, (x4 >> 32) as u32, 211 | x5 as u32, (x5 >> 32) as u32, 212 | ]) 213 | } 214 | 215 | #[inline(always)] 216 | pub fn set_add(&mut self, b: &Self) { 217 | let mut cc = 0; 218 | for i in 0..12 { 219 | (self.0[i], cc) = addcarry_u32(self.0[i], b.0[i], cc); 220 | } 221 | } 222 | 223 | /// Returns `self mod 2^n` and `(floor(self/2^n) + cc) mod 2^128`. 224 | /// Shift count `n` MUST be between 225 and 255 (inclusive). 225 | #[inline(always)] 226 | pub fn trunc_and_rsh_cc(&mut self, b: u32, n: u32) -> (Zu256, Zu128) { 227 | let n1 = n - 224; 228 | let n2 = 32 - n1; 229 | let (d0, cc) = addcarry_u32( 230 | (self.0[7] >> n1) | (self.0[8] << n2), b, 0); 231 | let (d1, cc) = addcarry_u32( 232 | (self.0[8] >> n1) | (self.0[9] << n2), 0, cc); 233 | let (d2, cc) = addcarry_u32( 234 | (self.0[9] >> n1) | (self.0[10] << n2), 0, cc); 235 | let (d3, _) = addcarry_u32( 236 | (self.0[10] >> n1) | (self.0[11] << n2), 0, cc); 237 | let mut e = [0u32; 8]; 238 | e[..].copy_from_slice(&self.0[..8]); 239 | e[7] &= (!0u32) >> n2; 240 | (Zu256(e), Zu128([ d0, d1, d2, d3 ])) 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /src/backend/w64/mod.rs: -------------------------------------------------------------------------------- 1 | // The zz module defines the Zu* type (custom non-modular integers with 2 | // sizes of 128, 256 or 384 bits, with some constant-time operation to 3 | // support scalar splitting in GLV and GLS curves). On aarch64 we use a 4 | // 32-bit version, because the Arm Cortex-A55 has non-constant-time 64-bit 5 | // multiplies (but 32-bit multiplies are constant-time). 6 | 7 | #[cfg(any( 8 | feature = "zz32", 9 | all( 10 | not(feature = "zz64"), 11 | target_arch = "aarch64")))] 12 | mod zz32; 13 | 14 | #[cfg(any( 15 | feature = "zz32", 16 | all( 17 | not(feature = "zz64"), 18 | target_arch = "aarch64")))] 19 | pub use zz32::{Zu128, Zu256, Zu384}; 20 | 21 | #[cfg(any( 22 | feature = "zz64", 23 | all( 24 | not(feature = "zz32"), 25 | not(target_arch = "aarch64"))))] 26 | mod zz; 27 | 28 | #[cfg(any( 29 | feature = "zz64", 30 | all( 31 | not(feature = "zz32"), 32 | not(target_arch = "aarch64"))))] 33 | pub use zz::{Zu128, Zu256, Zu384}; 34 | 35 | // Module gf255 defines the generic GF255 type, with 64-bit limbs. 36 | // It is used for GF255e and GF255s. For GF25519, an alternate implementation 37 | // with 51-bit limbs is provided (in module gf25519) and used in some cases. 38 | // - If feature gf25519_m64 is set, then GF255<19> is used. 39 | // - If feature gf25519_m51 is set, then the alternate implementation is used. 40 | // - If neither gf25519_m64 nor gf25519_m51 is set, then the selected 41 | // implementation depends on the target architecture. 42 | // - Features gf25519_m51 and gf25519_m64 are mutually incompatible; they 43 | // cannot be both set at the same time. 44 | #[cfg(all( 45 | feature = "gf255_m51", 46 | feature = "gf255_m64", 47 | ))] 48 | compile_error!("cannot use m51 and m64 GF255 implementations simultaneously"); 49 | 50 | #[cfg(all( 51 | any( 52 | feature = "gf255", 53 | feature = "gf255e", 54 | feature = "gf255s", 55 | feature = "gf25519"), 56 | not(feature = "gf255_m51"), 57 | any( 58 | feature = "gf255_m64", 59 | not(target_arch = "riscv64")), 60 | ))] 61 | pub mod gf255_m64; 62 | 63 | #[cfg(all( 64 | any( 65 | feature = "gf255", 66 | feature = "gf255e", 67 | feature = "gf255s", 68 | feature = "gf25519"), 69 | not(feature = "gf255_m51"), 70 | any( 71 | feature = "gf255_m64", 72 | not(target_arch = "riscv64")), 73 | ))] 74 | pub use gf255_m64::GF255; 75 | 76 | #[cfg(all( 77 | any( 78 | feature = "gf255", 79 | feature = "gf255e", 80 | feature = "gf255s", 81 | feature = "gf25519"), 82 | not(feature = "gf255_m64"), 83 | any( 84 | feature = "gf255_m51", 85 | target_arch = "riscv64"), 86 | ))] 87 | pub mod gf255_m51; 88 | 89 | #[cfg(all( 90 | any( 91 | feature = "gf255", 92 | feature = "gf255e", 93 | feature = "gf255s", 94 | feature = "gf25519"), 95 | not(feature = "gf255_m64"), 96 | any( 97 | feature = "gf255_m51", 98 | target_arch = "riscv64"), 99 | ))] 100 | pub use gf255_m51::GF255; 101 | 102 | #[cfg(feature = "gf255e")] 103 | pub type GF255e = GF255<18651>; 104 | 105 | #[cfg(feature = "gf255s")] 106 | pub type GF255s = GF255<3957>; 107 | 108 | #[cfg(feature = "gf25519")] 109 | pub type GF25519 = GF255<19>; 110 | 111 | #[cfg(any( 112 | feature = "modint256", 113 | feature = "gfp256", 114 | ))] 115 | pub mod modint; 116 | 117 | #[cfg(feature = "modint256")] 118 | pub use modint::ModInt256; 119 | 120 | #[cfg(all( 121 | feature = "modint256", 122 | not(target_arch = "aarch64")))] 123 | pub type ModInt256ct = ModInt256; 124 | 125 | #[cfg(all( 126 | feature = "modint256", 127 | target_arch = "aarch64"))] 128 | pub mod modint32; 129 | 130 | #[cfg(all( 131 | feature = "modint256", 132 | target_arch = "aarch64"))] 133 | pub use modint32::ModInt256ct; 134 | 135 | /* disabled -- not faster than the generic code 136 | #[cfg(feature = "gfp256")] 137 | pub mod gfp256; 138 | 139 | #[cfg(feature = "gfp256")] 140 | pub use gfp256::GFp256; 141 | */ 142 | 143 | #[cfg(feature = "gfp256")] 144 | pub type GFp256 = modint::ModInt256< 145 | 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 146 | 0x0000000000000000, 0xFFFFFFFF00000001>; 147 | 148 | #[cfg(feature = "gfp256")] 149 | impl GFp256 { 150 | /// Encodes a scalar element into bytes (little-endian). 151 | pub fn encode(self) -> [u8; 32] { 152 | self.encode32() 153 | } 154 | } 155 | 156 | #[cfg(feature = "secp256k1")] 157 | pub mod gfsecp256k1; 158 | 159 | #[cfg(feature = "secp256k1")] 160 | pub use gfsecp256k1::GFsecp256k1; 161 | 162 | #[cfg(feature = "gf448")] 163 | pub mod gf448; 164 | 165 | #[cfg(feature = "gf448")] 166 | pub use gf448::GF448; 167 | 168 | pub mod lagrange; 169 | 170 | #[cfg(feature = "gfgen")] 171 | pub mod gfgen; 172 | 173 | #[cfg(all( 174 | feature = "gfb254", 175 | not(any( 176 | feature = "gfb254_m64", 177 | feature = "gfb254_arm64pmull")), 178 | any( 179 | feature = "gfb254_x86clmul", 180 | all( 181 | target_arch = "x86_64", 182 | target_feature = "sse4.1", 183 | target_feature = "pclmulqdq"))))] 184 | pub mod gfb254_x86clmul; 185 | 186 | #[cfg(all( 187 | feature = "gfb254", 188 | not(any( 189 | feature = "gfb254_m64", 190 | feature = "gfb254_arm64pmull")), 191 | any( 192 | feature = "gfb254_x86clmul", 193 | all( 194 | target_arch = "x86_64", 195 | target_feature = "sse4.1", 196 | target_feature = "pclmulqdq"))))] 197 | pub use gfb254_x86clmul::{GFb127, GFb254}; 198 | 199 | #[cfg(all( 200 | feature = "gfb254", 201 | not(any( 202 | feature = "gfb254_x86clmul", 203 | feature = "gfb254_m64")), 204 | any( 205 | feature = "gfb254_arm64pmull", 206 | all( 207 | target_arch = "aarch64", 208 | target_feature = "aes"))))] 209 | pub mod gfb254_arm64pmull; 210 | 211 | #[cfg(all( 212 | feature = "gfb254", 213 | not(any( 214 | feature = "gfb254_x86clmul", 215 | feature = "gfb254_m64")), 216 | any( 217 | feature = "gfb254_arm64pmull", 218 | all( 219 | target_arch = "aarch64", 220 | target_feature = "aes"))))] 221 | pub use gfb254_arm64pmull::{GFb127, GFb254}; 222 | 223 | #[cfg(all( 224 | feature = "gfb254", 225 | not(any( 226 | feature = "gfb254_x86clmul", 227 | feature = "gfb254_arm64pmull")), 228 | any( 229 | feature = "gfb254_m64", 230 | not(any( 231 | all( 232 | target_arch = "x86_64", 233 | target_feature = "sse4.1", 234 | target_feature = "pclmulqdq"), 235 | all( 236 | target_arch = "aarch64", 237 | target_feature = "aes"))))))] 238 | pub mod gfb254_m64; 239 | 240 | #[cfg(all( 241 | feature = "gfb254", 242 | not(any( 243 | feature = "gfb254_x86clmul", 244 | feature = "gfb254_arm64pmull")), 245 | any( 246 | feature = "gfb254_m64", 247 | not(any( 248 | all( 249 | target_arch = "x86_64", 250 | target_feature = "sse4.1", 251 | target_feature = "pclmulqdq"), 252 | all( 253 | target_arch = "aarch64", 254 | target_feature = "aes"))))))] 255 | pub use gfb254_m64::{GFb127, GFb254}; 256 | 257 | // The 32-bit variants of the addcarry, umull,... functions. 258 | pub(crate) mod util32; 259 | 260 | // Carrying addition and subtraction should use u64::carrying_add() 261 | // and u64::borrowing_sub(), but these functions are currently only 262 | // experimental. 263 | 264 | // Add with carry; carry is 0 or 1. 265 | // (x, y, c_in) -> x + y + c_in mod 2^64, c_out 266 | 267 | #[cfg(target_arch = "x86_64")] 268 | #[allow(dead_code)] 269 | #[inline(always)] 270 | pub fn addcarry_u64(x: u64, y: u64, c: u8) -> (u64, u8) { 271 | use core::arch::x86_64::_addcarry_u64; 272 | unsafe { 273 | let mut d = 0u64; 274 | let cc = _addcarry_u64(c, x, y, &mut d); 275 | (d, cc) 276 | } 277 | } 278 | 279 | #[cfg(not(target_arch = "x86_64"))] 280 | #[allow(dead_code)] 281 | #[inline(always)] 282 | pub const fn addcarry_u64(x: u64, y: u64, c: u8) -> (u64, u8) { 283 | let z = (x as u128).wrapping_add(y as u128).wrapping_add(c as u128); 284 | (z as u64, (z >> 64) as u8) 285 | } 286 | 287 | // Subtract with borrow; borrow is 0 or 1. 288 | // (x, y, c_in) -> x - y - c_in mod 2^64, c_out 289 | 290 | #[cfg(target_arch = "x86_64")] 291 | #[allow(dead_code)] 292 | #[inline(always)] 293 | pub fn subborrow_u64(x: u64, y: u64, c: u8) -> (u64, u8) { 294 | use core::arch::x86_64::_subborrow_u64; 295 | unsafe { 296 | let mut d = 0u64; 297 | let cc = _subborrow_u64(c, x, y, &mut d); 298 | (d, cc) 299 | } 300 | } 301 | 302 | #[cfg(not(target_arch = "x86_64"))] 303 | #[allow(dead_code)] 304 | #[inline(always)] 305 | pub const fn subborrow_u64(x: u64, y: u64, c: u8) -> (u64, u8) { 306 | let z = (x as u128).wrapping_sub(y as u128).wrapping_sub(c as u128); 307 | (z as u64, (z >> 127) as u8) 308 | } 309 | 310 | // Compute x*y over 128 bits, returned as two 64-bit words (lo, hi) 311 | #[allow(dead_code)] 312 | #[inline(always)] 313 | pub const fn umull(x: u64, y: u64) -> (u64, u64) { 314 | let z = (x as u128) * (y as u128); 315 | (z as u64, (z >> 64) as u64) 316 | } 317 | 318 | // Compute x*y+z over 128 bits, returned as two 64-bit words (lo, hi) 319 | #[allow(dead_code)] 320 | #[inline(always)] 321 | pub const fn umull_add(x: u64, y: u64, z: u64) -> (u64, u64) { 322 | let t = ((x as u128) * (y as u128)).wrapping_add(z as u128); 323 | (t as u64, (t >> 64) as u64) 324 | } 325 | 326 | // Compute x*y+z1+z2 over 128 bits, returned as two 64-bit words (lo, hi) 327 | #[allow(dead_code)] 328 | #[inline(always)] 329 | pub const fn umull_add2(x: u64, y: u64, z1: u64, z2: u64) -> (u64, u64) { 330 | let t = ((x as u128) * (y as u128)) 331 | .wrapping_add(z1 as u128).wrapping_add(z2 as u128); 332 | (t as u64, (t >> 64) as u64) 333 | } 334 | 335 | // Compute x1*y1+x2*y2 over 128 bits, returned as two 64-bit words (lo, hi) 336 | #[allow(dead_code)] 337 | #[inline(always)] 338 | pub const fn umull_x2(x1: u64, y1: u64, x2: u64, y2: u64) -> (u64, u64) { 339 | let z1 = (x1 as u128) * (y1 as u128); 340 | let z2 = (x2 as u128) * (y2 as u128); 341 | let z = z1.wrapping_add(z2); 342 | (z as u64, (z >> 64) as u64) 343 | } 344 | 345 | // Compute x1*y1+x2*y2+z3 over 128 bits, returned as two 64-bit words (lo, hi) 346 | #[allow(dead_code)] 347 | #[inline(always)] 348 | pub const fn umull_x2_add(x1: u64, y1: u64, x2: u64, y2: u64, z3: u64) -> (u64, u64) { 349 | let z1 = (x1 as u128) * (y1 as u128); 350 | let z2 = (x2 as u128) * (y2 as u128); 351 | let z = z1.wrapping_add(z2).wrapping_add(z3 as u128); 352 | (z as u64, (z >> 64) as u64) 353 | } 354 | 355 | // Return 0xFFFFFFFFFFFFFFFF if x >= 0x8000000000000000, 0 otherwise 356 | // (i.e. take the sign bit of the signed interpretation, and expand it 357 | // to 64 bits). 358 | #[allow(dead_code)] 359 | #[inline(always)] 360 | pub const fn sgnw(x: u64) -> u64 { 361 | ((x as i64) >> 63) as u64 362 | } 363 | 364 | // Get the number of leading zeros in a 64-bit value. 365 | // On some platforms, u64::leading_zeros() performs the computation with 366 | // a code sequence that will be constant-time on most/all CPUs 367 | // compatible with that platforms (e.g. any 64-bit x86 with support for 368 | // the LZCNT opcode); on others, a non-constant-time sequence would be 369 | // used, and we must instead rely on a safe (but slower) routine. 370 | // 371 | // On x86 without LZCNT, u64::leading_zeros() uses a BSR opcode, but since 372 | // BSR yields an undefined result on an input of value 0, u64::leading_zeros() 373 | // includes an explicit test and a conditional jump for that case, and that 374 | // is not (in general) constant-time. 375 | #[cfg(any( 376 | all(target_arch = "x86_64", target_feature = "lzcnt"), 377 | target_arch = "aarch64", 378 | ))] 379 | #[allow(dead_code)] 380 | #[inline(always)] 381 | pub const fn lzcnt(x: u64) -> u32 { 382 | x.leading_zeros() 383 | } 384 | 385 | #[cfg(not(any( 386 | all(target_arch = "x86_64", target_feature = "lzcnt"), 387 | target_arch = "aarch64", 388 | )))] 389 | #[allow(dead_code)] 390 | pub const fn lzcnt(x: u64) -> u32 { 391 | let m = sgnw((x >> 32).wrapping_sub(1)); 392 | let s = m & 32; 393 | let x = (x >> 32) ^ (m & (x ^ (x >> 32))); 394 | 395 | let m = sgnw((x >> 16).wrapping_sub(1)); 396 | let s = s | (m & 16); 397 | let x = (x >> 16) ^ (m & (x ^ (x >> 16))); 398 | 399 | let m = sgnw((x >> 8).wrapping_sub(1)); 400 | let s = s | (m & 8); 401 | let x = (x >> 8) ^ (m & (x ^ (x >> 8))); 402 | 403 | let m = sgnw((x >> 4).wrapping_sub(1)); 404 | let s = s | (m & 4); 405 | let x = (x >> 4) ^ (m & (x ^ (x >> 4))); 406 | 407 | let m = sgnw((x >> 2).wrapping_sub(1)); 408 | let s = s | (m & 2); 409 | let x = (x >> 2) ^ (m & (x ^ (x >> 2))); 410 | 411 | // At this point, x fits on 2 bits. Number of leading zeros is then: 412 | // x = 0 -> 2 413 | // x = 1 -> 1 414 | // x = 2 -> 0 415 | // x = 3 -> 0 416 | let s = s.wrapping_add(2u64.wrapping_sub(x) & ((x.wrapping_sub(3) >> 2))); 417 | 418 | s as u32 419 | } 420 | -------------------------------------------------------------------------------- /src/backend/w64/util32.rs: -------------------------------------------------------------------------------- 1 | // We define here the 32-bit variants of addcarry, umull,... 2 | // They are meant to be used by code that requires 32-bit computations 3 | // even on 64-bit platforms, because the architecture's 64-bit multiplication 4 | // opcode is not constant-time. 5 | 6 | // Carrying addition and subtraction should use u32::carrying_add() 7 | // and u32::borrowing_sub(), but these functions are currently only 8 | // experimental. 9 | 10 | // Add with carry; carry is 0 or 1. 11 | // (x, y, c_in) -> x + y + c_in mod 2^32, c_out 12 | 13 | #[cfg(target_arch = "x86_64")] 14 | #[allow(dead_code)] 15 | #[inline(always)] 16 | pub(crate) fn addcarry_u32(x: u32, y: u32, c: u8) -> (u32, u8) { 17 | use core::arch::x86_64::_addcarry_u32; 18 | unsafe { 19 | let mut d = 0u32; 20 | let cc = _addcarry_u32(c, x, y, &mut d); 21 | (d, cc) 22 | } 23 | } 24 | 25 | #[cfg(not(target_arch = "x86_64"))] 26 | #[allow(dead_code)] 27 | #[inline(always)] 28 | pub(crate) const fn addcarry_u32(x: u32, y: u32, c: u8) -> (u32, u8) { 29 | let z = (x as u64).wrapping_add(y as u64).wrapping_add(c as u64); 30 | (z as u32, (z >> 32) as u8) 31 | } 32 | 33 | // Subtract with borrow; borrow is 0 or 1. 34 | // (x, y, c_in) -> x - y - c_in mod 2^32, c_out 35 | 36 | #[cfg(target_arch = "x86_64")] 37 | #[allow(dead_code)] 38 | #[inline(always)] 39 | pub(crate) fn subborrow_u32(x: u32, y: u32, c: u8) -> (u32, u8) { 40 | use core::arch::x86_64::_subborrow_u32; 41 | unsafe { 42 | let mut d = 0u32; 43 | let cc = _subborrow_u32(c, x, y, &mut d); 44 | (d, cc) 45 | } 46 | } 47 | 48 | #[cfg(not(target_arch = "x86_64"))] 49 | #[allow(dead_code)] 50 | #[inline(always)] 51 | pub(crate) const fn subborrow_u32(x: u32, y: u32, c: u8) -> (u32, u8) { 52 | let z = (x as u64).wrapping_sub(y as u64).wrapping_sub(c as u64); 53 | (z as u32, (z >> 63) as u8) 54 | } 55 | 56 | // Compute x*y over 64 bits, returned as two 32-bit words (lo, hi) 57 | #[allow(dead_code)] 58 | #[inline(always)] 59 | pub(crate) const fn umull(x: u32, y: u32) -> (u32, u32) { 60 | let z = (x as u64) * (y as u64); 61 | (z as u32, (z >> 32) as u32) 62 | } 63 | 64 | // Compute x*y+z over 64 bits, returned as two 32-bit words (lo, hi) 65 | #[allow(dead_code)] 66 | #[inline(always)] 67 | pub(crate) const fn umull_add(x: u32, y: u32, z: u32) -> (u32, u32) { 68 | let t = ((x as u64) * (y as u64)).wrapping_add(z as u64); 69 | (t as u32, (t >> 32) as u32) 70 | } 71 | 72 | // Compute x*y+z1+z2 over 64 bits, returned as two 32-bit words (lo, hi) 73 | #[allow(dead_code)] 74 | #[inline(always)] 75 | pub(crate) const fn umull_add2(x: u32, y: u32, z1: u32, z2: u32) -> (u32, u32) { 76 | let t = ((x as u64) * (y as u64)) 77 | .wrapping_add(z1 as u64).wrapping_add(z2 as u64); 78 | (t as u32, (t >> 32) as u32) 79 | } 80 | 81 | // Compute x1*y1+x2*y2 over 64 bits, returned as two 32-bit words (lo, hi) 82 | #[allow(dead_code)] 83 | #[inline(always)] 84 | pub(crate) const fn umull_x2(x1: u32, y1: u32, x2: u32, y2: u32) -> (u32, u32) { 85 | let z1 = (x1 as u64) * (y1 as u64); 86 | let z2 = (x2 as u64) * (y2 as u64); 87 | let z = z1.wrapping_add(z2); 88 | (z as u32, (z >> 32) as u32) 89 | } 90 | 91 | // Compute x1*y1+x2*y2+z3 over 64 bits, returned as two 32-bit words (lo, hi) 92 | #[allow(dead_code)] 93 | #[inline(always)] 94 | pub(crate) const fn umull_x2_add(x1: u32, y1: u32, x2: u32, y2: u32, z3: u32) -> (u32, u32) { 95 | let z1 = (x1 as u64) * (y1 as u64); 96 | let z2 = (x2 as u64) * (y2 as u64); 97 | let z = z1.wrapping_add(z2).wrapping_add(z3 as u64); 98 | (z as u32, (z >> 32) as u32) 99 | } 100 | 101 | // Return 0xFFFFFFFF if x >= 0x80000000, 0 otherwise (i.e. take the sign 102 | // bit of the signed interpretation, and expand it to 32 bits). 103 | #[allow(dead_code)] 104 | #[inline(always)] 105 | pub(crate) const fn sgnw(x: u32) -> u32 { 106 | ((x as i32) >> 31) as u32 107 | } 108 | 109 | // Get the number of leading zeros in a 32-bit value. 110 | // On some platforms, u32::leading_zeros() performs the computation with 111 | // a code sequence that will be constant-time on most/all CPUs 112 | // compatible with that platforms (e.g. any 32-bit x86 with support for 113 | // the LZCNT opcode); on others, a non-constant-time sequence would be 114 | // used, and we must instead rely on a safe (but slower) routine. 115 | // 116 | // On x86 without LZCNT, u32::leading_zeros() uses a BSR opcode, but since 117 | // BSR yields an undefined result on an input of value 0, u32::leading_zeros() 118 | // includes an explicit test and a conditional jump for that case, and that 119 | // is not (in general) constant-time. 120 | #[cfg(any( 121 | all(target_arch = "x86_64", target_feature = "lzcnt"), 122 | target_arch = "aarch64", 123 | ))] 124 | #[allow(dead_code)] 125 | #[inline(always)] 126 | pub(crate) const fn lzcnt(x: u32) -> u32 { 127 | x.leading_zeros() 128 | } 129 | 130 | #[cfg(not(any( 131 | all(target_arch = "x86_64", target_feature = "lzcnt"), 132 | target_arch = "aarch64", 133 | )))] 134 | #[allow(dead_code)] 135 | pub(crate) const fn lzcnt(x: u32) -> u32 { 136 | let m = sgnw((x >> 16).wrapping_sub(1)); 137 | let s = m & 16; 138 | let x = (x >> 16) ^ (m & (x ^ (x >> 16))); 139 | 140 | let m = sgnw((x >> 8).wrapping_sub(1)); 141 | let s = s | (m & 8); 142 | let x = (x >> 8) ^ (m & (x ^ (x >> 8))); 143 | 144 | let m = sgnw((x >> 4).wrapping_sub(1)); 145 | let s = s | (m & 4); 146 | let x = (x >> 4) ^ (m & (x ^ (x >> 4))); 147 | 148 | let m = sgnw((x >> 2).wrapping_sub(1)); 149 | let s = s | (m & 2); 150 | let x = (x >> 2) ^ (m & (x ^ (x >> 2))); 151 | 152 | // At this point, x fits on 2 bits. Number of leading zeros is then: 153 | // x = 0 -> 2 154 | // x = 1 -> 1 155 | // x = 2 -> 0 156 | // x = 3 -> 0 157 | let s = s.wrapping_add(2u32.wrapping_sub(x) & ((x.wrapping_sub(3) >> 2))); 158 | 159 | s as u32 160 | } 161 | -------------------------------------------------------------------------------- /src/backend/w64/zz.rs: -------------------------------------------------------------------------------- 1 | use core::convert::TryFrom; 2 | 3 | use super::{addcarry_u64, subborrow_u64, umull, umull_add, umull_add2, sgnw}; 4 | 5 | /// A custom 128-bit integer with some constant-time operations. 6 | #[derive(Clone, Copy, Debug)] 7 | pub struct Zu128([u64; 2]); 8 | 9 | impl Zu128 { 10 | 11 | pub const ZERO: Self = Self([0; 2]); 12 | 13 | #[inline(always)] 14 | pub const fn w64le(x0: u64, x1: u64) -> Self { 15 | Self([ x0, x1 ]) 16 | } 17 | 18 | #[inline(always)] 19 | pub fn decode(buf: &[u8]) -> Option { 20 | if buf.len() != 16 { 21 | None 22 | } else { 23 | let mut x = Self::ZERO; 24 | for i in 0..2 { 25 | x.0[i] = u64::from_le_bytes(*<&[u8; 8]>::try_from( 26 | &buf[(8 * i)..(8 * i + 8)]).unwrap()); 27 | } 28 | Some(x) 29 | } 30 | } 31 | 32 | #[inline(always)] 33 | pub fn mul128x128(self, b: &Self) -> Zu256 { 34 | let (a0, a1) = (self.0[0], self.0[1]); 35 | let (b0, b1) = (b.0[0], b.0[1]); 36 | let mut d = [0u64; 4]; 37 | let mut hi; 38 | (d[0], hi) = umull(a0, b0); 39 | (d[1], d[2]) = umull_add(a1, b0, hi); 40 | (d[1], hi) = umull_add(a0, b1, d[1]); 41 | (d[2], d[3]) = umull_add2(a1, b1, d[2], hi); 42 | Zu256(d) 43 | } 44 | 45 | #[inline(always)] 46 | pub fn mul128x128trunc(self, b: &Self) -> Self { 47 | let (a0, a1) = (self.0[0], self.0[1]); 48 | let (b0, b1) = (b.0[0], b.0[1]); 49 | let (d0, hi) = umull(a0, b0); 50 | let d1 = a0.wrapping_mul(b1) 51 | .wrapping_add(a1.wrapping_mul(b0)) 52 | .wrapping_add(hi); 53 | Self([ d0, d1 ]) 54 | } 55 | 56 | /// Interpreting this value as a signed 128-bit integer, return its 57 | /// absolute value (in a `u128` type) and the original sign (0xFFFFFFFF 58 | /// for negative, 0x00000000 for non-negative). 59 | #[inline(always)] 60 | pub fn abs(self) -> (u128, u32) { 61 | let (a0, a1) = (self.0[0], self.0[1]); 62 | let s = sgnw(a1); 63 | let (d0, cc) = subborrow_u64(a0 ^ s, s, 0); 64 | let (d1, _) = subborrow_u64(a1 ^ s, s, cc); 65 | ((d0 as u128) | ((d1 as u128) << 64), s as u32) 66 | } 67 | 68 | /// Interpreting this value as a signed 128-bit integer `x`, return 69 | /// the absolute value of `2*x+1` (as a `u128` type) and the original 70 | /// sign (0xFFFFFFFF for negative, 0x00000000 for non-negative). 71 | #[inline(always)] 72 | pub fn double_inc_abs(self) -> (u128, u32) { 73 | let (a0, a1) = (self.0[0], self.0[1]); 74 | let s = sgnw(a1); 75 | let b0 = (a0 << 1) | 1; 76 | let b1 = (a0 >> 63) | (a1 << 1); 77 | let (d0, cc) = subborrow_u64(b0 ^ s, s, 0); 78 | let (d1, _) = subborrow_u64(b1 ^ s, s, cc); 79 | ((d0 as u128) | ((d1 as u128) << 64), s as u32) 80 | } 81 | 82 | #[inline(always)] 83 | pub fn set_sub(&mut self, b: &Self) { 84 | let cc; 85 | (self.0[0], cc) = subborrow_u64(self.0[0], b.0[0], 0); 86 | (self.0[1], _) = subborrow_u64(self.0[1], b.0[1], cc); 87 | } 88 | 89 | #[inline(always)] 90 | pub fn set_sub_u32(&mut self, b: u32) { 91 | let cc; 92 | (self.0[0], cc) = subborrow_u64(self.0[0], b as u64, 0); 93 | (self.0[1], _) = subborrow_u64(self.0[1], 0, cc); 94 | } 95 | } 96 | 97 | /// A custom 256-bit integer with some constant-time operations. 98 | #[derive(Clone, Copy, Debug)] 99 | pub struct Zu256([u64; 4]); 100 | 101 | impl Zu256 { 102 | 103 | pub const ZERO: Self = Self([0; 4]); 104 | 105 | #[inline(always)] 106 | pub const fn w64le(x0: u64, x1: u64, x2: u64, x3: u64) -> Self { 107 | Self([ x0, x1, x2, x3 ]) 108 | } 109 | 110 | #[inline(always)] 111 | pub fn decode(buf: &[u8]) -> Option { 112 | if buf.len() != 32 { 113 | None 114 | } else { 115 | let mut x = Self::ZERO; 116 | for i in 0..4 { 117 | x.0[i] = u64::from_le_bytes(*<&[u8; 8]>::try_from( 118 | &buf[(8 * i)..(8 * i + 8)]).unwrap()); 119 | } 120 | Some(x) 121 | } 122 | } 123 | 124 | #[inline(always)] 125 | pub fn trunc128(self) -> Zu128 { 126 | Zu128([ self.0[0], self.0[1] ]) 127 | } 128 | 129 | #[inline(always)] 130 | pub fn mul256x128(self, b: &Zu128) -> Zu384 { 131 | let (a0, a1, a2, a3) = (self.0[0], self.0[1], self.0[2], self.0[3]); 132 | let (b0, b1) = (b.0[0], b.0[1]); 133 | let mut d = [0u64; 6]; 134 | let mut hi; 135 | (d[0], hi) = umull(a0, b0); 136 | (d[1], hi) = umull_add(a1, b0, hi); 137 | (d[2], hi) = umull_add(a2, b0, hi); 138 | (d[3], d[4]) = umull_add(a3, b0, hi); 139 | (d[1], hi) = umull_add(a0, b1, d[1]); 140 | (d[2], hi) = umull_add2(a1, b1, d[2], hi); 141 | (d[3], hi) = umull_add2(a2, b1, d[3], hi); 142 | (d[4], d[5]) = umull_add2(a3, b1, d[4], hi); 143 | Zu384(d) 144 | } 145 | 146 | /// Return `floor((self + b)/2^224) mod 2^32` (i.e. addition truncated 147 | /// to 256 bits, then return the high 32 bits of the 256-bit result). 148 | #[inline(always)] 149 | pub fn add_rsh224(self, b: &Self) -> u32 { 150 | let mut cc; 151 | (_, cc) = addcarry_u64(self.0[0], b.0[0], 0); 152 | for i in 1..3 { 153 | (_, cc) = addcarry_u64(self.0[i], b.0[i], cc); 154 | } 155 | let (w, _) = addcarry_u64(self.0[3], b.0[3], cc); 156 | (w >> 32) as u32 157 | } 158 | 159 | /// Return the borrow resulting from the subtraction of `b` from `self`; 160 | /// returned value is 1 in case of borrow, 0 otherwise. The subtraction 161 | /// result itself is discarded. 162 | #[inline(always)] 163 | pub fn borrow(self, b: &Self) -> u32 { 164 | let mut cc; 165 | (_, cc) = subborrow_u64(self.0[0], b.0[0], 0); 166 | for i in 1..4 { 167 | (_, cc) = subborrow_u64(self.0[i], b.0[i], cc); 168 | } 169 | cc as u32 170 | } 171 | } 172 | 173 | /// A custom 384-bit integer with some constant-time operations. 174 | #[derive(Clone, Copy, Debug)] 175 | pub struct Zu384([u64; 6]); 176 | 177 | impl Zu384 { 178 | 179 | pub const ZERO: Self = Self([0; 6]); 180 | 181 | #[inline(always)] 182 | pub const fn w64le(x0: u64, x1: u64, x2: u64, x3: u64, x4: u64, x5: u64) 183 | -> Self 184 | { 185 | Self([ x0, x1, x2, x3, x4, x5 ]) 186 | } 187 | 188 | #[inline(always)] 189 | pub fn set_add(&mut self, b: &Self) { 190 | let mut cc = 0; 191 | for i in 0..6 { 192 | (self.0[i], cc) = addcarry_u64(self.0[i], b.0[i], cc); 193 | } 194 | } 195 | 196 | /// Returns `self mod 2^n` and `(floor(self/2^n) + cc) mod 2^128`. 197 | /// Shift count `n` MUST be between 225 and 255 (inclusive). 198 | #[inline(always)] 199 | pub fn trunc_and_rsh_cc(&mut self, b: u32, n: u32) -> (Zu256, Zu128) { 200 | let n1 = n - 192; 201 | let n2 = 64 - n1; 202 | let (d0, cc) = addcarry_u64( 203 | (self.0[3] >> n1) | (self.0[4] << n2), b as u64, 0); 204 | let (d1, _) = addcarry_u64( 205 | (self.0[4] >> n1) | (self.0[5] << n2), 0, cc); 206 | let c0 = self.0[0]; 207 | let c1 = self.0[1]; 208 | let c2 = self.0[2]; 209 | let c3 = self.0[3] & ((!0u64) >> n2); 210 | (Zu256([ c0, c1, c2, c3 ]), Zu128([ d0, d1 ])) 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /src/backend/w64/zz32.rs: -------------------------------------------------------------------------------- 1 | // This file is used for the Zu* types (used in splitting scalars for some 2 | // GLV and GLS curves) on architectures where 32x32->64 multiplications are 3 | // constant-time, but 64x64->128 multiplications are not (e.g. the ARM 4 | // Cortex-A55). 5 | 6 | use core::convert::TryFrom; 7 | 8 | use super::util32::{addcarry_u32, subborrow_u32, umull, umull_add, umull_add2, sgnw}; 9 | 10 | /// A custom 128-bit integer with some constant-time operations. 11 | #[derive(Clone, Copy, Debug)] 12 | pub struct Zu128([u32; 4]); 13 | 14 | impl Zu128 { 15 | 16 | pub const ZERO: Self = Self([0; 4]); 17 | 18 | #[inline(always)] 19 | pub const fn w64le(x0: u64, x1: u64) -> Self { 20 | Self([ x0 as u32, (x0 >> 32) as u32, x1 as u32, (x1 >> 32) as u32 ]) 21 | } 22 | 23 | #[inline(always)] 24 | pub fn decode(buf: &[u8]) -> Option { 25 | if buf.len() != 16 { 26 | None 27 | } else { 28 | let mut x = Self::ZERO; 29 | for i in 0..4 { 30 | x.0[i] = u32::from_le_bytes(*<&[u8; 4]>::try_from( 31 | &buf[(4 * i)..(4 * i + 4)]).unwrap()); 32 | } 33 | Some(x) 34 | } 35 | } 36 | 37 | #[inline(always)] 38 | pub fn mul128x128(self, b: &Self) -> Zu256 { 39 | let mut d = [0u32; 8]; 40 | for i in 0..4 { 41 | let f = self.0[i]; 42 | let mut hi = 0; 43 | for j in 0..4 { 44 | (d[i + j], hi) = umull_add2(f, b.0[j], d[i + j], hi); 45 | } 46 | d[i + 4] = hi; 47 | } 48 | Zu256(d) 49 | } 50 | 51 | #[inline(always)] 52 | pub fn mul128x128trunc(self, b: &Self) -> Self { 53 | let f = self.0[0]; 54 | let (d0, hi) = umull(f, b.0[0]); 55 | let (d1, hi) = umull_add(f, b.0[1], hi); 56 | let (d2, hi) = umull_add(f, b.0[2], hi); 57 | let d3 = f.wrapping_mul(b.0[3]).wrapping_add(hi); 58 | let f = self.0[1]; 59 | let (d1, hi) = umull_add(f, b.0[0], d1); 60 | let (d2, hi) = umull_add2(f, b.0[1], d2, hi); 61 | let d3 = f.wrapping_mul(b.0[2]).wrapping_add(d3).wrapping_add(hi); 62 | let f = self.0[2]; 63 | let (d2, hi) = umull_add(f, b.0[0], d2); 64 | let d3 = f.wrapping_mul(b.0[1]).wrapping_add(d3).wrapping_add(hi); 65 | let f = self.0[3]; 66 | let d3 = f.wrapping_mul(b.0[0]).wrapping_add(d3); 67 | Self([ d0, d1, d2, d3 ]) 68 | } 69 | 70 | /// Interpreting this value as a signed 128-bit integer, return its 71 | /// absolute value (in a `u128` type) and the original sign (0xFFFFFFFF 72 | /// for negative, 0x00000000 for non-negative). 73 | #[inline(always)] 74 | pub fn abs(self) -> (u128, u32) { 75 | let s = sgnw(self.0[3]); 76 | let (d0, cc) = subborrow_u32(self.0[0] ^ s, s, 0); 77 | let (d1, cc) = subborrow_u32(self.0[1] ^ s, s, cc); 78 | let (d2, cc) = subborrow_u32(self.0[2] ^ s, s, cc); 79 | let (d3, _) = subborrow_u32(self.0[3] ^ s, s, cc); 80 | ((d0 as u128) | ((d1 as u128) << 32) 81 | | ((d2 as u128) << 64) | ((d3 as u128) << 96), s) 82 | } 83 | 84 | /// Interpreting this value as a signed 128-bit integer `x`, return 85 | /// the absolute value of `2*x+1` (as a `u128` type) and the original 86 | /// sign (0xFFFFFFFF for negative, 0x00000000 for non-negative). 87 | #[inline(always)] 88 | pub fn double_inc_abs(self) -> (u128, u32) { 89 | let s = sgnw(self.0[3]); 90 | let b0 = (self.0[0] << 1) | 1; 91 | let b1 = (self.0[0] >> 31) | (self.0[1] << 1); 92 | let b2 = (self.0[1] >> 31) | (self.0[2] << 1); 93 | let b3 = (self.0[2] >> 31) | (self.0[3] << 1); 94 | let (d0, cc) = subborrow_u32(b0 ^ s, s, 0); 95 | let (d1, cc) = subborrow_u32(b1 ^ s, s, cc); 96 | let (d2, cc) = subborrow_u32(b2 ^ s, s, cc); 97 | let (d3, _) = subborrow_u32(b3 ^ s, s, cc); 98 | ((d0 as u128) | ((d1 as u128) << 32) 99 | | ((d2 as u128) << 64) | ((d3 as u128) << 96), s) 100 | } 101 | 102 | #[inline(always)] 103 | pub fn set_sub(&mut self, b: &Self) { 104 | let mut cc = 0; 105 | for i in 0..4 { 106 | (self.0[i], cc) = subborrow_u32(self.0[i], b.0[i], cc); 107 | } 108 | } 109 | 110 | #[inline(always)] 111 | pub fn set_sub_u32(&mut self, b: u32) { 112 | let mut cc; 113 | (self.0[0], cc) = subborrow_u32(self.0[0], b, 0); 114 | for i in 1..4 { 115 | (self.0[i], cc) = subborrow_u32(self.0[i], 0, cc); 116 | } 117 | } 118 | } 119 | 120 | /// A custom 256-bit integer with some constant-time operations. 121 | #[derive(Clone, Copy, Debug)] 122 | pub struct Zu256([u32; 8]); 123 | 124 | impl Zu256 { 125 | 126 | pub const ZERO: Self = Self([0; 8]); 127 | 128 | #[inline(always)] 129 | pub const fn w64le(x0: u64, x1: u64, x2: u64, x3: u64) -> Self { 130 | Self([ 131 | x0 as u32, (x0 >> 32) as u32, 132 | x1 as u32, (x1 >> 32) as u32, 133 | x2 as u32, (x2 >> 32) as u32, 134 | x3 as u32, (x3 >> 32) as u32, 135 | ]) 136 | } 137 | 138 | #[inline(always)] 139 | pub fn decode(buf: &[u8]) -> Option { 140 | if buf.len() != 32 { 141 | None 142 | } else { 143 | let mut x = Self::ZERO; 144 | for i in 0..8 { 145 | x.0[i] = u32::from_le_bytes(*<&[u8; 4]>::try_from( 146 | &buf[(4 * i)..(4 * i + 4)]).unwrap()); 147 | } 148 | Some(x) 149 | } 150 | } 151 | 152 | #[inline(always)] 153 | pub fn trunc128(self) -> Zu128 { 154 | Zu128([ self.0[0], self.0[1], self.0[2], self.0[3] ]) 155 | } 156 | 157 | #[inline(always)] 158 | pub fn mul256x128(self, b: &Zu128) -> Zu384 { 159 | let mut d = [0u32; 12]; 160 | for i in 0..8 { 161 | let f = self.0[i]; 162 | let mut hi = 0; 163 | for j in 0..4 { 164 | (d[i + j], hi) = umull_add2(f, b.0[j], d[i + j], hi); 165 | } 166 | d[i + 4] = hi; 167 | } 168 | Zu384(d) 169 | } 170 | 171 | /// Return `floor((self + b)/2^224) mod 2^32` (i.e. addition truncated 172 | /// to 256 bits, then return the high 32 bits of the 256-bit result). 173 | #[inline(always)] 174 | pub fn add_rsh224(self, b: &Self) -> u32 { 175 | let mut cc; 176 | (_, cc) = addcarry_u32(self.0[0], b.0[0], 0); 177 | for i in 1..7 { 178 | (_, cc) = addcarry_u32(self.0[i], b.0[i], cc); 179 | } 180 | let (w, _) = addcarry_u32(self.0[7], b.0[7], cc); 181 | w 182 | } 183 | 184 | /// Return the borrow resulting from the subtraction of `b` from `self`; 185 | /// returned value is 1 in case of borrow, 0 otherwise. The subtraction 186 | /// result itself is discarded. 187 | #[inline(always)] 188 | pub fn borrow(self, b: &Self) -> u32 { 189 | let mut cc; 190 | (_, cc) = subborrow_u32(self.0[0], b.0[0], 0); 191 | for i in 1..8 { 192 | (_, cc) = subborrow_u32(self.0[i], b.0[i], cc); 193 | } 194 | cc as u32 195 | } 196 | } 197 | 198 | /// A custom 384-bit integer with some constant-time operations. 199 | #[derive(Clone, Copy, Debug)] 200 | pub struct Zu384([u32; 12]); 201 | 202 | impl Zu384 { 203 | 204 | pub const ZERO: Self = Self([0; 12]); 205 | 206 | #[inline(always)] 207 | pub const fn w64le(x0: u64, x1: u64, x2: u64, x3: u64, x4: u64, x5: u64) 208 | -> Self 209 | { 210 | Self([ 211 | x0 as u32, (x0 >> 32) as u32, 212 | x1 as u32, (x1 >> 32) as u32, 213 | x2 as u32, (x2 >> 32) as u32, 214 | x3 as u32, (x3 >> 32) as u32, 215 | x4 as u32, (x4 >> 32) as u32, 216 | x5 as u32, (x5 >> 32) as u32, 217 | ]) 218 | } 219 | 220 | #[inline(always)] 221 | pub fn set_add(&mut self, b: &Self) { 222 | let mut cc = 0; 223 | for i in 0..12 { 224 | (self.0[i], cc) = addcarry_u32(self.0[i], b.0[i], cc); 225 | } 226 | } 227 | 228 | /// Returns `self mod 2^n` and `(floor(self/2^n) + cc) mod 2^128`. 229 | /// Shift count `n` MUST be between 225 and 255 (inclusive). 230 | #[inline(always)] 231 | pub fn trunc_and_rsh_cc(&mut self, b: u32, n: u32) -> (Zu256, Zu128) { 232 | let n1 = n - 224; 233 | let n2 = 32 - n1; 234 | let (d0, cc) = addcarry_u32( 235 | (self.0[7] >> n1) | (self.0[8] << n2), b, 0); 236 | let (d1, cc) = addcarry_u32( 237 | (self.0[8] >> n1) | (self.0[9] << n2), 0, cc); 238 | let (d2, cc) = addcarry_u32( 239 | (self.0[9] >> n1) | (self.0[10] << n2), 0, cc); 240 | let (d3, _) = addcarry_u32( 241 | (self.0[10] >> n1) | (self.0[11] << n2), 0, cc); 242 | let mut e = [0u32; 8]; 243 | e[..].copy_from_slice(&self.0[..8]); 244 | e[7] &= (!0u32) >> n2; 245 | (Zu256(e), Zu128([ d0, d1, d2, d3 ])) 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /src/field.rs: -------------------------------------------------------------------------------- 1 | //! Finite fields. 2 | //! 3 | //! This module defines a few specific finite fields, used as base fields 4 | //! by various curves. These are merely specializations of the 5 | //! backend-provided `GF255` and `ModInt256` types. 6 | 7 | #[cfg(feature = "gf255e")] 8 | pub use crate::backend::GF255e; 9 | 10 | #[cfg(feature = "gf255s")] 11 | pub use crate::backend::GF255s; 12 | 13 | #[cfg(feature = "gf25519")] 14 | pub use crate::backend::GF25519; 15 | 16 | #[cfg(feature = "modint256")] 17 | pub use crate::backend::ModInt256; 18 | 19 | #[cfg(feature = "modint256")] 20 | pub use crate::backend::ModInt256ct; 21 | 22 | #[cfg(feature = "gfsecp256k1")] 23 | pub use crate::backend::GFsecp256k1; 24 | 25 | #[cfg(feature = "gfp256")] 26 | pub use crate::backend::GFp256; 27 | 28 | #[cfg(feature = "gf448")] 29 | pub use crate::backend::GF448; 30 | 31 | #[cfg(feature = "gfb254")] 32 | pub use crate::backend::{GFb127, GFb254}; 33 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Crrl is a Rust library for cryptographic research. 2 | //! 3 | //! This library implements computations in some finite fields and 4 | //! elliptic curves. It aims at providing efficient and secure 5 | //! (constant-time) implementations, but with portable code, and with a 6 | //! convenient API so that scalars, curve points, and other field 7 | //! elements may be used in straightforward expressions with normal 8 | //! arihtmetic operators. 9 | //! 10 | //! Finite fields are implemented through some customizable types defined 11 | //! in `backend` (a 32-bit and a 64-bit backends are provided, the "right 12 | //! one" is automatically selected, unless overridden by a compile-time 13 | //! feature). The types may support several distinct moduli, chosen 14 | //! through compile-time type parameter. 15 | //! 16 | //! Curve edwards25519 is implemented in the `ed25519` module. The 17 | //! specialized X25519 function is in `x25519`. The prime-order group 18 | //! ristretto255 (internally based on edwards25519) is defined in the 19 | //! `ristretto255` module. NIST curve P-256 (aka "secp256r1" and 20 | //! "prime256v1") is implemented in the `p256` module (with the ECDSA 21 | //! signature algorithm). Double-odd curves jq255e and jq255s are 22 | //! implemented by `jq255e` and `jq255s`, respectively (including 23 | //! signature and key exchange schemes). Secp256k1 is implemented in 24 | //! `secp256k1`. Edwards448 is in `ed448`, while the specialized X448 25 | //! function is in `x448`. The prime-order decaf448 group is implemented 26 | //! in `decaf448`. 27 | //! 28 | //! # Usage 29 | //! 30 | //! The library is "mostly `no_std`". By default, it compiles against the 31 | //! standard library. It can be compiled in `no_std` mode, in which case 32 | //! all functionality is still available, except verification of truncated 33 | //! ECDSA signatures with curve P-256. 34 | //! 35 | //! # Conventions 36 | //! 37 | //! All implemented functions should be strictly constant-time, unless 38 | //! explicitly documented otherwise (non-constant-time functions normally 39 | //! have "vartime" in their name). In order to avoid unwanted side-channel 40 | //! leaks, Booleans are avoided (compilers tend to "optimize" things a bit 41 | //! too eagerly when handling `bool` values). All functions that return or 42 | //! use a potentially secret Boolean value use the `u32` type; the convention 43 | //! is that 0xFFFFFFFF means "true", and 0x00000000 means "false". No other 44 | //! value shall be used, for they would lead to unpredictable results. 45 | //! Similarly, the `Eq` or `PartialEq` traits are not implemented. 46 | //! 47 | //! Algebraic operations on field elements and curve points are performed 48 | //! with the usual operators (e.g. `+`); appropriate traits are defined 49 | //! so that structure types and pointers to structure types can be used 50 | //! more or less interchangeably. Throughout the code, functions that 51 | //! modify the object on which they are called tend to have a name in 52 | //! `set_*()` (e.g. for a curve point `P`, if we want to compute the 53 | //! double of that point, then `P.set_double()` modifies the point 54 | //! structure in place, while `P.double()` leaves `P` unmodified and 55 | //! returns the double as a new structure instance). 56 | //! 57 | //! # Truncated Signatures 58 | //! 59 | //! Apart from standard support for curve operations and signature 60 | //! algorithms, _truncated signatures_ are implemented for both Ed25519 61 | //! (Schnorr signatures over edwards25519) and ECDSA (over P-256). A 62 | //! truncated signature is a shrunk version, by up to 32 bits, of a 63 | //! normal signature; the verification process is then more expensive, 64 | //! though not necessarily intolerably expensive, depending on usage 65 | //! context (the most expensive verification function is for ECDSA on 66 | //! P-256, with maximal 32-bit truncation; in that case, verification 67 | //! cost can be up to about 0.65 seconds on a 500 MHz ARM Cortex A53; but 68 | //! Ed25519 signatures with 32-bit truncation can be verified in less 69 | //! than 0.05 seconds on the same hardware). Signature truncation can be 70 | //! useful in situations with strong I/O constraints, where every data 71 | //! bit counts, but where use of fully standard Ed25519 or ECDSA 72 | //! signature generators is made mandatory because of some regulatory or 73 | //! physical constraints of the signing hardware. 74 | //! 75 | //! # Performance 76 | //! 77 | //! On an Intel i5-8259U CPU (Coffee Lake core), Ed25519 signatures have 78 | //! been benchmarked at about 51600 cycles for signing, 111000 cycles for 79 | //! verification; these are not bad values, and are competitive or at 80 | //! least within 30% of performance obtained from assembly-optimized 81 | //! implementations on the same hardware. For P-256, signing time is 82 | //! about 125000 cycles, verification is 256000 cycles. For the jq255e 83 | //! curve, signatures are generated in about 54700 cycles, and verified 84 | //! in only 82800 cycles (56200 and 86800, respectively, for jq255s). 85 | //! These figures have been obtained by compiling with Rust 1.59 in 86 | //! release mode, with the flags `-C target-cpu=native`. 87 | //! 88 | //! On an ARM Cortex A53 (RaspberryPi Model 3B), Ed25519 signing was 89 | //! measured at 213000 cycles, verification at 479000 cycles; for P-256, 90 | //! the figures were 389000 and 991000, respectively. With jq255e, 91 | //! signature generation and verification use 241000 and 358000 cycles, 92 | //! respectively (248000 and 369000 for jq255s). 93 | //! 94 | //! No inline assembly is used. On x86-64 architectures, the 95 | //! `_addcarry_u64()` and `_subborrow_u64()` intrinsics are used 96 | //! (from `core::arch::x86_64`); however, plain implementations with 97 | //! no intrinsics are available (and used on, for instance, aarch64). 98 | 99 | #![no_std] 100 | 101 | #[cfg(all(feature = "alloc", not(feature = "std")))] 102 | #[macro_use] 103 | #[allow(unused_imports)] 104 | extern crate alloc; 105 | 106 | #[cfg(feature = "std")] 107 | #[macro_use] 108 | #[allow(unused_imports)] 109 | extern crate std; 110 | 111 | #[cfg(all(feature = "alloc", not(feature = "std")))] 112 | #[allow(unused_imports)] 113 | pub(crate) use alloc::vec::Vec; 114 | 115 | #[cfg(feature = "std")] 116 | #[allow(unused_imports)] 117 | pub(crate) use std::vec::Vec; 118 | 119 | /// The `rand_core` types are re-exported so that users of crrl do not 120 | /// have to worry about using the exact correct version of `rand_core`. 121 | pub use rand_core::{CryptoRng, RngCore, Error as RngError}; 122 | 123 | #[allow(unused_macros)] 124 | macro_rules! static_assert { 125 | ($condition:expr) => { 126 | let _ = &[()][1 - ($condition) as usize]; 127 | } 128 | } 129 | 130 | pub mod backend; 131 | pub mod field; 132 | 133 | pub use backend::{Zu128, Zu256, Zu384}; 134 | 135 | #[cfg(feature = "ed25519")] 136 | pub mod ed25519; 137 | 138 | #[cfg(feature = "x25519")] 139 | pub mod x25519; 140 | 141 | #[cfg(feature = "ristretto255")] 142 | pub mod ristretto255; 143 | 144 | #[cfg(feature = "jq255e")] 145 | pub mod jq255e; 146 | 147 | #[cfg(feature = "jq255s")] 148 | pub mod jq255s; 149 | 150 | #[cfg(feature = "p256")] 151 | pub mod p256; 152 | 153 | #[cfg(feature = "secp256k1")] 154 | pub mod secp256k1; 155 | 156 | #[cfg(feature = "gls254")] 157 | pub mod gls254; 158 | 159 | #[cfg(feature = "ed448")] 160 | pub mod ed448; 161 | 162 | #[cfg(feature = "x448")] 163 | pub mod x448; 164 | 165 | #[cfg(feature = "decaf448")] 166 | pub mod decaf448; 167 | 168 | #[cfg(all(feature = "alloc", feature = "frost"))] 169 | pub mod frost; 170 | 171 | #[cfg(feature = "lms")] 172 | pub mod lms; 173 | 174 | #[cfg(feature = "blake2s")] 175 | pub mod blake2s; 176 | 177 | pub mod sha2; 178 | pub mod sha3; 179 | -------------------------------------------------------------------------------- /src/x25519.rs: -------------------------------------------------------------------------------- 1 | //! X25519 key-exchange algorithm. 2 | //! 3 | //! This module implements the X25519 primitive, as defined by [RFC 4 | //! 7748]. The primitive takes as input two 32-byte values, the first 5 | //! being the representation of a point on Curve25519 (a Montgomery 6 | //! curve) or on the quadratic twist of Curve25519, and the second being 7 | //! a scalar (a big integer). The scalar is internally "clamped" (some 8 | //! bits are set to specific values), then the point is multiplied by the 9 | //! scalar, and the output point is reencoded into 32 bytes. 10 | //! 11 | //! The `x25519()` function implements exactly the process described in 12 | //! RFC 7748 (section 5). The `x25519_base()` function is an optimization 13 | //! of the specific case of the input point being the conventional 14 | //! generator point on Curve25519; `x25519_base()` is fully compatible 15 | //! with `x25519()`, but also substantially faster. 16 | //! 17 | //! The `x25519()` function does NOT filter out any value from its input; 18 | //! any input sequence of 32 bytes is accepted, even if it encodes a 19 | //! low-order curve point. As per RFC 7748 requirements, the top point 20 | //! bit (most significant bit of the last byte) is ignored. As for 21 | //! scalars, the clamping process ensures that the integer used for the 22 | //! multiplication is a multiple of 8, at least 2^254, and lower than 23 | //! 2^255; the three least significant bits of the first byte, and two 24 | //! most significant bits of the last byte, are ignored. 25 | //! 26 | //! [RFC 7748]: https://datatracker.ietf.org/doc/html/rfc7748 27 | 28 | // Projective/fractional coordinates traditionally use uppercase letters, 29 | // using lowercase only for affine coordinates. 30 | #![allow(non_snake_case)] 31 | 32 | use super::field::GF25519; 33 | use super::ed25519::{Point, Scalar}; 34 | 35 | /// X25519 function (from RFC 7748), general case. 36 | /// 37 | /// The source point is provided as an array of 32 bytes (`point`), as 38 | /// well as the scalar (`scalar`). In RFC 7748 terminology, the `point` 39 | /// parameter is the little-endian encoding of the u coordinate of a 40 | /// point on the Montgomery curve or on its quadratic twist, and the 41 | /// `scalar` parameter is the little-endian encoding of the scalar. The 42 | /// function "clamps" the scalar (bits 0, 1, 2 and 255 are cleared, bit 43 | /// 254 is set) then interprets the clamped scalar as an integer 44 | /// (little-endian convention), with which the provided curve point is 45 | /// multiplied; the u coordinate of the resulting point is then encoded 46 | /// and returned. 47 | pub fn x25519(point: &[u8; 32], scalar: &[u8; 32]) -> [u8; 32] { 48 | // Make clamped scalar. 49 | let mut s = *scalar; 50 | s[0] &= 248; 51 | s[31] &= 127; 52 | s[31] |= 64; 53 | 54 | // Decode the source point. As per RFC 7748 rules, the top bit is 55 | // ignored, and non-canonical values are acceptable. 56 | let mut u = *point; 57 | u[31] &= 127; 58 | let x1 = GF25519::decode_reduce(&u[..]); 59 | 60 | // Apply the RFC 7748 section 5 algorithm. 61 | let mut x2 = GF25519::ONE; 62 | let mut z2 = GF25519::ZERO; 63 | let mut x3 = x1; 64 | let mut z3 = GF25519::ONE; 65 | let mut swap = 0u32; 66 | 67 | for t in (0..255).rev() { 68 | let kt = (((s[t >> 3] >> (t & 7)) & 1) as u32).wrapping_neg(); 69 | swap ^= kt; 70 | GF25519::cswap(&mut x2, &mut x3, swap); 71 | GF25519::cswap(&mut z2, &mut z3, swap); 72 | swap = kt; 73 | 74 | let A = x2 + z2; 75 | let B = x2 - z2; 76 | let AA = A.square(); 77 | let BB = B.square(); 78 | let C = x3 + z3; 79 | let D = x3 - z3; 80 | let E = AA - BB; 81 | let DA = D * A; 82 | let CB = C * B; 83 | x3 = (DA + CB).square(); 84 | z3 = x1 * (DA - CB).square(); 85 | x2 = AA * BB; 86 | z2 = E * (AA + E.mul_small(121665)); 87 | } 88 | GF25519::cswap(&mut x2, &mut x3, swap); 89 | GF25519::cswap(&mut z2, &mut z3, swap); 90 | 91 | (x2 / z2).encode() 92 | } 93 | 94 | /// Specialized version of X25519, when applied to the conventional 95 | /// generator point (u = 9). 96 | /// 97 | /// See `x25519()` for details. This function is significantly faster than 98 | /// the general `x25519()` function. 99 | pub fn x25519_base(scalar: &[u8; 32]) -> [u8; 32] { 100 | // Make clamped scalar, and decode it as an integer modulo L. 101 | let mut sb = *scalar; 102 | sb[0] &= 248; 103 | sb[31] &= 127; 104 | sb[31] |= 64; 105 | let s = Scalar::decode_reduce(&sb[..]); 106 | 107 | // Perform the multiplication on the Edwards curve. 108 | let P = Point::mulgen(&s); 109 | 110 | // Apply the birational map to get the Montgomery point (u coordinate 111 | // only). When the point is the neutral, we want to return 0. 112 | let u = P.to_montgomery_u(); 113 | u.encode() 114 | } 115 | 116 | // ======================================================================== 117 | 118 | #[cfg(test)] 119 | mod tests { 120 | 121 | use super::{x25519, x25519_base}; 122 | use crate::sha2::Sha256; 123 | 124 | #[test] 125 | fn x25519_mc() { 126 | let mut k = [0u8; 32]; 127 | k[0] = 9; 128 | let mut u = k; 129 | let mut ref1 = [0u8; 32]; 130 | hex::decode_to_slice("422c8e7a6227d7bca1350b3e2bb7279f7897b87bb6854b783c60e80311ae3079", &mut ref1[..]).unwrap(); 131 | let mut ref1000 = [0u8; 32]; 132 | hex::decode_to_slice("684cf59ba83309552800ef566f2f4d3c1c3887c49360e3875f2eb94d99532c51", &mut ref1000[..]).unwrap(); 133 | for i in 0..1000 { 134 | let old_k = k; 135 | k = x25519(&u, &k); 136 | u = old_k; 137 | if i == 0 { 138 | assert!(k == ref1); 139 | } 140 | } 141 | assert!(k == ref1000); 142 | } 143 | 144 | #[test] 145 | fn x25519_basepoint() { 146 | let mut sh = Sha256::new(); 147 | let mut b = [0u8; 32]; 148 | b[0] = 9; 149 | for i in 0..20 { 150 | sh.update(&(i as u64).to_le_bytes()); 151 | let v = sh.finalize_reset(); 152 | let mut k = [0u8; 32]; 153 | k[..].copy_from_slice(&v); 154 | assert!(x25519(&b, &k) == x25519_base(&k)); 155 | } 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/x448.rs: -------------------------------------------------------------------------------- 1 | //! X448 key-exchange algorithm. 2 | //! 3 | //! This module implements the X448 primitive, as defined by [RFC 7748]. 4 | //! The primitive takes as input two 56-byte values, the first 5 | //! being the representation of a point on Curve448 (a Montgomery 6 | //! curve) or on the quadratic twist of Curve448, and the second being 7 | //! a scalar (a big integer). The scalar is internally "clamped" (some 8 | //! bits are set to specific values), then the point is multiplied by the 9 | //! scalar, and the output point is reencoded into 56 bytes. 10 | //! 11 | //! The `x448()` function implements exactly the process described in 12 | //! RFC 7748 (section 5). The `x448_base()` function is an optimization 13 | //! of the specific case of the input point being the conventional 14 | //! generator point on Curve448; `x448_base()` is fully compatible 15 | //! with `x448()`, but also substantially faster. 16 | //! 17 | //! The `x448()` function does NOT filter out any value from its input; 18 | //! any input sequence of 56 bytes is accepted, even if it encodes a 19 | //! low-order curve point. As per RFC 7748 requirements, the top point 20 | //! bit (most significant bit of the last byte) is ignored. As for 21 | //! scalars, the clamping process ensures that the integer used for the 22 | //! multiplication is a multiple of 4, at least 2^447, and lower than 23 | //! 2^448; the two least significant bits of the first byte, and the 24 | //! most significant bit of the last byte, are ignored. 25 | //! 26 | //! [RFC 7748]: https://datatracker.ietf.org/doc/html/rfc7748 27 | 28 | // Projective/fractional coordinates traditionally use uppercase letters, 29 | // using lowercase only for affine coordinates. 30 | #![allow(non_snake_case)] 31 | 32 | use super::field::GF448; 33 | use super::ed448::{Point, Scalar}; 34 | 35 | /// X448 function (from RFC 7748), general case. 36 | /// 37 | /// The source point is provided as an array of 56 bytes (`point`), as 38 | /// well as the scalar (`scalar`). In RFC 7748 terminology, the `point` 39 | /// parameter is the little-endian encoding of the u coordinate of a 40 | /// point on the Montgomery curve or on its quadratic twist, and the 41 | /// `scalar` parameter is the little-endian encoding of the scalar. The 42 | /// function "clamps" the scalar (bits 0 and 1 are cleared, bit 447 is 43 | /// set) then interprets the clamped scalar as an integer (little-endian 44 | /// convention), with which the provided curve point is multiplied; the u 45 | /// coordinate of the resulting point is then encoded and returned. 46 | pub fn x448(point: &[u8; 56], scalar: &[u8; 56]) -> [u8; 56] { 47 | // Make clamped scalar. 48 | let mut s = *scalar; 49 | s[0] &= 252; 50 | s[55] |= 128; 51 | 52 | // Decode the source point. As per RFC 7748 rules, non-canonical 53 | // values are acceptable. 54 | let x1 = GF448::decode_reduce(point); 55 | 56 | // Apply the RFC 7748 section 5 algorithm. 57 | let mut x2 = GF448::ONE; 58 | let mut z2 = GF448::ZERO; 59 | let mut x3 = x1; 60 | let mut z3 = GF448::ONE; 61 | let mut swap = 0u32; 62 | 63 | for t in (0..448).rev() { 64 | let kt = (((s[t >> 3] >> (t & 7)) & 1) as u32).wrapping_neg(); 65 | swap ^= kt; 66 | GF448::cswap(&mut x2, &mut x3, swap); 67 | GF448::cswap(&mut z2, &mut z3, swap); 68 | swap = kt; 69 | 70 | let A = x2 + z2; 71 | let B = x2 - z2; 72 | let AA = A.square(); 73 | let BB = B.square(); 74 | let C = x3 + z3; 75 | let D = x3 - z3; 76 | let E = AA - BB; 77 | let DA = D * A; 78 | let CB = C * B; 79 | x3 = (DA + CB).square(); 80 | z3 = x1 * (DA - CB).square(); 81 | x2 = AA * BB; 82 | z2 = E * (AA + E.mul_small(39081)); 83 | } 84 | GF448::cswap(&mut x2, &mut x3, swap); 85 | GF448::cswap(&mut z2, &mut z3, swap); 86 | 87 | (x2 / z2).encode() 88 | } 89 | 90 | /// Specialized version of X448, when applied to the conventional 91 | /// generator point (u = 9). 92 | /// 93 | /// See `x448()` for details. This function is significantly faster than 94 | /// the general `x448()` function. 95 | pub fn x448_base(scalar: &[u8; 56]) -> [u8; 56] { 96 | // Make clamped scalar, and decode it as an integer modulo L. 97 | let mut sb = *scalar; 98 | sb[0] &= 252; 99 | sb[55] |= 128; 100 | let s = Scalar::decode_reduce(&sb[..]); 101 | 102 | // Perform the multiplication on the Edwards curve. 103 | let P = Point::mulgen(&s); 104 | 105 | // Apply the birational map to get the Montgomery point (u coordinate 106 | // only). When the point is the neutral, we want to return 0. 107 | let u = P.to_montgomery_u(); 108 | u.encode() 109 | } 110 | 111 | // ======================================================================== 112 | 113 | #[cfg(test)] 114 | mod tests { 115 | 116 | use super::{x448, x448_base}; 117 | use crate::sha2::Sha512; 118 | 119 | #[test] 120 | fn x448_mc() { 121 | let mut k = [0u8; 56]; 122 | k[0] = 5; 123 | let mut u = k; 124 | let mut ref1 = [0u8; 56]; 125 | hex::decode_to_slice("3f482c8a9f19b01e6c46ee9711d9dc14fd4bf67af30765c2ae2b846a4d23a8cd0db897086239492caf350b51f833868b9bc2b3bca9cf4113", &mut ref1[..]).unwrap(); 126 | let mut ref1000 = [0u8; 56]; 127 | hex::decode_to_slice("aa3b4749d55b9daf1e5b00288826c467274ce3ebbdd5c17b975e09d4af6c67cf10d087202db88286e2b79fceea3ec353ef54faa26e219f38", &mut ref1000[..]).unwrap(); 128 | for i in 0..1000 { 129 | let old_k = k; 130 | k = x448(&u, &k); 131 | u = old_k; 132 | if i == 0 { 133 | assert!(k == ref1); 134 | } 135 | } 136 | assert!(k == ref1000); 137 | } 138 | 139 | #[test] 140 | fn x448_basepoint() { 141 | let mut sh = Sha512::new(); 142 | let mut b = [0u8; 56]; 143 | b[0] = 5; 144 | for i in 0..20 { 145 | sh.update(&(i as u64).to_le_bytes()); 146 | let v = sh.finalize_reset(); 147 | let mut k = [0u8; 56]; 148 | k[..].copy_from_slice(&v[..56]); 149 | assert!(x448(&b, &k) == x448_base(&k)); 150 | } 151 | } 152 | } 153 | --------------------------------------------------------------------------------